In [1]:
from collections import Counter, defaultdict
import json
from pprint import pprint

In [2]:
reporttype_values = json.load(open("reporttype-values.json"))

How many categories of reports are there?

In [3]:
len(reporttype_values)

16

How many report types are per category?

In [4]:
{k:len(v[0]) for k,v in reporttype_values.items()}

{'Brand Analytics reports': 5,
 'Vendor retail analytics reports': 4,
 'Inventory reports': 13,
 'Order reports': 7,
 'Order tracking reports': 5,
 'Pending order reports': 3,
 'Returns reports': 6,
 'Performance reports': 4,
 'Settlement reports': 3,
 'Fulfillment by Amazon': 41,
 'Tax Reports': 5,
 'Browse tree report': 1,
 'Easy ship reports': 3,
 'Amazon business reports': 2,
 'Amazon pay report': 1,
 'B2B product opportunities reports': 2}

How many report types are?

In [5]:
print("Summing categories count:", sum(len(v[0]) for _,v in reporttype_values.items()))
print("Counting unique report types:", len({ v2 for _,v in reporttype_values.items() for v2 in v[0]}))

Summing categories count: 105
Counting unique report types: 101


Report types in more than a category

In [6]:
report_types_by_categories = defaultdict(list)
for category, report_types in reporttype_values.items():
    for report_type, _ in report_types[0].items():
        report_types_by_categories[report_type].append(category)
non_unique_report_types = {report_type: categories for report_type, categories in report_types_by_categories.items() if len(categories) > 1}
print("Reports in several categories: ")
print(list(non_unique_report_types.keys()))
print()
print("Categories and file formats for non-unique reports: ")
pprint({report_type: {cat: {'file_format': reporttype_values[cat][0][report_type]} for cat in categories} for report_type, categories in non_unique_report_types.items()})

Reports in several categories: 
['GET_FLAT_FILE_ALL_ORDERS_DATA_BY_LAST_UPDATE_GENERAL', 'GET_FLAT_FILE_ALL_ORDERS_DATA_BY_ORDER_DATE_GENERAL', 'GET_XML_ALL_ORDERS_DATA_BY_LAST_UPDATE_GENERAL', 'GET_XML_ALL_ORDERS_DATA_BY_ORDER_DATE_GENERAL']

Categories and file formats for non-unique reports: 
{'GET_FLAT_FILE_ALL_ORDERS_DATA_BY_LAST_UPDATE_GENERAL': {'Fulfillment by Amazon': {'file_format': {'file_format': 'tsv'}},
                                                          'Order tracking reports': {'file_format': {'file_format': 'tsv'}}},
 'GET_FLAT_FILE_ALL_ORDERS_DATA_BY_ORDER_DATE_GENERAL': {'Fulfillment by Amazon': {'file_format': {'file_format': 'tsv'}},
                                                         'Order tracking reports': {'file_format': {'file_format': 'tsv'}}},
 'GET_XML_ALL_ORDERS_DATA_BY_LAST_UPDATE_GENERAL': {'Fulfillment by Amazon': {'file_format': {'file_format': 'xml'}},
                                                    'Order tracking reports': {'file_fo

File format dictionary for REPORT_FILE_FORMATS dict in Gambit's config.py (Beware that it has unknown formats and erros)

In [7]:
file_formats = { report_type: config if config else {}
    for category, report_configs in reporttype_values.items() 
    for report_config in report_configs 
    for report_type, config in report_config.items()
}
file_formats

{'GET_BRAND_ANALYTICS_MARKET_BASKET_REPORT': {'file_format': 'json'},
 'GET_BRAND_ANALYTICS_SEARCH_TERMS_REPORT': {'file_format': 'json'},
 'GET_BRAND_ANALYTICS_REPEAT_PURCHASE_REPORT': {'file_format': 'json'},
 'GET_BRAND_ANALYTICS_ALTERNATE_PURCHASE_REPORT': {'file_format': 'json'},
 'GET_BRAND_ANALYTICS_ITEM_COMPARISON_REPORT': {'file_format': 'json'},
 'GET_VENDOR_SALES_DIAGNOSTIC_REPORT': {'file_format': 'json'},
 'GET_VENDOR_INVENTORY_HEALTH_AND_PLANNING_REPORT': {'file_format': 'json'},
 'GET_VENDOR_DEMAND_FORECAST_REPORT': {'file_format': 'json'},
 'GET_VENDOR_NET_PURE_PRODUCT_MARGIN_REPORT': {'file_format': 'json'},
 'GET_FLAT_FILE_OPEN_LISTINGS_DATA': {'file_format': 'tsv'},
 'GET_MERCHANT_LISTINGS_ALL_DATA': {'file_format': 'tsv'},
 'GET_MERCHANT_LISTINGS_DATA': {'file_format': 'tsv'},
 'GET_MERCHANT_LISTINGS_INACTIVE_DATA': {'file_format': 'tsv'},
 'GET_MERCHANT_LISTINGS_DATA_BACK_COMPAT': {'file_format': 'tsv'},
 'GET_MERCHANT_LISTINGS_DATA_LITE': {'file_format': 'tsv'},
 

In [8]:
len(file_formats)

101

File format count with unknown and errors

In [9]:
Counter(v.get("file_format", 'unknown') for v in file_formats.values()).most_common()

[('tsv', 69),
 ('xml', 12),
 ('json', 10),
 ('csv', 5),
 ('unknown', 2),
 ('xlsx', 2),
 ('pdf', 1)]

Unknown file formats

In [10]:
sum([[k2 for k2,v2 in v[0].items() if not v2] for k,v in reporttype_values.items()], [])

['GET_CONVERGED_FLAT_FILE_PENDING_ORDERS_DATA',
 'GET_V2_SELLER_PERFORMANCE_REPORT']

File format count per category with unknown and errors

In [11]:
{k:Counter([v2['file_format'] if v2 else 'unknown' for k,v2 in v[0].items()]) for k,v in reporttype_values.items()}

{'Brand Analytics reports': Counter({'json': 5}),
 'Vendor retail analytics reports': Counter({'json': 4}),
 'Inventory reports': Counter({'tsv': 13}),
 'Order reports': Counter({'tsv': 4, 'xml': 3}),
 'Order tracking reports': Counter({'tsv': 3, 'xml': 2}),
 'Pending order reports': Counter({'tsv': 1, 'xml': 1, 'unknown': 1}),
 'Returns reports': Counter({'xml': 3, 'tsv': 2, 'csv': 1}),
 'Performance reports': Counter({'tsv': 1, 'xml': 1, 'unknown': 1, 'json': 1}),
 'Settlement reports': Counter({'tsv': 2, 'xml': 1}),
 'Fulfillment by Amazon': Counter({'tsv': 39, 'xml': 2}),
 'Tax Reports': Counter({'tsv': 4, 'csv': 1}),
 'Browse tree report': Counter({'xml': 1}),
 'Easy ship reports': Counter({'pdf': 1, 'tsv': 2}),
 'Amazon business reports': Counter({'xlsx': 2}),
 'Amazon pay report': Counter({'csv': 1}),
 'B2B product opportunities reports': Counter({'csv': 2})}