In [24]:
import boto3
import larry as lry
import pandas as pd
import numpy as np

# Data prep
We'll read the data in from Excel, format it, and then group the ASINs by query string. The generated data structure will be our input to Ground Truth.

In [45]:
df = pd.read_excel("Sample+for+SMGT.xls")
records = df.to_dict("records")
records = [{k.split(":")[1]: v for k, v in record.items() if k.startswith("INPUT") and not (isinstance(v, float) and np.isnan(v))} for record in records]
print(f"Found {len(records)} records")
records[0]

Found 500 records


{'asin': 'B07SX5RM12',
 'detail_page_url': 'https://www.amazon.com/dp/B07SX5RM12',
 'image': 'https://m.media-amazon.com/images/I/71P8X7JIc-L._AC_UY879_.jpg',
 'query_string': 'guess jacket men',
 'search_alias': 'aps',
 'search_on_google_url': 'https://www.google.com/search?q=guess+jacket+men',
 'search_page_url': 'https://www.amazon.com/s?k=guess+jacket+men',
 'title': "GUESS Men's Wind & Water Resistant Hooded Puffer Jacket with Side Stretch Panels, Light Grey, XX-Lar"}

In [47]:
queries = {}
for record in records:
    qs = record["query_string"]
    asin = {k: v for k, v in record.items() if k not in ["query_string", "search_alias", "search_on_google_url", "search_page_url"]}
    if qs in queries:
        queries[qs]["ASINs"].append(asin)
    else:
        query = {k: v for k, v in record.items() if k in ["query_string", "search_alias", "search_on_google_url", "search_page_url"]}
        query["marketplace"] = "amazon.com"
        queries[qs] = {
            "query": query,
            "ASINs": [asin]
        }
queries = list(queries.values())
print(f"Consolidated into {len(queries)} queries")
queries[0]

Consolidated into 373 queries


{'query': {'query_string': 'guess jacket men',
  'search_alias': 'aps',
  'search_on_google_url': 'https://www.google.com/search?q=guess+jacket+men',
  'search_page_url': 'https://www.amazon.com/s?k=guess+jacket+men',
  'marketplace': 'amazon.com'},
 'ASINs': [{'asin': 'B07SX5RM12',
   'detail_page_url': 'https://www.amazon.com/dp/B07SX5RM12',
   'image': 'https://m.media-amazon.com/images/I/71P8X7JIc-L._AC_UY879_.jpg',
   'title': "GUESS Men's Wind & Water Resistant Hooded Puffer Jacket with Side Stretch Panels, Light Grey, XX-Lar"},
  {'asin': 'B07SW1D6HX',
   'detail_page_url': 'https://www.amazon.com/dp/B07SW1D6HX',
   'image': 'https://m.media-amazon.com/images/I/411Sli+HdhL.jpg',
   'title': "GUESS Men's Color Block Hooded Puffer Jacket, Navy, Medium"},
  {'asin': 'B08KRNDL3P',
   'detail_page_url': 'https://www.amazon.com/dp/B08KRNDL3P',
   'image': 'https://m.media-amazon.com/images/I/71JJtROtlBL._AC_UY679_.jpg',
   'title': "GUESS Men's Arctic Puffer Jacket, Reflective Prism, 