In [None]:
import os
import uuid
import json
import asyncio
import requests
import pandas as pd

from sqlalchemy import create_engine
from time import perf_counter, sleep
from datetime import datetime, timedelta
from urllib.parse import quote_plus
from azure.cosmos import exceptions, CosmosClient, PartitionKey

import config_bestbuy

In [None]:
async def api_bestbuy_test(page=1, api_index=0, page_size=100, last_update_date=None):
    key = config_bestbuy.bestbuy_api_key
    apis = ['products', 'categories', 'stores', f'products(itemUpdateDate>{last_update_date}&active=*)']
    url = f"https://api.bestbuy.com/v1/{apis[api_index]}"
    payload = {
        'apiKey': key, 
        'pageSize': page_size, 
        'format': 'json', 
        'show': 'all', 
        'page': page
        }
    r = requests.get(f'{url}', params=payload)

    await asyncio.sleep(.2)
    print(r.json()['currentPage'])
    return r


async def main():
    page = 0
    api_index = 0
    page_size = 100
    last_update_date = None
    pg = 1
    pages = await api_bestbuy_test(page=1, api_index=0, page_size=100, last_update_date=None)
    print(pages.json()['totalPages'])
    # r = await api_bestbuy_test(pg=1, api_index=0, page_size=100, last_update_date=None)
    # tasks = [api_bestbuy_test(page=page+1, api_index=api_index, page_size=page_size, last_update_date=last_update_date) for page in range(pages)]
    tasks = [
        api_bestbuy_test(page=1, api_index=api_index, page_size=page_size, last_update_date=last_update_date),
        api_bestbuy_test(page=2, api_index=api_index, page_size=page_size, last_update_date=last_update_date)
    ]
    r = await asyncio.gather(*tasks)
    print(r)
    # print(tasks)
    # await api_bestbuy_test(page=0, api_index=api_index, page_size=page_size, last_update_date=last_update_date)
    # api_bestbuy_test(pg=1, api_index=0, page_size=100, last_update_date=None)


if __name__ == "__main__":
    await main()
    

In [None]:
async def to_matrix(x, n):
    l = []
    for i in range(x):
        l.append(i+1)
    return [l[i:i+n] for i in range(0, len(l), n)]


async def ceiling_division(n, d):
    return -(n // -d)


async def initialize(folder_index=0):

    folders = ['products', 'categories', 'stores', 'products_update']
    datename = datetime.utcnow().strftime('%Y%m%d')
    cosmos_endpoint = config_bestbuy.bestbuy_cosmosdb_end_point
    cosmos_primary_key = config_bestbuy.bestbuy_cosmosdb_primary_key
    client = CosmosClient(cosmos_endpoint, cosmos_primary_key)
    db_name = 'BestBuyDB'
    database = client.create_database_if_not_exists(id=db_name)
    container_name = 'Products'
    container = database.create_container_if_not_exists(
        id=container_name,
        partition_key=PartitionKey(path='/department'),
        offer_throughput=400
    )
    path = config_bestbuy.path
    foldername = f'best_buy_{datename}\\{folders[folder_index]}'
    folderpath = os.path.join(path, foldername)

    if not os.path.exists(folderpath):
        os.makedirs(folderpath)

    db = os.path.join(config_bestbuy.path, 'bestbuy.db')
    conn_string = f'sqlite:///{db}'
    engine = create_engine(conn_string)

    # params = quote_plus(config_bestbuy.bestbuy_sql_odbc_string)
    # engine = create_engine(f"mssql+pyodbc:///?odbc_connect={params}")

    with engine.connect() as cnx:
        try:
            sel_stmt = "SELECT * FROM products LIMIT 0"
            df_db = pd.read_sql(sql=sel_stmt, con=cnx)
            db_cols = df_db.columns.tolist()

            last_update_stmt = 'SELECT MAX(itemUpdateDate) FROM products'
            df_itemUpdateDate = pd.read_sql(sql=last_update_stmt, con=cnx)
            last_update_date = df_itemUpdateDate.iloc[0, 0]
            
        except Exception as e:
            db_cols = []
            last_update_date = None
            
    return folderpath, datename, engine, db_cols, last_update_date, container


def insert_db(r, engine, db_cols, container, cursor_mark):

    cols = ['nextCursorMark', 'total', 'totalPages', 'queryTime', 'totalTime', 'canonicalUrl', 
            'sku', 'name', 'type', 'startDate', 'new', 'activeUpdateDate', 'active', 'regularPrice', 
            'salePrice', 'clearance', 'onSale', 'categoryPath', 'customerReviewCount', 'customerReviewAverage', 
            'priceUpdateDate', 'itemUpdateDate', 'class', 'classId', 'subclass', 'subclassId', 'department', 'departmentId', 
            'theatricalReleaseDate', 'studio', 'manufacturer', 'modelNumber', 'condition', 'artistName', 'images', 'image', 'color']
    bool_cols = ["new", "active", "clearance", "onSale"]
    int_cols = ["total", "totalPages"]
    float_cols = ["queryTime", "totalTime", "regularPrice", "salePrice", "customerReviewCount", "customerReviewAverage", 'theatricalReleaseDate']
    date_cols = ["startDate", "activeUpdateDate", "priceUpdateDate", "itemUpdateDate"]
    
    with engine.connect() as cnx:
        io = r.json()
        # for product in io['products']:
        #     product['cursorMark'] = cursor_mark
        #     product['id'] = str(uuid.uuid4())
        #     container.create_item(body=product)
        
        df_meta = pd.DataFrame(io)
        df_meta = df_meta.iloc[:, :-1]
        df_products = pd.DataFrame(io['products'])
        df = df_meta.merge(df_products, how='inner', left_index=True, right_index=True)
        df = df.loc[:, cols]
        df.insert(0, 'request_timestamp', datetime.utcnow())

        for col in df.columns.tolist():
            if col in bool_cols:
                df.loc[:, col] = df.loc[:, col].astype('bool')
            elif col in int_cols:
                df.loc[:, col] = df.loc[:, col].astype('int64')
            elif col in float_cols:
                df.loc[:, col] = df.loc[:, col].astype('float64')
            elif col in date_cols:
                df.loc[:, col] = pd.to_datetime(df.loc[:, col], errors='coerce', infer_datetime_format=True)
            else:
                df.loc[:, col] = df.loc[:, col].astype('str')
                
        df.to_sql(name='products', con=cnx, if_exists='append', index=False)


async def api_bestbuy(datename, folderpath, page=1, api_index=0, page_size=100, last_update_date=None):
    key = config_bestbuy.bestbuy_api_key
    apis = ['products', 'categories', 'stores', f'products(itemUpdateDate>{last_update_date}&active=*)']
    url = f"https://api.bestbuy.com/v1/{apis[api_index]}"
    payload = {
        'apiKey': key, 
        'pageSize': page_size, 
        'format': 'json', 
        'show': 'all',
        'page': page
        }

    delay = page/5
    t0 = perf_counter()
    await asyncio.sleep(delay)
    t1 = perf_counter()
    r = requests.get(f'{url}', params=payload)

    if r.status_code == 200:
        
        pg = r.json()['currentPage']
        filename = f'best_buy_{datename}_{pg:05}.json'
        filepath = os.path.join(folderpath, filename)

        with open(filepath, 'w') as f:
            json.dump(r.json(), f, indent=4)

        # insert_db(r=r, engine=engine, db_cols=db_cols, container=container, cursor_mark=nextcursorMark)
    
    print(f'{pg=}', f'{delay=}', f'{t1-t0=}', sep=' | ')
    return r


async def main(api_index=0, page_size=100):

    t0 = perf_counter()
    folderpath, datename, engine, db_cols, last_update_date, container = await initialize(folder_index=api_index)
    
    try:
        pages = await api_bestbuy(datename=datename, folderpath=folderpath, api_index=api_index, page_size=page_size, last_update_date=last_update_date)
        pages = pages.json()['totalPages']
        print(pages)  
        batches = to_matrix(pages, 5)
        tasks = [api_bestbuy(datename=datename, folderpath=folderpath, page=page+1, api_index=api_index, page_size=page_size, last_update_date=last_update_date) for page in range(pages)]
        await asyncio.gather(*tasks)
        # r = await api_bestbuy(page=pg, api_index=api_index, page_size=page_size, last_update_date=last_update_date)
        
    except Exception as e:
        print(e)

    print(f'fin: {perf_counter()-t0=}')
    


In [None]:
if __name__ == '__main__':
    # [0: 'products', 1: 'categories', 2: 'stores', 3: f'products(itemUpdateDate>{last_update_date}&active=*)']
    await main(api_index=0, page_size=100)
    

Tip: To query for updates or deltas since you last walked through the result set you can use the itemUpdateDate attribute. To ensure that your query results include changes to a product’s active/inactive status, add active=* to your query parameters. 
For example: .../v1/products(itemUpdateDate>2017-02-06T16:00:00&active=*)?format=json&pageSize=100&cursorMark=*&apiKey=YOUR_API_KEY
For example: .../v1/products(itemUpdateDate>today&active=*)?format=json&pageSize=100&cursorMark=*&apiKey=YOUR_API_KEY
"https://api.bestbuy.com/v1/products(releaseDate>today)?format=json&show=sku,name,salePrice&apiKey=YourAPIKey"

In [None]:
Page = 1
api_index = 0
page_size = 100

r = api_bestbuy_test(pg=Page, api_index=api_index, page_size=page_size)
io = r.json()
io['id'] = str(uuid.uuid4())
io


In [None]:
cols = ['nextCursorMark', 'total', 'totalPages', 'queryTime', 'totalTime', 'canonicalUrl', 'sku', 'name', 'type', 'startDate', 'new', 'activeUpdateDate', 'active', 'regularPrice', 'salePrice', 'clearance', 'onSale', 'categoryPath', 'customerReviewCount', 'customerReviewAverage', 'priceUpdateDate', 'itemUpdateDate', 'class', 'classId', 'subclass', 'subclassId', 'department', 'departmentId', 'images', 'image', 'color']
io = r.json()
df_meta = pd.DataFrame(io)
df_meta = df_meta.iloc[:, :-1]
df_products = pd.DataFrame(io['products'])
df = df_meta.merge(df_products, how='inner', left_index=True, right_index=True)
df = df.loc[:, cols]
df.insert(0, 'request_timestamp', datetime.utcnow())

In [None]:
totalpages = set()
for _ in range(io['totalPages']):
    totalpages.add(_+1)

totalpages


In [None]:
config_bestbuy.bestbuy_sql_odbc_string

In [None]:

db = os.path.join(config_bestbuy.path, 'bestbuy.db')
conn_string = f'sqlite:///{db}'
engine = create_engine(conn_string)

# params = quote_plus(config_bestbuy.bestbuy_sql_odbc_string)
# engine = create_engine(f"mssql+pyodbc:///?odbc_connect={params}")
    
with engine.connect() as cnx:
    df_test = pd.read_sql(sql="select color from products_archive", con=cnx)

for col in df_test.columns.tolist():
    print(col, df_test.loc[:, col].astype('str').apply(len).max(), sep= ' - ')


In [None]:
def to_matrix(x, n):
    l = []
    for i in range(x):
        l.append(i+1)
    return [{_: l[i:i+n]} for _, i in enumerate(range(0, len(l), n))]


batches = to_matrix(2244, 5)

for i, batch in enumerate(batches):
    print(batch.get(i), "-")

In [None]:
today = datetime.utcnow().date()
n_days = datetime.utcnow().date() - timedelta(days=2)
n_hours = (today-n_days)/timedelta(seconds=1)
n_hours