In [1]:
import pandas as pd
import numpy as np
import json
import datetime

In [2]:
# returns the IDs of the products in each category
def categories(filename):
    return pd.read_csv(f'categories/{filename}', encoding='cp1251', sep=';', usecols=['код'], dtype={'код': int})['код']

# list of all the categories.csv
cat_list = ['alco.csv', 'animals.csv', 'bakery.csv', 'beverage.csv', 'chasauge.csv', 'cigarettes.csv', 'condiments.csv', 
            'confectionery.csv', 'cooked.csv', 'fish.csv', 'frozen.csv', 'fruitveg.csv', 'grocery.csv', 'household.csv',
            'kids.csv', 'meat.csv', 'milk.csv', 'others.csv','salads.csv', 'snack.csv', 'wash.csv']

In [3]:
# generator for looping through time periods
def daterange(start_date, end_date):
    if start_date < end_date:
        for day in range((end_date - start_date).days):
            yield start_date + datetime.timedelta(day)
    else:
        print('start_date exceeds end_date')

In [4]:
# generates coordinates of each check in the specified period
def gen_coord(start_date, end_date):
    
    for date in daterange(start_date, end_date):
        
        start = datetime.datetime.now()
        data = pd.read_csv(f'dates/{date}.csv')
        check_dic = {} # check_id : coordinates
        coordinates = [] # coordinates of a check
        check_list = data.Check.unique()
        product_total = 0
        
        for num, check in enumerate(check_list):
            
            prod_list = list(data[data.Check == check].Product_id)
            for category in cat_list:
                code_list = categories(category)
                
                for el in code_list:
                    product_total += prod_list.count(el)
                coordinates.append(product_total)
                product_total = 0
                
            check_dic[str(check)] = coordinates
            coordinates = []
            
        with open(f'coordinates/{date}', 'w') as file:
            json.dump(check_dic, file)
            
        end = datetime.datetime.now()
        
        print(f'{date} is finished in {end-start}')

In [6]:
start_date = datetime.date(year=2012, month=6, day=25)
end_date = datetime.date(year=2012, month=6, day=26)
gen_coord(start_date, end_date)

2012-06-25 is finished in 0:07:10.688237
