In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import time
import numpy as np

In [2]:
parentDirectory = os.path.abspath(os.path.join(os.path.join(os.getcwd(), os.pardir), os.pardir))
DATA_DIR = parentDirectory +'/data/'
FIGURES_DIR = parentDirectory +'/figures/'

In [3]:
df = pd.read_parquet(DATA_DIR+'dk_new_food_timeseries_items1.parquet')

In [4]:
selected_codes = ['FR','DE','US','IT','CA','GB','ES','AU','NG','KE','JP','MX','BR','IN','DK']
categories = list(df['category'].unique())

In [5]:
len(categories)

28

In [7]:
df = df.loc[df['name']!='Bánh mì']

In [8]:
entry_list = []

for country_code in selected_codes:
    for category in categories:
        entry = {}
        
        df_temp = df.loc[(df['country_code']==country_code) & (df['category']==category)]
        entry['country'] = country_code
        entry['category'] = category
        
        entry['volume_total'] = df_temp['ts'].apply(lambda x: pd.Series(x['max_ratio'])).sum()
        entry['volume_total_l'] = df_temp['ts'].apply(lambda x: pd.Series(x['max_ratio_lo'])).sum()
        entry['volume_total_h'] = df_temp['ts'].apply(lambda x: pd.Series(x['max_ratio_hi'])).sum()
        
        entry_list.append(entry)

In [9]:
df_agg = pd.DataFrame(entry_list)

In [10]:
df_agg

Unnamed: 0,country,category,volume_total,volume_total_l,volume_total_h
0,FR,dessert,2019-01-06 2039.955977 2019-01-13 2326.4...,2019-01-06 1835.859784 2019-01-13 2099.2...,2019-01-06 2268.661639 2019-01-13 2580.8...
1,FR,soft drink,2019-01-06 121.671207 2019-01-13 129.083...,2019-01-06 114.391114 2019-01-13 121.331...,2019-01-06 129.498450 2019-01-13 137.439...
2,FR,rice dish,2019-01-06 302.488958 2019-01-13 311.905...,2019-01-06 278.087672 2019-01-13 287.005...,2019-01-06 329.392011 2019-01-13 339.326...
3,FR,soup,2019-01-06 401.235196 2019-01-13 366.711...,2019-01-06 369.185323 2019-01-13 337.439...,2019-01-06 436.562920 2019-01-13 399.011...
4,FR,spice,2019-01-06 388.295133 2019-01-13 396.663...,2019-01-06 355.745896 2019-01-13 363.625...,2019-01-06 424.161427 2019-01-13 433.033...
...,...,...,...,...,...
415,DK,cocktail,2019-01-06 34.420517 2019-01-13 32.986...,2019-01-06 31.581825 2019-01-13 30.403...,2019-01-06 37.815074 2019-01-13 36.177...
416,DK,salad,2019-01-06 40.275451 2019-01-13 45.20149...,2019-01-06 37.351432 2019-01-13 42.03056...,2019-01-06 43.497477 2019-01-13 48.70743...
417,DK,sandwich,2019-01-06 12.457403 2019-01-13 10.75246...,2019-01-06 11.985026 2019-01-13 10.36668...,2019-01-06 12.999977 2019-01-13 11.19724...
418,DK,sausage,2019-01-06 10.073876 2019-01-13 10.67101...,2019-01-06 9.747320 2019-01-13 10.32124...,2019-01-06 10.449219 2019-01-13 11.06057...


In [11]:
country_totals = []

for c,gr in df_agg.groupby('country'):
    entry = {}
    entry['country'] = c
    entry['total'] = gr['volume_total'].sum()
    country_totals.append(entry)

In [12]:
country_totals = pd.DataFrame((country_totals))

In [13]:
entry_list = []

for country_code in selected_codes:
    for category in categories:
        entry = {}
        
        df_temp = df.loc[(df['country_code']==country_code) & (df['category']==category)]
        entry['country'] = country_code
        entry['category'] = category
        
        entry['volume_weekly_total'] = df_temp['ts'].apply(lambda x: pd.Series(x['max_ratio'])).sum()
        entry['volume_weekly_total_l'] = df_temp['ts'].apply(lambda x: pd.Series(x['max_ratio_lo'])).sum()
        entry['volume_weekly_total_h'] = df_temp['ts'].apply(lambda x: pd.Series(x['max_ratio_hi'])).sum()
        
        entry['volume_percent_weekly_total'] = df_temp['ts'].apply(lambda x: pd.Series(x['max_ratio'])).sum() / \
                country_totals.loc[(country_totals['country']==country_code)].iloc[0]['total']
        
        entry_list.append(entry)


In [14]:
df_agg = pd.DataFrame(entry_list)

In [15]:
df_agg

Unnamed: 0,country,category,volume_weekly_total,volume_weekly_total_l,volume_weekly_total_h,volume_percent_weekly_total
0,FR,dessert,2019-01-06 2039.955977 2019-01-13 2326.4...,2019-01-06 1835.859784 2019-01-13 2099.2...,2019-01-06 2268.661639 2019-01-13 2580.8...,2019-01-06 0.181580 2019-01-13 0.198787 ...
1,FR,soft drink,2019-01-06 121.671207 2019-01-13 129.083...,2019-01-06 114.391114 2019-01-13 121.331...,2019-01-06 129.498450 2019-01-13 137.439...,2019-01-06 0.010830 2019-01-13 0.011030 ...
2,FR,rice dish,2019-01-06 302.488958 2019-01-13 311.905...,2019-01-06 278.087672 2019-01-13 287.005...,2019-01-06 329.392011 2019-01-13 339.326...,2019-01-06 0.026925 2019-01-13 0.026651 ...
3,FR,soup,2019-01-06 401.235196 2019-01-13 366.711...,2019-01-06 369.185323 2019-01-13 337.439...,2019-01-06 436.562920 2019-01-13 399.011...,2019-01-06 0.035715 2019-01-13 0.031334 ...
4,FR,spice,2019-01-06 388.295133 2019-01-13 396.663...,2019-01-06 355.745896 2019-01-13 363.625...,2019-01-06 424.161427 2019-01-13 433.033...,2019-01-06 0.034563 2019-01-13 0.033893 ...
...,...,...,...,...,...,...
415,DK,cocktail,2019-01-06 34.420517 2019-01-13 32.986...,2019-01-06 31.581825 2019-01-13 30.403...,2019-01-06 37.815074 2019-01-13 36.177...,2019-01-06 0.021234 2019-01-13 0.020476 ...
416,DK,salad,2019-01-06 40.275451 2019-01-13 45.20149...,2019-01-06 37.351432 2019-01-13 42.03056...,2019-01-06 43.497477 2019-01-13 48.70743...,2019-01-06 0.024845 2019-01-13 0.028058 ...
417,DK,sandwich,2019-01-06 12.457403 2019-01-13 10.75246...,2019-01-06 11.985026 2019-01-13 10.36668...,2019-01-06 12.999977 2019-01-13 11.19724...,2019-01-06 0.007685 2019-01-13 0.006674 ...
418,DK,sausage,2019-01-06 10.073876 2019-01-13 10.67101...,2019-01-06 9.747320 2019-01-13 10.32124...,2019-01-06 10.449219 2019-01-13 11.06057...,2019-01-06 0.006214 2019-01-13 0.006624 ...


In [16]:
df_agg.to_pickle('dk_new_df_agg_cats2.pickle')