In [1]:
import pandas as pd
import os
import numpy as np
from scipy.stats import pearsonr
from scipy.stats import spearmanr

In [2]:
parentDirectory = os.path.abspath(os.path.join(os.path.join(os.getcwd(), os.pardir), os.pardir))
DATA_DIR = parentDirectory +'/data/'
FIGURES_DIR = parentDirectory +'/figures/'

def make_stars(val):
    if val<0.0001:
        return '****'
    elif val<0.001:
        return '***'
    elif val<0.01:
        return '**'
    elif val<0.05:
        return '*'
    else:
        return ''

In [None]:
df = pd.read_parquet(DATA_DIR+'dk_new_food_timeseries_items1.parquet')

In [None]:
df = df.loc[df['name']!='Bánh mì']

In [None]:
df.head()

In [None]:
df_mobility = pd.read_csv(DATA_DIR+'df_mobility.csv')

In [None]:
full_names = {
    'AU': 'Australia',
    'BR': 'Brazil',
    'CA': 'Canada',
    'FR': 'France',
    'DE': 'Germany',
    'IN': 'India',
    'IT': 'Italy',
    'MX': 'Mexico',
    'ES': 'Spain',
    'GB': 'United Kingdom',
    'US': 'United States',
    'DK': 'Denmark'
}

def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

event_dicts = [{'country': 'AU',
  'start_md_1': '2020-03-27',
  'end_md_1': '2020-06-07',
  'start_md_2': np.nan},
 {'country': 'BR',
  'start_md_1': '2020-03-23',
  'end_md_1': '2020-08-09',
  'start_md_2': np.nan},
 {'country': 'CA',
  'start_md_1': '2020-03-19',
  'end_md_1': '2020-06-21',
  'start_md_2': '2020-10-12'},
 {'country': 'DE',
  'start_md_1': '2020-03-21',
  'end_md_1': '2020-05-09',
  'start_md_2': '2020-12-18'},
 {'country': 'DK',
  'start_md_1': '2020-03-17',
  'end_md_1': '2020-05-07',
  'start_md_2': np.nan},
 {'country': 'ES',
  'start_md_1': '2020-03-17',
  'end_md_1': '2020-06-14',
  'start_md_2': '2020-11-07'},
 {'country': 'FR',
  'start_md_1': '2020-03-18',
  'end_md_1': '2020-06-08',
  'start_md_2': '2020-11-01'},
 {'country': 'GB',
  'start_md_1': '2020-03-23',
  'end_md_1': '2020-08-03',
  'start_md_2': '2020-10-21'},
 {'country': 'IN',
  'start_md_1': '2020-03-24',
  'end_md_1': '2020-10-29',
  'start_md_2': np.nan},
 {'country': 'IT',
  'start_md_1': '2020-03-11',
  'end_md_1': '2020-06-06',
  'start_md_2': '2020-11-06'},
 {'country': 'JP',
  'start_md_1': '2020-04-12',
  'end_md_1': '2020-05-30',
  'start_md_2': np.nan},
 {'country': 'KE',
  'start_md_1': '2020-03-24',
  'end_md_1': '2020-10-04',
  'start_md_2': np.nan},
 {'country': 'MX',
  'start_md_1': '2020-03-25',
  'end_md_1': '2020-10-06',
  'start_md_2': np.nan},
 {'country': 'NG',
  'start_md_1': '2020-03-27',
  'end_md_1': '2020-08-09',
  'start_md_2': np.nan},
 {'country': 'US',
  'start_md_1': '2020-03-21',
  'end_md_1': '2020-06-11',
  'start_md_2': '2020-11-26'}]

df_events = pd.DataFrame(event_dicts)
df_events['start_md_1'] = pd.to_datetime(df_events['start_md_1'])
df_events['end_md_1'] = pd.to_datetime(df_events['end_md_1'])
df_events['start_md_2'] = pd.to_datetime(df_events['start_md_2'])

df_agg = pd.read_pickle(DATA_DIR+'df_agg_cats.pickle')

In [None]:
df = df.loc[df['country_code'].isin(full_names.keys())]

In [None]:
df_agg = df_agg.loc[df_agg['country'].isin(full_names.keys())]
studied_weeks = list(df_agg.iloc[0]['volume_weekly_total'].index)[-46:]

mobility_ts = {}

for country, gr in df_mobility.groupby('country_region_code'):
    mobility_dict = {}
    gr = gr.iloc[1:323]
    for i in chunker(gr,7):
        mobility_dict[i.iloc[0]['date']] = (i['residential_percent_change_from_baseline'].mean())
    mobility_ts[country] = mobility_dict

In [None]:
entry_list = []

c = 0
for category, gr1 in df.groupby('name'):
    for country, gr2 in gr1.groupby('country_code'):
        c+=1
        entry = {}
        y = list(mobility_ts[country].values())
        x = list(gr2.iloc[0]['ts']['max_ratio'].values())[-46:]
        entry['name'] = category
        entry['category'] = gr2.iloc[0]['category']
        entry['country'] = country
        entry['corr'] = spearmanr(x,y)[0]
        entry['p'] = spearmanr(x,y)[1]
        
        entry_list.append(entry)

In [None]:
df_results = pd.DataFrame(entry_list)

In [None]:
tmp = df_results.groupby('name').apply(lambda x: pd.Series({
    'corr_avg': x['corr'].mean(), 
    'category': x['category'].unique()[0],
    'C': x['country'].unique(),
    'exists_all_countries': len(x)==12,
    'all': x[['country','corr','p']]
    })).reset_index()
tmp = tmp.loc[tmp['exists_all_countries']]

In [None]:
tmp.sort_values(by = 'corr_avg', ascending =  False).head(10)[['name','corr_avg','category']]

In [None]:
for i,row in tmp.sort_values(by = 'corr_avg', ascending =  False).head(10)[['name','corr_avg','category','all']].iterrows():
    print(row['name'])
    print(round(row['corr_avg'],2))
    print(row['category'])
    for c,r in row['all'].iterrows():
        print(r['country'], round(r['corr'],2),make_stars(r['p']))
    print('----------------------------------')
    print('\n')
    

In [None]:
tmp.sort_values(by = 'corr_avg', ascending =  True).head(10)[['name','corr_avg','category']]

In [None]:
for i,row in tmp.sort_values(by = 'corr_avg', ascending =  True).head(10)[['name','corr_avg','category','all']].iterrows():
    print(row['name'])
    print(round(row['corr_avg'],2))
    print(row['category'])
    for c,r in row['all'].iterrows():
        print(r['country'], round(r['corr'],2),make_stars(r['p']))
    print('----------------------------------')
    print('\n')
    

In [None]:
df = pd.read_parquet(DATA_DIR+'modes_fine.parquet')

In [None]:
entry_list = []

c = 0
for category, gr1 in df.groupby('name'):
    for country, gr2 in gr1.groupby('country_code'):
        c+=1
        entry = {}
        y = list(mobility_ts[country].values())
        x = list(gr2.iloc[0]['ts']['max_ratio'].values())[-46:]
        entry['name'] = category
        entry['category'] = gr2.iloc[0]['category']
        entry['country'] = country
        entry['corr'] = spearmanr(x,y)[0]
        entry['p'] = spearmanr(x,y)[1]
        
        entry_list.append(entry)

In [None]:
df_results_modes = pd.DataFrame(entry_list)

In [None]:
tmp = df_results_modes.groupby('name').apply(lambda x: pd.Series({
    'corr_avg': x['corr'].mean(), 
    'category': x['category'].unique()[0],
    'C': x['country'].unique(),
    'all': x[['country','corr','p']]
    })).reset_index()


In [None]:
tmp.sort_values(by = 'corr_avg', ascending =  False)[['name','corr_avg','category']]

In [None]:
for i,row in tmp.sort_values(by = 'corr_avg', ascending =  False)[['name','corr_avg','category','all']].iterrows():
    print(row['name'])
    print(round(row['corr_avg'],2))
    print(row['category'])
    if row['name']!='Lunchbox':
        for c,r in row['all'].iterrows():
            print(r['country'], round(r['corr'],2),make_stars(r['p']))
    print('----------------------------------')
    print('\n')