In [1]:
import pandas as pd
import numpy as np
import pandas as pd
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import math 
import seaborn as sns
import matplotlib.colors as mcolors
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.formula.api import ols
from statsmodels.formula.api import mixedlm
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import os
import matplotlib.pyplot as mpl
import matplotlib
from scipy.stats import spearmanr

colors = list(mcolors.TABLEAU_COLORS.keys())*2


parentDirectory = os.path.abspath(os.path.join(os.path.join(os.path.join(os.getcwd(), os.pardir), os.pardir),os.pardir))
DATA_DIR = parentDirectory +'/data/'
FIGURES_DIR = parentDirectory +'/figures/'

full_names = {
    'AU': 'Australia',
    'BR': 'Brazil',
    'CA': 'Canada',
    'FR': 'France',
    'DE': 'Germany',
    'IN': 'India',
    'IT': 'Italy',
    'MX': 'Mexico',
    'ES': 'Spain',
    'GB': 'United Kingdom',
    'US': 'United States',
    'DK': 'Denmark',
    'KE': 'Kenya', 
    'NG': 'Nigeria',
    'JP': 'Japan',
    'SE': 'Sweden',
    'ID': 'Indonesia',
    'EG': 'Egypt'
}

event_dicts = [{'country': 'AU',
  'end_md_1': '2020-06-07',
  'start_md_1': '2020-03-27',
  'start_md_2': np.nan},
 {'country': 'BR',
  'end_md_1': '2020-08-09',
  'start_md_1': '2020-03-23',
  'start_md_2': np.nan},
 {'country': 'CA',
  'end_md_1': '2020-06-21',
  'start_md_1': '2020-03-19',
  'start_md_2': '2020-10-12'},
 {'country': 'DE',
  'end_md_1': '2020-05-09',
  'start_md_1': '2020-03-21',
  'start_md_2': '2020-12-18'},
 {'country': 'DK',
  'end_md_1': '2020-05-07',
  'start_md_1': '2020-03-17',
  'start_md_2': np.nan},
 {'country': 'EG',
  'end_md_1': '2020-07-01',
  'start_md_1': '2020-03-24',
  'start_md_2': np.nan},
 {'country': 'ES',
  'end_md_1': '2020-06-14',
  'start_md_1': '2020-03-17',
  'start_md_2': '2020-11-07'},
 {'country': 'FR',
  'end_md_1': '2020-06-08',
  'start_md_1': '2020-03-18',
  'start_md_2': '2020-11-01'},
 {'country': 'GB',
  'end_md_1': '2020-08-03',
  'start_md_1': '2020-03-23',
  'start_md_2': '2020-10-21'},
 {'country': 'ID',
  'end_md_1': '2020-08-10',
  'start_md_1': '2020-03-24',
  'start_md_2': np.nan},
 {'country': 'IN',
  'end_md_1': '2020-10-29',
  'start_md_1': '2020-03-24',
  'start_md_2': np.nan},
 {'country': 'IT',
  'end_md_1': '2020-06-06',
  'start_md_1': '2020-03-11',
  'start_md_2': '2020-11-06'},
 {'country': 'JP',
  'end_md_1': '2020-05-30',
  'start_md_1': '2020-04-12',
  'start_md_2': np.nan},
 {'country': 'KE',
  'end_md_1': '2020-10-04',
  'start_md_1': '2020-03-24',
  'start_md_2': np.nan},
 {'country': 'MX',
  'end_md_1': '2020-10-06',
  'start_md_1': '2020-03-25',
  'start_md_2': np.nan},
 {'country': 'NG',
  'end_md_1': '2020-08-09',
  'start_md_1': '2020-03-27',
  'start_md_2': np.nan},
 {'country': 'SE',
  'end_md_1': '2020-04-09',
  'start_md_1': '2020-04-03',
  'start_md_2': np.nan},
 {'country': 'US',
  'end_md_1': '2020-06-11',
  'start_md_1': '2020-03-21',
  'start_md_2': '2020-11-26'}]

df_events = pd.DataFrame(event_dicts)

df_events['start_md_1'] = pd.to_datetime(df_events['start_md_1'])
df_events['end_md_1'] = pd.to_datetime(df_events['end_md_1'])
df_events['start_md_2'] = pd.to_datetime(df_events['start_md_2'])

df_agg = pd.read_pickle(DATA_DIR+'df_agg_cats.pickle')

In [2]:
def make_stars(val):
    if val<0.0001:
        return '****'
    elif val<0.001:
        return '***'
    elif val<0.01:
        return '**'
    elif val<0.05:
        return '*'
    else:
        return ''

def make_star_ste(value,ste):
    if value>0 and value-2*ste>0:
        return '*'
    elif value<0 and value+2*ste<0:
        return '*'
    else:
        return ''

In [3]:
weeks_2019 = list(df_agg.iloc[0]['volume_weekly_total'].index)[:52]
weeks_2020 = list(df_agg.iloc[0]['volume_weekly_total'].index)[52:]

In [4]:
l = []
for cnt, row in df_agg.iterrows():
    start_md = df_events.loc[df_events['country'] == row['country']].iloc[0]['start_md_1']
    end_md = df_events.loc[df_events['country'] == row['country']].iloc[0]['end_md_1']
    start_md2 = df_events.loc[df_events['country'] == row['country']].iloc[0]['start_md_2']
    
    for week in zip(row['volume_weekly_total'].index,row['volume_weekly_total'].values,row['volume_percent_weekly_total'].values):
        
        entry = {}

        entry['country'] = row['country']
        entry['category'] = row['category']
        

        if week[0] in weeks_2020:
            date = pd.to_datetime(week[0])

            if type(start_md2)!=pd._libs.tslibs.nattype.NaTType and date > start_md2:
                continue

            entry['k'] = math.floor(((date - start_md).days +7) / 7)
            entry['volume_total'] = week[1]
            entry['volume_percent'] = week[2]
            entry['year'] = '2020'
            l.append(entry)

        elif week[0] in weeks_2019:
            date = pd.to_datetime(weeks_2020[weeks_2019.index(week[0])])
            
            if type(start_md2)!=pd._libs.tslibs.nattype.NaTType and date > start_md2:
                continue

            entry['k'] = math.floor(((date - start_md).days +7) / 7)
            entry['volume_total'] = week[1]
            entry['volume_percent'] = week[2]
            entry['year'] = '2019'
            l.append(entry)

df = pd.DataFrame(l)   

In [5]:
df = df.loc[(df['k'] >= -30) & (df['k'] <= 30)]
df = df.loc[(df['country'].isin(list(full_names.keys())))]
df['intervention_flag'] = df['k'].apply(lambda x: 1 if x >= 0 else 0)

In [6]:
cats = list(df['category'].unique())
k = 30

In [7]:
df_temp = df.loc[(df['k'] >= -k) & (df['k'] <= k)].copy()
df_temp['volume_total'] = df_temp['volume_total'].apply(lambda x: np.log(x + 0.001))

In [8]:
entries_list = []

for name, group in df_temp.groupby(['category']):
    print(name)
    
    entry = {}
    mod = smf.ols('volume_total ~ intervention_flag*year  + C(country)', data = group)
    res = mod.fit(cov_type='hc0')
    entry['model_degree'] = 0
    entry['category'] = name
    entry['alpha'] = res.params['intervention_flag:year[T.2020]']
    entry['ste'] = res.bse['intervention_flag:year[T.2020]']
    entry['pval'] = res.pvalues['intervention_flag:year[T.2020]']
    entry['r2'] = res.rsquared
    entries_list.append(entry)
    
    entry = {}
    mod = smf.ols('volume_total ~ intervention_flag*k*year  + C(country)', data = group)
    res = mod.fit(cov_type='hc0')
    entry['model_degree'] = 1
    entry['category'] = name
    entry['alpha'] = res.params['intervention_flag:year[T.2020]']
    entry['ste'] = res.bse['intervention_flag:year[T.2020]']
    entry['pval'] = res.pvalues['intervention_flag:year[T.2020]']
    entry['r2'] = res.rsquared
    entries_list.append(entry)
    
    entry = {}
    mod = smf.ols('volume_total ~ intervention_flag*k*year + intervention_flag*np.power(k,2)*year + C(country)', data = group)
    res = mod.fit(cov_type='hc0')
    entry['model_degree'] = 2
    entry['category'] = name
    entry['alpha'] = res.params['intervention_flag:year[T.2020]']
    entry['ste'] = res.bse['intervention_flag:year[T.2020]']
    entry['pval'] = res.pvalues['intervention_flag:year[T.2020]']
    entry['r2'] = res.rsquared
    entries_list.append(entry)
    
    

beef dish
bread and flatbread
cheese
chicken dish
cocktail
dessert
egg dish
fish dish
fruit
herb
lamb dish
pasta, pizza and noodle dish
pastry and bakery product
pie
pork dish
potato dish
rice dish
salad
sandwich
sauce
sausage
snack
soft drink
soup
spice
stew
vegetable and legume
wine, beer and liquor


In [9]:
df_res = pd.DataFrame(entries_list)

In [10]:
for j in range(28):
    print(str(j+1)+' &')
    for i in range(3):
        if i ==2:
            print(df_res.loc[df_res['model_degree']==i].sort_values(by = 'alpha', ascending = False)['category'].values[j] )
        else:
            print(df_res.loc[df_res['model_degree']==i].sort_values(by = 'alpha', ascending = False)['category'].values[j] + ' &')
    
    print('\\\\')

1 &
pastry and bakery product &
pastry and bakery product &
pastry and bakery product
\\
2 &
pie &
pie &
bread and flatbread
\\
3 &
dessert &
bread and flatbread &
potato dish
\\
4 &
sauce &
potato dish &
pie
\\
5 &
potato dish &
dessert &
dessert
\\
6 &
bread and flatbread &
sauce &
cheese
\\
7 &
chicken dish &
chicken dish &
sauce
\\
8 &
stew &
cheese &
chicken dish
\\
9 &
egg dish &
vegetable and legume &
pork dish
\\
10 &
vegetable and legume &
egg dish &
sausage
\\
11 &
cheese &
pork dish &
stew
\\
12 &
fruit &
pasta, pizza and noodle dish &
pasta, pizza and noodle dish
\\
13 &
herb &
fruit &
egg dish
\\
14 &
spice &
stew &
vegetable and legume
\\
15 &
sausage &
rice dish &
fruit
\\
16 &
rice dish &
spice &
rice dish
\\
17 &
pasta, pizza and noodle dish &
herb &
fish dish
\\
18 &
fish dish &
sausage &
beef dish
\\
19 &
pork dish &
fish dish &
spice
\\
20 &
salad &
snack &
herb
\\
21 &
snack &
beef dish &
soup
\\
22 &
beef dish &
salad &
snack
\\
23 &
sandwich &
sandwich &
salad
\\

In [11]:
spearmanr(df_res.loc[df_res['model_degree']==0]['alpha'].values , 
          df_res.loc[df_res['model_degree']==2]['alpha'].values)

SpearmanrResult(correlation=0.8872468527640941, pvalue=3.173779725914818e-10)

In [12]:
spearmanr(df_res.loc[df_res['model_degree']==1]['alpha'].values , 
          df_res.loc[df_res['model_degree']==2]['alpha'].values)

SpearmanrResult(correlation=0.9414340448823207, pvalue=8.699648067086242e-14)

In [13]:
spearmanr(df_res.loc[df_res['model_degree']==0]['alpha'].values , 
          df_res.loc[df_res['model_degree']==1]['alpha'].values)

SpearmanrResult(correlation=0.9452654625068418, pvalue=3.68993332701922e-14)