In [48]:
import pandas as pd
import numpy as np
import pycountry

In [75]:
df_gdp = pd.read_excel('GDP_yoy_global_28jan2021.xlsx', sheet_name='for-python')
df_gdp.set_index('code', inplace=True)

# Import the latest OxCGRT data from the team's Github repo
url_str = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/OxCGRT_latest.csv'
df_str = pd.read_csv(url_str, parse_dates=['Date'])
# Let's focus on country aggregates and drop the subregional data (US and UK states)
df_str = df_str[df_str['RegionCode'].isnull()]
df_str = df_str[['CountryName', 'CountryCode', 'Date', 'StringencyIndex']]
df_str.rename(columns={'CountryName': 'country', 'CountryCode':'code', 'Date': 'date',
                       'StringencyIndex': 'stringency'}, inplace=True)
str_q1 = df_str[df_str['date'] < '2020-04-01'].groupby('code').mean()['stringency']
str_q2 = df_str[(df_str['date'] >= '2020-04-01') & (df_str['date'] < '2020-07-01')].groupby('code').mean()['stringency']
str_q3 = df_str[(df_str['date'] >= '2020-07-01') & (df_str['date'] < '2020-10-01')].groupby('code').mean()['stringency']
str_q4 = df_str[df_str['date'] >= '2020-10-01'].groupby('code').mean()['stringency']
df_str = pd.concat([str_q1, str_q2, str_q3, str_q4], axis=1)
df_str.columns = ['stringency_q1', 'stringency_q2', 'stringency_q3', 'stringency_q4']

df_all = df_gdp.join(df_str)

# Extract mobility data from Google API
df_mob = pd.read_csv('https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv')

# Keep country aggreagates only and drop region-level data
df_mob = df_mob.loc[df_mob['sub_region_1'].isna()]
df_mob = df_mob.loc[df_mob['sub_region_2'].isna()]
df_mob = df_mob.loc[df_mob['metro_area'].isna()]
df_mob = df_mob.iloc[:, [0, 1, 7, 8, 9, 10, 11, 12, 13]]

df_mob.columns = ['code', 'country', 'date', 'retail', 'grocery', 'parks', 'transit', 
                  'workplaces', 'residential']

input_countries = df_mob['country'].values
countries = {}
for country in pycountry.countries:
    countries[country.name] = country.alpha_3
codes = [countries.get(country, 'Unknown code') for country in input_countries]
df_mob['iso_code'] = codes
df_mob =df_mob[['iso_code', 'country', 'date', 'retail', 'grocery', 'parks', 'transit', 
                'workplaces', 'residential']]

df_mob = df_mob.assign(mobility_index = lambda x: (x.retail + x.grocery + x.parks + 
                                          x.transit + x.workplaces) / 5)
df_mob.set_index('iso_code', inplace=True)
df_workmob = df_mob.iloc[:, [0, 1, 6]]
workmob_q1 = df_workmob[df_workmob['date'] < '2020-04-01'].groupby(level=0).mean()['workplaces']
workmob_q2 = df_workmob[(df_workmob['date'] >= '2020-04-01') & (df_workmob['date'] < '2020-07-01')].groupby(level=0).mean()['workplaces']
workmob_q3 = df_workmob[(df_workmob['date'] >= '2020-07-01') & (df_workmob['date'] < '2020-10-01')].groupby(level=0).mean()['workplaces']
workmob_q4 = df_workmob[df_workmob['date'] >= '2020-10-01'].groupby(level=0).mean()['workplaces']

mob_q1 = df_mob[df_mob['date'] < '2020-04-01'].groupby(level=0).mean()['mobility_index']
mob_q2 = df_mob[(df_mob['date'] >= '2020-04-01') & (df_mob['date'] < '2020-07-01')].groupby(level=0).mean()['mobility_index']
mob_q3 = df_mob[(df_mob['date'] >= '2020-07-01') & (df_mob['date'] < '2020-10-01')].groupby(level=0).mean()['mobility_index']
mob_q4 = df_mob[df_mob['date'] >= '2020-10-01'].groupby(level=0).mean()['mobility_index']

df_mob = pd.concat([mob_q1, mob_q2, mob_q3, mob_q4, workmob_q1, workmob_q2, workmob_q3, workmob_q4],
                   axis=1)

df_mob.columns = ['mobility_q1', 'mobility_q2', 'mobility_q3', 'mobility_q4',
                  'workmobility_q1', 'workmobility_q2', 'workmobility_q3', 'workmobility_q4']

df_all = df_all.join(df_mob)

url_cc = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv'
df_cc = pd.read_csv(url_cc, parse_dates=['date'])
df_cc = df_cc[['iso_code', 'location', 'date', 'total_cases_per_million']]
df_cc = df_cc[(df_cc['date'] == '2020-03-31') | (df_cc['date'] == '2020-06-30') | 
              (df_cc['date'] == '2020-09-30') | (df_cc['date'] == '2020-12-31')]
df_cc = df_cc.pivot(index='iso_code', columns='date', values='total_cases_per_million')
df_cc.columns = ['cc_q1', 'cc_q2', 'cc_q3', 'cc_q4']
df_cc = df_cc.assign(lncc_q1 = lambda x: np.log(x.cc_q1), lncc_q2 = lambda x: np.log(x.cc_q2),
                     lncc_q3 = lambda x: np.log(x.cc_q3), lncc_q4 = lambda x: np.log(x.cc_q4))

df_all = df_all.join(df_cc)

df_all.to_excel('GDP-growth-real-time-data.xlsx')



  interactivity=interactivity, compiler=compiler, result=result)
  interactivity=interactivity, compiler=compiler, result=result)
