In [1]:
#!pip install countryinfo

In [2]:
import pandas as pd
from countryinfo import CountryInfo

In [3]:
df_mort_feo = pd.read_csv('../data/time_series_covid19_deaths_global.csv')
df_mov = pd.read_csv('../data/Global_Mobility_Report.csv',low_memory=False)

In [4]:
df_mort = pd.melt(df_mort_feo, id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'])

In [5]:
df_mort.columns

Index(['Province/State', 'Country/Region', 'Lat', 'Long', 'variable', 'value'], dtype='object')

In [6]:
df_mort.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,variable,value
0,,Afghanistan,33.0,65.0,1/22/20,0
1,,Albania,41.1533,20.1683,1/22/20,0
2,,Algeria,28.0339,1.6596,1/22/20,0
3,,Andorra,42.5063,1.5218,1/22/20,0
4,,Angola,-11.2027,17.8739,1/22/20,0


In [7]:
df_mort = df_mort.groupby(['Country/Region','variable'])['value'].sum().reset_index()

In [8]:
df_mort.columns = ['country','date','deaths']

In [9]:
countries = list(df_mov['country_region'].unique())

In [10]:
df_mort['country'] = df_mort['country'].replace({'US':'United States','Korea, South':'South Korea','Taiwan*':'Taiwan'})

In [11]:
df_mort[~df_mort['country'].isin(countries)][['country']].drop_duplicates().values

array([['Albania'],
       ['Algeria'],
       ['Andorra'],
       ['Armenia'],
       ['Azerbaijan'],
       ['Bahamas'],
       ['Bhutan'],
       ['Brunei'],
       ['Burma'],
       ['Burundi'],
       ['Cabo Verde'],
       ['Central African Republic'],
       ['Chad'],
       ['China'],
       ['Comoros'],
       ['Congo (Brazzaville)'],
       ['Congo (Kinshasa)'],
       ["Cote d'Ivoire"],
       ['Cuba'],
       ['Cyprus'],
       ['Diamond Princess'],
       ['Djibouti'],
       ['Dominica'],
       ['Equatorial Guinea'],
       ['Eritrea'],
       ['Eswatini'],
       ['Ethiopia'],
       ['Gambia'],
       ['Grenada'],
       ['Guinea'],
       ['Guyana'],
       ['Holy See'],
       ['Iceland'],
       ['Iran'],
       ['Kosovo'],
       ['Liberia'],
       ['MS Zaandam'],
       ['Madagascar'],
       ['Malawi'],
       ['Maldives'],
       ['Mauritania'],
       ['Monaco'],
       ['Montenegro'],
       ['Morocco'],
       ['Russia'],
       ['Saint Kitts and Nevis'],
  

In [12]:
df_mort['date'] = pd.to_datetime(df_mort['date'])

In [13]:
df_mov = df_mov[df_mov['sub_region_1'].isnull()]

In [14]:
df_mov = df_mov[['country_region_code','country_region','date','residential_percent_change_from_baseline']]

In [15]:
df_mov.shape

(10296, 4)

In [16]:
df_mort.shape

(20757, 3)

In [17]:
df_mov['date'] = pd.to_datetime(df_mov['date'])

In [18]:
df_full = pd.merge(df_mov,df_mort,left_on=['country_region','date'],right_on=['country', 'date'])

In [19]:
df_full.shape

(9672, 6)

In [20]:
len(df_full['country_region'].unique())

124

In [21]:
countries = list(df_full['country_region'].unique())

In [22]:
populations = []
for c in countries:
    try:
        country = CountryInfo(c)
        resp = country.info()
        pop = resp['population']
        #print(c,pop)
        populations.append(pop)
    except:
        print(c,'Not Found')
        populations.append('Not found')


Czechia Not Found
North Macedonia Not Found
Serbia Not Found


In [23]:
df_pop = pd.DataFrame({'countries':countries,'population':populations})

In [25]:
df_pop = df_pop[df_pop['population'] != 'Not found'].copy()

In [26]:
df_full_pop = pd.merge(df_full,df_pop,left_on=['country_region'],right_on=['countries'])

In [27]:
df_full_pop.shape

(9438, 8)

In [28]:
df_full_pop.columns

Index(['country_region_code', 'country_region', 'date',
       'residential_percent_change_from_baseline', 'country', 'deaths',
       'countries', 'population'],
      dtype='object')

In [29]:
df_full_pop = df_full_pop[['country_region', 'date',
       'residential_percent_change_from_baseline', 'deaths', 'population']]

In [30]:
df_full_pop.columns = ['country','date','movility_reduction','deaths','pop']

In [31]:
df_full_pop.to_csv('movility_and_mortality.csv',index=False)