In [1]:
# Imports
import plotly.express as px
from helper import *
import pandas as pd
import numpy as np
import gzip
import json

## Animated figure

In [2]:
print(get_country_dict('trust gov mobility'))



{'Italy': 'it', 'Russia': 'ru', 'Bangladesh': 'bn', 'Croatia': 'hr', 'Greece': 'el', 'Sweden': 'sv', 'Finland': 'fi', 'Norway': 'no', 'Malaysia': 'ms', 'Israel': 'he', 'Lithuania': 'lt', 'Serbia': 'sr', 'Slovakia': 'sk', 'Slovenia': 'sl', 'Turkey': 'tr', 'Bulgaria': 'bg', 'Czechia': 'cs', 'Denmark': 'da', 'Georgia': 'ka', 'Germany': 'de', 'Hungary': 'hu', 'Japan': 'ja', 'Kazakhstan': 'kk', 'South Korea': 'ko', 'Kyrgyzstan': 'ky', 'Netherlands': 'nl', 'Poland': 'pl', 'Romania': 'ro', 'Thailand': 'th', 'Mongolia': 'mn'}


In [3]:
#Create dataframe with the columns [country, trust,  day, mobility, deaths per 100k ,   pageviews per 100k"]
pageview_df = pd.read_csv("page_views_covid_related.csv.gz")
population_df = pd.read_csv("Population_countries.csv")


_,_, df_pageviews100k,_ = get_pageviews_df(pageview_df, population_df, get_country_dict('trust gov mobility'), '2020-01-22', '2020-11-22')
_, _, _, _, deaths100k, _, _,_ = get_cases_deaths_df(population_df, get_country_dict('trust gov mobility'), '2020-01-22', '2020-11-22')

In [4]:
df_pageviews100k = df_pageviews100k.transpose().stack().to_frame().reset_index().rename(columns={"level_0": "country", "date": "date", 0:"pageviews per 100k"}, errors="raise")
df_pageviews100k.head()

Unnamed: 0,country,date,pageviews per 100k
0,it,2020-01-22,40.388883
1,it,2020-01-23,46.878422
2,it,2020-01-24,38.747154
3,it,2020-01-25,54.543748
4,it,2020-01-26,55.928115


In [5]:
deaths100k = deaths100k.transpose().stack().to_frame().reset_index().rename(columns={"level_0": "country", "date": "date", 0:"deaths per 100k"}, errors="raise")
deaths100k.head()


Unnamed: 0,country,date,deaths per 100k
0,it,2020-01-22,0.0
1,it,2020-01-23,0.0
2,it,2020-01-24,0.0
3,it,2020-01-25,0.0
4,it,2020-01-26,0.0


In [6]:
df_animation = df_pageviews100k.merge(deaths100k, on=['country','date'])
df_animation.head()

Unnamed: 0,country,date,pageviews per 100k,deaths per 100k
0,it,2020-01-22,40.388883,0.0
1,it,2020-01-23,46.878422,0.0
2,it,2020-01-24,38.747154,0.0
3,it,2020-01-25,54.543748,0.0
4,it,2020-01-26,55.928115,0.0


In [7]:

# Divide trust interval into nbr_category and label the countries
def trust_category(trust, nbr_category, country_dict):
    country_dict_ = country_dict.copy()
    country_dict_cat = {}
    min_trust = float(trust.min(axis=1))
    max_trust = trust.max(axis=1)
    
    delta = float((max_trust-min_trust))/nbr_category
    
    for j in list(country_dict_.keys()):
        country_trust = float(trust[country_dict_[j]])
        for i in range(nbr_category):
            if (country_trust >= min_trust + i*delta) & (country_trust < min_trust + (i+1)*delta):
                country_dict_cat.update({country_dict_[j]:i})
                country_dict_[j] = [country_dict_[j], i]
            elif (country_trust == (min_trust + (i+1)*delta)) & (i == (nbr_category-1)):
                country_dict_cat.update({country_dict_[j]:i})
                country_dict_[j] = [country_dict_[j], i]

    return  country_dict_cat      

In [8]:
# Dowload Trust dataset
data_folder = 'data_2/'
df_trust_gov = pd.read_csv(data_folder+'share-who-trust-government.csv.zip') 
df_trust_gov = df_trust_gov.set_index("Entity")[["Trust the national government in this country"]].transpose()[COUNTRY_OWN_LANG_TRUST_GOV.keys()].rename(columns= COUNTRY_OWN_LANG_TRUST_GOV)
country_dict = get_country_dict('trust gov mobility')

# Map the trust category to the countries in df_animation
country_dict_cat =  trust_category(df_trust_gov, 5,country_dict)
df_animation['trust'] = df_animation['country'].map(country_dict_cat)

In [9]:
df_animation.head()
df_animation['date'] = pd.to_datetime(df_animation['date'])

In [10]:
df_animation.head()

Unnamed: 0,country,date,pageviews per 100k,deaths per 100k,trust
0,it,2020-01-22,40.388883,0.0,2
1,it,2020-01-23,46.878422,0.0,2
2,it,2020-01-24,38.747154,0.0,2
3,it,2020-01-25,54.543748,0.0,2
4,it,2020-01-26,55.928115,0.0,2


In [11]:
# Load mobility 
data_folder = 'data_2/'

#Download mobility and intervention files
google_mobility = pd.read_csv(data_folder+'Global_Mobility_Report.csv.zip')
interventions = pd.read_csv(data_folder+'interventions.csv.zip')

#Download file with covid pageviews #MODIFY WITH THE NEW DATA
file = gzip.open(data_folder+'aggregated_timeseries.json.gz')
data = json.load(file)



  google_mobility = pd.read_csv(data_folder+'Global_Mobility_Report.csv.zip')


In [12]:
#Change name of the columns
categories = ["Retail and Recreations", "Grocery and Pharmacy", "Parks","Transit stations", "Workplace", "Residential"]
google_mobility = google_mobility.rename(dict(zip(google_mobility.columns[8:],categories)), axis = 1)

#Transform date string to datetime
google_mobility['date'] = pd.to_datetime(google_mobility['date'])

#Delete columns about region
google_mobility = google_mobility.drop(google_mobility.iloc[:,2:7], axis = 1)

#Keep same dates as for corona wikipedia pages
google_mobility = google_mobility.loc[(google_mobility['date'] < "'2020-11-22") & (google_mobility['date'] >= "2020-01-22")]

#Regroup all data per country and per date and take the average
mobility = google_mobility.groupby(["country_region_code","country_region","date"]).mean()

#Keep only the country that we are interested in
mobility = mobility[mobility.index.get_level_values('country_region').isin(list(country_dict.keys()))]

mobility

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Retail and Recreations,Grocery and Pharmacy,Parks,Transit stations,Workplace,Residential
country_region_code,country_region,date,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
BD,Bangladesh,2020-02-15,4.000000,6.000000,10.000000,7.000000,8.000000,1.000000
BD,Bangladesh,2020-02-16,3.000000,3.000000,5.000000,4.000000,8.000000,0.000000
BD,Bangladesh,2020-02-17,1.000000,4.000000,3.000000,2.000000,5.000000,1.000000
BD,Bangladesh,2020-02-18,2.000000,3.000000,2.000000,3.000000,5.000000,1.000000
BD,Bangladesh,2020-02-19,1.000000,1.000000,3.000000,2.000000,5.000000,1.000000
...,...,...,...,...,...,...,...,...
TR,Turkey,2020-08-21,-3.732353,12.555249,61.106383,-3.496622,-14.449912,1.588957
TR,Turkey,2020-08-22,-7.729107,9.383099,57.272425,-3.795139,-3.439623,-0.557093
TR,Turkey,2020-08-23,-7.886628,7.063444,69.815534,-2.864769,-1.970149,-3.220779
TR,Turkey,2020-08-24,0.731481,14.697222,62.927481,0.321678,-12.375000,-0.388379


In [13]:
# Group together Parks, Retail and Recreations, Transit stations and Workplace by taking the mean of them
mobility['moving category'] = mobility[['Retail and Recreations', 'Parks', 'Transit stations', 'Workplace']].mean(axis=1)

#Group together Grocey and Pharmacy and Residential by taking the mean of them
mobility['covid category'] = mobility[['Grocery and Pharmacy', 'Residential']].mean(axis=1)

In [14]:
moving_cat_df = mobility['moving category'].reset_index()
moving_cat_df.head()

Unnamed: 0,country_region_code,country_region,date,moving category
0,BD,Bangladesh,2020-02-15,7.25
1,BD,Bangladesh,2020-02-16,5.0
2,BD,Bangladesh,2020-02-17,2.75
3,BD,Bangladesh,2020-02-18,3.0
4,BD,Bangladesh,2020-02-19,2.75


In [15]:
dict_ = get_country_dict('trust gov mobility')
print(dict_)

{'Italy': 'it', 'Russia': 'ru', 'Bangladesh': 'bn', 'Croatia': 'hr', 'Greece': 'el', 'Sweden': 'sv', 'Finland': 'fi', 'Norway': 'no', 'Malaysia': 'ms', 'Israel': 'he', 'Lithuania': 'lt', 'Serbia': 'sr', 'Slovakia': 'sk', 'Slovenia': 'sl', 'Turkey': 'tr', 'Bulgaria': 'bg', 'Czechia': 'cs', 'Denmark': 'da', 'Georgia': 'ka', 'Germany': 'de', 'Hungary': 'hu', 'Japan': 'ja', 'Kazakhstan': 'kk', 'South Korea': 'ko', 'Kyrgyzstan': 'ky', 'Netherlands': 'nl', 'Poland': 'pl', 'Romania': 'ro', 'Thailand': 'th', 'Mongolia': 'mn'}


In [16]:
moving_cat_df['country_region'] =moving_cat_df['country_region'].map(get_country_dict('trust gov mobility'))

In [17]:
moving_cat_df.head(50)

Unnamed: 0,country_region_code,country_region,date,moving category
0,BD,bn,2020-02-15,7.25
1,BD,bn,2020-02-16,5.0
2,BD,bn,2020-02-17,2.75
3,BD,bn,2020-02-18,3.0
4,BD,bn,2020-02-19,2.75
5,BD,bn,2020-02-20,4.25
6,BD,bn,2020-02-21,6.25
7,BD,bn,2020-02-22,6.5
8,BD,bn,2020-02-23,6.75
9,BD,bn,2020-02-24,3.5


In [18]:
df_animation = df_animation.merge(moving_cat_df, left_on=['country','date'],  right_on=['country_region','date'])
df_animation.head()

Unnamed: 0,country,date,pageviews per 100k,deaths per 100k,trust,country_region_code,country_region,moving category
0,it,2020-02-15,15.111979,0.0,2,IT,it,10.167323
1,it,2020-02-16,15.016099,0.0,2,IT,it,11.920799
2,it,2020-02-17,15.791547,0.0,2,IT,it,2.230126
3,it,2020-02-18,13.569494,0.0,2,IT,it,5.787339
4,it,2020-02-19,11.396222,0.0,2,IT,it,2.40673


In [19]:
df_animation.head()

Unnamed: 0,country,date,pageviews per 100k,deaths per 100k,trust,country_region_code,country_region,moving category
0,it,2020-02-15,15.111979,0.0,2,IT,it,10.167323
1,it,2020-02-16,15.016099,0.0,2,IT,it,11.920799
2,it,2020-02-17,15.791547,0.0,2,IT,it,2.230126
3,it,2020-02-18,13.569494,0.0,2,IT,it,5.787339
4,it,2020-02-19,11.396222,0.0,2,IT,it,2.40673


In [20]:
df_animation = df_animation.drop(columns=['country_region','country_region_code'])

In [21]:
df_animation.head()

Unnamed: 0,country,date,pageviews per 100k,deaths per 100k,trust,moving category
0,it,2020-02-15,15.111979,0.0,2,10.167323
1,it,2020-02-16,15.016099,0.0,2,11.920799
2,it,2020-02-17,15.791547,0.0,2,2.230126
3,it,2020-02-18,13.569494,0.0,2,5.787339
4,it,2020-02-19,11.396222,0.0,2,2.40673


In [22]:
moving_cat_df['country_region'].unique()

array(['bn', 'bg', 'cs', 'de', 'da', 'fi', 'ka', 'el', 'hr', 'hu', 'he',
       'it', 'ja', 'ky', 'ko', 'kk', 'lt', 'mn', 'ms', 'nl', 'no', 'pl',
       'ro', 'sr', 'ru', 'sv', 'sl', 'sk', 'th', 'tr'], dtype=object)

In [23]:
moving_cat_df.head(50)


Unnamed: 0,country_region_code,country_region,date,moving category
0,BD,bn,2020-02-15,7.25
1,BD,bn,2020-02-16,5.0
2,BD,bn,2020-02-17,2.75
3,BD,bn,2020-02-18,3.0
4,BD,bn,2020-02-19,2.75
5,BD,bn,2020-02-20,4.25
6,BD,bn,2020-02-21,6.25
7,BD,bn,2020-02-22,6.5
8,BD,bn,2020-02-23,6.75
9,BD,bn,2020-02-24,3.5


In [24]:

df_animation['date']=df_animation['date'].astype(str)
# df_animation['pageviews per 100k'] = df_animation['pageviews per 100k']*1000000

In [25]:
df_animation['pageviews per 100k']

0       15.111979
1       15.016099
2       15.791547
3       13.569494
4       11.396222
          ...    
5695     0.213526
5696     0.091511
5697     0.061007
5698     0.061007
5699     0.122015
Name: pageviews per 100k, Length: 5700, dtype: float64

In [26]:
# C
df = df_animation
px.scatter(df,x="pageviews per 100k", y="deaths per 100k" , animation_frame="date", animation_group="country",
           size="pageviews per 100k" ,color="trust", hover_name="country" )
           #log_x=True, size_max=55, range_x=[,100000], range_y=[25,90] "pageviews per 100k"

In [39]:
df_animation['month'] = pd.DatetimeIndex(df_animation['date']).month
df_animation.head()

Unnamed: 0,country,date,pageviews per 100k,deaths per 100k,trust,moving category,month
0,it,2020-02-15,15.111979,0.0,2,10.167323,2
1,it,2020-02-16,15.016099,0.0,2,11.920799,2
2,it,2020-02-17,15.791547,0.0,2,2.230126,2
3,it,2020-02-18,13.569494,0.0,2,5.787339,2
4,it,2020-02-19,11.396222,0.0,2,2.40673,2


In [44]:
def f(x):
    d = {}
    d['cumulative pageviews per 100k over month'] = x['pageviews per 100k'].sum()
    d['cumulative deaths per 100k over month'] = x['deaths per 100k'].sum()
    d['mean moving category per month'] = x['moving category'].mean()


    return pd.Series(d, index=['cumulative pageviews per 100k over month','cumulative deaths per 100k over month', 'mean moving category per month'])

grouped_df = df_animation.groupby(['month','country']).apply(f)

In [45]:
print(type(grouped_df))
grouped_df.head()

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0_level_0,Unnamed: 1_level_0,cumulative pageviews per 100k over month,cumulative deaths per 100k over month,mean moving category per month
month,country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2,bg,161.407208,0.0,5.097669
2,bn,0.556806,0.0,5.25
2,cs,1925.544347,0.0,6.41
2,da,275.594008,0.0,-1.578199
2,de,1699.497592,0.0,1.636275


In [47]:
grouped_df = grouped_df.reset_index()
grouped_df =grouped_df.merge(df_animation[['country','month','trust']], how='left', on=['month','country']).drop_duplicates()
grouped_df.head(50)

Unnamed: 0,month,country,cumulative pageviews per 100k over month,cumulative deaths per 100k over month,mean moving category per month,trust
0,2,bg,161.407208,0.0,5.097669,1
15,2,bn,0.556806,0.0,5.25,4
30,2,cs,1925.544347,0.0,6.41,1
45,2,da,275.594008,0.0,-1.578199,3
60,2,de,1699.497592,0.0,1.636275,4
75,2,el,175.065669,0.0,6.43125,1
90,2,fi,786.50261,0.0,5.078715,4
105,2,he,688.283361,0.0,6.939167,1
120,2,hr,171.62918,0.0,8.503883,1
135,2,hu,293.616026,0.0,9.620652,1


In [50]:
# C
df = grouped_df
px.scatter(df,x="mean moving category per month", y="cumulative deaths per 100k over month" , animation_frame="month", animation_group="country",
           size="cumulative pageviews per 100k over month" ,color="trust", hover_name="country", range_x=[-50,30],range_y=[0,3])