In [1]:
# Imports
import plotly.express as px
from helper import *
import pandas as pd
import numpy as np
import gzip
import json

## Animated figure

In [2]:
#Create dataframe with the columns [country, trust,  day, mobility, deaths per 100k ,   pageviews per 100k"]

In [3]:
# Load and preprocess pageviews and deaths 
pageview_df = pd.read_csv("page_views_covid_related.csv.gz")
population_df = pd.read_csv("Population_countries.csv")


_,_, df_pageviews100k,_ = get_pageviews_df(pageview_df, population_df, get_country_dict('trust gov mobility'), '2020-01-22', '2020-11-22')
_, _, _, _, deaths100k, _, _,_ = get_cases_deaths_df(population_df, get_country_dict('trust gov mobility'), '2020-01-22', '2020-11-22')
df_pageviews100k = df_pageviews100k.transpose().stack().to_frame().reset_index().rename(columns={"level_0": "country", "date": "date", 0:"pageviews per 100k"}, errors="raise")
deaths100k = deaths100k.transpose().stack().to_frame().reset_index().rename(columns={"level_0": "country", "date": "date", 0:"deaths per 100k"}, errors="raise")
df_animation = df_pageviews100k.merge(deaths100k, on=['country','date'])
df_animation['date'] = pd.to_datetime(df_animation['date'])


In [4]:
# Dowload and preprocess trust dataset
data_folder = 'data_2/'
df_trust_gov = pd.read_csv(data_folder+'share-who-trust-government.csv.zip') 
df_trust_gov = df_trust_gov.set_index("Entity")[["Trust the national government in this country"]].transpose()[COUNTRY_OWN_LANG_TRUST_GOV.keys()].rename(columns= COUNTRY_OWN_LANG_TRUST_GOV)
country_dict = get_country_dict('trust gov mobility')

# Map the trust category to the countries in df_animation
country_dict_cat =  trust_category(df_trust_gov, 5,country_dict)
df_animation['trust'] = df_animation['country'].map(country_dict_cat)



In [5]:
data_folder = 'data_2/'

#Download mobility and intervention files
google_mobility = pd.read_csv(data_folder+'Global_Mobility_Report.csv.zip')
interventions = pd.read_csv(data_folder+'interventions.csv.zip')

#Download file with covid pageviews #MODIFY WITH THE NEW DATA
file = gzip.open(data_folder+'aggregated_timeseries.json.gz')
data = json.load(file)

#Change name of the columns
categories = ["Retail and Recreations", "Grocery and Pharmacy", "Parks","Transit stations", "Workplace", "Residential"]
google_mobility = google_mobility.rename(dict(zip(google_mobility.columns[8:],categories)), axis = 1)

#Transform date string to datetime
google_mobility['date'] = pd.to_datetime(google_mobility['date'])

#Delete columns about region
google_mobility = google_mobility.drop(google_mobility.iloc[:,2:7], axis = 1)

#Keep same dates as for corona wikipedia pages
google_mobility = google_mobility.loc[(google_mobility['date'] < "2020-08-01") & (google_mobility['date'] >= "2020-01-22")]

#Regroup all data per country and per date and take the average
mobility = google_mobility.groupby(["country_region_code","country_region","date"]).mean()

#Keep only the country that we are interested in
mobility = mobility[mobility.index.get_level_values('country_region').isin(list(country_dict.keys()))]

# Group together Parks, Retail and Recreations, Transit stations and Workplace by taking the mean of them
mobility['moving category'] = mobility[['Retail and Recreations', 'Parks', 'Transit stations', 'Workplace']].mean(axis=1)

#Group together Grocey and Pharmacy and Residential by taking the mean of them
mobility['covid category'] = mobility[['Grocery and Pharmacy', 'Residential']].mean(axis=1)

moving_cat_df = mobility['moving category'].reset_index()
moving_cat_df['country_region'] =moving_cat_df['country_region'].map(get_country_dict('trust gov mobility'))

# merge dataframe with the mobility dataset
df_animation = df_animation.merge(moving_cat_df, left_on=['country','date'],  right_on=['country_region','date'])
df_animation = df_animation.drop(columns=['country_region','country_region_code'])
df_animation['date']=df_animation['date'].astype(str)
df_animation['month'] = pd.DatetimeIndex(df_animation['date']).month

  google_mobility = pd.read_csv(data_folder+'Global_Mobility_Report.csv.zip')


In [6]:
def f(x):
    d = {}
    d['cumulative pageviews per 100k over month'] = x['pageviews per 100k'].sum()
    d['cumulative deaths per 100k over month'] = x['deaths per 100k'].sum()
    d['mean moving category per month'] = x['moving category'].mean()


    return pd.Series(d, index=['cumulative pageviews per 100k over month','cumulative deaths per 100k over month', 'mean moving category per month'])

grouped_df = df_animation.groupby(['month','country']).apply(f)
grouped_df = grouped_df.reset_index()
grouped_df =grouped_df.merge(df_animation[['country','month','trust']], how='left', on=['month','country']).drop_duplicates()
grouped_df.head(50)

Unnamed: 0,month,country,cumulative pageviews per 100k over month,cumulative deaths per 100k over month,mean moving category per month,trust
0,2,bg,161.407208,0.0,5.097669,1
15,2,bn,0.556806,0.0,5.25,4
30,2,cs,1925.544347,0.0,6.41,1
45,2,da,275.594008,0.0,-1.578199,3
60,2,de,1699.497592,0.0,1.636275,4
75,2,el,175.065669,0.0,6.43125,1
90,2,fi,786.50261,0.0,5.078715,4
105,2,he,688.283361,0.0,6.939167,1
120,2,hr,171.62918,0.0,8.503883,1
135,2,hu,293.616026,0.0,9.620652,1


In [7]:
# Create animation
df = grouped_df
px.scatter(df,x="mean moving category per month", y="cumulative deaths per 100k over month" , animation_frame="month", animation_group="country",
           size="cumulative pageviews per 100k over month" ,color="trust", hover_name="country", range_x=[-50,30],range_y=[-0.1,5])