In [1]:
# Imports
import plotly.express as px
from helper import *
import pandas as pd
import numpy as np
import gzip
import json
from scipy import stats
from plotly.subplots import make_subplots
import plotly.graph_objects as go

Load Data

In [2]:
#Load mobility data 
mobility = get_mobility_df(get_country_dict('trust gov mobility')).reset_index()
mobility.head()


# Load and preprocess pageviews and deaths 
pageview_df = pd.read_csv("page_views_covid_related.csv.gz")
population_df = pd.read_csv("Population_countries.csv")

pageviews,_, _,_ = get_pageviews_df(pageview_df, population_df, get_country_dict('trust gov mobility'), '2020-01-22', '2022-07-31')
deaths, cases, _, _, _, _, _,_ = get_cases_deaths_df(population_df, get_country_dict('trust gov mobility'), '2020-01-22', '2022-07-31')

  google_mobility = pd.read_csv(data_folder+'Global_Mobility_Report.csv.zip')


In [3]:
# Functions
def mobility_category(country, mobility):
    
    #Select the mobility for the specific country of interest
    df= mobility.loc[mobility['country_region']==country].drop(columns=['country_region_code', 'country_region','moving category', 'covid category'])
    df =  df.set_index('date')
    df = df.rename_axis("Mobility category", axis = "columns")

    #Create the plot
    fig = px.area(df, facet_col="Mobility category", facet_col_wrap=2)
    fig.update_layout( title="Mobility change per category for {} in %".format(str(country)))
    fig.show()


def covid_plot(country, deaths, cases, pageviews):
    dict_ = get_country_dict('trust gov mobility')
    df = pageviews[dict_[country]]
    df = pd.concat([df,cases[dict_[country]],deaths[dict_[country]]],axis = 1)
    cols = ['Pageviews', 'COVID cases', 'COVID deaths']
    df.columns = cols
    df = df.rename_axis("COVID related informations", axis = "columns")
    
    #Create the plot
    fig = make_subplots(rows=3, cols=1,shared_xaxes=True,vertical_spacing=0.02)
    fig.add_trace(go.Scatter(x=df.index, y=df['Pageviews'], name="Wikipedia pageviews"),row=3, col=1)
    fig.add_trace(go.Scatter(x=df.index, y=df['COVID cases'], name = "COVID cases"),row=2, col=1)
    fig.add_trace(go.Scatter(x=df.index, y=df['COVID deaths'], name = "COVID deaths"),row=1, col=1)
    fig.update_layout(height=600, width=600, title_text="The pandemic through cases, deaths and pageviews in {}".format(str(country)))
    fig.show()



# Case Studies
Analysis of 3 countries from our countries dataset, looking more in depth at the evolution of the pandemic over all our dataset metrics
First we look at how covid cases, deaths and wikipedia pageviews evolve in time. Then, we look at mobility by separating each category and plotting the evolution in time.  

## Case study: Germany 

In [9]:
covid_plot('Germany', deaths, cases, pageviews)

Looking at the plot above, one can see a clear distinction between pageviews cases and deaths. In fact, the number of pageviews increases sharply compared to the other metrics during the first months of the pandemic and then decreases with small rebounds of activity at each new wave. What is interesting is the comparison of deaths between the first and the second wave of the pandemic. It highlights the utility of the partial lockdown implemented on the 22nd of March 2020. Strangely, the two first wave of covid cases are very low compared to the third wave, which is expected as were not enough test made and available in Germany and thus the values were heavily underestimated. In the first part of the pandemic Germany successfully implemented contact tracing to quickly isolate cases and contain the dispersion, which we can see with lower covid cases until the 3rd wave. However, in 2022 there was a peak of covid cases as people were much more carefree about covid. Nonetheless, there were not as many deaths as in the 3rd wave as there were 77.8% of germans vaccinated. 

In [5]:
mobility_category('Germany', mobility)


During the pandemic Germany implemented a partial lockdown, forcing people to work at home and closing shops but still allowing them to go outside. This can be seen with the mobility evolution for each category plot above. In fact, categories such as Retail and Recreational, Workplace and Transit station endured a decrease from baseline. While Retail and Recreational and Transit station progressively returned to baseline over time, workplace stayed at a constant value below baseline. This is due to a restructuring of the way companies work, implementing more remote work. On the other hand, the park category remained constant at the beginning due to the partial lockdown, before a clear increase at the end of the lockdown, showing people had enough of staying inside and went out more. 

In [6]:


# # Transpose datagrames and rename columns
# df_pageviews100k = df_pageviews100k.transpose().stack().to_frame().reset_index().rename(columns={"level_0": "country", "date": "date", 0:"pageviews per 100k"}, errors="raise")
# deaths100k = deaths100k.transpose().stack().to_frame().reset_index().rename(columns={"level_0": "country", "date": "date", 0:"deaths per 100k"}, errors="raise")

# # Merge pageviews and deaths onto df_animation which will contain columns : ['Month-Year','Country',	'Cumulative pageviews per 100k','Cumulative deaths per 100k','Mobility change from baseline','Trust']
# df_animation = df_pageviews100k.merge(deaths100k, on=['country','date'])
# df_animation['date'] = pd.to_datetime(df_animation['date'])
# df_animation=df_animation.rename(columns={"country": "Country"})

In [7]:
# country = 'Italy'
# dict_ = get_country_dict('trust gov mobility')
# df = pageviews[dict_[country]]
# df = pd.concat([df,cases[dict_[country]],deaths[dict_[country]]],axis = 1)
# df.head()

In [8]:
# covid_plot('Italy', deaths, cases, pageviews)
# covid_plot('Thailand', deaths, cases, pageviews)
