In [1]:
import gc
import os
from pathlib import Path
import random
import sys

from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
import scipy as sp


import matplotlib.pyplot as plt
import seaborn as sns

from IPython.core.display import display, HTML

# --- plotly ---
from plotly import tools, subplots
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.express as px
import plotly.figure_factory as ff
import plotly.io as pio
pio.templates.default = "plotly_dark"

# --- models ---
from sklearn import preprocessing
from sklearn.model_selection import KFold
import lightgbm as lgb
import xgboost as xgb
import catboost as cb

# --- setup ---
pd.set_option('max_columns', 50)

In [2]:

df = pd.read_csv('https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv')
df


Columns (4,5) have mixed types.Specify dtype option on import or set low_memory=False.



Unnamed: 0,country_region_code,country_region,sub_region_1,sub_region_2,metro_area,iso_3166_2_code,census_fips_code,place_id,date,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
0,AE,United Arab Emirates,,,,,,ChIJvRKrsd9IXj4RpwoIwFYv0zM,2020-02-15,0.0,4.0,5.0,0.0,2.0,1.0
1,AE,United Arab Emirates,,,,,,ChIJvRKrsd9IXj4RpwoIwFYv0zM,2020-02-16,1.0,4.0,4.0,1.0,2.0,1.0
2,AE,United Arab Emirates,,,,,,ChIJvRKrsd9IXj4RpwoIwFYv0zM,2020-02-17,-1.0,1.0,5.0,1.0,2.0,1.0
3,AE,United Arab Emirates,,,,,,ChIJvRKrsd9IXj4RpwoIwFYv0zM,2020-02-18,-2.0,1.0,5.0,0.0,2.0,1.0
4,AE,United Arab Emirates,,,,,,ChIJvRKrsd9IXj4RpwoIwFYv0zM,2020-02-19,-2.0,0.0,4.0,-1.0,2.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4973947,ZW,Zimbabwe,Midlands Province,Kwekwe,,,,ChIJRcIZ3-FJNBkRRsj55IcLpfU,2021-04-05,,,,,-54.0,
4973948,ZW,Zimbabwe,Midlands Province,Kwekwe,,,,ChIJRcIZ3-FJNBkRRsj55IcLpfU,2021-04-06,,,,,10.0,
4973949,ZW,Zimbabwe,Midlands Province,Kwekwe,,,,ChIJRcIZ3-FJNBkRRsj55IcLpfU,2021-04-07,,,,,5.0,
4973950,ZW,Zimbabwe,Midlands Province,Kwekwe,,,,ChIJRcIZ3-FJNBkRRsj55IcLpfU,2021-04-08,,,,,1.0,


In [3]:
europe_country_list =list([
    'Austria','Belgium','Bulgaria','Croatia','Cyprus','Czechia','Denmark','Estonia','Finland','France','Germany','Greece','Hungary','Ireland',
    'Italy', 'Latvia','Luxembourg','Lithuania','Malta','Norway','Netherlands','Poland','Portugal','Romania','Slovakia','Slovenia',
    'Spain', 'Sweden', 'United Kingdom', 'Iceland', 'Russia', 'Switzerland', 'Serbia', 'Ukraine', 'Belarus',
    'Albania', 'Bosnia and Herzegovina', 'Kosovo', 'Moldova', 'Montenegro', 'North Macedonia'])

In [4]:
df = df[df['country_region'].isin(europe_country_list)]
df.shape

(1335688, 15)

In [5]:
df

Unnamed: 0,country_region_code,country_region,sub_region_1,sub_region_2,metro_area,iso_3166_2_code,census_fips_code,place_id,date,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
182462,AT,Austria,,,,,,ChIJfyqdJZsHbUcRr8Hk3XvUEhA,2020-02-15,9.0,1.0,42.0,13.0,0.0,-2.0
182463,AT,Austria,,,,,,ChIJfyqdJZsHbUcRr8Hk3XvUEhA,2020-02-16,15.0,21.0,42.0,12.0,1.0,-2.0
182464,AT,Austria,,,,,,ChIJfyqdJZsHbUcRr8Hk3XvUEhA,2020-02-17,9.0,5.0,35.0,3.0,-4.0,0.0
182465,AT,Austria,,,,,,ChIJfyqdJZsHbUcRr8Hk3XvUEhA,2020-02-18,8.0,5.0,40.0,2.0,-4.0,0.0
182466,AT,Austria,,,,,,ChIJfyqdJZsHbUcRr8Hk3XvUEhA,2020-02-19,4.0,2.0,10.0,-1.0,-5.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3841545,UA,Ukraine,,,Kiev Metropolitan Area,,,ChIJzYwQhKf_1EAR5PjP6COXiNw,2021-04-07,-46.0,-6.0,-34.0,-46.0,-44.0,13.0
3841546,UA,Ukraine,,,Kiev Metropolitan Area,,,ChIJzYwQhKf_1EAR5PjP6COXiNw,2021-04-08,-39.0,2.0,-3.0,-39.0,-42.0,11.0
3841547,UA,Ukraine,,,Kiev Metropolitan Area,,,ChIJzYwQhKf_1EAR5PjP6COXiNw,2021-04-09,-38.0,3.0,8.0,-38.0,-41.0,9.0
3841548,UA,Ukraine,,,Kiev Metropolitan Area,,,ChIJzYwQhKf_1EAR5PjP6COXiNw,2021-04-10,-39.0,2.0,39.0,-32.0,-22.0,-2.0


In [6]:
df = df.drop(['sub_region_1', 'sub_region_2', 'country_region_code', 'sub_region_2', 'metro_area',
              'iso_3166_2_code', 'census_fips_code', 'place_id'], axis = 1)
df

Unnamed: 0,country_region,date,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
182462,Austria,2020-02-15,9.0,1.0,42.0,13.0,0.0,-2.0
182463,Austria,2020-02-16,15.0,21.0,42.0,12.0,1.0,-2.0
182464,Austria,2020-02-17,9.0,5.0,35.0,3.0,-4.0,0.0
182465,Austria,2020-02-18,8.0,5.0,40.0,2.0,-4.0,0.0
182466,Austria,2020-02-19,4.0,2.0,10.0,-1.0,-5.0,1.0
...,...,...,...,...,...,...,...,...
3841545,Ukraine,2021-04-07,-46.0,-6.0,-34.0,-46.0,-44.0,13.0
3841546,Ukraine,2021-04-08,-39.0,2.0,-3.0,-39.0,-42.0,11.0
3841547,Ukraine,2021-04-09,-38.0,3.0,8.0,-38.0,-41.0,9.0
3841548,Ukraine,2021-04-10,-39.0,2.0,39.0,-32.0,-22.0,-2.0


In [7]:
df = df.groupby(['date', 'country_region']).sum()

In [8]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
date,country_region,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-02-15,Austria,704.0,88.0,1536.0,1058.0,-41.0,-74.0
2020-02-15,Belarus,10.0,9.0,21.0,11.0,4.0,-1.0
2020-02-15,Belgium,44.0,15.0,328.0,104.0,15.0,-12.0
2020-02-15,Bosnia and Herzegovina,8.0,6.0,16.0,3.0,1.0,-1.0
2020-02-15,Bulgaria,-145.0,15.0,-959.0,-29.0,-69.0,16.0
...,...,...,...,...,...,...,...
2021-04-11,Spain,-2563.0,79.0,-1388.0,-1997.0,-1011.0,397.0
2021-04-11,Sweden,-1777.0,-780.0,-76.0,-4867.0,-547.0,224.0
2021-04-11,Switzerland,-877.0,1.0,1096.0,-420.0,-274.0,87.0
2021-04-11,Ukraine,-63.0,6.0,81.0,-39.0,-36.0,-9.0


In [9]:
df.reset_index(inplace=True)
df

Unnamed: 0,date,country_region,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
0,2020-02-15,Austria,704.0,88.0,1536.0,1058.0,-41.0,-74.0
1,2020-02-15,Belarus,10.0,9.0,21.0,11.0,4.0,-1.0
2,2020-02-15,Belgium,44.0,15.0,328.0,104.0,15.0,-12.0
3,2020-02-15,Bosnia and Herzegovina,8.0,6.0,16.0,3.0,1.0,-1.0
4,2020-02-15,Bulgaria,-145.0,15.0,-959.0,-29.0,-69.0,16.0
...,...,...,...,...,...,...,...,...
15142,2021-04-11,Spain,-2563.0,79.0,-1388.0,-1997.0,-1011.0,397.0
15143,2021-04-11,Sweden,-1777.0,-780.0,-76.0,-4867.0,-547.0,224.0
15144,2021-04-11,Switzerland,-877.0,1.0,1096.0,-420.0,-274.0,87.0
15145,2021-04-11,Ukraine,-63.0,6.0,81.0,-39.0,-36.0,-9.0


In [10]:
fig = px.line(df, x="date", y="retail_and_recreation_percent_change_from_baseline", color='country_region', 
              title="retail_and_recreation_percent_change_from_baseline")
fig.show()

In [11]:
df_all = df.groupby(['date']).sum()
df_all.reset_index(inplace=True)
df_all

Unnamed: 0,date,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline
0,2020-02-15,5361.0,634.0,10171.0,8380.0,-648.0,-135.0
1,2020-02-16,4779.0,-1266.0,-3431.0,4841.0,-2152.0,-273.0
2,2020-02-17,10328.0,5024.0,20690.0,1476.0,-10400.0,2062.0
3,2020-02-18,12544.0,4393.0,25474.0,74.0,-15749.0,2363.0
4,2020-02-19,9294.0,2225.0,15443.0,-2305.0,-16340.0,1940.0
...,...,...,...,...,...,...,...
417,2021-04-07,-70086.0,6342.0,10460.0,-73897.0,-103875.0,28084.0
418,2021-04-08,-67164.0,12052.0,23146.0,-73486.0,-100591.0,27989.0
419,2021-04-09,-73057.0,14926.0,26219.0,-72329.0,-88266.0,26708.0
420,2021-04-10,-84922.0,5597.0,21187.0,-61781.0,-26513.0,11444.0


In [12]:
fig = px.line(df_all, x="date", y="retail_and_recreation_percent_change_from_baseline", 
              title="Europe retail_and_recreation_percent_change_from_baseline")
fig.show()

In [13]:
fig = px.line(df, x="date", y="grocery_and_pharmacy_percent_change_from_baseline", color='country_region', 
              title="grocery_and_pharmacy_percent_change_from_baseline")
fig.show()

In [14]:
fig = px.line(df_all, x="date", y="grocery_and_pharmacy_percent_change_from_baseline", 
              title="Europe grocery_and_pharmacy_percent_change_from_baseline")
fig.show()

In [15]:
fig = px.line(df, x="date", y="parks_percent_change_from_baseline", color='country_region', 
              title="parks_percent_change_from_baseline")
fig.show()

In [16]:
fig = px.line(df_all, x="date", y="parks_percent_change_from_baseline", 
              title="Europe parks_percent_change_from_baseline")
fig.show()

In [17]:
fig = px.line(df, x="date", y="transit_stations_percent_change_from_baseline", color='country_region', 
              title="transit_stations_percent_change_from_baseline")
fig.show()

In [18]:
fig = px.line(df_all, x="date", y="transit_stations_percent_change_from_baseline", 
              title="Europe transit_stations_percent_change_from_baseline")
fig.show()

In [19]:
fig = px.line(df, x="date", y="workplaces_percent_change_from_baseline", color='country_region', 
              title="workplaces_percent_change_from_baseline")
fig.show()

In [20]:
fig = px.line(df_all, x="date", y="workplaces_percent_change_from_baseline", 
              title="Europe workplaces_percent_change_from_baseline")
fig.show()

In [21]:
fig = px.line(df, x="date", y="residential_percent_change_from_baseline", color='country_region', 
              title="residential_percent_change_from_baseline")
fig.show()

In [22]:
fig = px.line(df_all, x="date", y="residential_percent_change_from_baseline", 
              title="Europe residential_percent_change_from_baseline")
fig.show()