# Sub-question: Travellling Behaviour
## Part 1: Data Import

In order to analyze the impact of the epidemic on the mobility of Poland and the Netherlands, we will use Google mobility data to study the changes of trips to six different destinations relative to the baseline (traffic volume before the epidemic) during the epidemic, namely Retail & recreation, Grocery & pharmacy, Workplaces, Residential, Parks and Transit stations.

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from matplotlib import pyplot as plt
from sklearn.linear_model import LinearRegression

### 1.1 Google Community Mobility Data

Data source: Google Community Mobility Reports

https://www.google.com/covid19/mobility/

Report is broken down by location and displays the change in visits to places like grocery stores and parks

In [None]:
# Import the file contians Community Mobility data of NL and PL
df_nl_2020 = pd.read_csv("./raw/2020_NL_Region_Mobility_Report.csv", delimiter=',')
df_nl_2021 = pd.read_csv("./raw/2021_NL_Region_Mobility_Report.csv", delimiter=',')
df_nl_2022 = pd.read_csv("./data/travelling behaviour/raw/2022_NL_Region_Mobility_Report.csv", delimiter=',')

df_pl_2020 = pd.read_csv("./raw/2020_PL_Region_Mobility_Report.csv", delimiter=',')
df_pl_2021 = pd.read_csv("./raw/2021_PL_Region_Mobility_Report.csv", delimiter=',')
df_pl_2022 = pd.read_csv("./raw/2022_PL_Region_Mobility_Report.csv", delimiter=',')

# Merge the data
df_nl_m=pd.concat([df_nl_2020,df_nl_2021,df_nl_2022])
df_pl_m=pd.concat([df_pl_2020,df_pl_2021,df_pl_2022])

# Keep the national data, and remove the provincial data
df_nl_m=df_nl_m[df_nl_m['sub_region_1'].isnull().values==True]
df_pl_m=df_pl_m[df_pl_m['sub_region_1'].isnull().values==True]
#display(df_nl_m)
#display(df_pl_m)

### 1.2 COVID-19 Data

Data source: WHO Coronavirus (COVID-19) Data
    
https://covid19.who.int/data

In [None]:
# Import the csv file contians data of global daily Daily new confirmed cases per million people
df_covid_raw = pd.read_csv("./raw/Daily new confirmed cases per 1M.csv", delimiter=',')

# Drop unwanted rows
df_covid = df_covid_raw.loc[((df_covid_raw['location'] == 'Netherlands') | (df_covid_raw['location'] == 'Poland'))]

# Drop unwanted columns
df_covid = df_covid[['location', 'date', 'new_cases', 'new_cases_per_million', 'new_deaths', 'new_deaths_per_million']]
df_covid.loc[:, 'date'] = pd.to_datetime(df_covid.loc[:, 'date'])
df_covid.set_index('date', inplace = True)

df_covid

## Part 2: Data Processing
### 2.1 Google Community Mobility Data

In [None]:
# Remove useless column
df_nl_m = df_nl_m.drop(['country_region_code','country_region','sub_region_1','sub_region_2','metro_area','iso_3166_2_code','census_fips_code','place_id'], axis=1)
df_pl_m = df_pl_m.drop(['country_region_code','country_region','sub_region_1','sub_region_2','metro_area','iso_3166_2_code','census_fips_code','place_id'], axis=1)

In [None]:
# Change column type to pandas date time of Google Mobility Dataset
df_nl_m.loc[:, 'date'] = pd.to_datetime(df_nl_m.loc[:, 'date'])
df_pl_m.loc[:, 'date'] = pd.to_datetime(df_pl_m.loc[:, 'date'])

# Rename the columns
df_nl_m.columns = ['date', 'NL_retail_and_recreation', 'NL_grocery_and_pharmacy', 'NL_parks', 'NL_transit', 'NL_workplaces', 'NL_residential']
df_pl_m.columns = ['date', 'PL_retail_and_recreation', 'PL_grocery_and_pharmacy', 'PL_parks', 'PL_transit', 'PL_workplaces', 'PL_residential']
display(df_nl_m)

In [None]:
df_Mobility_Data = pd.merge(df_nl_m, df_pl_m, on='date')
df_Mobility_Data.set_index('date', inplace=True)
display(df_Mobility_Data)

### 2.2 Covid-19 Data

In [None]:
# Extract by countries
df_covid_nl=df_covid[~df_covid['location'].isin(['Poland'])]
df_covid_nl=df_covid_nl.drop(['location', 'new_cases', 'new_deaths'], axis=1)
df_covid_nl.columns=[ 'new_cases_per_million_nl', 'new_deaths_per_million_nl']

df_covid_pl=df_covid[~df_covid['location'].isin(['Netherlands'])]
df_covid_pl=df_covid_pl.drop(['location', 'new_cases', 'new_deaths'], axis=1)
df_covid_pl.columns=[ 'new_cases_per_million_pl', 'new_deaths_per_million_pl']

# merge to one table
df_covid = pd.concat([df_covid_nl, df_covid_pl], axis=1)
display(df_covid)

### 2.3 Merge to one dataframe

In [None]:
#df_result =df_covid.join(df_Mobility_Data)
df_Mobility_Data=pd.concat([df_covid, df_Mobility_Data], axis=1)
#df_result=pd.merge(df_Mobility_Data, df_covid, left_index=True, right_index=True)
#print(pd.merge(df1,df2,on='key'))
display(df_Mobility_Data)

In [None]:
  # delete axis that contains NaN
df_Mobility_Data.dropna(axis=0, how='any', inplace=True)

In [None]:
# save file
df_Mobility_Data.to_csv("./process/travel_behaviour.csv", index=False)