In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

# Part 1 - Data Import
Data about traffic volume per GDP is imported.
- European data on volume of traffic relative to GDP. Obtained from eurostat: `https://ec.europa.eu/eurostat/databrowser/view/ttr00001/default/table?lang=en`

Data about GDP is imported.
- European data GDP. Obtained from worldbank: `https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=PL & https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=NL & https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=RO
`

Data about COVID is imported.
- World data COVID. Obtained from world health organization: `https://covid19.who.int/
`

Looking at the difference in mobility due to covid between 1 wealthy country (Netherlands) and 2 less wealthy countries (Poland and Romania) to see if the decline in transport during covid was different in these two types of countries. Less wealthy countries maybe have less room for working from home, less knowledge minded service jobs for example. Yearly data is used, as this was the only available data for this question. This gives a macroscopic overview of the effect of covid.
    

In [26]:
# Import the file that contains traffic volume per GDP data, drop irrelevent data and only keep data of Netherlands, Poland and romania
file_path = 'TIL_6010_project_daniel/data/Traffic_volume/ttr00001_linear.csv.gz' 
df = pd.read_csv(file_path, delimiter=',')

# Extract data of the Netherlands, poland and romania
df_nl = df[df['geo'] == "NL"]
df_pl = df[df['geo'] == "PL"]
df_ro = df[df['geo'] == "RO"]

# Create a new dataframe to integrate the data of all three countries in the desired form
df_nl_pl = pd.merge(df_nl, df_pl, on='TIME_PERIOD')
df_nl_pl_ro = pd.merge(df_nl_pl, df_ro, on='TIME_PERIOD')

# Remove unnessecary columns 
df_nl_pl_ro = df_nl_pl_ro.drop(['unit_x', "freq_x", "LAST UPDATE_x", "OBS_FLAG_x", "DATAFLOW_x", "DATAFLOW_y", "LAST UPDATE_y", "freq_y", "unit_y", "OBS_FLAG_y", "DATAFLOW", "LAST UPDATE", "freq", "unit", "OBS_FLAG", "geo_x", "geo_y", "geo"], axis=1)

# Rename column
df_nl_pl_ro = df_nl_pl_ro.rename({
            'OBS_VALUE_x': 'Netherlands traffic volume relative to GDP',
            'OBS_VALUE_y': 'Poland traffic volume relative to GDP',
            'OBS_VALUE': 'Romania traffic relative to GDP'
        }, axis=1)

In [27]:
# Import GDP data for the netherlands, poland and romania
file_path = 'TIL_6010_project_daniel/data/Traffic_volume/GDP_data_C.csv' 
df_GDP = pd.read_csv(file_path, delimiter=';')

# Extract data of the Netherlands, poland and romania
df_GDP_nl = df_GDP[df_GDP["Country Name"] == "Netherlands"]
df_GDP_pl = df_GDP[df_GDP["Country Name"] == "Poland"]
df_GDP_ro = df_GDP[df_GDP["Country Name"] == "Romania"]

# Convert data to correct format for merging and visualizing
df_GDP_nl = df_GDP_nl.melt(id_vars=["Country Name", "Country Code", "Indicator Name", "Indicator Code"],
              var_name="TIME_PERIOD",
              value_name="GDP")
df_GDP_pl = df_GDP_pl.melt(id_vars=["Country Name", "Country Code", "Indicator Name", "Indicator Code"],
              var_name="TIME_PERIOD",
              value_name="GDP")
df_GDP_ro = df_GDP_ro.melt(id_vars=["Country Name", "Country Code", "Indicator Name", "Indicator Code"],
              var_name="TIME_PERIOD",
              value_name="GDP")

# Create a new dataframe to integrate the data of all three countries in the desired form
df_GDP_nl_pl = pd.merge(df_GDP_nl, df_GDP_pl, on='TIME_PERIOD')
df_GDP_nl_pl_ro = pd.merge(df_GDP_nl_pl, df_GDP_ro, on='TIME_PERIOD')

# Remove unnessecary columns 
df_GDP_nl_pl_ro = df_GDP_nl_pl_ro.drop(['Country Name_x', "Country Name_y", "Country Name", "Indicator Name_x", "Indicator Code_x", "Indicator Code_y", "Indicator Code", "Country Code_x", "Country Code_y", "Country Code", "Indicator Name_y", "Indicator Name"], axis=1)

# Rename column
df_GDP_nl_pl_ro = df_GDP_nl_pl_ro.rename({
            'GDP_x': 'GDP Netherlands',
            'GDP_y': 'GDP Poland',
            'GDP': 'GDP Romania'
        }, axis=1)

# Remove years before 2009 and remove 2021
df_GDP_nl_pl_ro_from_2009 = df_GDP_nl_pl_ro[df_GDP_nl_pl_ro["TIME_PERIOD"] > "2008"]
df_GDP_nl_pl_ro_excluding_2021 = df_GDP_nl_pl_ro_from_2009[df_GDP_nl_pl_ro_from_2009["TIME_PERIOD"] < "2021"]




In [28]:
# Convert Time period in df_GDP_nl_pl_ro_excluding_2021 to integer 
df_GDP_nl_pl_ro_excluding_2021['TIME_PERIOD'] = df_GDP_nl_pl_ro_excluding_2021['TIME_PERIOD'].astype(int)
df_GDP_nl_pl_ro_excluding_2021['GDP Netherlands'] = df_GDP_nl_pl_ro_excluding_2021['GDP Netherlands'].astype(float)
df_GDP_nl_pl_ro_excluding_2021['GDP Poland'] = df_GDP_nl_pl_ro_excluding_2021['GDP Poland'].astype(float)
df_GDP_nl_pl_ro_excluding_2021['GDP Romania'] = df_GDP_nl_pl_ro_excluding_2021['GDP Romania'].astype(float)

# Merge the GDP data frame and the Traffic volume relative to GDP data frame
df_GDP_Traffic_3countries = pd.merge(df_GDP_nl_pl_ro_excluding_2021, df_nl_pl_ro, on='TIME_PERIOD')

# Make dataframe with only covid years
df_GDP_Traffic_3countries_COVID = df_GDP_Traffic_3countries[df_GDP_Traffic_3countries["TIME_PERIOD"] > 2017]
## df_GDP_Traffic_3countries_COVID = df_GDP_Traffic_3countries_COVID.drop(['GDP Netherlands', "GDP Poland", "GDP Romania"], axis=1)

# Set index to time period
df_GDP_Traffic_3countries.set_index('TIME_PERIOD', inplace=True)
df_GDP_Traffic_3countries_COVID.set_index('TIME_PERIOD', inplace=True)

# Calculate percentage change of traffic volume for each year
df_GDP_Traffic_3countries_COVID = df_GDP_Traffic_3countries_COVID.pct_change()





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [31]:
# Import COVID data for the netherlands, poland and romania
file_path = 'TIL_6010_project_daniel/data/Traffic_volume/WHO-COVID-19-global-data.csv' 
df_COVID = pd.read_csv(file_path, delimiter=',')

# Extract data of the Netherlands, poland and romania
df_COVID_nl = df_COVID[df_COVID["Country"] == "Netherlands"]
df_COVID_pl = df_COVID[df_COVID["Country"] == "Poland"]
df_COVID_ro = df_COVID[df_COVID["Country"] == "Romania"]

# Create a new dataframe to integrate the data of all three countries in the desired form
df_COVID_nl_pl = pd.merge(df_COVID_nl, df_COVID_pl, on='Date_reported')
df_COVID_nl_pl_ro = pd.merge(df_COVID_nl_pl, df_COVID_ro, on='Date_reported')

# Remove unnessecary columns 
df_COVID_nl_pl_ro = df_COVID_nl_pl_ro.drop(['Country_code_x', "WHO_region_x", "Country_code_y", "WHO_region_y", "Country_code", "WHO_region", "Cumulative_cases_x", "Cumulative_cases_y", "Cumulative_cases", "Cumulative_deaths_x", "Cumulative_deaths_y", "Cumulative_deaths"], axis=1)

# Creating yearly data
df_COVID_nl_pl_ro.set_index('Date_reported', inplace=True)
df_COVID_nl_pl_ro.index = pd.to_datetime(df_COVID_nl_pl_ro.index)
df_COVID_nl_pl_ro = df_COVID_nl_pl_ro.resample('1Y').sum()

# Rename column
df_COVID_nl_pl_ro = df_COVID_nl_pl_ro.rename({
    'New_cases_x': 'COVID_cases_Netherlands',
    'New_cases_y': 'COVID_cases_Poland',
    'New_cases'  : 'COVID_cases_Romania',
    'New_deaths_x': 'COVID_deaths_Netherlands',
    'New_deaths_y': 'COVID_deaths_Poland',
    'New_deaths'  : 'COVID_deaths_Romania'
    }, axis=1)

# Remove year 2022 and 2021
df_COVID_nl_pl_ro_excluding_2022 = df_COVID_nl_pl_ro[df_COVID_nl_pl_ro.index != "2022-12-31"]
df_COVID_nl_pl_ro_excluding_2021 = df_COVID_nl_pl_ro_excluding_2022[df_COVID_nl_pl_ro_excluding_2022.index != "2021-12-31"]

# Add year 2019
dict2019={'Date_reported': [20191231], "COVID_cases_Netherlands":[0],'COVID_deaths_Netherlands':[0],'COVID_cases_Poland':[0],"COVID_deaths_Poland":[0], 'COVID_cases_Romania':[0], 'COVID_deaths_Romania':[0]}
df_2019=pd.DataFrame(dict2019)

# Format to datetime
df_2019['Date_reported'] = pd.to_datetime(df_2019['Date_reported'], format='%Y%m%d')
df_2019.set_index('Date_reported', inplace=True)

# Concatenate two dataframes
df_COVID_final = pd.concat([df_2019, df_COVID_nl_pl_ro_excluding_2021], axis=0)

# Make a year column so that graphs look better
df_COVID_final["year"] = pd.DatetimeIndex(df_COVID_final.index).year


Unnamed: 0_level_0,COVID_cases_Netherlands,COVID_deaths_Netherlands,COVID_cases_Poland,COVID_deaths_Poland,COVID_cases_Romania,COVID_deaths_Romania,year
Date_reported,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-12-31,0,0,0,0,0,0,2019
2020-12-31,785874,11296,1298021,28641,627941,15596,2020


In [30]:
# Make graph showing the Traffic volume relative to GDP over time for the three countries
Traffic_volume = ["Netherlands traffic volume relative to GDP", "Poland traffic volume relative to GDP", "Romania traffic relative to GDP"]
fig = px.line(df_GDP_Traffic_3countries, x=df_GDP_Traffic_3countries.index, y=Traffic_volume, title="Traffic volume relative to GDP for three countries")
fig.show()

# Make graph showing the GDP of the three countries
GDP = ["GDP Netherlands", "GDP Poland", "GDP Romania"]
fig = px.line(df_GDP_Traffic_3countries, x=df_GDP_Traffic_3countries.index, y=df_GDP_Traffic_3countries.columns[0:3], title="GDP of three countries")
fig.show()

# Make bar plot showing the percentual change in Traffic volume
fig = px.bar(df_GDP_Traffic_3countries_COVID, x=df_GDP_Traffic_3countries_COVID.index, y=Traffic_volume, title="Traffic volume percentage change for three countries", barmode='group', labels={
                     "value": "Percentage change from year before"})           
fig.show()

# Make bar plot showing the percentual change in GDP
fig = px.bar(df_GDP_Traffic_3countries_COVID, x=df_GDP_Traffic_3countries_COVID.index, y=GDP, title="GDP percentage change for three countries", barmode='group', labels={
                     "value": "Percentage change from year before"})           
fig.show()

# Make bar plot showing the number of COVID cases 2019 and 2020
Cases = ["COVID_cases_Netherlands", 'COVID_cases_Poland', 'COVID_cases_Romania']
fig = px.bar(df_COVID_final, x="year", y=Cases, title="COVID cases 2019 and 2020", barmode='group', labels={
                     "value": "COVID cases"})           
fig.show()

# Make bar plot showing the number of COVID deaths 2019 and 2020
Deaths = ["COVID_deaths_Netherlands", 'COVID_deaths_Poland', 'COVID_deaths_Romania']
fig = px.bar(df_COVID_final, x="year", y=Deaths, title="COVID deaths 2019 and 2020", barmode='group', labels={
                     "value": "COVID deaths"})           
fig.show()