In [128]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

# Part 1 - Data Import
Data about traffic volume per GDP is imported for poland, romania and the netherlands.
- European data on volume of traffic relative to GDP. Obtained from eurostat: `https://ec.europa.eu/eurostat/databrowser/view/ttr00001/default/table?lang=en`

Data about GDP is imported for poland, romania and the netherlands.
- European data GDP. Obtained from worldbank: `https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=PL & https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=NL & https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=RO
`

Looking at the difference between 1 wealthy country (Netherlands) and 2 less wealthy countries (Poland and Romania) to see if the decline in transport during covid was different in these two types of countries. Less wealthy countries maybe have less room for working from home, less knowledge minded service jobs for example.
    

In [121]:
# Import the file that contains traffic volume per GDP data, drop irrelevent data and only keep data of Netherlands, Poland and romania

file_path = 'TIL_6010_project_daniel/data/ttr00001_linear.csv.gz' 
df = pd.read_csv(file_path, delimiter=',')

# Extract data of the Netherlands, poland and romania
df_nl = df[df['geo'] == "NL"]
df_pl = df[df['geo'] == "PL"]
df_ro = df[df['geo'] == "RO"]

# Create a new dataframe to integrate the data of all three countries in the desired form
df_nl_pl = pd.merge(df_nl, df_pl, on='TIME_PERIOD')
df_nl_pl_ro = pd.merge(df_nl_pl, df_ro, on='TIME_PERIOD')
##df_nl_pl_ro.set_index('TIME_PERIOD', inplace=True)

# remove unnessecary columns 
df_nl_pl_ro = df_nl_pl_ro.drop(['unit_x', "freq_x", "LAST UPDATE_x", "OBS_FLAG_x", "DATAFLOW_x", "DATAFLOW_y", "LAST UPDATE_y", "freq_y", "unit_y", "OBS_FLAG_y", "DATAFLOW", "LAST UPDATE", "freq", "unit", "OBS_FLAG", "geo_x", "geo_y", "geo"], axis=1)

# rename column
df_nl_pl_ro = df_nl_pl_ro.rename({
            'OBS_VALUE_x': 'Netherlands traffic volume relative to GDP',
            'OBS_VALUE_y': 'Poland traffic volume relative to GDP',
            'OBS_VALUE': 'Romania traffic relative to GDP'
        }, axis=1)

df_nl_pl_ro

Unnamed: 0,TIME_PERIOD,Netherlands traffic volume relative to GDP,Poland traffic volume relative to GDP,Romania traffic relative to GDP
0,2009,102.4,145.2,93.8
1,2010,100.0,100.0,100.0
2,2011,99.1,98.4,96.9
3,2012,99.9,93.6,97.9
4,2013,101.0,93.1,97.5
5,2014,99.8,91.6,100.3
6,2015,93.5,89.6,103.6
7,2016,92.6,91.0,104.7
8,2017,88.8,84.5,100.0
9,2018,92.2,82.0,100.8


In [115]:
# Import GDP data for the netherlands, poland and romania
file_path = 'TIL_6010_project_daniel/data/GDP_data_C.csv' 
df_GDP = pd.read_csv(file_path, delimiter=';')

# Extract data of the Netherlands, poland and romania
df_GDP_nl = df_GDP[df_GDP["Country Name"] == "Netherlands"]
df_GDP_pl = df_GDP[df_GDP["Country Name"] == "Poland"]
df_GDP_ro = df_GDP[df_GDP["Country Name"] == "Romania"]

# convert data to correct format for merging and visualizing
df_GDP_nl = df_GDP_nl.melt(id_vars=["Country Name", "Country Code", "Indicator Name", "Indicator Code"],
              var_name="TIME_PERIOD",
              value_name="GDP")
df_GDP_pl = df_GDP_pl.melt(id_vars=["Country Name", "Country Code", "Indicator Name", "Indicator Code"],
              var_name="TIME_PERIOD",
              value_name="GDP")
df_GDP_ro = df_GDP_ro.melt(id_vars=["Country Name", "Country Code", "Indicator Name", "Indicator Code"],
              var_name="TIME_PERIOD",
              value_name="GDP")

# Create a new dataframe to integrate the data of all three countries in the desired form
df_GDP_nl_pl = pd.merge(df_GDP_nl, df_GDP_pl, on='TIME_PERIOD')
df_GDP_nl_pl_ro = pd.merge(df_GDP_nl_pl, df_GDP_ro, on='TIME_PERIOD')

# remove unnessecary columns 
df_GDP_nl_pl_ro = df_GDP_nl_pl_ro.drop(['Country Name_x', "Country Name_y", "Country Name", "Indicator Name_x", "Indicator Code_x", "Indicator Code_y", "Indicator Code", "Country Code_x", "Country Code_y", "Country Code", "Indicator Name_y", "Indicator Name"], axis=1)

#rename column
df_GDP_nl_pl_ro = df_GDP_nl_pl_ro.rename({
            'GDP_x': 'GDP Netherlands',
            'GDP_y': 'GDP Poland',
            'GDP': 'GDP Romania'
        }, axis=1)

#remove years before 2009 and remove 2021
df_GDP_nl_pl_ro_from_2009 = df_GDP_nl_pl_ro[df_GDP_nl_pl_ro["TIME_PERIOD"] > "2008"]
df_GDP_nl_pl_ro_excluding_2021 = df_GDP_nl_pl_ro_from_2009[df_GDP_nl_pl_ro_from_2009["TIME_PERIOD"] < "2021"]

df_GDP_nl_pl_ro_excluding_2021

Unnamed: 0,TIME_PERIOD,GDP Netherlands,GDP Poland,GDP Romania
49,2009,"8,72E+11","4,40E+11","1,74E+11"
50,2010,"8,47E+11","4,80E+11","1,66E+11"
51,2011,"9,05E+11","5,28E+11","1,83E+11"
52,2012,"8,39E+11","4,99E+11","1,71E+11"
53,2013,"8,77E+11","5,21E+11","1,91E+11"
54,2014,"8,92E+11","5,42E+11","2,00E+11"
55,2015,"7,66E+11","4,78E+11","1,78E+11"
56,2016,"7,84E+11","4,73E+11","1,88E+11"
57,2017,"8,34E+11","5,27E+11","2,12E+11"
58,2018,"9,14E+11","5,87E+11","2,41E+11"


In [127]:
# Convert Time period in df_GDP_nl_pl_ro_excluding_2021 to integer 
df_GDP_nl_pl_ro_excluding_2021['TIME_PERIOD']=df_GDP_nl_pl_ro_excluding_2021['TIME_PERIOD'].astype(int)

# Merge the GDP data frame and the Traffic volume relative to GDP data frame
df_GDP_Traffic_3countries = pd.merge(df_GDP_nl_pl_ro_excluding_2021, df_nl_pl_ro, on='TIME_PERIOD')

# set index to time period
df_GDP_Traffic_3countries.set_index('TIME_PERIOD', inplace=True)
df_GDP_Traffic_3countries

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_GDP_nl_pl_ro_excluding_2021['TIME_PERIOD']=df_GDP_nl_pl_ro_excluding_2021['TIME_PERIOD'].astype(int)


Unnamed: 0_level_0,GDP Netherlands,GDP Poland,GDP Romania,Netherlands traffic volume relative to GDP,Poland traffic volume relative to GDP,Romania traffic relative to GDP
TIME_PERIOD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2009,"8,72E+11","4,40E+11","1,74E+11",102.4,145.2,93.8
2010,"8,47E+11","4,80E+11","1,66E+11",100.0,100.0,100.0
2011,"9,05E+11","5,28E+11","1,83E+11",99.1,98.4,96.9
2012,"8,39E+11","4,99E+11","1,71E+11",99.9,93.6,97.9
2013,"8,77E+11","5,21E+11","1,91E+11",101.0,93.1,97.5
2014,"8,92E+11","5,42E+11","2,00E+11",99.8,91.6,100.3
2015,"7,66E+11","4,78E+11","1,78E+11",93.5,89.6,103.6
2016,"7,84E+11","4,73E+11","1,88E+11",92.6,91.0,104.7
2017,"8,34E+11","5,27E+11","2,12E+11",88.8,84.5,100.0
2018,"9,14E+11","5,87E+11","2,41E+11",92.2,82.0,100.8


In [135]:
# Make graph showing the Traffic volume relative to GDP over time for the three countries
Traffic_volume = ["Netherlands traffic volume relative to GDP", "Poland traffic volume relative to GDP", "Romania traffic relative to GDP"]
fig = px.line(df_GDP_Traffic_3countries, x=df_GDP_Traffic_3countries.index, y=Traffic_volume, title="Traffic volume relative to GDP for three countries")
fig.show()

# Make graph showing the GDP of the three countries
GDP = ["GDP Netherlands", "GDP Poland", "GDP Romania"]
fig = px.line(df_GDP_Traffic_3countries, x=df_GDP_Traffic_3countries.index, y=GDP, title="GDP of three countries")
fig.show()
