In [2]:
import pandas as pd
import numpy as np
import plotly as plt

import math
import scipy

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

import geopandas as gpd

import plotly.io as pio
import plotly.graph_objects as go   
import country_converter as coco

import plotly.io as pio
pio.renderers.default = "plotly_mimetype+notebook"

In [3]:
# Import datasets
freight = pd.read_csv("Data_Sets/avia_gor_nl__custom_3564729_monthly_linear.csv")
passengers = pd.read_csv("Data_Sets/avia_par_nl__custom_3564728_monthly_linear.csv")

# Setting up special cleaning list
country_code_fix = {'EL':'GR','AN':'BQ'}

def top_n_year(df,n,year):
    # This function has a dataframe, a number of entries and a year as inputs and 
    # as output the dataframe filtered for the top n countries on a specific year
    df[["airp_country_1","airp_code_1","airp_country_2","airp_code_2"]] = df.airp_pr.str.split("_",expand=True)
    df[["Year","Month"]] = df.TIME_PERIOD.str.split("-",expand=True)
    topn = df.groupby(['Year',"airp_country_2"]).sum()
    topn = topn.loc[year,:].sort_values("OBS_VALUE", ascending= False).head(n).reset_index()
    cleaned = df[df["airp_country_2"].isin(topn["airp_country_2"])]
    return cleaned

def codes_correction(df,ISO_2):
    # This function has a data frame and a ISO2 countries list and outputs the same 
    # dataframe adding a ISO3 code and the country name for each unique
    df[ISO_2] = df[ISO_2].replace(to_replace= country_code_fix)
    df['iso_3_country'] = coco.convert(names = df[ISO_2], to= 'ISO3')
    df['country_2_name'] = coco.convert(names = df[ISO_2], to= 'name_short')
    return df

# Call defined functions for the 2 datasets
top_passengers = top_n_year(passengers,5,'2019')
top_cargo = top_n_year(freight,5,'2019')

In [4]:
#COVID PART 
filepath = 'Data_Sets/owid-covid-data.csv'
covid_data = pd.read_csv(filepath, delimiter=',')

# Select the columns of interest from the original dataset and filter it, also expand the date to accomodate year and month
columns_of_interest = ['new_cases_smoothed_per_million','people_fully_vaccinated_per_hundred','date','location']
filtered_covid_data = covid_data[columns_of_interest]
filtered_covid_data[['Year','Month','Day']] = filtered_covid_data.date.str.split("-",expand=True)

# Selecting the countries of interest for cargo and passenger transport
cargo_countries_of_interest =['China','United States','United Arab Emirates','Brazil','Qatar','Netherlands']
passenger_countries_of_interest = ['United Kingdom','Spain','Germany','Italy','United States','Netherlands']

# Group by year, month and location to calculate the mean for cargo transport
cargo_filtered_covid_data = filtered_covid_data[filtered_covid_data['location'].isin(cargo_countries_of_interest)]
cargo_filtered_covid_data = cargo_filtered_covid_data.groupby(['location','Year','Month']).mean()

# Resetting the index and creating new collum with Year and Month 
cargo_filtered_covid_data = cargo_filtered_covid_data.reset_index()
cargo_filtered_covid_data['date'] = cargo_filtered_covid_data['Year'] + ['-'] + cargo_filtered_covid_data['Month'] 

# Group by year, month and location to calculate the mean for cargo transport
passenger_filtered_covid_data = filtered_covid_data[filtered_covid_data['location'].isin(passenger_countries_of_interest)]
passenger_filtered_covid_data = passenger_filtered_covid_data.groupby(['location','Year','Month']).mean()

#Resetting the index and creating new collum with Year and Month 
passenger_filtered_covid_data = passenger_filtered_covid_data.reset_index()
passenger_filtered_covid_data['date'] = passenger_filtered_covid_data['Year'] + ['-'] + passenger_filtered_covid_data['Month']



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [14]:
top_passengers


Unnamed: 0,DATAFLOW,LAST UPDATE,freq,unit,tra_meas,airp_pr,TIME_PERIOD,OBS_VALUE,OBS_FLAG,airp_country_1,airp_code_1,airp_country_2,airp_code_2,Year,Month
4910,ESTAT:AVIA_PAR_NL(1.0),23/09/22 23:00:00,M,PAS,PAS_CRD,NL_EHAM_DE_EDDB,2011-01,11395,,NL,EHAM,DE,EDDB,2011,01
4911,ESTAT:AVIA_PAR_NL(1.0),23/09/22 23:00:00,M,PAS,PAS_CRD,NL_EHAM_DE_EDDB,2011-02,11540,,NL,EHAM,DE,EDDB,2011,02
4912,ESTAT:AVIA_PAR_NL(1.0),23/09/22 23:00:00,M,PAS,PAS_CRD,NL_EHAM_DE_EDDB,2011-03,13070,,NL,EHAM,DE,EDDB,2011,03
4913,ESTAT:AVIA_PAR_NL(1.0),23/09/22 23:00:00,M,PAS,PAS_CRD,NL_EHAM_DE_EDDB,2011-04,13135,,NL,EHAM,DE,EDDB,2011,04
4914,ESTAT:AVIA_PAR_NL(1.0),23/09/22 23:00:00,M,PAS,PAS_CRD,NL_EHAM_DE_EDDB,2011-05,13598,,NL,EHAM,DE,EDDB,2011,05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40384,ESTAT:AVIA_PAR_NL(1.0),23/09/22 23:00:00,M,PAS,PAS_CRD,NL_EHAM_US_KSLC,2022-03,15208,,NL,EHAM,US,KSLC,2022,03
40385,ESTAT:AVIA_PAR_NL(1.0),23/09/22 23:00:00,M,PAS,PAS_CRD,NL_EHAM_US_KSLC,2022-04,19831,,NL,EHAM,US,KSLC,2022,04
40386,ESTAT:AVIA_PAR_NL(1.0),23/09/22 23:00:00,M,PAS,PAS_CRD,NL_EHAM_US_KSLC,2022-05,22648,,NL,EHAM,US,KSLC,2022,05
40387,ESTAT:AVIA_PAR_NL(1.0),23/09/22 23:00:00,M,PAS,PAS_CRD,NL_EHAM_US_KSLC,2022-06,22379,,NL,EHAM,US,KSLC,2022,06


In [9]:
passenger_filtered_covid_data

Unnamed: 0,location,Year,Month,new_cases_smoothed_per_million,people_fully_vaccinated_per_hundred,date
0,Germany,2020,01,,,2020-01
1,Germany,2020,02,0.011621,,2020-02
2,Germany,2020,03,18.258710,,2020-03
3,Germany,2020,04,43.080800,,2020-04
4,Germany,2020,05,9.641290,,2020-05
...,...,...,...,...,...,...
197,United States,2022,06,314.537467,67.019667,2022-06
198,United States,2022,07,364.569742,67.272258,2022-07
199,United States,2022,08,305.351581,67.557419,2022-08
200,United States,2022,09,190.342033,67.822333,2022-09


In [5]:


# load dataset
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/volcano.csv")

# Create figure
fig = go.Figure()

