In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

#data taken from BITRE international airline activity from Jan 2009 to March 2024
port = pd.read_csv('port_operations.csv')
port.head(5)

Unnamed: 0,Month,Scheduled Operator,Country to/from,Passengers In,Freight In,Mail In,Passengers Out,Freight Out,Mail Out,Year
0,01/01/2009,Aerolineas Argentinas,Argentina,3021.0,4.313,0.6,1959.0,8.311,0.0,2009
1,01/01/2009,Aerolineas Argentinas,New Zealand,627.0,76.26,0.0,1821.0,68.539,0.0,2009
2,01/01/2009,Air Caledonie,New Caledonia,6658.0,4.918,0.645,5365.0,68.621,1.291,2009
3,01/01/2009,Air Canada,Canada,7489.0,174.828,0.004,6424.0,105.191,0.016,2009
4,01/01/2009,Air China,China,12458.0,201.314,18.569,11163.0,142.408,2.93,2009


In [4]:
port_year = port[(port['Year'] >= 2009) & (port['Year'] <= 2024)]
port_year
port_year.dtypes

Month                  object
Scheduled Operator     object
Country to/from        object
Passengers In         float64
Freight In            float64
Mail In               float64
Passengers Out        float64
Freight Out           float64
Mail Out              float64
Year                    int64
dtype: object

In [5]:
port_year['Month'] = pd.to_datetime(port_year['Month'], format='%d/%m/%Y')
port_year.dtypes
port_year.head(5)

port_replaced = port_year.fillna(0)
port_replaced['Country to/from'].unique()

array(['Argentina', 'New Zealand', 'New Caledonia', 'Canada', 'China',
       'Mauritius', 'Cook Islands', 'Germany', 'USA', 'Papua New Guinea',
       'Fiji', 'Tahiti', 'Vanuatu', 'Malaysia', 'Indonesia', 'Korea',
       'Singapore', 'Thailand', 'UK', 'Luxembourg', 'Hong Kong (SAR)',
       'Taiwan', 'Guam', 'United Arab Emirates', 'Philippines', 'Japan',
       'Vietnam', 'Chile', 'Nauru', 'Solomon Islands', 'India',
       'South Africa', 'Brunei', 'Tonga', 'Western Samoa', 'Qatar',
       'France', 'Reunion', 'Macau', 'Kiribati', 'Nigeria', 'Brazil',
       'Turkey', 'Kazakhstan', 'Netherlands', 'Uruguay', 'Laos',
       'Sri Lanka', 'Bahrain', 'Peru', 'Cambodia', 'East Timor', 'Italy',
       'Palau', 'Azerbaijan'], dtype=object)

In [6]:
#Mapping countries to continents
continent_mapping = {
    'Africa': ['South Africa', 'Mauritius', 'Nigeria', 'Reunion'],
    'Asia': ['Indonesia', 'Turkey', 'Macau', 'United Arab Emirates', 'Hong Kong (SAR)', 'Malaysia', 'Singapore', 'Thailand', 'China', 'Japan', 'India', 'Vietnam', 'Qatar', 'Philippines', 'Fiji', 'South Korea', 'Taiwan', 'Brunei', 'Korea', 'Sri Lanka', 'Cambodia', 'East Timor', 'Laos', 'Bahrain', 'Kazakhstan', 'Azerbaijan'],
    'Europe': ['UK', 'Germany', 'Italy', 'France', 'Luxembourg', 'Netherlands'],
    'North America': ['USA', 'Canada'],
    'Oceania': ['New Zealand', 'Tahiti', 'Guam', 'Australia', 'Western Samoa', 'Vanuatu', 'Solomon Islands', 'Nauru', 'New Caledonia', 'Papua New Guinea', 'Cook Islands', 'Tonga', 'Palau', 'Kiribati'],
    'South America': ['Argentina', 'Brazil', 'Chile', 'Peru', 'Uruguay']
}

def get_continent(country):
    for continent, countries in continent_mapping.items():
        if country in countries:
            return continent
    return 'Unknown'

#Continent column added to port_replaced
port_replaced['Continent'] = port_replaced['Country to/from'].apply(get_continent)
print(port_replaced.isnull().sum())

port_replaced['Total Passengers'] = port_replaced['Passengers In'] + port_replaced['Passengers Out']
port_replaced['Total Freight'] = port_replaced['Freight In'] + port_replaced['Freight Out']
port_replaced['Total Mail'] = port_replaced['Mail In'] + port_replaced['Mail Out']

sum_by_country = port_replaced.groupby(['Year', 'Continent']).sum()
sum_by_country.head(7)

Month                 0
Scheduled Operator    0
Country to/from       0
Passengers In         0
Freight In            0
Mail In               0
Passengers Out        0
Freight Out           0
Mail Out              0
Year                  0
Continent             0
dtype: int64


Unnamed: 0_level_0,Unnamed: 1_level_0,Passengers In,Freight In,Mail In,Passengers Out,Freight Out,Mail Out,Total Passengers,Total Freight,Total Mail
Year,Continent,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2009,Africa,45570.0,452.346,27.95,38654.0,727.469,34.149,84224.0,1179.815,62.099
2009,Asia,1968167.0,54990.274,3137.537,1669543.0,64221.469,1248.93,3637710.0,119211.743,4386.467
2009,Europe,98641.0,2871.07,388.713,98877.0,786.923,570.338,197518.0,3657.993,959.051
2009,North America,221652.0,10870.657,820.301,206458.0,3634.666,548.52,428110.0,14505.323,1368.821
2009,Oceania,770222.0,14812.581,672.893,745103.0,13957.797,560.415,1515325.0,28770.378,1233.308
2009,South America,26698.0,489.049,5.216,24526.0,289.03,12.939,51224.0,778.079,18.155
2010,Africa,172294.0,1987.616,80.598,162301.0,3511.156,151.841,334595.0,5498.772,232.439


In [7]:
#exclude Asia continent because it is the largest continent
port_replaced_except_asia_na = port_replaced[port_replaced['Continent'] != 'Asia']
sum_by_country_ea_na = port_replaced_except_asia_na.groupby(['Year', 'Continent']).sum()
sum_by_country_ea_na.head(7)

Unnamed: 0_level_0,Unnamed: 1_level_0,Passengers In,Freight In,Mail In,Passengers Out,Freight Out,Mail Out,Total Passengers,Total Freight,Total Mail
Year,Continent,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2009,Africa,45570.0,452.346,27.95,38654.0,727.469,34.149,84224.0,1179.815,62.099
2009,Europe,98641.0,2871.07,388.713,98877.0,786.923,570.338,197518.0,3657.993,959.051
2009,North America,221652.0,10870.657,820.301,206458.0,3634.666,548.52,428110.0,14505.323,1368.821
2009,Oceania,770222.0,14812.581,672.893,745103.0,13957.797,560.415,1515325.0,28770.378,1233.308
2009,South America,26698.0,489.049,5.216,24526.0,289.03,12.939,51224.0,778.079,18.155
2010,Africa,172294.0,1987.616,80.598,162301.0,3511.156,151.841,334595.0,5498.772,232.439
2010,Europe,371456.0,10891.284,1568.797,367737.0,3322.894,2233.009,739193.0,14214.178,3801.806


In [8]:
sum_by_country_ea_na = port_replaced_except_asia_na.groupby(['Year', 'Continent']).sum().reset_index().round(2)

#Calculate total passengers by continent
continent_with_max_passengers = sum_by_country_ea_na.loc[sum_by_country_ea_na['Total Passengers'].idxmax(), 'Continent']

#line graph using px
fig = px.line(sum_by_country_ea_na, x='Year', y='Total Passengers', color='Continent',
              labels={'Year': 'Year', 'Total Passengers': 'Total Passengers', 'Continent': 'Continent'},
              title='Total Yearly Passengers by Continent',
              hover_name='Continent')

#layout px
fig.update_layout(
    xaxis=dict(type='linear'),  # Slider requires numeric values for x-axis
    xaxis_title='Year',
    yaxis_title='Total Passengers',
    hovermode='x',  
    showlegend=True,
    legend_title='Continent',
    title_x=0.5,
)

#default value = continent with highest total passenger
default_visible = [False] * len(fig.data)
max_continent_index = sum_by_country_ea_na['Continent'].tolist().index(continent_with_max_passengers)
default_visible[max_continent_index] = True
for i, trace in enumerate(fig.data):
    trace.visible = default_visible[i]



#create dropdown box
buttons = []
default_checkbox = [True if i == max_continent_index else False for i in range(len(sum_by_country_ea_na['Continent'].unique()))]

for i, continent in enumerate(sum_by_country_ea_na['Continent'].unique()):
    buttons.append(
        {
            'label': continent,
            'method': 'update',
            'args': [{'visible': [True if c == continent else False for c in sum_by_country_ea_na['Continent']]}]
        }
    )

#default dropdown value to the continent with the highest total passenger
default_dropdown_index = sum_by_country_ea_na['Continent'].unique().tolist().index(continent_with_max_passengers)    
    
fig.update_layout(
    updatemenus=[
        {
            'buttons': buttons,
            'direction': 'down',
            'showactive': True,
            'active': default_dropdown_index,
            'x': 0.17,
            'xanchor': 'right',
            'y': 1.07,
            'yanchor': 'middle',
        }
    ]
)

fig.show()