In [187]:
import airportsdata
import pycountry_convert as pc
import numpy as np
import pandas as pd

In [188]:
flight_emission_df = pd.read_csv('./data/flight_emission_data.txt', header=0)
flight_emission_df.head()

airports = airportsdata.load('IATA')
flight_emission_df['Country'] = flight_emission_df['Assigned airport'].map(
    lambda x: np.nan if x not in airports.keys() else airports[x]['country']
)
flight_emission_df = flight_emission_df.dropna()

flight_emission_df['Continent'] = flight_emission_df['Country'].map(
    lambda x: pc.country_alpha2_to_continent_code(x)
)

flight_emission_df['CO2 RFI2.7 (t)'] /= flight_emission_df['# Flights']
flight_emission_df['# Flights'] /= np.sum(flight_emission_df['# Flights'])

print(flight_emission_df.head())

  Assigned airport  # Flights  CO2 RFI2.7 (t) Country Continent
0              AAL   0.000115         0.05000      DK        EU
1              AAR   0.000230         0.05000      DK        EU
2              ABJ   0.001209         1.07619      CI        AF
3              ABQ   0.000288         0.30000      US        NA
4              ABZ   0.000058         0.20000      GB        EU


In [189]:
import datetime

num_researchers = 3000

num_choices = np.random.binomial(8, 0.3, num_researchers)

simulated_flight_df = {
    'researcher_id': [],
    'travel_type': [],
    'month': [],
    'country': [],
    'continent': [],
    'emission': []
}

months = [
     datetime.datetime(2023, i, 1, 0, 0, 0, 0) for i in range(1, 13)
]

for i in range(num_researchers):
    choice_idxs = np.random.choice(np.arange(len(flight_emission_df)), size=num_choices[i], replace=True,
                                   p=flight_emission_df['# Flights'].to_numpy())
    for idx in choice_idxs:
        continent = flight_emission_df.iloc[idx]['Continent']
        travel_type = 'Train' if np.random.uniform(0, 1) < 0.5 and continent == 'EU' else 'Flight'
        emission = flight_emission_df.iloc[idx]['CO2 RFI2.7 (t)'] * 1000
        if travel_type == 'Train':
            emission /= 9.8

        simulated_flight_df['researcher_id'].append(i)
        simulated_flight_df['travel_type'].append(travel_type)
        simulated_flight_df['month'].append(np.random.choice(months))
        simulated_flight_df['country'].append(flight_emission_df.iloc[idx]['Country'])
        simulated_flight_df['continent'].append(continent)
        simulated_flight_df['emission'].append(emission)

simulated_flight_df = pd.DataFrame(simulated_flight_df)

In [190]:
simulated_flight_df.to_csv('./data/synthetic_emission_data_1.csv', index=False)

## Loading the data and computing statistics

In [191]:
import plotly.express as px

simulated_flight_df = pd.read_csv('./data/synthetic_emission_data.csv', na_filter=False)

interesting_researcher_idx = 65

simulated_flight_df[simulated_flight_df['researcher_id'] == interesting_researcher_idx]

Unnamed: 0,researcher_id,travel_type,month,country,continent,emission
155,65,Flight,2023-01-01,NL,EU,165.274463
156,65,Train,2023-10-01,GB,EU,18.579267
157,65,Flight,2023-06-01,US,,1711.538462
158,65,Flight,2023-07-01,SG,AS,2655.333333
159,65,Train,2023-07-01,NL,EU,165.274463
160,65,Flight,2023-10-01,CA,,1208.923077


In [192]:
# Filter data for researcher_id 6 and flights and trains only
df = simulated_flight_df[simulated_flight_df['researcher_id'] == interesting_researcher_idx]

# Convert month column to datetime type and extract month
df['month'] = pd.to_datetime(df['month'])
df['month'] = df['month'].dt.month_name()

grouped_df = df.groupby(['month', 'travel_type']).agg({'emission': 'sum', 'researcher_id': 'count'})
grouped_df = grouped_df.reset_index()

all_months_df = pd.DataFrame({
    'month': ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] + ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'],
    'travel_type': ['Flight' for _ in range(12)] + ['Train' for _ in range(12)]
})

merged_df = pd.merge(all_months_df, grouped_df, on=['month', 'travel_type'], how='left')
merged_df = merged_df.fillna(0)

color_map = {'Train': '#579161', 'Flight': '#FC4C4C'}

fig = px.bar(merged_df, x='month', y='emission', color='travel_type',
             title=f"Travel history in 2023", color_discrete_map=color_map,
             labels={'travel_type': 'Travel Method','emission': 'Emission','month': 'Month', 'researcher_id': 'Number of trips'},
             hover_data={'emission': ':.2f', 'researcher_id': ':.i'}, barmode='stack')
fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [193]:
grouped_df = simulated_flight_df.groupby(['researcher_id']).agg({'emission': 'sum', 'researcher_id': 'count'})

researcher_df = grouped_df[grouped_df.index == interesting_researcher_idx]
total_co2_emission = float(researcher_df['emission'])
print('Number of Conferences:', int(researcher_df['researcher_id']))
print('Total CO2 Emission:', round(total_co2_emission, 2), 'Kg')

flight_europe_df = df[(df['travel_type'] == 'Flight') & (df['continent'] == 'EU')]
co2_usage_flights_europe = np.sum(flight_europe_df['emission'])
print('You could save', round(co2_usage_flights_europe - co2_usage_flights_europe/9.8, 2), 'Kg of CO2 by going by train instead of flying within Europe')

co2_percentile = np.sum(grouped_df['emission'] < total_co2_emission)/len(grouped_df)*100
print('You are in the', int(co2_percentile), 'th percentile based on CO2 usage in this year')

Number of Conferences: 6
Total CO2 Emission: 5924.92 Kg
You could save 148.41 Kg of CO2 by going by train instead of flying within Europe
You are in the 99 th percentile based on CO2 usage in this year
