In [80]:
import airportsdata
import pycountry_convert as pc
import numpy as np
import pandas as pd

In [81]:
flight_emission_df = pd.read_csv('./data/flight_emission_data.txt', header=0)
flight_emission_df.head()

airports = airportsdata.load('IATA')
flight_emission_df['Country'] = flight_emission_df['Assigned airport'].map(
    lambda x: np.nan if x not in airports.keys() else airports[x]['country']
)
flight_emission_df = flight_emission_df.dropna()

flight_emission_df['Continent'] = flight_emission_df['Country'].map(
    lambda x: pc.country_alpha2_to_continent_code(x)
)

flight_emission_df['CO2 RFI2.7 (t)'] /= flight_emission_df['# Flights']
flight_emission_df['# Flights'] /= np.sum(flight_emission_df['# Flights'])

print(flight_emission_df.head())

  Assigned airport  # Flights  CO2 RFI2.7 (t) Country Continent
0              AAL   0.000115         0.05000      DK        EU
1              AAR   0.000230         0.05000      DK        EU
2              ABJ   0.001209         1.07619      CI        AF
3              ABQ   0.000288         0.30000      US        NA
4              ABZ   0.000058         0.20000      GB        EU


In [82]:
import datetime

num_researchers = 3000

num_choices = np.random.binomial(8, 0.3, num_researchers)

simulated_flight_df = {
    'researcher_id': [],
    'travel_type': [],
    'month': [],
    'country': [],
    'continent': [],
    'emission': []
}

months = [
     datetime.datetime(2023, i, 1, 0, 0, 0, 0) for i in range(1, 13)
]

for i in range(num_researchers):
    choice_idxs = np.random.choice(np.arange(len(flight_emission_df)), size=num_choices[i], replace=True,
                                   p=flight_emission_df['# Flights'].to_numpy())
    for idx in choice_idxs:
        continent = flight_emission_df.iloc[idx]['Continent']
        travel_type = 'Train' if np.random.uniform(0, 1) < 0.2 and continent == 'EU' else 'Flight'
        emission = flight_emission_df.iloc[idx]['CO2 RFI2.7 (t)'] * 1000
        if travel_type == 'Train':
            emission /= 9.8

        simulated_flight_df['researcher_id'].append(i)
        simulated_flight_df['travel_type'].append(travel_type)
        simulated_flight_df['month'].append(np.random.choice(months))
        simulated_flight_df['country'].append(flight_emission_df.iloc[idx]['Country'])
        simulated_flight_df['continent'].append(continent)
        simulated_flight_df['emission'].append(emission)

simulated_flight_df = pd.DataFrame(simulated_flight_df)

In [83]:
simulated_flight_df.to_csv('./data/synthetic_emission_data.csv', index=False)