In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

%matplotlib notebook
%matplotlib notebook

In [2]:
# International segment monthly data from all carriers on all routes; there can be more than 1 entry for a month
# e.g. American Airlines has 2 entries for JFK-LAX on Jan, 1 entry on Feb, 4 entries on Apr, etc.
all_2005 = pd.read_csv('./413215462_T_T100_SEGMENT_ALL_CARRIER.csv')   
columns = ['DEPARTURES_SCHEDULED', 'DEPARTURES_PERFORMED', 'SEATS',
           'PASSENGERS', 'DISTANCE', 'AIR_TIME',
           'UNIQUE_CARRIER', 'AIRLINE_ID', 'UNIQUE_CARRIER_NAME',
           'UNIQUE_CARRIER_ENTITY', 'REGION', 'CARRIER', 'CARRIER_NAME',
           'CARRIER_GROUP', 'CARRIER_GROUP_NEW', 'ORIGIN_AIRPORT_ID',
           'ORIGIN_AIRPORT_SEQ_ID', 'ORIGIN_CITY_MARKET_ID', 'ORIGIN',
           'ORIGIN_CITY_NAME', 'ORIGIN_COUNTRY', 'ORIGIN_COUNTRY_NAME',
           'ORIGIN_WAC', 'DEST_AIRPORT_ID', 'DEST_AIRPORT_SEQ_ID',
           'DEST_CITY_MARKET_ID', 'DEST', 'DEST_CITY_NAME', 'DEST_COUNTRY',
           'DEST_COUNTRY_NAME', 'DEST_WAC', 'AIRCRAFT_GROUP', 'AIRCRAFT_TYPE',
           'AIRCRAFT_CONFIG', 'QUARTER', 'MONTH','DISTANCE_GROUP', 'CLASS']


all_2005 = all_2005[columns]

In [3]:
# 255 WWLMINET network airport list
airport_list = pd.read_excel('./GHG and cost LC stages System Expansion.xlsx', sheet_name='Airport List Map')

# Parse based on the origin and destination
all_2005 = all_2005[all_2005['ORIGIN'].isin(airport_list.Code) & all_2005['DEST'].isin(airport_list.Code)]

In [4]:
# Filter out the all cargo carrier data based on carrier group new id
all_2005 = all_2005[all_2005['CARRIER_GROUP_NEW'] != 7]

In [5]:
# Filter out the freight configuration aircraft based on aircraft configuration id
all_2005 = all_2005[all_2005['AIRCRAFT_CONFIG'] != 2]

In [6]:
# Filter out seaplane based on aircraft configuration id
all_2005 = all_2005[all_2005['AIRCRAFT_CONFIG'] != 4]

In [7]:
# Filter out the all cargo scheduled service, unscheduled passenger service based on class id
all_2005 = all_2005[all_2005['CLASS'].isin(['F','A','C','E'])]

In [8]:
# Filter out routes by international/foreign carriers based on carrier group id and carrier group new id
all_2005 = all_2005[all_2005['CARRIER_GROUP_NEW'] != 0]
all_2005 = all_2005[all_2005['CARRIER_GROUP'] != 0]

In [9]:
# Keep only routes with demand for passenger service (non-zero passengers and seats) 
all_2005 = all_2005[(all_2005['PASSENGERS'] > 0) & (all_2005['SEATS'] > 0)]

In [10]:
# Filter out routes with 0 distance (or routes with origin = destination)
all_2005 = all_2005[all_2005['DISTANCE'] != 0]

In [11]:
# Aggregate the data together into the monthly performance by different airlines
# e.g. American Airlines has 1 entry for JFK-LAX on Jan, 1 on Feb, 1 on Apr, etc.

index_column = ['AIRLINE_ID', 'UNIQUE_CARRIER_NAME',
                'UNIQUE_CARRIER_ENTITY', 'REGION','CARRIER_NAME',
                'CARRIER_GROUP', 'CARRIER_GROUP_NEW', 'ORIGIN_AIRPORT_ID',
                'ORIGIN_AIRPORT_SEQ_ID', 'ORIGIN_CITY_MARKET_ID', 'ORIGIN',
                'ORIGIN_CITY_NAME', 'ORIGIN_COUNTRY', 'ORIGIN_COUNTRY_NAME',
                'ORIGIN_WAC', 'DEST_AIRPORT_ID', 'DEST_AIRPORT_SEQ_ID',
                'DEST_CITY_MARKET_ID', 'DEST', 'DEST_CITY_NAME', 'DEST_COUNTRY',
                'DEST_COUNTRY_NAME', 'DEST_WAC', 'MONTH']
monthly_2005 = all_2005.groupby(index_column).agg({'DEPARTURES_SCHEDULED': 'sum', 
                                                 'DEPARTURES_PERFORMED': 'sum',  
                                                 'SEATS': 'sum', 
                                                 'PASSENGERS': 'sum',  
                                                 'DISTANCE': 'mean', 
                                                 'AIR_TIME': 'mean'})

monthly_2005.reset_index(inplace=True)

In [12]:
# Add directional route names (consider A-B != B-A)
monthly_2005['DIRROUTE'] = monthly_2005.apply(lambda x: x.ORIGIN+'-'+x.DEST, axis =1)

In [13]:
# Group by directional route names to combine all airlines into 1 representative airline 
# e.g. JFK-LAX has multiple entries (max 12), one for each month, and the demand shown is sum of all airlines

index_column = ['DIRROUTE', 'ORIGIN','ORIGIN_CITY_NAME', 'DEST', 'DEST_CITY_NAME', 'MONTH']
monthly_2005 = monthly_2005.groupby(index_column).agg({'DEPARTURES_SCHEDULED': 'sum', 
                                                 'DEPARTURES_PERFORMED': 'sum',
                                                 'SEATS': 'sum',
                                                 'PASSENGERS': 'sum',
                                                 'DISTANCE': 'mean',
                                                 'AIR_TIME': 'mean'})
monthly_2005.reset_index(inplace=True)

In [14]:
# A directional route must have at least 4 flights per month (1 flight per week) to qualify as "regular ops"
monthly_2005 = monthly_2005[monthly_2005['DEPARTURES_PERFORMED'] >= 4]

In [15]:
monthly_2005.to_csv('./Jan2Dec_demand_2005.csv')

In [16]:
# Obtain a subset of demand data for each month 
# monthly_2005 = monthly_2005[monthly_2005['MONTH'] == 12]
# monthly_2005.to_csv('./Dec_demand_2005.csv')