<h2>Setup & Import</h2>

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Show all columns when displaying df
pd.set_option('display.max_columns', None)

In [3]:
# Import data and setup dataframe
df = pd.read_csv('data/T_T100_SEGMENT_ALL_CARRIER.csv')

FileNotFoundError: [Errno 2] No such file or directory: 'data/T_T100_SEGMENT_ALL_CARRIER.csv'

In [None]:
# Import lookup tables
carrier_classes = pd.read_csv('data/L_CARRIER_GROUP_NEW.csv')
area_codes_table = pd.read_csv('data/L_WORLD_AREA_CODES.csv')
aircraft_types = pd.read_csv('data/L_AIRCRAFT_TYPE.csv')
city_markets = pd.read_csv('data/L_CITY_MARKET_ID.csv')
service_classes = pd.read_csv('data/L_SERVICE_CLASS.csv')
aircraft_configs = pd.read_csv('data/L_AIRCRAFT_CONFIG.csv')
aircraft_groups = pd.read_csv('data/L_AIRCRAFT_GROUP.csv')
airport_coords = pd.read_csv('data/T_MASTER_CORD.csv')

<h2>Data Cleaning</h2>

<h4>Carrier Classes</h4>

In [None]:
# Create simplified list of carrier class descriptions
carrier_classes_modified = []
for carrier_type in carrier_classes['Description']:
    index = carrier_type.index('Carrier')
    substr = carrier_type[:index-1]
    carrier_classes_modified.append(substr)

carrier_classes['Carrier Class'] = carrier_classes_modified

In [None]:
#Inspect updated carrier_classes
carrier_classes

In [None]:
# Replace carrier groups with descriptions from modified lookup table
df = df.merge(carrier_classes, left_on='CARRIER_GROUP_NEW', right_on='Code', how='left')
df = df.drop(columns=['Code', 'Description', 'CARRIER_GROUP_NEW'])

<h4>World Area Codes (WAC)</h4>

In [None]:
# Replace WAC code with descriptions from lookup table
df = df.merge(area_codes_table, left_on='ORIGIN_WAC', right_on='Code', how='left')
df['ORIGIN_WAC'] = df['Description']
df = df.drop(columns=['Code', 'Description'])

df = df.merge(area_codes_table, left_on='DEST_WAC', right_on='Code', how='left')
df['DEST_WAC'] = df['Description']
df = df.drop(columns=['Code', 'Description'])

<h4>Aircraft type</h4>

In [None]:
# Preview aircraft_types
aircraft_types.head()

In [None]:
# Replace aircraft type code with descriptions from lookup table
df = df.merge(aircraft_types, left_on='AIRCRAFT_TYPE', right_on='Code', how='left')
df['AIRCRAFT_TYPE'] = df['Description']
df = df.drop(columns=['Code', 'Description'])

<h4>City Markets</h4>

In [None]:
# Preview city_markets
city_markets.head()

In [None]:
# Replace city market codes with descriptions from lookup table
df = df.merge(city_markets, left_on='ORIGIN_CITY_MARKET_ID', right_on='Code', how='left')
df['ORIGIN_CITY_MARKET_ID'] = df['Description']
df = df.drop(columns=['Code', 'Description'])

df = df.merge(city_markets, left_on='DEST_CITY_MARKET_ID', right_on='Code', how='left')
df['DEST_CITY_MARKET_ID'] = df['Description']
df = df.drop(columns=['Code', 'Description'])

<h4>Service Classes</h4>

In [None]:
# Preview service_classes
service_classes.head()

In [None]:
# Replace service class code with descriptions from lookup table
df = df.merge(service_classes, left_on='CLASS', right_on='Code', how='left')
df['CLASS'] = df['Description']
df = df.drop(columns=['Code', 'Description'])

<h4>Aircraft Configs</h4>

In [None]:
# Preview aircraft_configs
aircraft_configs.head()

In [None]:
# Replace aircraft config code with descriptions from lookup table
df = df.merge(aircraft_configs, left_on='AIRCRAFT_CONFIG', right_on='Code', how='left')
df['AIRCRAFT_CONFIG'] = df['Description']
df = df.drop(columns=['Code', 'Description'])

<h4>Aircraft Groups</h4>

In [None]:
# Preview aircraft_groups
aircraft_groups.head()

In [None]:
# Replace aircraft group code with descriptions from lookup table
df = df.merge(aircraft_groups, left_on='AIRCRAFT_GROUP', right_on='Code', how='left')
df['AIRCRAFT_GROUP'] = df['Description']
df = df.drop(columns=['Code', 'Description'])

<h3>Add Load Factor Calculation</h3>

In [None]:
#Add Load Factor column on flight level
df['Load Factor'] = df['PASSENGERS'] / df['SEATS']
df['Load Factor'] = df['Load Factor'].fillna(0)

<h3>Add Lattitude/Longitude Coordinates</h3>

In [None]:
airport_coords.head()

In [None]:
df = df.merge(airport_coords, left_on='ORIGIN', right_on='AIRPORT', how='left')
df['ORIGIN_LATITUDE'] = df['LATITUDE']
df['ORIGIN_LONGITUDE'] = df['LONGITUDE']
df = df.drop(columns=['AIRPORT', 'DISPLAY_AIRPORT_NAME', 'LATITUDE', 'LONGITUDE'])

df = df.merge(airport_coords, left_on='DEST', right_on='AIRPORT', how='left')
df['DEST_LATITUDE'] = df['LATITUDE']
df['DEST_LONGITUDE'] = df['LONGITUDE']
df = df.drop(columns=['AIRPORT', 'DISPLAY_AIRPORT_NAME', 'LATITUDE', 'LONGITUDE'])

<h3>Drop 0 departures performed</h3>

In [None]:
df.shape

In [None]:
df = df.drop(df[df.DEPARTURES_PERFORMED] < 1)

In [None]:
df.shape

<h2>Display Final Table</h2>

In [None]:
df[:10]

In [None]:
# Download to csv
#df.to_csv('exports/T-100_Segment_Data.csv')