In [1]:
import sys
from suds import null, WebFault
from suds.client import Client
import logging
import pandas as pd
import numpy as np
import geopy.distance
import pickle
import datetime
from sklearn.preprocessing import StandardScaler

np.seterr(divide='ignore', invalid='ignore')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

In [2]:
username = 'drewhibbard'
key = 'e3b96bb77a74f8670797a353cdec88fdaee2aa16'
url = 'http://flightxml.flightaware.com/soap/FlightXML2/wsdl'

logging.basicConfig(level=logging.INFO)
api = Client(url, username=username, password=key)

scaler = StandardScaler()

In [3]:
flight = api.service.FlightInfoEx('AAL302')

In [6]:
timestamp = flight[1][0]['filed_departuretime']

In [15]:
datetime.datetime.fromtimestamp(timestamp)

datetime.datetime(2020, 10, 30, 17, 10)

In [8]:
tail = api.service.AirlineFlightInfo('AAL302-1603866356-airline-0320')['tailnumber']

In [5]:
for f in flight[1][0]:
    print(f)

('faFlightID', 'AAL302-1603866356-airline-0320')
('ident', 'AAL302')
('aircrafttype', 'A321')
('filed_ete', '06:01:00')
('filed_time', 1603866356)
('filed_departuretime', 1604095800)
('filed_airspeed_kts', 357)
('filed_airspeed_mach', None)
('filed_altitude', 0)
('route', None)
('actualdeparturetime', 0)
('estimatedarrivaltime', 1604117460)
('actualarrivaltime', 0)
('diverted', None)
('origin', 'KJFK')
('destination', 'KLAX')
('originName', 'John F Kennedy Intl')
('originCity', 'New York, NY')
('destinationName', 'Los Angeles Intl')
('destinationCity', 'Los Angeles, CA')


In [9]:
engine_manufacturer_lookup[tail]

KeyError: 'N102NN'

In [9]:
with open('data/airport_code_converter.pickle','rb') as read_file:
    airport_code_converter = pickle.load(read_file)

with open('data/airline_code_converter.pickle','rb') as read_file:
    airline_code_converter = pickle.load(read_file)
    
with open('data/airport_coord_lookup.pickle','rb') as read_file:
    airport_coord_lookup = pickle.load(read_file)
    
with open('tail_engine_conv.pickle','rb') as read_file:
    tail_engine_convert = pickle.load(read_file)
    
with open('data/engine_manufacturer_lookup.pickle','rb') as read_file:
    engine_manufacturer_lookup = pickle.load(read_file)
    
with open('data/aircraft_age_lookup.pickle','rb') as read_file:
    aircraft_age_lookup = pickle.load(read_file)
    
with open('data/aircraft_delay_lookup.pickle','rb') as read_file:
    aircraft_delay_lookup = pickle.load(read_file)
    
with open('data/airline_delay_lookup.pickle','rb') as read_file:
    airline_delay_lookup = pickle.load(read_file)
    
with open('data/airport_delay_lookup.pickle','rb') as read_file:
    airport_delay_lookup = pickle.load(read_file)
    
with open('data/engine_delay_lookup.pickle','rb') as read_file:
    engine_delay_lookup = pickle.load(read_file)
    
with open('data/hour_delay_lookup.pickle','rb') as read_file:
    hour_delay_lookup = pickle.load(read_file)
    
with open('data/model_delay_lookup.pickle','rb') as read_file:
    model_delay_lookup = pickle.load(read_file)
    
with open('data/month_delay_lookup.pickle','rb') as read_file:
    month_delay_lookup = pickle.load(read_file)
    
with open('data/weekday_delay_lookup.pickle','rb') as read_file:
    weekday_delay_lookup = pickle.load(read_file)
    
with open('data/model_coef.pickle','rb') as read_file:
    model_coefs = pickle.load(read_file)
    
with open('data/scaler.pickle','rb') as read_file:
    scaler = pickle.load(read_file)
    
with open('data/feature_means.pickle','rb') as read_file:
    feature_means = pickle.load(read_file)
    
with open('data/airline_name_lookup.pickle','rb') as read_file:
    airline_name_lookup = pickle.load(read_file)

In [10]:
def get_flight_info(flight_number):
    flight_info = {}
    
    flight_details = api.service.FlightInfoEx(flight_number,1)
    fa_id = flight_details[1][0]['faFlightID']
    flight_info['unique_id'] = fa_id
    
    tail = api.service.AirlineFlightInfo(fa_id)['tailnumber']
    
    flight_info['tail_num'] = tail
    flight_info['aircraft_type'] = flight_details[1][0]['aircrafttype']
    airport = airport_code_converter[flight_details[1][0]['origin']]
    flight_info['airport_orig'] = airport
    flight_info['airport_dest'] = airport_code_converter[flight_details[1][0]['destination']]
    flight_info['airline'] = airline_code_converter[flight_details[1][0]['ident'][:3]]
    
    depart_unix_time = flight_details[1][0]['filed_departuretime']
    depart_timestamp = datetime.datetime.fromtimestamp(depart_unix_time)
    
    flight_info['month'] = depart_timestamp.month
    flight_info['hour'] = depart_timestamp.hour
    flight_info['day_of_week'] = depart_timestamp.weekday() + 1
    
    weather = api.service.MetarEx(airport,howMany=1)
    
    flight_info['snow'] = int('snow' in weather['metar'][0]['cloud_friendly'].lower())
    flight_info['rain'] = int('rain' in weather['metar'][0]['cloud_friendly'].lower())
    flight_info['wind'] = round(weather['metar'][0]['wind_speed'] * 1.151,1)  # convert from nots to mph 
    flight_info['temp_f'] = round(weather['metar'][0]['temp_air']*(9/5) +32,0)  # convert from celcius to fahrenheit
    flight_info['clouds'] = weather['metar'][0]['cloud_friendly']
    
    try:
        port1_coords = airport_coord_lookup[flight_info['airport_orig']]
        port2_coords = airport_coord_lookup[flight_info['airport_dest']]
        flight_info['distance'] = round(geopy.distance.distance(port1_coords,port2_coords).miles,0)
    except:
        flight_info['distance'] = np.nan
    
    flight_info['orig_city'] = flight_details[1][0]['originCity']
    flight_info['destination_city'] = flight_details[1][0]['destinationCity']
    
    try:
        flight_info['engine'] = tail_engine_convert[tail]
        flight_info['engine_manufacturer'] = engine_manufacturer_lookup[flight_info['engine']]
    except:
        flight_info['engine'] = np.nan
        flight_info['engine_manufacturer'] = np.nan
        
    try: 
        flight_info['year_plane_made'] = aircraft_age_lookup[tail]
        flight_info['aircraft_age'] = 2020 - aircraft_age_lookup[tail]
    except:
        flight_info['year_plane_made'] = np.nan
        flight_info['aircraft_age'] = np.nan
        
    previous_flight = api.service.FlightInfoEx(flight_number,1)
    if previous_flight[1][0]['actualdeparturetime'] > previous_flight[1][0]['filed_departuretime']:
        flight_info['previous_delay'] = 1
    else:
        flight_info['previous_delay'] = 0
    
    return flight_info

In [11]:
def get_prediction_variables(flight_dict):
    features_no_scale = []
    try: 
        features_no_scale.append(flight_dict['snow'])
    except:
        features_no_scale.append(feature_means['snow_orig'])
        
    try: 
        features_no_scale.append(flight_dict['previous_delay'])
    except:
        features_no_scale.append(feature_means['previous_delay'])
        
    try: 
        features_no_scale.append(flight_dict['rain'])
    except:
        features_no_scale.append(feature_means['precip_orig'])
        
        
    features_to_scale = []
    
    try: 
        features_to_scale.append(flight_dict['distance'])
    except:
        features_to_scale.append(feature_means['distance'])
        
    try: 
        features_to_scale.append(airport_delay_lookup[flight_dict['airport_orig']])
    except:
        features_to_scale.append(feature_means['airport_delayed'])
        
    try: 
        features_to_scale.append(airline_delay_lookup[flight_dict['airline']])
    except:
        features_to_scale.append(feature_means['airline_delayed'])
        
    try: 
        features_to_scale.append(hour_delay_lookup[flight_dict['hour']])
    except:
        features_to_scale.append(feature_means['hour_delayed'])
        
    try: 
        features_to_scale.append(month_delay_lookup[flight_dict['month']])
    except:
        features_to_scale.append(feature_means['month_delayed'])
        
    try: 
        features_to_scale.append(weekday_delay_lookup[flight_dict['day_of_week']])
    except:
        features_to_scale.append(feature_means['weekday_delayed'])
        
    try: 
        features_to_scale.append(model_delay_lookup[flight_dict['aircraft_type']])
    except:
        features_to_scale.append(feature_means['model_delayed'])
        
    try: 
        features_to_scale.append(engine_delay_lookup[flight_dict['engine']])
    except:
        features_to_scale.append(feature_means['engine_delayed'])
        
    try: 
        features_to_scale.append(aircraft_delay_lookup[flight_dict['tail_num']])
    except:
        features_to_scale.append(feature_means['aircraft_delayed'])
        
    try: 
        features_to_scale.append(flight_dict['temp_f'])
    except:
        features_to_scale.append(feature_means['min_temp_orig'])
        
    try: 
        features_to_scale.append(flight_dict['wind'])
    except:
        features_to_scale.append(feature_means['avg_wing_orig'])
    
    arr_no_scale = np.array([features_no_scale])
    arr_to_scale = np.array([features_to_scale])
    scaled = scaler.transform(arr_to_scale)
    print(np.around(np.concatenate((arr_no_scale,arr_to_scale),axis=1),2))
    return np.concatenate([arr_no_scale,scaled],axis=1)

In [12]:
import math

def predict(features):
    exp = sum(features[0] * model_coefs)
    return 1/(1+math.e**(-exp))

In [13]:
flight_info = get_flight_info('AAL302')

In [14]:
flight_info

{'unique_id': 'AAL302-1603866356-airline-0320',
 'tail_num': 'N102NN',
 'aircraft_type': 'A321',
 'airport_orig': 'JFK',
 'airport_dest': 'LAX',
 'airline': 'AA',
 'month': 10,
 'hour': 17,
 'day_of_week': 5,
 'snow': 0,
 'rain': 1,
 'wind': 17.3,
 'temp_f': 52.0,
 'clouds': 'Raining',
 'distance': 2475.0,
 'orig_city': 'New York, NY',
 'destination_city': 'Los Angeles, CA',
 'engine': 'IAE',
 'engine_manufacturer': 'International Aero Engines',
 'year_plane_made': 2013,
 'aircraft_age': 7,
 'previous_delay': 0}

In [13]:
engine_delay_lookup[flight_info['engine']]

0.234006701020828

In [14]:
tail_engine_convert[flight_info['tail_num']]

'IAE'

In [44]:
flight_info['distance'] = 1000

In [50]:
flight_info

{'unique_id': 'UAL277-1603717332-fa-0002',
 'tail_num': 'N485UA',
 'aircraft_type': 'A320',
 'airport_orig': 'IAD',
 'airport_dest': 'SFO',
 'airline': 'UA',
 'month': 10,
 'hour': 6,
 'day_of_week': 4,
 'snow': 0,
 'rain': 0,
 'wind': 4.6,
 'temp_f': 50.0,
 'clouds': 'Overcast skies',
 'distance': 1000,
 'orig_city': 'Washington, DC',
 'destination_city': 'San Francisco, CA',
 'engine': 'IAE',
 'engine_manufacturer': 'International Aero Engines',
 'year_plane_made': 2001,
 'aircraft_age': 19,
 'previous_delay': 0}

In [87]:
features = get_prediction_variables(flight_info)

[[0.0e+00 0.0e+00 0.0e+00 1.0e+03 2.0e-01 2.2e-01 8.0e-02 1.6e-01 2.2e-01
  2.2e-01 2.3e-01 1.9e-01 5.0e+01 4.6e+00]]


In [88]:
predict(features)

0.25528652792834455

In [77]:
scaled = scaler.transform(features[0][3:])

ValueError: Expected 2D array, got 1D array instead:
array=[-5.21777902e-02 -1.70739183e-11  4.97868740e-01 -1.67598783e+00
 -1.05281717e+00  1.40393755e+00  1.38274275e+00  1.81131332e+00
 -1.95685907e-01 -1.54706650e-01 -9.60905056e-01].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [55]:
sum(features[0]*model_coefs)

-1.070612989872058

In [47]:
predict(features)

-1.070612989872058

In [81]:
new = np.array([[3000,.3,.3,.3,.3,.2,.2,.2,.2,50,8]])
new_scaled = scaler.transform(new)
new_scaled

array([[ 2.90733544,  3.29407139,  2.52601507,  1.47819742,  3.04603024,
         0.29666814,  0.27043791,  0.19233638,  0.07556308, -0.15470665,
        -0.02177229]])

In [82]:
a = np.array([[0,0,0]])
predict(np.concatenate((a,new_scaled),axis=1))

1.856310622821233

In [15]:
def get_flight_info(flight_number):
    '''
    Input: a flight number
    Returns: a dictionary with all information needed for delay prediction, as well as consumer-facing variables 
    such as airport names.
    '''
    
    flight_info = {}
    
    flight_details = api.service.FlightInfoEx(flight_number,1)
    fa_id = flight_details[1][0]['faFlightID']  # FlightAware's unique code needed to grab other info
    flight_info['unique_id'] = fa_id
    
    tail = api.service.AirlineFlightInfo(fa_id)['tailnumber']  # the FAA registration number
    
    flight_info['tail_num'] = tail
    flight_info['aircraft_type'] = flight_details[1][0]['aircrafttype'] # the aircraft model, such as Boeing 737
    airport = airport_code_converter[flight_details[1][0]['origin']]
    flight_info['airport_orig'] = airport
    flight_info['airport_dest'] = airport_code_converter[flight_details[1][0]['destination']]
    flight_info['airline'] = airline_code_converter[flight_details[1][0]['ident'][:3]]
    
    depart_unix_time = flight_details[1][0]['filed_departuretime']
    depart_timestamp = datetime.datetime.fromtimestamp(depart_unix_time)
    
    flight_info['month'] = depart_timestamp.month
    flight_info['hour'] = depart_timestamp.hour
    flight_info['day_of_week'] = depart_timestamp.weekday() + 1
    
    weather = api.service.MetarEx(airport,howMany=1)
    
    flight_info['snow'] = int('snow' in weather['metar'][0]['cloud_friendly'].lower())  # snow as a binary variable
    flight_info['rain'] = int('rain' in weather['metar'][0]['cloud_friendly'].lower())  # rain as a binary variable
    flight_info['wind'] = round(weather['metar'][0]['wind_speed'] * 1.151,1)  # convert from nots to mph 
    flight_info['temp_f'] = round(weather['metar'][0]['temp_air']*(9/5) +32,0)  # convert from celcius to fahrenheit
    flight_info['clouds'] = weather['metar'][0]['cloud_friendly']
    
    # obtain the lat and long coordinates of both airports from my original data, or query the API if unavailable
    try:
        port1_coords = airport_coord_lookup[flight_info['airport_orig']]
        port2_coords = airport_coord_lookup[flight_info['airport_dest']]
        flight_info['distance'] = round(geopy.distance.distance(port1_coords,port2_coords).miles,0)
    except:
        orig = api.service.AirportInfo(flight_details[1][0]['origin'])
        dest = api.service.AirportInfo(flight_details[1][0]['destination'])
        port1_coords = (orig['latitude'],orig['longitude'])
        port2_coords = (dest['latitude'],dest['longitude'])
        flight_info['distance'] = round(geopy.distance.distance(port1_coords,port2_coords).miles,0)
    
    flight_info['orig_city'] = flight_details[1][0]['originCity']
    flight_info['destination_city'] = flight_details[1][0]['destinationCity']
    
    # lookup the engine info based on tail number, or nan if tail number is not in the data source
    try:
        flight_info['engine'] = tail_engine_convert[tail]
        flight_info['engine_manufacturer'] = engine_manufacturer_lookup[flight_info['engine']]
    except:
        flight_info['engine'] = np.nan
        flight_info['engine_manufacturer'] = np.nan
    
    # same thing for the aircraft age
    try: 
        flight_info['year_plane_made'] = aircraft_age_lookup[tail]
        flight_info['aircraft_age'] = 2020 - aircraft_age_lookup[tail]
    except:
        flight_info['year_plane_made'] = np.nan
        flight_info['aircraft_age'] = np.nan
        
    # determine if the previous flight of that exact aircraft was delayed by comparing actual depart time to 
    # scheduled depart time
    
    previous_flight = api.service.FlightInfoEx(flight_number,1)
    if previous_flight[1][0]['actualdeparturetime'] > previous_flight[1][0]['filed_departuretime']:
        flight_info['previous_delay'] = 1
    else:
        flight_info['previous_delay'] = 0
    
    return flight_info

In [18]:
flight = get_flight_info('DAL1202')

In [19]:
flight

{'unique_id': 'DAL1202-1603944633-fa-0003',
 'tail_num': 'N815DN',
 'aircraft_type': 'B739',
 'airport_orig': 'MSP',
 'airport_dest': 'SFO',
 'airline': 'DL',
 'month': 10,
 'hour': 10,
 'day_of_week': 6,
 'snow': 0,
 'rain': 0,
 'wind': 6.9,
 'temp_f': 34.0,
 'clouds': 'Overcast skies',
 'distance': 1589.0,
 'orig_city': 'Minneapolis, MN',
 'destination_city': 'San Francisco, CA',
 'engine': nan,
 'engine_manufacturer': nan,
 'year_plane_made': nan,
 'aircraft_age': nan,
 'previous_delay': 0}

In [32]:
flights = api.service.FlightInfoEx('DAL1202',5)

In [33]:
for f in flights[1]:
    print(f)

(FlightExStruct){
   faFlightID = "DAL1202-1603944633-fa-0003"
   ident = "DAL1202"
   aircrafttype = "B739"
   filed_ete = "03:52:00"
   filed_time = 1603944633
   filed_departuretime = 1604157900
   filed_airspeed_kts = 357
   filed_airspeed_mach = None
   filed_altitude = 0
   route = None
   actualdeparturetime = 0
   estimatedarrivaltime = 1604171820
   actualarrivaltime = 0
   diverted = None
   origin = "KMSP"
   destination = "KSFO"
   originName = "Minneapolis/St Paul Intl"
   originCity = "Minneapolis, MN"
   destinationName = "San Francisco Intl"
   destinationCity = "San Francisco, CA"
 }
(FlightExStruct){
   faFlightID = "DAL1202-1603858305-fa-0002"
   ident = "DAL1202"
   aircrafttype = "B739"
   filed_ete = "03:52:00"
   filed_time = 1603858305
   filed_departuretime = 1604071500
   filed_airspeed_kts = 357
   filed_airspeed_mach = None
   filed_altitude = 360
   route = "DWN8 ABR J32 LLC LEGGS BDEGA3"
   actualdeparturetime = 0
   estimatedarrivaltime = 1604085420
   ac

In [37]:
time = flights[1][4]['filed_departuretime']

In [39]:
datetime.datetime.fromtimestamp(time).date()

datetime.date(2020, 10, 27)

In [41]:
datetime.datetime.today().date()

datetime.date(2020, 10, 29)

In [59]:
for i in range(len(flights[1])):
    if datetime.datetime.fromtimestamp(flights[1][i]['filed_departuretime']).date() == datetime.datetime.today().date():
        my_flight = flights[1][i]
        break
    else:
        my_flight = flights[1][0]

In [60]:
datetime.datetime.fromtimestamp(my_flight['filed_departuretime'])

datetime.datetime(2020, 10, 29, 10, 25)

In [55]:
datetime.datetime.fromtimestamp(flights[1][2]['filed_departuretime']).date() == datetime.datetime.today().date()

True

In [56]:
len(flights)

2

In [62]:
flights[1][0]

<suds.sudsobject.FlightExStruct at 0x7fd4182d3970>

In [63]:
for f in flights[1][0]:
    print(f)

('faFlightID', 'DAL1202-1603944633-fa-0003')
('ident', 'DAL1202')
('aircrafttype', 'B739')
('filed_ete', '03:52:00')
('filed_time', 1603944633)
('filed_departuretime', 1604157900)
('filed_airspeed_kts', 357)
('filed_airspeed_mach', None)
('filed_altitude', 0)
('route', None)
('actualdeparturetime', 0)
('estimatedarrivaltime', 1604171820)
('actualarrivaltime', 0)
('diverted', None)
('origin', 'KMSP')
('destination', 'KSFO')
('originName', 'Minneapolis/St Paul Intl')
('originCity', 'Minneapolis, MN')
('destinationName', 'San Francisco Intl')
('destinationCity', 'San Francisco, CA')


In [64]:
with open('data/airline_code_converter.pickle','rb') as read_file:
    a = pickle.load(read_file)
    
a

{'ASA': 'AS',
 'AAY': 'G4',
 'AAL': 'AA',
 'DAL': 'DL',
 'FFT': 'F9',
 'HAL': 'HA',
 'JBU': 'B6',
 'SWA': 'WN',
 'NKS': 'NK',
 'SCX': 'SY',
 'UAL': 'UA'}

In [65]:
with open('data/airline_name_lookup.pickle','rb') as read_file:
    b = pickle.load(read_file)
    
b

{'AS': 'Alaska Airlines',
 'G4': 'Allegiant Air',
 'AA': 'American Airlines',
 'DL': 'Delta Air Lines',
 'F9': 'Frontier Airlines',
 'HA': 'Hawaiian Airlines',
 'B6': 'JetBlue Airways',
 'WN': 'Southwest Airlines',
 'NK': 'Spirit Airlines',
 'SY': 'Sun Country Airlines',
 'UA': 'United Airlines'}