In [None]:
# import dependencies
import requests
import json
import pandas as pd
import datetime as dt
import numpy as np
from pprint import pprint
from collections import defaultdict

In [None]:
# import modules
from config import conn_str  
from config import api_key
from dboperations import *  

In [None]:
conn = connect2db(conn_str)

In [None]:
# set flight_dates array for api parameter
start_date = dt.date(2021, 4, 24)
number_of_days = 3 #for testing assigned 3 will be past 3 months value

flight_dates = []
cities = ['LAX', 'ATL']

for day in range(number_of_days): 
    flight_date = (start_date - dt.timedelta(days = day)).isoformat()
    flight_dates.append(flight_date)

print(flight_dates) 

In [None]:
# get aviationstack.com api response for flight dates
airlines = defaultdict(list)
flights = defaultdict(list)
airports = defaultdict(list)
#departures = defaultdict(list)
#arrivals = defaultdict(list)

for city in cities:
    
    for flight_date in flight_dates:

        params = {'access_key': api_key, 'arr_iata': 'LAX', 'flight_date': flight_date}

        api_link = requests.get('https://api.aviationstack.com/v1/flights', params) 
        response = api_link.json()

        for data in response['data']: 
            airlines['airline'].append(data['airline']['name'])
            airlines['iata'].append(data['airline']['iata'])
            airlines['icao'].append(data['airline']['icao'])

            flights['flight_number'].append(data['flight']['number'])
            flights['iata'].append(data['flight']['iata'])
            flights['icao'].append(data['flight']['icao'])  

            flights['departure_airport'].append(data['departure']['airport'])
            flights['departure_timezone'].append(data['departure']['timezone'])
            flights['departure_iata'].append(data['departure']['iata'])
            flights['departure_icao'].append(data['departure']['icao'])
            flights['departure_terminal'].append(data['departure']['terminal'])
            flights['departure_gate'].append(data['departure']['gate'])
            flights['departure_delay'].append(data['departure']['delay'])
            flights['departure_scheduled'].append(data['departure']['scheduled'])
            flights['departure_estimated'].append(data['departure']['estimated'])
            flights['departure_actual'].append(data['departure']['actual'])
            flights['departure_estimated_runway'].append(data['departure']['estimated_runway'])
            flights['departure_actual_runway'].append(data['departure']['actual_runway'])

            flights['arrival_airport'].append(data['arrival']['airport'])
            flights['arrival_timezone'].append(data['arrival']['timezone'])
            flights['arrival_iata'].append(data['arrival']['iata'])
            flights['arrival_icao'].append(data['arrival']['icao'])
            flights['arrival_terminal'].append(data['arrival']['terminal'])
            flights['arrival_gate'].append(data['arrival']['gate'])
            flights['arrival_baggage'].append(data['arrival']['baggage'])
            flights['arrival_delay'].append(data['arrival']['delay'])
            flights['arrival_scheduled'].append(data['arrival']['scheduled'])
            flights['arrival_estimated'].append(data['arrival']['estimated'])
            flights['arrival_actual'].append(data['arrival']['actual'])
            flights['arrival_estimated_runway'].append(data['arrival']['estimated_runway'])
            flights['arrival_actual_runway'].append(data['arrival']['actual_runway'])         

In [None]:
# pprint(list (flights.items()))  

In [None]:
# create dataframes from dictionaries
airlines_df = pd.DataFrame(airlines)
flights_df = pd.DataFrame(flights) 

flights_df.head()

In [None]:
# clean airline frame for table
airlines_df = airlines_df.dropna(subset = ['airline', 'iata', 'icao'])
airlines_df = airlines_df.drop_duplicates(subset = ['airline', 'iata', 'icao']) 
airlines_df.index = np.arange(1, len(airlines_df) + 1) #use index as airline id 
airlines_df.reset_index(inplace = True) 
airlines_df = airlines_df.rename(columns = {"index": "airline_id", "name": "airline"})  

In [None]:
airlines_df.head()

In [None]:
# airline insert df to table
insertvalues(conn, "airlines", airlines_df)

In [None]:
# create new frame for airports
# airpots from departures 
airports_d_df = flights_df[['departure_airport', 'departure_iata', 'departure_icao', 'departure_timezone']] 
airports_d_df = airports_d_df.rename(columns = {'departure_airport': 'airport', 'departure_timezone': 'timezone', \
                                                'departure_iata': 'iata', 'departure_icao': 'icao'}) 
airports_d_df = airports_d_df.drop_duplicates()

#airports from arrivals
airports_a_df = flights_df[['arrival_airport', 'arrival_iata', 'arrival_icao', 'arrival_timezone']]
airports_a_df = airports_d_df.rename(columns = {'arrival_airport': 'airport', 'arrival_timezone': 'timezone', \
                                                'arrival_iata': 'iata', 'arrival_icao': 'icao'}) 
airports_a_df = airports_a_df.drop_duplicates()
 
# combine
airports_df = pd.concat([airports_a_df, airports_d_df])

airports_df = airports_df.drop_duplicates(subset=['airport', 'iata', 'icao']) 

airports_df.index = np.arange(1, len(airports_df) + 1) #use index as airport id
airports_df.reset_index(inplace = True)
airports_df = airports_df.rename(columns = {"index": "airport_id"})

In [None]:
airports_df.head()

In [None]:
# airport insert df to table
insertvalues(conn, "airports", airports_df)

In [None]:
# list(flights_df.columns)

In [None]:
def find_airport_id(conn, airport):
    query = f"select airport_id from airports where airport = '{airport}'"
#     max_value = max_dict.get('max')
    airport_id = executestatement(conn, query).get('airport_id')
    return airport_id

In [None]:
def find_airline_id(conn, airline):
    query = f"select airline_id from airlines where airline = '{airline}'" 
    airline_id = executestatement(conn, query).get('airline_id')
    return airline_id

In [None]:
# result = find_table_id(conn, 'airports', 'airport_id', 'Chicago O''hare International' ) #'Love Field')
# #result = find_airport_id('Chicago O''hare International')
# result

In [None]:
# departures
flights_d_df = flights_df[['flight_number', 'iata', 'icao', 'departure_airport', \
                           'departure_terminal', 'departure_gate', \
                           'departure_delay', 'departure_scheduled', \
                           'departure_estimated', 'departure_actual', \
                           'departure_estimated_runway', 'departure_actual_runway']]

flights_d_df = flights_d_df.head() # test

flights_d_df = flights_d_df.assign(flight_type = 'DEPARTURE') #flight type  
flights_d_df = flights_d_df.assign(airline_id = None)

flights_d_df['airport_id'] = flights_d_df['departure_airport']. \
                             apply(lambda element: find_airport_id(conn, element )) 

flights_d_df = flights_d_df.rename(columns = {'departure_terminal': 'terminal', 'departure_gate': 'gate', \
                                              'departure_delay': 'delay', 'departure_scheduled': 'scheduled', \
                                              'departure_estimated': 'estimated', 'departure_actual': 'actual', \
                                              'departure_estimated_runway': 'estimated_runway', \
                                              'departure_actual_runway': 'actual_runway'  }) 
flights_d_df

In [None]:
# arrivals
flights_a_df = flights_df[['flight_number', 'iata', 'icao', 'arrival_airport', \
                           'arrival_terminal', 'arrival_gate', \
                           'arrival_baggage', 'arrival_delay', 'arrival_scheduled', \
                           'arrival_estimated', 'arrival_actual', \
                           'arrival_estimated_runway', 'arrival_actual_runway' ]] 

flights_a_df = flights_a_df.assign(flight_type = 'ARRIVAL') #flight type  
flights_a_df = flights_a_df.assign(airline_id = None)

flights_a_df['airport_id'] = flights_a_df['arrival_airport']. \
                             apply(lambda element: find_airport_id(conn, element )) 

flights_a_df = flights_a_df.rename(columns = {'arrival_terminal': 'terminal', 'arrival_gate': 'gate', \
                                              'arrival_baggage': 'baggage', \
                                              'arrival_delay': 'delay', 'arrival_scheduled': 'scheduled', \
                                              'arrival_estimated': 'estimated', 'arrival_actual': 'actual', \
                                              'arrival_estimated_runway': 'estimated_runway', \
                                              'arrival_actual_runway': 'actual_runway'  }) 
flights_a_df.head()

In [None]:
# combine departure and arrival flights
all_flights_df = pd.concat([flights_d_df, flights_a_df])

all_flights_df.drop(columns = ['departure_airport', 'arrival_airport'])
all_flights_df = all_flights_df.dropna(subset = ['flight_number', 'iata', 'icao', 'airport_id'])

# sort columns for table insert
all_flights_df = all_flights_df[['flight_number', 'flight_type', 'iata', 'icao', 'airport_id', 'airline_id', \
                                 'terminal', 'gate', 'baggage', 'delay', 'scheduled', 'estimated', 'actual', \
                                 'estimated_runway', 'actual_runway' ]]


all_flights_df.index = np.arange(1, len(all_flights_df) + 1) #use index as flight id 
all_flights_df.reset_index(inplace = True) 
all_flights_df = all_flights_df.rename(columns = {"index": "flight_id"})  

all_flights_df.head()

In [None]:
# flights insert df to table
insertvalues(conn, "flights", all_flights_df)

In [None]:
closeconnection(conn);