In [1]:
import requests
import json
from pprint import pprint 
import pandas as pd
from sqlalchemy import create_engine, inspect
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session
import sqlite3
Base = declarative_base()

In [2]:
# Pull in combined data for airlines

American_Alaskan = pd.read_csv("Resources/CSV/Combined/AA_concat.csv")
Delta = pd.read_csv("Resources/CSV/Combined/Delta_Airlines.csv")
Southwest = pd.read_csv("Resources/CSV/Combined/Southwest_Airlines.csv")
United = pd.read_csv("Resources/CSV/Combined/United_Airlines.csv")

# Clean up column headers so that all data is matching
American_Alaskan.rename(columns={'Origin Airport':'Origin_Airport',
                          'Carrier Code':'Carrier_Code',
                          'Destination Airport':'Destination_Airport',
                          'Date (MM/DD/YYYY)':'Date'},
                 inplace=True)

AA = American_Alaskan[American_Alaskan.columns.drop('Unnamed: 0')]

Delta.rename(columns={'Carrier Code':'Carrier_Code',
                      'Date (MM/DD/YYYY)':'Date'},
                 inplace=True)
Delta['Carrier_Code'] = "Delta Airlines"

Southwest.rename(columns={'Date (MM/DD/YYYY)':'Date'},
                inplace=True)

United.rename(columns={'Date (MM/DD/YYYY)':'Date'},
                inplace=True)

In [3]:
# Create SQLite engine
engine = create_engine('sqlite:///flights.sqlite', echo=False)
inspector = inspect(engine)
Base.metadata.create_all(engine)
session= Session(engine)

In [4]:
# Create table and append airline flights data
con=engine
con.execute('DROP TABLE IF EXISTS flights')
AA.to_sql('flights', con)
Delta.to_sql('flights', con, if_exists='append', index=False)
Southwest.to_sql('flights', con, if_exists='append', index=False)
United.to_sql('flights', con, if_exists='append', index=False)

# Count the data stored within the newly created database
engine.execute("SELECT COUNT(*) FROM flights").fetchall()

[(2365604,)]

In [5]:
# Database too large. Drop data from all years except 2018.

engine.execute("DELETE FROM flights WHERE Date LIKE '%2019%'")
engine.execute("DELETE FROM flights WHERE Date LIKE '%2017%'")
engine.execute("DELETE FROM flights WHERE Date LIKE '%2016%'")
engine.execute("DELETE FROM flights WHERE Date LIKE '%2015%'")
engine.execute("DELETE FROM flights WHERE Date LIKE '%2014%'")

engine.execute("SELECT COUNT(*) FROM flights").fetchall()

[(496336,)]

In [6]:
# Use vacuum to rebuild the main data base and change the file size from 217MB to 46,232KB
engine.execute("vacuum")

<sqlalchemy.engine.result.ResultProxy at 0x121a74f28>

In [11]:
# Put Flights sqlite database into a data frame and rename column headers that were dropped

flights_db = pd.DataFrame(engine.execute("SELECT * FROM flights").fetchall())

flights_db = flights_db.rename(columns={1:'Carrier_Code', 2:'Date', 
                  5:'Destination_Airport', 8:'Scheduled_Elapsed_Time', 
                  9:'Actual_Elapsed_Time', 10:'Departure_Delay',
                  13:'Delay_Carrier', 14:'Delay_Weather', 
                  15:'Delay_National_Aviation_Sys', 16:'Delay_Security',
                  17:'Delay_Late_Aircraft_Arrival', 18:'Origin_Airport'
                          })

flights_db = flights_db.drop(columns={0, 3, 4, 6, 7, 11, 12})

flights_db

Unnamed: 0,Carrier_Code,Date,Destination_Airport,Scheduled_Elapsed_Time,Actual_Elapsed_Time,Departure_Delay,Delay_Carrier,Delay_Weather,Delay_National_Aviation_Sys,Delay_Security,Delay_Late_Aircraft_Arrival,Origin_Airport
0,American Airlines,01/01/2018,DFW,155.0,153.0,1.0,0.0,0.0,0.0,0.0,0.0,ATL
1,American Airlines,01/01/2018,DFW,156.0,132.0,-2.0,0.0,0.0,0.0,0.0,0.0,ATL
2,American Airlines,01/01/2018,DFW,153.0,135.0,-4.0,0.0,0.0,0.0,0.0,0.0,ATL
3,American Airlines,01/01/2018,DFW,151.0,161.0,-3.0,0.0,0.0,0.0,0.0,0.0,ATL
4,American Airlines,01/01/2018,LAX,317.0,296.0,-4.0,0.0,0.0,0.0,0.0,0.0,ATL
...,...,...,...,...,...,...,...,...,...,...,...,...
496331,United Airlines,12/31/2018,DEN,140.0,164.0,-13.0,0.0,0.0,0.0,0.0,0.0,PDX
496332,United Airlines,12/31/2018,ORD,228.0,243.0,-4.0,0.0,0.0,0.0,0.0,0.0,PDX
496333,United Airlines,12/31/2018,SFO,108.0,87.0,62.0,2.0,0.0,0.0,0.0,39.0,PDX
496334,United Airlines,12/31/2018,DEN,139.0,158.0,3.0,0.0,0.0,22.0,0.0,0.0,PDX


In [12]:
flights_db = flights_db.drop(columns={"Delay_Carrier", "Delay_Weather", 
                                      "Delay_National_Aviation_Sys", "Delay_Security", "Delay_Late_Aircraft_Arrival"})
flights_db

Unnamed: 0,Carrier_Code,Date,Destination_Airport,Scheduled_Elapsed_Time,Actual_Elapsed_Time,Departure_Delay,Origin_Airport
0,American Airlines,01/01/2018,DFW,155.0,153.0,1.0,ATL
1,American Airlines,01/01/2018,DFW,156.0,132.0,-2.0,ATL
2,American Airlines,01/01/2018,DFW,153.0,135.0,-4.0,ATL
3,American Airlines,01/01/2018,DFW,151.0,161.0,-3.0,ATL
4,American Airlines,01/01/2018,LAX,317.0,296.0,-4.0,ATL
...,...,...,...,...,...,...,...
496331,United Airlines,12/31/2018,DEN,140.0,164.0,-13.0,PDX
496332,United Airlines,12/31/2018,ORD,228.0,243.0,-4.0,PDX
496333,United Airlines,12/31/2018,SFO,108.0,87.0,62.0,PDX
496334,United Airlines,12/31/2018,DEN,139.0,158.0,3.0,PDX


In [13]:
# jsonify the dataframe

flights_db.to_json(r"Resources/json/flights.json", orient='table')

In [14]:
airlines = flights_db.groupby("Carrier_Code")
airlines.head()

Unnamed: 0,Carrier_Code,Date,Destination_Airport,Scheduled_Elapsed_Time,Actual_Elapsed_Time,Departure_Delay,Origin_Airport
0,American Airlines,01/01/2018,DFW,155.0,153.0,1.0,ATL
1,American Airlines,01/01/2018,DFW,156.0,132.0,-2.0,ATL
2,American Airlines,01/01/2018,DFW,153.0,135.0,-4.0,ATL
3,American Airlines,01/01/2018,DFW,151.0,161.0,-3.0,ATL
4,American Airlines,01/01/2018,LAX,317.0,296.0,-4.0,ATL
115941,Alaska Airlines,01/01/2018,SEA,345.0,331.0,6.0,ATL
115942,Alaska Airlines,01/01/2018,SEA,345.0,343.0,-4.0,ATL
115943,Alaska Airlines,01/02/2018,SEA,345.0,311.0,16.0,ATL
115944,Alaska Airlines,01/02/2018,SEA,345.0,328.0,10.0,ATL
115945,Alaska Airlines,01/03/2018,SEA,345.0,340.0,18.0,ATL


In [16]:
AA

Unnamed: 0,Carrier_Code,Date,Flight Number,Tail Number,Destination_Airport,Scheduled departure time,Actual departure time,Scheduled elapsed time (Minutes),Actual elapsed time (Minutes),Departure delay (Minutes),Wheels-off time,Taxi-Out time (Minutes),Delay Carrier (Minutes),Delay Weather (Minutes),Delay National Aviation System (Minutes),Delay Security (Minutes),Delay Late Aircraft Arrival (Minutes),Origin_Airport
0,American Airlines,01/01/2015,125.0,N585AA,DFW,17:55,17:54,155.0,135.0,-1.0,18:08,14.0,0.0,0.0,0.0,0.0,0.0,ATL
1,American Airlines,01/01/2015,194.0,N025AA,DFW,15:25,15:34,145.0,149.0,9.0,15:48,14.0,0.0,0.0,0.0,0.0,0.0,ATL
2,American Airlines,01/01/2015,232.0,N599AA,DFW,19:00,18:58,150.0,177.0,-2.0,19:15,17.0,0.0,0.0,25.0,0.0,0.0,ATL
3,American Airlines,01/01/2015,276.0,N4YCAA,DFW,20:15,20:29,145.0,189.0,14.0,21:40,71.0,0.0,0.0,44.0,0.0,14.0,ATL
4,American Airlines,01/01/2015,314.0,N585AA,DFW,11:55,12:10,150.0,149.0,15.0,12:26,16.0,0.0,0.0,0.0,0.0,0.0,ATL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
853733,Alaska Airlines,12/31/2018,1936.0,N837VA,LAX,17:00,17:07,95.0,85.0,7.0,17:31,24.0,0.0,0.0,0.0,0.0,0.0,SFO
853734,Alaska Airlines,12/31/2018,1938.0,N842VA,LAX,19:00,18:52,95.0,91.0,-8.0,19:13,21.0,0.0,0.0,0.0,0.0,0.0,SFO
853735,Alaska Airlines,12/31/2018,1940.0,N522VA,LAS,19:35,19:21,94.0,89.0,-14.0,19:39,18.0,0.0,0.0,0.0,0.0,0.0,SFO
853736,Alaska Airlines,12/31/2018,1946.0,N842VA,LAX,09:00,08:53,95.0,95.0,-7.0,09:16,23.0,0.0,0.0,0.0,0.0,0.0,SFO


In [15]:
flights_db2 = pd.DataFrame(engine.execute("SELECT * FROM flights").fetchall())
flights_db2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
0,33.0,American Airlines,01/01/2018,607.0,N604AW,DFW,17:30,17:31,155.0,153.0,1.0,17:43,12.0,0.0,0.0,0.0,0.0,0.0,ATL
1,34.0,American Airlines,01/01/2018,610.0,N679AW,DFW,15:59,15:57,156.0,132.0,-2.0,16:08,11.0,0.0,0.0,0.0,0.0,0.0,ATL
2,35.0,American Airlines,01/01/2018,629.0,N663AW,DFW,19:40,19:36,153.0,135.0,-4.0,19:52,16.0,0.0,0.0,0.0,0.0,0.0,ATL
3,36.0,American Airlines,01/01/2018,841.0,N658AW,DFW,10:00,09:57,151.0,161.0,-3.0,10:14,17.0,0.0,0.0,0.0,0.0,0.0,ATL
4,37.0,American Airlines,01/01/2018,1249.0,N975AN,LAX,15:45,15:41,317.0,296.0,-4.0,15:57,16.0,0.0,0.0,0.0,0.0,0.0,ATL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
496331,,United Airlines,12/31/2018,1072.0,N814UA,DEN,16:03,15:50,140.0,164.0,-13.0,16:01,11.0,0.0,0.0,0.0,0.0,0.0,PDX
496332,,United Airlines,12/31/2018,2004.0,N68817,ORD,7:05,7:01,228.0,243.0,-4.0,7:18,17.0,0.0,0.0,0.0,0.0,0.0,PDX
496333,,United Airlines,12/31/2018,2054.0,N54241,SFO,14:36,15:38,108.0,87.0,62.0,15:47,9.0,2.0,0.0,0.0,0.0,39.0,PDX
496334,,United Airlines,12/31/2018,2223.0,N69838,DEN,13:37,13:40,139.0,158.0,3.0,13:52,12.0,0.0,0.0,22.0,0.0,0.0,PDX
