## Imports

In [1]:
import pandas as pd
import numpy as np
import sqlite3
import sqlalchemy
from sqlalchemy import create_engine

In [2]:
file_path = "../../datasets/airports.csv"

In [3]:
df = pd.read_csv(file_path, names=['AirportID', 'Airport', 'City', 'State'])

In [4]:
df

Unnamed: 0,AirportID,Airport,City,State
0,12892,LAX,"Los Angeles, CA",CA
1,12889,LAS,"Las Vegas, NV",NV
2,11995,GSO,"Greensboro/High Point, NC",NC
3,10469,AZO,"Kalamazoo, MI",MI
4,14057,PDX,"Portland, OR",OR
...,...,...,...,...
389,14534,RIW,"Riverton/Lander, WY",WY
390,12559,JST,"Johnstown, PA",PA
391,10275,ALW,"Walla Walla, WA",WA
392,14303,PUW,"Pullman, WA",WA


In [5]:
def split_city(val):
    return val.split(",")[0]

In [6]:
df['City'] = df.City.apply(split_city)

In [7]:
df

Unnamed: 0,AirportID,Airport,City,State
0,12892,LAX,Los Angeles,CA
1,12889,LAS,Las Vegas,NV
2,11995,GSO,Greensboro/High Point,NC
3,10469,AZO,Kalamazoo,MI
4,14057,PDX,Portland,OR
...,...,...,...,...
389,14534,RIW,Riverton/Lander,WY
390,12559,JST,Johnstown,PA
391,10275,ALW,Walla Walla,WA
392,14303,PUW,Pullman,WA


In [8]:
df_airports = pd.read_csv("https://raw.githubusercontent.com/mborsetti/airportsdata/main/airportsdata/airports.csv")

In [9]:
df_airports

Unnamed: 0,icao,iata,name,city,subd,country,elevation,lat,lon,tz
0,00AK,,Lowell Field,Anchor Point,Alaska,US,450,59.949200,-151.695999,America/Anchorage
1,00AL,,Epps Airpark,Harvest,Alabama,US,820,34.864799,-86.770302,America/Chicago
2,00AZ,,Cordes Airport,Cordes,Arizona,US,3810,34.305599,-112.165001,America/Phoenix
3,00CA,,Goldstone /Gts/ Airport,Barstow,California,US,3038,35.350498,-116.888000,America/Los_Angeles
4,00CO,,Cass Field,Briggsdale,Colorado,US,4830,40.622200,-104.344002,America/Denver
...,...,...,...,...,...,...,...,...,...,...
28879,ZYTN,TNH,Tonghua Sanyuanpu Airport,Tonghua,Jilin,CN,1200,42.253889,125.703333,Asia/Shanghai
28880,ZYTX,SHE,Taoxian Airport,Shenyang,Liaoning,CN,198,41.639801,123.483002,Asia/Shanghai
28881,ZYXC,XEN,Xingcheng Air Base,,Liaoning,CN,30,40.580299,120.697998,Asia/Shanghai
28882,ZYYJ,YNJ,Yanji Chaoyangchuan Airport,Yanji,Jilin,CN,624,42.882801,129.451004,Asia/Shanghai


In [10]:
airports_final = df.set_index('Airport').join(df_airports.set_index('iata'))[['AirportID', 'City', 'State', 'elevation', 'lat', 'lon']].reset_index().rename({"index":"Airport", "elevation": "Elevation", "lat":"Lat", "lon":"Lon"}, axis=1)

In [11]:
airports_final

Unnamed: 0,Airport,AirportID,City,State,Elevation,Lat,Lon
0,ABE,10135,Allentown/Bethlehem/Easton,PA,393.0,40.652100,-75.440804
1,ABI,10136,Abilene,TX,1791.0,32.411301,-99.681900
2,ABQ,10140,Albuquerque,NM,5355.0,35.040199,-106.609001
3,ABR,10141,Aberdeen,SD,1302.0,45.449100,-98.421799
4,ABY,10146,Albany,GA,197.0,31.535500,-84.194504
...,...,...,...,...,...,...,...
389,XWA,16869,Williston,ND,2356.0,48.259833,-103.750500
390,YAK,15991,Yakutat,AK,33.0,59.503300,-139.660004
391,YKM,16101,Yakima,WA,1099.0,46.568199,-120.543999
392,YNG,16133,Youngstown/Warren,OH,1192.0,41.260700,-80.679100


In [12]:
airports_final.to_csv('../../datasets/airports_final.csv', index=False)

In [13]:
db_path = "../../dbs/delays.db"

In [14]:
conn = sqlite3.connect(db_path)
conn.execute("DROP TABLE IF EXISTS airport;")

create_query = ''' \
            CREATE TABLE airport
            ( 
                Airport                         VARCHAR, 
                AirportID                       INT,
                City                            VARCHAR,
                State                           VARCHAR,
                Elevation                       REAL,
                Lat                             REAL,
                Lon                             REAL
            );
    '''
conn.execute(create_query)
conn.close()
engine = sqlalchemy.create_engine("sqlite:///"+db_path, echo=False)

In [15]:
engine = sqlalchemy.create_engine("sqlite:///"+db_path, echo=False)
airports_final.to_sql('airport', con=engine, if_exists='append', index=False)