In [95]:
import pandas as pd
import requests
from config import gkey

air_data = pd.read_csv("Data/final_dataset.csv")
air_data.head()

Unnamed: 0,year,month,carrier_name,airport_abbr,airport_name,city,state,total_number_flights,air_carrier_delay,weather_delay,national_aviation_system_delay,sercurity_delay,aircraft_arriving_late,flight_cancelled,flight_diverted
0,2018,September,Frontier Airlines Inc.,PBI,Palm Beach International Airport,West Palm Beach,FL,8.0,0.85,0.0,0.49,0.0,1.65,0.0,0.0
1,2018,September,Envoy Air,PBI,Palm Beach International Airport,West Palm Beach,FL,25.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0
2,2018,September,United Air Lines Inc.,PBI,Palm Beach International Airport,West Palm Beach,FL,114.0,6.97,0.01,7.65,0.0,5.37,0.0,0.0
3,2018,September,Southwest Airlines Co.,PBI,Palm Beach International Airport,West Palm Beach,FL,143.0,8.14,0.0,1.49,0.0,15.36,0.0,1.0
4,2018,September,Endeavor Air Inc.,PBI,Palm Beach International Airport,West Palm Beach,FL,3.0,0.68,0.0,0.32,0.0,0.0,0.0,0.0


In [96]:
#Add 'Delay Sum' column to dataframe & preview
col_list = ["air_carrier_delay","weather_delay","national_aviation_system_delay",
            "sercurity_delay","aircraft_arriving_late","flight_cancelled","flight_diverted"]


air_data['delay_sum'] = air_data[col_list].sum(axis=1)
air_data.head()


Unnamed: 0,year,month,carrier_name,airport_abbr,airport_name,city,state,total_number_flights,air_carrier_delay,weather_delay,national_aviation_system_delay,sercurity_delay,aircraft_arriving_late,flight_cancelled,flight_diverted,delay_sum
0,2018,September,Frontier Airlines Inc.,PBI,Palm Beach International Airport,West Palm Beach,FL,8.0,0.85,0.0,0.49,0.0,1.65,0.0,0.0,2.99
1,2018,September,Envoy Air,PBI,Palm Beach International Airport,West Palm Beach,FL,25.0,0.0,0.0,2.0,0.0,1.0,0.0,0.0,3.0
2,2018,September,United Air Lines Inc.,PBI,Palm Beach International Airport,West Palm Beach,FL,114.0,6.97,0.01,7.65,0.0,5.37,0.0,0.0,20.0
3,2018,September,Southwest Airlines Co.,PBI,Palm Beach International Airport,West Palm Beach,FL,143.0,8.14,0.0,1.49,0.0,15.36,0.0,1.0,25.99
4,2018,September,Endeavor Air Inc.,PBI,Palm Beach International Airport,West Palm Beach,FL,3.0,0.68,0.0,0.32,0.0,0.0,0.0,0.0,1.0


In [97]:
# Group by airport name

airport_groups = air_data.groupby(['airport_name','airport_abbr'])['delay_sum','total_number_flights']
airport_groups_sum = airport_groups.sum()

airport_grouped_df = airport_groups_sum.reset_index()
airport_grouped_df.head()

Unnamed: 0,airport_name,airport_abbr,delay_sum,total_number_flights
0,Aberdeen Regional Airport,ABR,112.01,808.0
1,Abilene Regional Airport,ABI,498.96,2371.0
2,Abraham Lincoln Capital Airport,SPI,464.05,1898.0
3,Adak Airport,ADK,16.0,113.0
4,Akron-Canton Airport,CAK,2054.05,7353.0


In [98]:
# Add percent delay by airport anda verage percent delay
airport_grouped_df["perc_delay"]= airport_grouped_df["delay_sum"]/airport_grouped_df["total_number_flights"]
airport_grouped_df["avg_perc_delay"] = air_data["delay_sum"].sum()/air_data["total_number_flights"].sum()

# Sort airport data by 'total_number_flights' and take only top 25
sorted_airport_df = airport_grouped_df.sort_values('total_number_flights', ascending=False)

Top25_delay_df = sorted_airport_df.nlargest(25, 'total_number_flights')
Top25_delay_df.reset_index(drop=True)

Top25_delay_df.head()

Unnamed: 0,airport_name,airport_abbr,delay_sum,total_number_flights,perc_delay,avg_perc_delay
15,Atlanta International Airport,ATL,65842.93,425255.0,0.154832,0.208236
63,Chicago O'hare International Airport,ORD,92928.02,367499.0,0.252866,0.208236
77,Dallas/Fort Worth International Airport,DFW,76313.98,319212.0,0.23907,0.208236
84,Denver International Airport,DEN,54852.87,265815.0,0.206357,0.208236
57,Charlotte/Douglas International Airport,CLT,46262.06,253116.0,0.18277,0.208236


In [6]:
# Create empty list to insert latitude, longitude from request to API
lat = []
lng = []

# Get airport name into a list to iterate throught
airport_name_list = airport_grouped_df['airport_name']

# Iterate through airportnames, get lat and lng, appened to the empty lists
for x in airport_name_list:
    target_url = f"https://maps.googleapis.com/maps/api/geocode/json?address={x}&key={gkey}"
    airport_info = requests.get(target_url).json()
    
    try:
        latitude = airport_info["results"][0]["geometry"]["location"]["lat"]
    except IndexError:
        latitude = 'null'
    
    try:
        longitude = airport_info["results"][0]["geometry"]["location"]["lng"]
    except IndexError:
        longitude = 'null'
        
    lat.append(latitude)
    lng.append(longitude)



In [99]:
# Add lat and lng info into airport_grouped_df
airport_full_info = airport_grouped_df
airport_full_info["lat"] = lat
airport_full_info["lng"] = lng
airport_full_info.head()

Unnamed: 0,airport_name,airport_abbr,delay_sum,total_number_flights,perc_delay,avg_perc_delay,lat,lng
0,Aberdeen Regional Airport,ABR,112.01,808.0,0.138626,0.208236,45.4535,-98.4177
1,Abilene Regional Airport,ABI,498.96,2371.0,0.210443,0.208236,32.4119,-99.68
2,Abraham Lincoln Capital Airport,SPI,464.05,1898.0,0.244494,0.208236,39.8435,-89.6781
3,Adak Airport,ADK,16.0,113.0,0.141593,0.208236,51.88,-176.658
4,Akron-Canton Airport,CAK,2054.05,7353.0,0.279349,0.208236,40.9154,-81.4419


In [100]:
# Check types as lat and lng should be float
print(airport_full_info.dtypes)
print(len(airport_full_info))

airport_name             object
airport_abbr             object
delay_sum               float64
total_number_flights    float64
perc_delay              float64
avg_perc_delay          float64
lat                      object
lng                      object
dtype: object
351


In [101]:
# Find the rows with null under lat/lng
null_airport = airport_full_info.loc[airport_full_info['lat'] == 'null']
null_airport

Unnamed: 0,airport_name,airport_abbr,delay_sum,total_number_flights,perc_delay,avg_perc_delay,lat,lng
132,Harrison/Marion Regional Airport,CKB,242.04,906.0,0.267152,0.208236,,


In [102]:
# Only one null airport, remove that one
airport_full_info = airport_full_info.loc[airport_full_info['lat'] != 'null']

#Check by looking at lenght of dataframe
len(airport_full_info)

350

In [103]:
# Check dtypes again
airport_full_info.dtypes

airport_name             object
airport_abbr             object
delay_sum               float64
total_number_flights    float64
perc_delay              float64
avg_perc_delay          float64
lat                      object
lng                      object
dtype: object

In [107]:
# change lat and lng to float
airport_full_info['lat'] = airport_full_info["lat"].astype(float)
airport_full_info['lng'] = airport_full_info["lng"].astype(float)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [109]:
airport_full_info.dtypes

airport_name             object
airport_abbr             object
delay_sum               float64
total_number_flights    float64
perc_delay              float64
avg_perc_delay          float64
lat                     float64
lng                     float64
dtype: object

In [111]:
# Save final data set into new csv file
airport_full_info.to_csv('./Data/airport_lat_lng_AV.csv',index=False)