# Finding the most northern and eastern airports in the US
### First I'll load the .csv files and take a look

In [47]:
import pandas as pd

airports = pd.read_csv('/Users/Maureen/Desktop/flightsdata/airports.csv')
weather = pd.read_csv('/Users/Maureen/Desktop/flightsdata/weather.csv')

In [48]:
airports.head()

Unnamed: 0,faa,name,lat,lon,alt,tz,dst,tzone
0,04G,Lansdowne Airport,41.130472,-80.619583,1044,-5.0,A,America/New_York
1,06A,Moton Field Municipal Airport,32.460572,-85.680028,264,-6.0,A,America/Chicago
2,06C,Schaumburg Regional,41.989341,-88.101243,801,-6.0,A,America/Chicago
3,06N,Randall Airport,41.431912,-74.391561,523,-5.0,A,America/New_York
4,09J,Jekyll Island Airport,31.074472,-81.427778,11,-5.0,A,America/New_York


In [49]:
weather.head()

Unnamed: 0,origin,year,month,day,hour,temp,dewp,humid,wind_dir,wind_speed,wind_gust,precip,pressure,visib,time_hour
0,EWR,2013,1,1,0,37.04,21.92,53.97,230.0,10.35702,11.918651,0.0,1013.9,10.0,2013-01-01T00:00:00Z
1,EWR,2013,1,1,1,37.04,21.92,53.97,230.0,13.80936,15.891535,0.0,1013.0,10.0,2013-01-01T01:00:00Z
2,EWR,2013,1,1,2,37.94,21.92,52.09,230.0,12.65858,14.567241,0.0,1012.6,10.0,2013-01-01T02:00:00Z
3,EWR,2013,1,1,3,37.94,23.0,54.51,230.0,13.80936,15.891535,0.0,1012.7,10.0,2013-01-01T03:00:00Z
4,EWR,2013,1,1,4,37.94,24.08,57.04,240.0,14.96014,17.21583,0.0,1012.8,10.0,2013-01-01T04:00:00Z


### I want to make sure all the airports in the list are in the United States

In [50]:
airports.tzone.value_counts()

America/New_York       519
America/Chicago        342
America/Anchorage      239
America/Los_Angeles    176
America/Denver         119
America/Phoenix         38
Pacific/Honolulu        18
\N                       3
Asia/Chongqing           2
America/Vancouver        2
Name: tzone, dtype: int64

### It looks like they are not all in the US. I want to exclude rows based on whether the lat/lon is in the US.
This website lists the most extreme coordinates in the US. http://www.cohp.org/extremes/extreme_points.html
I used this information to set the lat/lon min/max to filter out non-US airports.

## Filter and find the most eastern airport

In [51]:
coordinates_lon = [-172.44, -66.94975]
airports = airports[(airports.lon >= coordinates_lon[0]) & (airports.lon <= coordinates_lon[1])]
east_sort = airports.sort_values('lon', ascending=False)
east_sort.iloc[0]

faa                             EPM
name     Eastport Municipal Airport
lat                         44.9101
lon                        -67.0127
alt                              45
tz                               -5
dst                               A
tzone              America/New_York
Name: 444, dtype: object

## Filter and find the most northern airport

In [52]:
coordinates_lat = [18.91, 71.39]
airports = airports[(airports.lat >= coordinates_lat[0]) & (airports.lat <= coordinates_lat[1])]
north_sort = airports.sort_values('lat', ascending=False)
north_sort.iloc[0]

faa                             BRW
name     Wiley Post Will Rogers Mem
lat                         71.2854
lon                        -156.766
alt                              44
tz                               -9
dst                               A
tzone             America/Anchorage
Name: 230, dtype: object

# The northernmost airport in is the Wiley Post Will Rogers Mem in Alaska.
# The easternmost airport is appropriately named Eastport Municipal Airport in Maine.

# Now it's time to find the windiest NY airport on February 12, 2013

### Lets see what years are in the data

In [53]:
weather.year.value_counts()

2013    26130
Name: year, dtype: int64

### Great! I don't need to filter by year since all entries are from 2013. Now I'll choose only the data from February 12

In [54]:
weather = weather[(weather.month == 2) & (weather.day == 12)]
weather.origin.value_counts()

JFK    24
EWR    24
LGA    24
Name: origin, dtype: int64

### Using groupby will let me get the calculations for each airport for that day.

In [55]:
weather.groupby('origin').wind_speed.agg(['count','min','max','mean','std'])

Unnamed: 0_level_0,count,min,max,mean,std
origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
EWR,24,0.0,1048.36058,55.668983,211.507035
JFK,24,0.0,20.71404,13.090123,6.01144
LGA,24,3.45234,23.0156,13.857309,5.703436


### OK, something's not right here. The numbers for Newark would mean they were hit with a tornado. Let's take a look at only Newark's weather:

In [56]:
weather[weather.origin=='EWR']

Unnamed: 0,origin,year,month,day,hour,temp,dewp,humid,wind_dir,wind_speed,wind_gust,precip,pressure,visib,time_hour
1007,EWR,2013,2,12,0,42.98,42.98,100.0,0.0,0.0,0.0,0.0,1004.7,0.5,2013-02-12T00:00:00Z
1008,EWR,2013,2,12,1,42.08,42.08,100.0,300.0,3.45234,3.972884,0.0,1004.7,0.5,2013-02-12T01:00:00Z
1009,EWR,2013,2,12,2,42.8,42.8,100.0,310.0,8.05546,9.270062,0.0,,1.5,2013-02-12T02:00:00Z
1010,EWR,2013,2,12,3,42.8,42.8,100.0,280.0,5.7539,6.621473,0.0,,3.0,2013-02-12T03:00:00Z
1011,EWR,2013,2,12,4,44.6,42.8,93.35,290.0,8.05546,9.270062,0.0,,10.0,2013-02-12T04:00:00Z
1012,EWR,2013,2,12,5,39.92,39.02,96.55,240.0,6.90468,7.945768,0.0,1006.9,10.0,2013-02-12T05:00:00Z
1013,EWR,2013,2,12,6,39.92,37.94,92.56,250.0,9.20624,10.594357,0.0,1007.2,10.0,2013-02-12T06:00:00Z
1014,EWR,2013,2,12,7,39.92,28.04,62.21,270.0,20.71404,23.837303,0.0,1007.8,10.0,2013-02-12T07:00:00Z
1015,EWR,2013,2,12,8,39.02,26.96,61.63,260.0,1048.36058,1206.432388,0.0,1008.3,10.0,2013-02-12T08:00:00Z
1016,EWR,2013,2,12,9,39.02,26.96,61.63,260.0,12.65858,14.567241,0.0,1008.3,10.0,2013-02-12T09:00:00Z


### Something is wrong with the data from 8am. I'm going to drop that row and rerun the stats.

In [57]:
weather = weather.drop(1015)
weather.groupby('origin').wind_speed.agg(['count','min','max','mean','std'])

Unnamed: 0_level_0,count,min,max,mean,std
origin,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
EWR,23,0.0,21.86482,12.508478,5.345014
JFK,24,0.0,20.71404,13.090123,6.01144
LGA,24,3.45234,23.0156,13.857309,5.703436


# Excluding the outlier at Newark, it looks like LaGuardia was the windiest by a small margin on 2.12.13