# Weather Exploration Notebook

In [1]:
import pandas as pd
import numpy as np

import os.path
import requests
import io

import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats

import warnings
warnings.filterwarnings("ignore")

from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler, StandardScaler, PowerTransformer
from sklearn.ensemble import GradientBoostingClassifier

import wrangle
import model

In [2]:
weather_data = pd.read_csv("US_WeatherEvents_2016-2019.csv")

In [11]:
weather_data[weather_data.State == "NC"].City.unique()

array(['Davis', 'Sanford', 'Goldsboro', 'Salisbury', 'Greensboro',
       'Southport', 'Louisburg', 'Oxford', 'Stoneville', 'Elm City',
       'Smithfield', 'Statesville', 'Franklin', 'Concord', 'Monroe',
       'Richlands', 'Rockingham', 'Whiteville', 'Pope Army Airfield',
       'Washington', 'Erwin', 'Pikeville', 'Tarboro', 'Wilmington',
       'New London', 'Morrisville', 'Mount Airy', 'Jefferson',
       'Engelhard', 'Frisco', 'Andrews', 'Elizabeth City', 'New Bern',
       'Winston Salem', 'Lexington', 'Elizabethtown', 'Iron Station',
       'Rutherfordton', 'Aulander', 'Asheboro', 'Timberlake',
       'Burlington', 'Maxton', 'Wadesboro', 'North Wilkesboro', 'Hoffman',
       'Jacksonville', 'Carthage', 'Edenton', 'Kinston', 'Lumberton',
       'Clinton', 'Fayetteville', 'Manteo', 'Newport', 'Halifax',
       'Kenansville', 'Beaufort', 'Havelock', 'Cleveland', 'Gastonia',
       'Morganton', 'Greenville', 'Maple', 'New River', 'Fletcher',
       'Township 2 Berryhill', 'Kill Devi

In [13]:
weather_data[weather_data.City == "Dallas"]

Unnamed: 0,EventId,Type,Severity,StartTime(UTC),EndTime(UTC),TimeZone,AirportCode,LocationLat,LocationLng,City,County,State,ZipCode
788917,W-789030,Rain,Light,2016-01-06 15:53:00,2016-01-06 16:53:00,US/Central,KRBD,32.6809,-96.8682,Dallas,Dallas,TX,75237.0
788918,W-789031,Rain,Light,2016-01-07 01:39:00,2016-01-07 02:00:00,US/Central,KRBD,32.6809,-96.8682,Dallas,Dallas,TX,75237.0
788919,W-789032,Rain,Light,2016-01-07 02:18:00,2016-01-07 02:41:00,US/Central,KRBD,32.6809,-96.8682,Dallas,Dallas,TX,75237.0
788920,W-789033,Rain,Heavy,2016-01-07 02:41:00,2016-01-07 03:06:00,US/Central,KRBD,32.6809,-96.8682,Dallas,Dallas,TX,75237.0
788921,W-789034,Rain,Moderate,2016-01-07 03:06:00,2016-01-07 04:26:00,US/Central,KRBD,32.6809,-96.8682,Dallas,Dallas,TX,75237.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3805626,W-3806127,Rain,Light,2019-12-29 18:15:00,2019-12-29 18:55:00,US/Eastern,KPUJ,33.9120,-84.9410,Dallas,Paulding,GA,30157.0
3805627,W-3806128,Rain,Light,2019-12-29 20:35:00,2019-12-29 20:55:00,US/Eastern,KPUJ,33.9120,-84.9410,Dallas,Paulding,GA,30157.0
3805628,W-3806129,Rain,Light,2019-12-29 22:15:00,2019-12-29 22:55:00,US/Eastern,KPUJ,33.9120,-84.9410,Dallas,Paulding,GA,30157.0
3805629,W-3806130,Rain,Light,2019-12-30 01:55:00,2019-12-30 02:35:00,US/Eastern,KPUJ,33.9120,-84.9410,Dallas,Paulding,GA,30157.0


In [None]:
# we only want data for 2018

In [None]:
# we only wand data for the following cities

top_airports = ["ATL", "LAX", "ORD", "DFW", "DEN", "JFK", "SFO", "SEA", "LAS", "MCO", "EWR", "CLT", "PHX", "IAH", "MIA"]

In [18]:
airport_codes = pd.read_csv("https://storage.googleapis.com/kagglesdsdata/datasets%2F626214%2F1116273%2Fairports.csv?GoogleAccessId=gcp-kaggle-com@kaggle-161607.iam.gserviceaccount.com&Expires=1594952131&Signature=CbuLdZR%2B9YkSTVhPslvV67WILvcTGa1EnhtSpZza10YkL2ccV6HIZLkikKrkF%2BEgwFySBM88hZ4Gp9aqZqA5s5BC%2FL7l71dpdyM8PkBWxtmvJkEYuPYxPvzwt2M10q7T4TzK1mFzUUwPz1CtvLcBdx%2F3GJgd5z4nSGepU3SkoalQHJ4JUQ0SpE5liZiCJ2SB%2FTA4fcz62TvAKv21Jwe7oq4q8CnrG6rvSjW5uRDkvKXDc1sXH2WG2CIZYyt%2FKz2B%2BJsi2iZ9AS9dAhbLO8GVOP6EhtIA9%2ForRmxuBStdXY%2BY1lojJaekb9agtbNS5wAIr%2Fkk0LUHmKPxt7b6jTiLtQ%3D%3D")

In [22]:
airport_codes = airport_codes[airport_codes.Country == "United States"]

In [23]:
airport_codes.head()

Unnamed: 0,Name,City,Country,IATA,ICAO,Latitude,Longitude
3212,Barter Island LRRS Airport,Barter Island,United States,BTI,PABA,70.134003,-143.582001
3213,Wainwright Air Station,Fort Wainwright,United States,\N,PAWT,70.613403,-159.860001
3214,Cape Lisburne LRRS Airport,Cape Lisburne,United States,LUR,PALU,68.875099,-166.110001
3215,Point Lay LRRS Airport,Point Lay,United States,PIZ,PPIZ,69.732903,-163.005005
3216,Hilo International Airport,Hilo,United States,ITO,PHTO,19.721399,-155.048004


In [27]:
weather_data = weather_data.merge(airport_codes, how="left", left_on="AirportCode", right_on="ICAO")

In [32]:
top_airports = ["ATL", "LAX", "ORD", "DFW", "DEN", "JFK", "SFO", "SEA", "LAS", "MCO", "EWR", "CLT", "PHX", "IAH", "MIA"]

for i in top_airports:
    weather_data.loc[weather_data['IATA'] == i, 'is_top'] = True 
    
weather_data.is_top = weather_data.is_top.fillna(False)

In [37]:
weather_data = weather_data[weather_data.is_top]

In [43]:
weather_data["StartTime(UTC)"] = pd.to_datetime(weather_data["StartTime(UTC)"])

In [45]:
weather_data = weather_data.set_index("StartTime(UTC)")

In [47]:
weather_data = weather_data["2018"]

In [48]:
weather_data.head()

Unnamed: 0_level_0,EventId,Type,Severity,EndTime(UTC),TimeZone,AirportCode,LocationLat,LocationLng,City_x,County,State,ZipCode,Name,City_y,Country,IATA,ICAO,Latitude,Longitude,is_top
StartTime(UTC),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2018-01-04 05:39:00,W-226959,Snow,Light,2018-01-04 06:29:00,US/Eastern,KJFK,40.6392,-73.7639,Jamaica,Queens,NY,11430.0,John F Kennedy International Airport,New York,United States,JFK,KJFK,40.639801,-73.7789,True
2018-01-04 07:51:00,W-226960,Snow,Light,2018-01-04 12:51:00,US/Eastern,KJFK,40.6392,-73.7639,Jamaica,Queens,NY,11430.0,John F Kennedy International Airport,New York,United States,JFK,KJFK,40.639801,-73.7789,True
2018-01-04 12:51:00,W-226961,Fog,Severe,2018-01-04 18:16:00,US/Eastern,KJFK,40.6392,-73.7639,Jamaica,Queens,NY,11430.0,John F Kennedy International Airport,New York,United States,JFK,KJFK,40.639801,-73.7789,True
2018-01-04 18:16:00,W-226962,Storm,Severe,2018-01-04 18:43:00,US/Eastern,KJFK,40.6392,-73.7639,Jamaica,Queens,NY,11430.0,John F Kennedy International Airport,New York,United States,JFK,KJFK,40.639801,-73.7789,True
2018-01-04 18:43:00,W-226963,Snow,Light,2018-01-04 23:51:00,US/Eastern,KJFK,40.6392,-73.7639,Jamaica,Queens,NY,11430.0,John F Kennedy International Airport,New York,United States,JFK,KJFK,40.639801,-73.7789,True


In [49]:
weather_data[weather_data.City_y == "Charlotte"]

Unnamed: 0_level_0,EventId,Type,Severity,EndTime(UTC),TimeZone,AirportCode,LocationLat,LocationLng,City_x,County,State,ZipCode,Name,City_y,Country,IATA,ICAO,Latitude,Longitude,is_top
StartTime(UTC),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2018-01-08 20:52:00,W-4437347,Rain,Light,2018-01-09 00:52:00,US/Eastern,KCLT,35.2225,-80.9543,Township 2 Berryhill,Mecklenburg,NC,28278.0,Charlotte Douglas International Airport,Charlotte,United States,CLT,KCLT,35.214001,-80.9431,True
2018-01-11 07:52:00,W-4437348,Rain,Light,2018-01-11 08:52:00,US/Eastern,KCLT,35.2225,-80.9543,Township 2 Berryhill,Mecklenburg,NC,28278.0,Charlotte Douglas International Airport,Charlotte,United States,CLT,KCLT,35.214001,-80.9431,True
2018-01-11 11:52:00,W-4437349,Rain,Light,2018-01-11 13:35:00,US/Eastern,KCLT,35.2225,-80.9543,Township 2 Berryhill,Mecklenburg,NC,28278.0,Charlotte Douglas International Airport,Charlotte,United States,CLT,KCLT,35.214001,-80.9431,True
2018-01-11 13:39:00,W-4437350,Rain,Light,2018-01-11 13:52:00,US/Eastern,KCLT,35.2225,-80.9543,Township 2 Berryhill,Mecklenburg,NC,28278.0,Charlotte Douglas International Airport,Charlotte,United States,CLT,KCLT,35.214001,-80.9431,True
2018-01-11 15:03:00,W-4437351,Rain,Light,2018-01-11 15:52:00,US/Eastern,KCLT,35.2225,-80.9543,Township 2 Berryhill,Mecklenburg,NC,28278.0,Charlotte Douglas International Airport,Charlotte,United States,CLT,KCLT,35.214001,-80.9431,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-31 11:52:00,W-4438023,Rain,Light,2018-12-31 13:44:00,US/Eastern,KCLT,35.2225,-80.9543,Township 2 Berryhill,Mecklenburg,NC,28278.0,Charlotte Douglas International Airport,Charlotte,United States,CLT,KCLT,35.214001,-80.9431,True
2018-12-31 13:52:00,W-4438024,Rain,Light,2018-12-31 14:25:00,US/Eastern,KCLT,35.2225,-80.9543,Township 2 Berryhill,Mecklenburg,NC,28278.0,Charlotte Douglas International Airport,Charlotte,United States,CLT,KCLT,35.214001,-80.9431,True
2018-12-31 14:52:00,W-4438025,Rain,Light,2018-12-31 15:52:00,US/Eastern,KCLT,35.2225,-80.9543,Township 2 Berryhill,Mecklenburg,NC,28278.0,Charlotte Douglas International Airport,Charlotte,United States,CLT,KCLT,35.214001,-80.9431,True
2018-12-31 17:52:00,W-4438026,Fog,Severe,2018-12-31 21:41:00,US/Eastern,KCLT,35.2225,-80.9543,Township 2 Berryhill,Mecklenburg,NC,28278.0,Charlotte Douglas International Airport,Charlotte,United States,CLT,KCLT,35.214001,-80.9431,True
