In [1]:
# Library Imports.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.backends.backend_pdf import PdfPages
import seaborn as sns

# Allows plots to appear directly in the notebook.
%matplotlib inline

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn import tree
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score 

import pickle

In [2]:
# Read CSV file into Data Frame:
weather = pd.read_csv('cleaned_2018_OpenWeather.csv', keep_default_na=True, delimiter=',', skipinitialspace=True)

In [3]:
weather.shape

(26280, 29)

In [4]:
weather.head()

Unnamed: 0,dt,dt_iso,timezonetext,timezone,city_name,lat,lon,temp,visibility,dew_point,...,wind_gust,rain_1h,rain_3h,snow_1h,snow_3h,clouds_all,weather_id,weather_main,weather_description,weather_icon
0,1483228800,1/1/2017 0:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,,2.3,,,,75,501,Rain,moderate rain,10n
1,1483232400,1/1/2017 1:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,,1.51,,,,75,501,Rain,moderate rain,10n
2,1483236000,1/1/2017 2:00,0000 UTC,0,Custom location,53.345035,-6.267261,5.39,9999.0,4.35,...,,0.64,,,,75,500,Rain,light rain,10n
3,1483239600,1/1/2017 3:00,0000 UTC,0,Custom location,53.345035,-6.267261,4.39,9999.0,3.36,...,,0.17,,,,75,500,Rain,light rain,10n
4,1483243200,1/1/2017 4:00,0000 UTC,0,Custom location,53.345035,-6.267261,4.39,9999.0,2.42,...,,,,,,75,803,Clouds,broken clouds,04n


## Dropping Useless Rows

In [5]:
weather.drop('dt', axis=1, inplace=True)
weather.drop('timezonetext', axis=1, inplace=True)
weather.drop('timezone', axis=1, inplace=True)
weather.drop('city_name', axis=1, inplace=True)
weather.drop('lat', axis=1, inplace=True)
weather.drop('lon', axis=1, inplace=True)

In [6]:
weather.head()

Unnamed: 0,dt_iso,temp,visibility,dew_point,feels_like,temp_min,temp_max,pressure,sea_level,grnd_level,...,wind_gust,rain_1h,rain_3h,snow_1h,snow_3h,clouds_all,weather_id,weather_main,weather_description,weather_icon
0,1/1/2017 0:00,5.39,9999.0,4.35,1.78,5.36,6.56,1019,,,...,,2.3,,,,75,501,Rain,moderate rain,10n
1,1/1/2017 1:00,5.39,9999.0,4.35,2.28,5.36,5.56,1019,,,...,,1.51,,,,75,501,Rain,moderate rain,10n
2,1/1/2017 2:00,5.39,9999.0,4.35,2.28,5.36,5.56,1019,,,...,,0.64,,,,75,500,Rain,light rain,10n
3,1/1/2017 3:00,4.39,9999.0,3.36,0.04,4.36,5.56,1019,,,...,,0.17,,,,75,500,Rain,light rain,10n
4,1/1/2017 4:00,4.39,9999.0,2.42,0.04,4.36,4.56,1019,,,...,,,,,,75,803,Clouds,broken clouds,04n


In [7]:
weather.isnull().sum()

dt_iso                     0
temp                       0
visibility                45
dew_point                  0
feels_like                 0
temp_min                   0
temp_max                   0
pressure                   0
sea_level              26280
grnd_level             26280
humidity                   0
wind_speed                 0
wind_deg                   0
wind_gust              19461
rain_1h                21253
rain_3h                26280
snow_1h                26199
snow_3h                26280
clouds_all                 0
weather_id                 0
weather_main               0
weather_description        0
weather_icon               0
dtype: int64

In [8]:
weather.drop('sea_level', axis=1, inplace=True)
weather.drop('grnd_level', axis=1, inplace=True)
weather.drop('wind_gust', axis=1, inplace=True)
weather.drop('rain_1h', axis=1, inplace=True)
weather.drop('rain_3h', axis=1, inplace=True)
weather.drop('snow_1h', axis=1, inplace=True)
weather.drop('snow_3h', axis=1, inplace=True)

In [9]:
weather.head()

Unnamed: 0,dt_iso,temp,visibility,dew_point,feels_like,temp_min,temp_max,pressure,humidity,wind_speed,wind_deg,clouds_all,weather_id,weather_main,weather_description,weather_icon
0,1/1/2017 0:00,5.39,9999.0,4.35,1.78,5.36,6.56,1019,93,5.1,330,75,501,Rain,moderate rain,10n
1,1/1/2017 1:00,5.39,9999.0,4.35,2.28,5.36,5.56,1019,93,4.1,310,75,501,Rain,moderate rain,10n
2,1/1/2017 2:00,5.39,9999.0,4.35,2.28,5.36,5.56,1019,93,4.1,320,75,500,Rain,light rain,10n
3,1/1/2017 3:00,4.39,9999.0,3.36,0.04,4.36,5.56,1019,93,6.2,330,75,500,Rain,light rain,10n
4,1/1/2017 4:00,4.39,9999.0,2.42,0.04,4.36,4.56,1019,87,6.2,330,75,803,Clouds,broken clouds,04n


In [10]:
weather.drop('visibility', axis=1, inplace=True)
weather.drop('dew_point', axis=1, inplace=True)
weather.drop('temp_min', axis=1, inplace=True)
weather.drop('temp_max', axis=1, inplace=True)
weather.drop('pressure', axis=1, inplace=True)
weather.drop('weather_icon', axis=1, inplace=True)

In [13]:
weather.head()

Unnamed: 0,dt_iso,temp,feels_like,humidity,wind_speed,wind_deg,clouds_all,weather_id,weather_main,weather_description
0,1/1/2017 0:00,5.39,1.78,93,5.1,330,75,501,Rain,moderate rain
1,1/1/2017 1:00,5.39,2.28,93,4.1,310,75,501,Rain,moderate rain
2,1/1/2017 2:00,5.39,2.28,93,4.1,320,75,500,Rain,light rain
3,1/1/2017 3:00,4.39,0.04,93,6.2,330,75,500,Rain,light rain
4,1/1/2017 4:00,4.39,0.04,87,6.2,330,75,803,Clouds,broken clouds


In [14]:
weather.drop('feels_like', axis=1, inplace=True)
weather.drop('wind_deg', axis=1, inplace=True)
weather.drop('clouds_all', axis=1, inplace=True)
weather.drop('weather_main', axis=1, inplace=True)

In [15]:
weather.head()

Unnamed: 0,dt_iso,temp,humidity,wind_speed,weather_id,weather_description
0,1/1/2017 0:00,5.39,93,5.1,501,moderate rain
1,1/1/2017 1:00,5.39,93,4.1,501,moderate rain
2,1/1/2017 2:00,5.39,93,4.1,500,light rain
3,1/1/2017 3:00,4.39,93,6.2,500,light rain
4,1/1/2017 4:00,4.39,87,6.2,803,broken clouds


In [16]:
weather.dtypes

dt_iso                  object
temp                   float64
humidity                 int64
wind_speed             float64
weather_id               int64
weather_description     object
dtype: object

In [17]:
weather['dt_iso'] = weather['dt_iso'].astype('string')
weather['weather_description'] = weather['weather_description'].astype('category')

In [18]:
weather.head()

Unnamed: 0,dt_iso,temp,humidity,wind_speed,weather_id,weather_description
0,1/1/2017 0:00,5.39,93,5.1,501,moderate rain
1,1/1/2017 1:00,5.39,93,4.1,501,moderate rain
2,1/1/2017 2:00,5.39,93,4.1,500,light rain
3,1/1/2017 3:00,4.39,93,6.2,500,light rain
4,1/1/2017 4:00,4.39,87,6.2,803,broken clouds


In [19]:
weather[['date', 'time']] = weather.dt_iso.str.split(' ', expand=True)

In [20]:
weather.head()

Unnamed: 0,dt_iso,temp,humidity,wind_speed,weather_id,weather_description,date,time
0,1/1/2017 0:00,5.39,93,5.1,501,moderate rain,1/1/2017,0:00
1,1/1/2017 1:00,5.39,93,4.1,501,moderate rain,1/1/2017,1:00
2,1/1/2017 2:00,5.39,93,4.1,500,light rain,1/1/2017,2:00
3,1/1/2017 3:00,4.39,93,6.2,500,light rain,1/1/2017,3:00
4,1/1/2017 4:00,4.39,87,6.2,803,broken clouds,1/1/2017,4:00


In [21]:
weather[['str_Month', 'str_Day', 'str_Year']] = weather.date.str.split('/', expand=True)

In [22]:
weather.head()

Unnamed: 0,dt_iso,temp,humidity,wind_speed,weather_id,weather_description,date,time,str_Month,str_Day,str_Year
0,1/1/2017 0:00,5.39,93,5.1,501,moderate rain,1/1/2017,0:00,1,1,2017
1,1/1/2017 1:00,5.39,93,4.1,501,moderate rain,1/1/2017,1:00,1,1,2017
2,1/1/2017 2:00,5.39,93,4.1,500,light rain,1/1/2017,2:00,1,1,2017
3,1/1/2017 3:00,4.39,93,6.2,500,light rain,1/1/2017,3:00,1,1,2017
4,1/1/2017 4:00,4.39,87,6.2,803,broken clouds,1/1/2017,4:00,1,1,2017


In [23]:
weather[['str_hour', 'str_min']] = weather.time.str.split(':', expand=True)

In [24]:
weather.head()

Unnamed: 0,dt_iso,temp,humidity,wind_speed,weather_id,weather_description,date,time,str_Month,str_Day,str_Year,str_hour,str_min
0,1/1/2017 0:00,5.39,93,5.1,501,moderate rain,1/1/2017,0:00,1,1,2017,0,0
1,1/1/2017 1:00,5.39,93,4.1,501,moderate rain,1/1/2017,1:00,1,1,2017,1,0
2,1/1/2017 2:00,5.39,93,4.1,500,light rain,1/1/2017,2:00,1,1,2017,2,0
3,1/1/2017 3:00,4.39,93,6.2,500,light rain,1/1/2017,3:00,1,1,2017,3,0
4,1/1/2017 4:00,4.39,87,6.2,803,broken clouds,1/1/2017,4:00,1,1,2017,4,0


## Create a Weather/Trip/Leave ID for Combining Weather & Trip/Leavetime Datasets

In [25]:
weather['wthr_tr_lt_id'] = weather['str_Year'] + weather['str_Month'] + weather['str_Day'] + weather['str_hour']

In [26]:
weather.head()

Unnamed: 0,dt_iso,temp,humidity,wind_speed,weather_id,weather_description,date,time,str_Month,str_Day,str_Year,str_hour,str_min,wthr_tr_lt_id
0,1/1/2017 0:00,5.39,93,5.1,501,moderate rain,1/1/2017,0:00,1,1,2017,0,0,2017110
1,1/1/2017 1:00,5.39,93,4.1,501,moderate rain,1/1/2017,1:00,1,1,2017,1,0,2017111
2,1/1/2017 2:00,5.39,93,4.1,500,light rain,1/1/2017,2:00,1,1,2017,2,0,2017112
3,1/1/2017 3:00,4.39,93,6.2,500,light rain,1/1/2017,3:00,1,1,2017,3,0,2017113
4,1/1/2017 4:00,4.39,87,6.2,803,broken clouds,1/1/2017,4:00,1,1,2017,4,0,2017114


## Creating a Weather Dataframe to combine with Trip/Leavetime

In [27]:
weatherCombine = weather[['wthr_tr_lt_id', 'temp','humidity', 'wind_speed', 'weather_id', 'weather_description']].copy()

In [28]:
weatherCombine.head()

Unnamed: 0,wthr_tr_lt_id,temp,humidity,wind_speed,weather_id,weather_description
0,2017110,5.39,93,5.1,501,moderate rain
1,2017111,5.39,93,4.1,501,moderate rain
2,2017112,5.39,93,4.1,500,light rain
3,2017113,4.39,93,6.2,500,light rain
4,2017114,4.39,87,6.2,803,broken clouds


## Send to CSV for Combination of Weather & Trip/Leave Data

In [29]:
# Send weatherCombine to csv:
weatherCombine.to_csv('v3c_weatherCombine_LT_2018.csv', index=False)