In [1]:
from tensorflow import keras # for building Neural Networks
from keras.models import Sequential # for creating a linear stack of layers for our Neural Network
from keras import Input # for instantiating a keras tensor
from keras.layers import Dense # for creating regular densely-connected NN layer.

# Data manipulation
import pandas as pd # for data manipulation
import numpy as np # for data manipulation

# Sklearn
import sklearn # for model evaluation
from sklearn.model_selection import train_test_split # for splitting the data into train and test samples
from sklearn.metrics import classification_report # for model evaluation metrics

# Other utilities
import sys
import os

# Assign main directory to a variable
main_dir=os.path.dirname(sys.path[0])
#print(main_dir)

In [2]:
import pandas as pd
# Set Pandas options to display more columns
pd.options.display.max_columns=50

# Read in the weather data csv
df=pd.read_csv(main_dir+'/data/weatherHistory.csv', encoding='utf-8')


In [3]:
df

Unnamed: 0,Formatted Date,Summary,Precip Type,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Loud Cover,Pressure (millibars),Daily Summary
0,2006-04-01 00:00:00.000 +0200,Partly Cloudy,rain,9.472222,7.388889,0.89,14.1197,251.0,15.8263,0.0,1015.13,Partly cloudy throughout the day.
1,2006-04-01 01:00:00.000 +0200,Partly Cloudy,rain,9.355556,7.227778,0.86,14.2646,259.0,15.8263,0.0,1015.63,Partly cloudy throughout the day.
2,2006-04-01 02:00:00.000 +0200,Mostly Cloudy,rain,9.377778,9.377778,0.89,3.9284,204.0,14.9569,0.0,1015.94,Partly cloudy throughout the day.
3,2006-04-01 03:00:00.000 +0200,Partly Cloudy,rain,8.288889,5.944444,0.83,14.1036,269.0,15.8263,0.0,1016.41,Partly cloudy throughout the day.
4,2006-04-01 04:00:00.000 +0200,Mostly Cloudy,rain,8.755556,6.977778,0.83,11.0446,259.0,15.8263,0.0,1016.51,Partly cloudy throughout the day.
...,...,...,...,...,...,...,...,...,...,...,...,...
96448,2016-09-09 19:00:00.000 +0200,Partly Cloudy,rain,26.016667,26.016667,0.43,10.9963,31.0,16.1000,0.0,1014.36,Partly cloudy starting in the morning.
96449,2016-09-09 20:00:00.000 +0200,Partly Cloudy,rain,24.583333,24.583333,0.48,10.0947,20.0,15.5526,0.0,1015.16,Partly cloudy starting in the morning.
96450,2016-09-09 21:00:00.000 +0200,Partly Cloudy,rain,22.038889,22.038889,0.56,8.9838,30.0,16.1000,0.0,1015.66,Partly cloudy starting in the morning.
96451,2016-09-09 22:00:00.000 +0200,Partly Cloudy,rain,21.522222,21.522222,0.60,10.5294,20.0,16.1000,0.0,1015.95,Partly cloudy starting in the morning.


In [4]:
df.isnull().count()

Formatted Date              96453
Summary                     96453
Precip Type                 96453
Temperature (C)             96453
Apparent Temperature (C)    96453
Humidity                    96453
Wind Speed (km/h)           96453
Wind Bearing (degrees)      96453
Visibility (km)             96453
Loud Cover                  96453
Pressure (millibars)        96453
Daily Summary               96453
dtype: int64

In [5]:
# Drop records where target RainTomorrow=NaN
df.drop("Formatted Date", axis=1, inplace=True)
df.drop("Daily Summary", axis=1, inplace=True)

In [6]:
# For other columns with missing values, fill them in with column mean
#df=df.fillna(df.mean())

In [7]:
df.dtypes

Summary                      object
Precip Type                  object
Temperature (C)             float64
Apparent Temperature (C)    float64
Humidity                    float64
Wind Speed (km/h)           float64
Wind Bearing (degrees)      float64
Visibility (km)             float64
Loud Cover                  float64
Pressure (millibars)        float64
dtype: object

In [8]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
for i in df.columns:
    if df[i].dtypes == "object":
        df[i] = encoder.fit_transform(df[i])

In [9]:
df.describe()

Unnamed: 0,Summary,Precip Type,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Loud Cover,Pressure (millibars)
count,96453.0,96453.0,96453.0,96453.0,96453.0,96453.0,96453.0,96453.0,96453.0,96453.0
mean,16.006024,0.12178,11.932678,10.855029,0.734899,10.81064,187.509232,10.347325,0.0,1003.235956
std,4.361497,0.343032,9.551546,10.696847,0.195473,6.913571,107.383428,4.192123,0.0,116.969906
min,0.0,0.0,-21.822222,-27.716667,0.0,0.0,0.0,0.0,0.0,0.0
25%,17.0,0.0,4.688889,2.311111,0.6,5.8282,116.0,8.3398,0.0,1011.9
50%,18.0,0.0,12.0,12.0,0.78,9.9659,180.0,10.0464,0.0,1016.45
75%,19.0,0.0,18.838889,18.838889,0.89,14.1358,290.0,14.812,0.0,1021.09
max,26.0,2.0,39.905556,39.344444,1.0,63.8526,359.0,16.1,0.0,1046.38


In [10]:
df.head()

Unnamed: 0,Summary,Precip Type,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Loud Cover,Pressure (millibars)
0,19,0,9.472222,7.388889,0.89,14.1197,251.0,15.8263,0.0,1015.13
1,19,0,9.355556,7.227778,0.86,14.2646,259.0,15.8263,0.0,1015.63
2,17,0,9.377778,9.377778,0.89,3.9284,204.0,14.9569,0.0,1015.94
3,19,0,8.288889,5.944444,0.83,14.1036,269.0,15.8263,0.0,1016.41
4,17,0,8.755556,6.977778,0.83,11.0446,259.0,15.8263,0.0,1016.51


In [11]:
corr_matrix = df.corr()
corr_rel = corr_matrix["Apparent Temperature (C)"].sort_values(ascending=False)

In [12]:
corr_rel

Apparent Temperature (C)    1.000000
Temperature (C)             0.992629
Visibility (km)             0.381718
Summary                     0.142760
Wind Bearing (degrees)      0.029031
Pressure (millibars)       -0.000219
Wind Speed (km/h)          -0.056650
Precip Type                -0.521781
Humidity                   -0.602571
Loud Cover                       NaN
Name: Apparent Temperature (C), dtype: float64

df.drop('WindSpeed9am', inplace=True, axis=1)
df.drop('WindSpeed3pm', inplace=True, axis=1)
df.drop('MinTemp', inplace=True, axis=1)
df.drop('WindGustDir', inplace=True, axis=1)
df.drop('WindDir3pm', inplace=True, axis=1)
df.drop('WindDir9am', inplace=True, axis=1)
df.drop('Location', inplace=True, axis=1)
df.drop('Temp9am', inplace=True, axis=1)
df.drop('Evaporation', inplace=True, axis=1)

In [13]:
df["Loud Cover"].unique()

array([0.])

In [14]:
df.drop("Loud Cover", inplace=True, axis=1)
df.drop("Temperature (C)", inplace=True, axis=1)

In [15]:
import numpy as np
df = df.sample(frac = 1)
X=df.copy()
X.drop("Apparent Temperature (C)", inplace=True, axis=1)
y=df["Apparent Temperature (C)"].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

## Base Feedforward Neural Network I

In [16]:
X_train.shape[1]

7

In [17]:

callback = keras.callbacks.EarlyStopping(monitor='loss', patience=10)
model = Sequential(name="Model-with-All-Features") # Model
model.add(Dense(16, input_shape=(X_train.shape[1],), activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='linear'))

##### Step 4 - Compile keras model
model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_squared_error'])
model.fit(X_train, y_train, epochs=100, batch_size=10, verbose=1, validation_split=0.2, callbacks=[callback])

##### Step 5 - Fit keras model on the dataset

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100


Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100


Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x7fbf9e15f3d0>

# FFNN With SCaled Features II

In [18]:
import numpy as np
df = df.sample(frac = 1)
X=df.copy()
X.drop("Apparent Temperature (C)", inplace=True, axis=1)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)
y=df["Apparent Temperature (C)"].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [19]:
X

array([[ 0.45717939, -0.35501138,  0.94694531, ...,  1.28037894,
        -0.08714601,  0.07962811],
       [-0.91850209,  2.56018635,  0.84462872, ...,  1.20587916,
        -1.91141007,  0.246595  ],
       [-2.29418357, -0.35501138, -0.38317033, ...,  1.03825467,
         1.30697578,  0.13605317],
       ...,
       [ 0.68645963, -0.35501138, -1.5086528 , ..., -0.41449099,
         0.86147129,  0.17093412],
       [ 0.68645963, -0.35501138,  0.33304578, ..., -0.19099166,
         1.15335354,  0.06124722],
       [ 0.68645963, -0.35501138,  0.38420408, ...,  0.48881881,
         1.37226523,  0.06475241]])

In [20]:
model2 = Sequential(name="Model-with-All-Features") # Model
model2.add(Dense(16, input_shape=(X_train.shape[1],), activation='relu'))
model2.add(Dense(8, activation='relu'))
model2.add(Dense(1, activation='linear'))

##### Step 4 - Compile keras model
model2.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_squared_error'])
model2.fit(X_train, y_train, epochs=100, batch_size=10, verbose=1, validation_split=0.2)

##### Step 5 - Fit keras model on the dataset

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100


Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100


Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x7fbf9d87a5b0>

### TryOuts with Other Algorithms - RandomForest