<a href="https://colab.research.google.com/github/bsaha205/Fall_22_PML/blob/main/PML_HW_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Task 1



In [14]:
# Tensorflow / Keras
from tensorflow import keras 
from keras.models import Sequential
from keras import Input 
from keras.layers import Dense, SimpleRNN

# Data manipulation
import pandas as pd 
import numpy as np 
import math 

# Sklearn
import sklearn 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import mean_squared_error 
from sklearn.preprocessing import MinMaxScaler 

In [15]:
# read the weatherAUS.csv file
df=pd.read_csv('weatherAUS.csv', encoding='utf-8')

# drop records where target Humidity9am=NaN or MaxTemp=NaN
df=df[pd.isnull(df['Humidity9am'])==False]
df=df[pd.isnull(df['Humidity3pm'])==False]

# median daily humidity (mid point between Daily Humidity9am and Daily Humidity3pm)
df['MedHum']=df[['Humidity9am', 'Humidity3pm']].median(axis=1)

# viewing the first few rows of the data
df.head()

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,...,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow,MedHum
0,2008-12-01,Albury,13.4,22.9,0.6,,,W,44.0,W,...,22.0,1007.7,1007.1,8.0,,16.9,21.8,No,No,46.5
1,2008-12-02,Albury,7.4,25.1,0.0,,,WNW,44.0,NNW,...,25.0,1010.6,1007.8,,,17.2,24.3,No,No,34.5
2,2008-12-03,Albury,12.9,25.7,0.0,,,WSW,46.0,W,...,30.0,1007.6,1008.7,,2.0,21.0,23.2,No,No,34.0
3,2008-12-04,Albury,9.2,28.0,0.0,,,NE,24.0,SE,...,16.0,1017.6,1012.8,,,18.1,26.5,No,No,30.5
4,2008-12-05,Albury,17.5,32.3,1.0,,,W,41.0,ENE,...,33.0,1010.8,1006.0,7.0,8.0,17.8,29.7,No,No,57.5


In [16]:
# what's the data size?
df.shape

(140186, 24)

In [17]:
# general information about the dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 140186 entries, 0 to 145459
Data columns (total 24 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   Date           140186 non-null  object 
 1   Location       140186 non-null  object 
 2   MinTemp        139955 non-null  float64
 3   MaxTemp        139975 non-null  float64
 4   Rainfall       138041 non-null  float64
 5   Evaporation    80295 non-null   float64
 6   Sunshine       74323 non-null   float64
 7   WindGustDir    132203 non-null  object 
 8   WindGustSpeed  132234 non-null  float64
 9   WindDir9am     131076 non-null  object 
 10  WindDir3pm     138293 non-null  object 
 11  WindSpeed9am   139322 non-null  float64
 12  WindSpeed3pm   139396 non-null  float64
 13  Humidity9am    140186 non-null  float64
 14  Humidity3pm    140186 non-null  float64
 15  Pressure9am    127650 non-null  float64
 16  Pressure3pm    127709 non-null  float64
 17  Cloud9am       86889 non-null

In [18]:
# preprocessing data
def prep_data(data, time_step):
    # get the indices for y elements
    y_indices = np.arange(start=time_step, stop=len(data), step=time_step)
    y = data[y_indices]
    
    rows_X = len(y)
    X = data[range(time_step*rows_X)]
    # reshape it into the desired shape
    X = np.reshape(X, (rows_X, time_step, 1))
    return X, y

In [19]:
# select only Canberra 
dfCan = df[df['Location']=='Canberra'].copy()

# select data for model
X = dfCan[['MedHum']]

# scaling data
scaler = MinMaxScaler()
X = scaler.fit_transform(X)

# create training, validation and testing samples
train_data, test_data = train_test_split(X, test_size=0.2, shuffle=False)
train_data, valid_data = train_test_split(train_data, test_size=0.2, shuffle=False)

print('train_data.shape:', train_data.shape)
print('valid_data.shape:', valid_data.shape)
print('test_data.shape:', test_data.shape)

# prepare input X and target y
time_step = 5
X_train, y_train = prep_data(train_data, time_step)
X_valid, y_valid = prep_data(valid_data, time_step)
X_test, y_test = prep_data(test_data, time_step)

train_data.shape: (2152, 1)
valid_data.shape: (539, 1)
test_data.shape: (673, 1)


In [20]:
# define RNN Network
model = Sequential(name="RNN-Model") 
model.add(Input(shape=(time_step,1), name='Input-Layer'))
model.add(SimpleRNN(units=1, activation='tanh', name='Hidden-Recurrent-Layer')) # Hidden Recurrent Layer with activation='tanh'
model.add(Dense(units=1, activation='tanh', name='Hidden-Layer')) # Hidden Layer with activation='tanh'
model.add(Dense(units=1, activation='linear', name='Output-Layer')) # Output Layer with activation='linear'

# compile RNN model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['MeanSquaredError', 'MeanAbsoluteError'])

In [21]:
# training the model
model.fit(X_train, y_train, batch_size=32, epochs=20)

# model summary
print('-------------------- Model Summary --------------------')
model.summary()

# testing the model
# Predict the result on validation data
pred_valid = model.predict(X_valid)
# Predict the result on test data
pred_test = model.predict(X_test)

print("")
print("Validation nMSE: ", mean_squared_error(y_valid, pred_valid))
print("Test MSE: ", mean_squared_error(y_test, pred_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
-------------------- Model Summary --------------------
Model: "RNN-Model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Hidden-Recurrent-Layer (Sim  (None, 1)                3         
 pleRNN)                                                         
                                                                 
 Hidden-Layer (Dense)        (None, 1)                 2         
                                                                 
 Output-Layer (Dense)        (None, 1)                 2         
                                                                 
Total params: 7
Trainable params: 7
Non-trainable params: 0
________________________________________