

---
Please Note:

Instead of making new model everytime for every feature combination, I reinitialize the input data with necessary changes and run the previously created models. I cross verified if this method works correctly, and found that it actually does. Hence, instead of repeating code, I have re-run the models created in the begining of this file.

I have repeated model creation twice since there was a major difference in the first and second part where first part only involves the temperature whereas second part onwards contains combination of features. For the second part and later, i use only one model, re-running it with different data inputs.

Also, data exploration part is done along with the ARIMA model and hence, haven't repeated it here. Although, I have carried forward the preprocessing to this file from the ARIMA file.

---



In [None]:
'''Importing necessary libraries'''

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, Input, Flatten, SimpleRNN, Dropout

from sklearn.metrics import mean_absolute_error

from sklearn.preprocessing import MinMaxScaler

In [None]:
'''reading data from csv'''

data = pd.read_csv('/content/drive/MyDrive/Problem Set 6/ps6_trainvalid.csv')

# Data Preprocessing

In [None]:
'''converting datetime column to datetime type and setting it as index'''

data.datetime = pd.to_datetime(data.datetime)
data.set_index(data.datetime, inplace=True)
data.drop('datetime', inplace=True, axis=1)
data

In [None]:
'''substituting null values with mean'''

temp_mean = data.temperature.mean()
humidity_mean = data.humidity.mean()
pressure_mean = data.pressure.mean()

data.temperature = data.temperature.fillna(temp_mean)
data.humidity = data.humidity.fillna(humidity_mean)
data.pressure = data.pressure.fillna(pressure_mean)

In [None]:
'''filling null value in weather with most common weather type'''

data.weather = data.weather.fillna('sky is clear')
data.wind_direction = data.wind_direction.fillna(0.0)
data.wind_speed = data.wind_speed.fillna(0.0)

In [None]:
'''since wind direction is in degrees, we split it into its components - sin and cos and create two respective columns'''

data['cos_wind'] = np.cos((data.wind_direction.values.reshape(len(data), 1)*np.pi)/180)
data['sin_wind'] = np.sin((data.wind_direction.values.reshape(len(data), 1)*np.pi)/180)

In [None]:
'''checking final dataset for any null values'''

data.info()

In [None]:
'''dropping wind_direction since it is no longer needed as we have sin and cos components for it'''

data = data.drop('wind_direction', axis=1)
data.head()

In [None]:
'''looking at different weather types'''

data.weather.value_counts()

# Using only Temperature feature

In [None]:
'''using 360 hours to predict 60 hours in the future'''

n_hours = 360
X, Y = [], []
for i in range(len(data)):
  index = i + n_hours
  if index > len(data) - 60:
    break
  x = data.temperature[i:index]
  X.append(x)
  y = data.temperature[index:index+60]
  Y.append(y)
X = np.array(X)
Y = np.array(Y)

In [None]:
X.shape, Y.shape

In [None]:
'''splitting data into training, validation and test data'''

train_index = int(X.shape[0]*0.6)
val_index = int(X.shape[0]*0.2)

'''training data'''
x_train = X[:train_index]
y_train = Y[:train_index]

'''validation data'''
x_val = X[train_index:train_index+val_index]
y_val = Y[train_index:train_index+val_index]

'''test data'''
x_test = X[train_index+val_index:]
y_test = Y[train_index+val_index:]

x_train.shape, y_train.shape, x_val.shape, y_val.shape, x_test.shape, y_test.shape

In [None]:
'''reshaping training and validation data'''

x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], 1)
x_val = x_val.reshape(x_val.shape[0], x_val.shape[1], 1)
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], 1)

x_train.shape, y_train.shape, x_val.shape, y_val.shape, x_test.shape, y_test.shape



---

Baseline model with dense layer

---



In [None]:
''''baseline model with only one dense layer'''

baseline = Sequential([
                       Flatten(input_shape=[360,1]),
                       Dense(60)
])
baseline.summary()

In [None]:
'''compiling baseline model'''

from keras.optimizers import Adam
opt = Adam(0.0001)
baseline.compile(optimizer=opt, loss='mean_absolute_error')

In [None]:
'''training baseline model'''

baseline_history = baseline.fit(x_train, y_train, epochs=20, validation_data=(x_val, y_val))

In [None]:
'''plotting training and validation loss for baseline model'''

plt.title('loss')
plt.plot(baseline_history.history['loss'], label='training loss')
plt.plot(baseline_history.history['val_loss'], label='validation loss')
plt.legend(loc='best')

In [None]:
'''making predictions'''

baseline_y_pred = baseline.predict(x_test)

In [None]:
'''calculating mean absolute error'''

mean_absolute_error(y_test.reshape(y_test.shape[0], y_test.shape[1]), baseline_y_pred)



---

Simple RNN

---



In [None]:
''''Sequential model with only one SimpleRNN layer'''

simpleRNN = Sequential([
                        SimpleRNN(5, input_shape=[None,1], activation='relu'),
                        Dense(60)
])
simpleRNN.summary()

In [None]:
'''compiling the model'''

from keras.optimizers import Adam
opt = Adam(0.01)
simpleRNN.compile(optimizer=opt, loss='mean_absolute_error')

In [None]:
'''training phase'''

simpleRNN_history = simpleRNN.fit(x_train, y_train, epochs=10, validation_data=(x_val, y_val))

In [None]:
'''plotting training and validation loss for SimpleRNN model'''

plt.plot(simpleRNN_history.history['loss'])
plt.plot(simpleRNN_history.history['val_loss'])

In [None]:
'''making predictions'''

simpleRNN_y_pred = simpleRNN.predict(x_test)

In [None]:
'''calculating mean absolute error'''

mean_absolute_error(y_test, simpleRNN_y_pred)



---

LSTM

---



In [None]:
''''Sequential model with only one LSTM layer'''


lstm = Sequential([
                   LSTM(10, activation='relu', return_sequences=True, input_shape=(360,1)),
                   #Dropout(0.3),
                   Flatten(),
                   Dense(60)
])                  
lstm.summary()

In [None]:
'''compiling the model and training'''

from keras.optimizers import Adam
opt = Adam(lr=0.005)
lstm.compile(optimizer=opt, loss='mean_absolute_error')
lstm_history = lstm.fit(x_train, y_train, epochs=5, validation_data=(x_val, y_val))

In [None]:
'''plotting training and validation loss for SimpleRNN model'''

plt.plot(lstm_history.history['loss'][:])
plt.plot(lstm_history.history['val_loss'][:])

In [None]:
'''making predictions and calculating mean absolute error'''

lstm_ypred = lstm.predict(x_test)
mean_absolute_error(y_test, lstm_ypred)

# With features

In [None]:
'''checking correlation between features'''

data.corr()

In [None]:
'''encoding categorical feature - weather'''
new = pd.get_dummies(data)

In [None]:
data.shape, new.shape, len(data.weather.value_counts())

In [None]:
'''using 360 hours to predict 60 hours in the future'''

n_hours = 360
X, Y = [], []
for i in range(len(new)):
  index = i + n_hours
  if index > len(new) - 60:
    break
  x = new[i:index]
  X.append(x)
  y = new.temperature[index:index+60]
  Y.append(y)
X = np.array(X)
Y = np.array(Y)

In [None]:
X.shape, Y.shape

In [None]:
'''splitting data into training, validation and test data'''

train_index = int(X.shape[0]*0.6)
val_index = int(X.shape[0]*0.2)

'''training data'''
x_train = X[:train_index]
y_train = Y[:train_index]

'''validation data'''
x_val = X[train_index:train_index+val_index]
y_val = Y[train_index:train_index+val_index]

'''test data'''
x_test = X[train_index+val_index:]
y_test = Y[train_index+val_index:]

x_train.shape, y_train.shape, x_val.shape, y_val.shape, x_test.shape, y_test.shape

In [None]:
''''baseline model with only one dense layer'''

baseplusft = Sequential([
                         Flatten(input_shape=[360,31]),
                         Dense(60)
])
baseplusft.summary()

In [None]:
'''compiling the model'''

from keras.optimizers import Adam
opt = Adam(0.0001)
baseplusft.compile(optimizer=opt, loss='mean_absolute_error')

In [None]:
'''training phase'''

baseplusft_history = baseplusft.fit(x_train, y_train, epochs=20, validation_data=(x_val, y_val))

In [None]:
'''plotting validation and training loss'''

plt.plot(baseplusft_history.history['loss'])
plt.plot(baseplusft_history.history['val_loss'])

In [None]:
'''making predictions'''
baseplusft_ypred = baseplusft.predict(x_test)

In [None]:
'''calculating mean absolute error'''
mean_absolute_error(y_test, baseplusft_ypred)

In [None]:
''''Sequential model with only one SimpleRNN layer'''

simpleRNNplusft = Sequential([
                              SimpleRNN(24, input_shape=(360,31)),
                              Dense(60)
])
simpleRNNplusft.summary()

In [None]:
'''compiling the model'''

from keras.optimizers import Adam
opt = Adam(0.01)
simpleRNNplusft.compile(optimizer=opt, loss='mean_absolute_error')

In [None]:
'''training phase'''

simpleRNNplusft_history = simpleRNNplusft.fit(x_train, y_train, epochs=10, validation_data=(x_val, y_val))

In [None]:
'''plotting validation and training loss'''

plt.plot(simpleRNNplusft_history.history['loss'])
plt.plot(simpleRNNplusft_history.history['val_loss'])

In [None]:
'''making predictions'''

simpleRNNplusft_ypred = simpleRNNplusft.predict(x_test)

In [None]:
'''calculating mean absolute error'''

mean_absolute_error(y_test, simpleRNNplusft_ypred)

In [None]:
'''lstm model'''

lstmplusft = Sequential([
                   LSTM(10, activation='relu', return_sequences=True, input_shape=(360,31)),
                   #Dropout(0.3),
                   Flatten(),
                   Dense(60)
])                  
lstmplusft.summary()

In [None]:
'''compiling the model'''

from keras.optimizers import Adam
opt = Adam(0.01)
lstmplusft.compile(optimizer=opt, loss='mean_absolute_error')

In [None]:
'''training phase'''

lstmplusft_history = lstmplusft.fit(x_train, y_train, epochs=10, validation_data=(x_val, y_val))

In [None]:
'''plotting validation and training loss'''

plt.plot(lstmplusft_history.history['loss'])
plt.plot(lstmplusft_history.history['val_loss'])

In [None]:
'''making predictions'''

lstmplusft_ypred = lstmplusft.predict(x_test)

In [None]:
'''calculating mean absolute error'''

mean_absolute_error(y_test, lstmplusft_ypred)

# Without weather

In [None]:
'''removing weather feature'''

wo_weather = data.drop('weather', axis=1)

In [None]:
wo_weather.shape

In [None]:
'''using 360 hours to predict 60 hours in the future'''

n_hours = 360
X, Y = [], []
for i in range(len(new)):
  index = i + n_hours
  if index > len(new) - 60:
    break
  x = wo_weather[i:index]
  X.append(x)
  y = wo_weather.temperature[index:index+60]
  Y.append(y)
X = np.array(X)
Y = np.array(Y)

In [None]:
X.shape, Y.shape

In [None]:
'''splitting data into training, validation and test data'''

train_index = int(X.shape[0]*0.6)
val_index = int(X.shape[0]*0.2)

'''training data'''
x_train = X[:train_index]
y_train = Y[:train_index]

'''validation data'''
x_val = X[train_index:train_index+val_index]
y_val = Y[train_index:train_index+val_index]

'''test data'''
x_test = X[train_index+val_index:]
y_test = Y[train_index+val_index:]

x_train.shape, y_train.shape, x_val.shape, y_val.shape, x_test.shape, y_test.shape

# Without weather and wind directions

In [None]:
'''removing weather and wind_direction features'''

minuswaw = data.drop(['weather','cos_wind','sin_wind'], axis=1)

In [None]:
'''using 360 hours to predict 60 hours in the future'''

n_hours = 360
X, Y = [], []
for i in range(len(new)):
  index = i + n_hours
  if index > len(new) - 60:
    break
  x = minuswaw[i:index]
  X.append(x)
  y = minuswaw.temperature[index:index+60]
  Y.append(y)
X = np.array(X)
Y = np.array(Y)

In [None]:
X.shape, Y.shape

In [None]:
'''splitting data into training, validation and test data'''

train_index = int(X.shape[0]*0.6)
val_index = int(X.shape[0]*0.2)

'''training data'''
x_train = X[:train_index]
y_train = Y[:train_index]

'''validation data'''
x_val = X[train_index:train_index+val_index]
y_val = Y[train_index:train_index+val_index]

'''test data'''
x_test = X[train_index+val_index:]
y_test = Y[train_index+val_index:]

x_train.shape, y_train.shape, x_val.shape, y_val.shape, x_test.shape, y_test.shape