In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
%matplotlib inline
import math
import datetime
import tensorflow as tf
from tensorflow import keras
from google.colab import files  #Comment if not using google colab

In [None]:
df = pd.read_csv('/content/drive/My Drive/Weather Prediction and Forecasting/inter-999-interpolate_final.csv', parse_dates=['obstime'], index_col=['obstime'])  # Reading the csv file and making the obstime column 
                                                                                                                             # as index of dataframe after converting it to datetime
df = df.iloc[:, 0:1]    # We are interested in temperature forecasting
df   

In [None]:
"""
Since we have hourly tempertaures and we are interested in forecasting daily temperatures, 
we will manually calculate the daily mean temperature

"""

from pandas.tseries.offsets import DateOffset
add_dates = [datetime.datetime(2016, 6, 2) + DateOffset(days=x) for x in range(0,1112+1) ]    # Our dataset contains 1113 days [2 June 2016 - 19 June 2019]
temp_avg = pd.Series(range(1113), index = range(1113))
start = 0
end = 0
for i in range(1113):
    end = end + 24
    ser = df.iloc[start:end, 0]
    temp_avg.update(pd.Series([round((ser.sum())/24.0, 2)], index = [i]))        # Calcuating the daily mean temperature and also rounding off to 2 decimal places
    start = end

df = pd.DataFrame(temp_avg.tolist(),index=add_dates[0:], columns=['tempr'])       # Daily average temperatures
#df

In [None]:
time_steps_CNN = 30
train_size = 943        # Training duration - 2 June 2016 - 31 Dec 2018 (80 % data)
test_size = len(df) - train_size           # Test duration - 1 Jan 2019- 19 June 2019
train_CNN, test_CNN = df.iloc[0:train_size], df.iloc[train_size - time_steps_CNN : len(df)]

In [None]:
time_steps_RNN = 60
train_size = 943        # Training duration - 2 June 2016 - 31 Dec 2018 (80 % data)
test_size = len(df) - train_size           # Test duration - 1 Jan 2019- 19 June 2019
train_RNN, test_RNN = df.iloc[0:train_size], df.iloc[train_size - time_steps_RNN : len(df)]

In [None]:
time_steps_LSTM = 60
train_size = 943        # Training duration - 2 June 2016 - 31 Dec 2018 (80 % data)
test_size = len(df) - train_size           # Test duration - 1 Jan 2019- 19 June 2019
train_LSTM, test_LSTM = df.iloc[0:train_size], df.iloc[train_size - time_steps_LSTM : len(df)]

In [None]:
def create_dataset(X, y, time_steps):      
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)        
        ys.append(y.iloc[i + time_steps])
    return np.array(Xs), np.array(ys)

In [None]:
X_train_CNN, y_train_CNN = create_dataset(train_CNN, train_CNN.tempr, time_steps_CNN)
X_test_CNN, y_test_CNN = create_dataset(test_CNN, test_CNN.tempr, time_steps_CNN)
#print(X_train_CNN.shape, y_train_CNN.shape)
#print(X_test_CNN.shape, y_test_CNN.shape)

In [None]:
X_train_RNN, y_train_RNN = create_dataset(train_RNN, train_RNN.tempr, time_steps_RNN)
X_test_RNN, y_test_RNN = create_dataset(test_RNN, test_RNN.tempr, time_steps_RNN)
#print(X_train_RNN.shape, y_train_RNN.shape)
#print(X_test_RNN.shape, y_test_RNN.shape)

In [None]:
X_train_LSTM, y_train_LSTM = create_dataset(train_LSTM, train_LSTM.tempr, time_steps_LSTM)
X_test_LSTM, y_test_LSTM = create_dataset(test_LSTM, test_LSTM.tempr, time_steps_LSTM)
#print(X_train_LSTM.shape, y_train_LSTM.shape)
#print(X_test_LSTM.shape, y_test_LSTM.shape)

In [None]:
"""
Creating CNN model using keras API of tensorflow
"""

from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D

model1 = Sequential()
model1.add(Conv1D(filters = 64, kernel_size=3, activation='relu', input_shape=(X_train_CNN.shape[1], X_train_CNN.shape[2])))
model1.add(MaxPooling1D(pool_size=2))

model1.add(Flatten())
model1.add(Dense(32, activation='relu'))
#model1.add(Dropout(rate=0.2))
model1.add(Dense(1))

model1.compile(optimizer=keras.optimizers.Adam(0.001), loss='mean_squared_error')
#model1.summary()

In [None]:
"""
Creating GRU model using keras API of tensorflow
"""

from keras.models import Sequential
from keras.layers import Dense, Dropout, GRU
model2 = Sequential()
model2.add(GRU(100, activation='tanh', input_shape=(X_train_RNN.shape[1], X_train_RNN.shape[2])))
model2.add(Dense(64,activation='tanh'))
#model2.add(Dropout(rate=0.2))
model2.add(Dense(1))
model2.compile(loss='mean_squared_error', optimizer=keras.optimizers.Adam(0.001))
#model2.summary()

In [None]:
"""
Creating LSTM model using keras API of tensorflow
"""
model3 = keras.Sequential()
model3.add(keras.layers.LSTM(units = 128, input_shape=(X_train_LSTM.shape[1], X_train_LSTM.shape[2])))

model3.add(keras.layers.Dense(1))
model3.compile(loss='mean_squared_error', optimizer=keras.optimizers.Adam(0.001), metrics=['mae'])
#model3.summary()

In [None]:
"""
Train the model using appropriate batch size and epochs

"""
history1 = model1.fit(
    X_train_CNN, y_train_CNN, 
    epochs=100, 
    batch_size=32, 
    validation_split=0.1, 
    verbose=0, 
    shuffle=False
)

In [None]:
"""
Train the model using appropriate batch size and epochs

"""
history2 = model2.fit(
    X_train_RNN, y_train_RNN, 
    epochs=50, 
    batch_size=32, 
    validation_split=0.1, 
    verbose=0, 
    shuffle=False
)

In [None]:
"""
Train the model using appropriate batch size and epochs

"""
history3 = model3.fit(
    X_train_LSTM, y_train_LSTM, 
    epochs=50, 
    batch_size=32, 
    validation_split=0.1, 
    verbose=0, 
    shuffle=False
)

In [None]:
y_pred_CNN = model1.predict(X_test_CNN)        #Make predictions on CNN test set
y_pred_CNN = np.round(y_pred_CNN, decimals=2)  #Round off to 2 decimals
y_pred_RNN = model2.predict(X_test_RNN)        #Make predictions on RNN test set
y_pred_RNN = np.round(y_pred_RNN, decimals=2)  #Round off to 2 decimals
y_pred_LSTM = model3.predict(X_test_LSTM)      #Make predictions on LSTM test set
y_pred_LSTM = np.round(y_pred_LSTM, decimals=2)  #Round off to 2 decimals

In [None]:
df_test_pred_CNN = test_CNN[time_steps_CNN:]    # Dataframe containing the actual and predicted temperatures
df_test_pred_CNN['Prediction'] = y_pred_CNN
df_test_pred_RNN = test_RNN[time_steps_RNN:]    # Dataframe containing the actual and predicted temperatures
df_test_pred_RNN['Prediction'] = y_pred_RNN
df_test_pred_LSTM = test_LSTM[time_steps_LSTM:]    # Dataframe containing the actual and predicted temperatures
df_test_pred_LSTM['Prediction'] = y_pred_LSTM 

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

In [None]:
testScore_mse_CNN = mean_squared_error(df_test_pred_CNN['tempr'], df_test_pred_CNN['Prediction'])  #  mean square error on test data ( ACTUAL AND PREDICTED )
testScore_rmse_CNN = math.sqrt(mean_squared_error(df_test_pred_CNN['tempr'], df_test_pred_CNN['Prediction']))   # Root mean square error on test data ( ACTUAL AND PREDICTED )
testScore_mae_CNN = mean_absolute_error(df_test_pred_CNN['tempr'], df_test_pred_CNN['Prediction'])   # mean absolute error on test data ( ACTUAL AND PREDICTED )

In [None]:
testScore_mse_RNN = mean_squared_error(df_test_pred_RNN['tempr'], df_test_pred_RNN['Prediction'])  #  mean square error on test data ( ACTUAL AND PREDICTED )
testScore_rmse_RNN = math.sqrt(mean_squared_error(df_test_pred_RNN['tempr'], df_test_pred_RNN['Prediction']))   # Root mean square error on test data ( ACTUAL AND PREDICTED )
testScore_mae_RNN = mean_absolute_error(df_test_pred_RNN['tempr'], df_test_pred_RNN['Prediction'])   # mean absolute error on test data ( ACTUAL AND PREDICTED )

In [None]:
testScore_mse_LSTM = mean_squared_error(df_test_pred_LSTM['tempr'], df_test_pred_LSTM['Prediction'])  #  mean square error on test data ( ACTUAL AND PREDICTED )
testScore_rmse_LSTM = math.sqrt(mean_squared_error(df_test_pred_LSTM['tempr'], df_test_pred_LSTM['Prediction']))   # Root mean square error on test data ( ACTUAL AND PREDICTED )
testScore_mae_LSTM = mean_absolute_error(df_test_pred_LSTM['tempr'], df_test_pred_LSTM['Prediction'])   # mean absolute error on test data ( ACTUAL AND PREDICTED )

In [None]:
print("Test set evaluation metrics using CNN:")
print("Mean Squared Error = "+str(testScore_mse_CNN))
print("Root Mean Squared Error = "+str(testScore_rmse_CNN))
print("Mean Absolute Error = "+str(testScore_mae_CNN))
print("\n")
print("Test set evaluation metrics using RNN:")
print("Mean Squared Error = "+str(testScore_mse_RNN))
print("Root Mean Squared Error = "+str(testScore_rmse_RNN))
print("Mean Absolute Error = "+str(testScore_mae_RNN))
print("\n")
print("Test set evaluation metrics using LSTM:")
print("Mean Squared Error = "+str(testScore_mse_LSTM))
print("Root Mean Squared Error = "+str(testScore_rmse_LSTM))
print("Mean Absolute Error = "+str(testScore_mae_LSTM))
print("\n")

In [None]:
testScore_rmse = min(min(testScore_rmse_LSTM, testScore_rmse_RNN), testScore_rmse_LSTM)   #Determining the minimum RMSE

In [None]:
"""
Determining which model has minimum rmse (which performs best)
"""
if(testScore_rmse == testScore_rmse_CNN):
  best_model = "CNN"
  train = train_CNN
  df_test_pred = df_test_pred_CNN
elif(testScore_rmse == testScore_rmse_RNN):
  best_model = "RNN"
  train = train_RNN
  df_test_pred = df_test_pred_RNN
else:
  best_model = "LSTM"
  train = train_LSTM
  df_test_pred = df_test_pred_LSTM
print(best_model + " gave the best results.\n")
print(df_test_pred)

 

In [None]:
import matplotlib.style as style
style.use('seaborn-poster') #sets the size of the charts
style.use('ggplot')
fig, ax = plt.subplots(figsize=(40,17))
ax.set_facecolor(	'#FFFFE0')
plt.plot(train.index, train['tempr'], label="Training data", color = 'blue')
plt.plot(df_test_pred.index, df_test_pred['tempr'], marker='.', color = '#013220', label="Actual temperatures")
plt.plot(df_test_pred.index, df_test_pred['Prediction'] , 'r', label="Predicted Temperatures")
plt.legend(loc='best', fontsize='xx-large')
plt.xlabel('Timestep', size = 30)
plt.ylabel('Temperature (deg celsius)', size = 25 )
plt.title(best_model+ " gave the best results.\nDigital Current Weather Information System (DCWIS)\n Parameter - Temperature\n Duration of Training = 2016-06-02 :: 2018-12-31 (80% data)\n Predicted Duration = 2019-01-01 :: 2019-06-19 (20% data)", size = 25)
plt.xticks(fontsize=25)
plt.yticks(fontsize=25)
plt.legend(fontsize = 20) 
ax.set_xlim([df.index.min(), df.index.max()])

plt.grid(color='black', linestyle='-.', linewidth=0.9)
plt.savefig('Results.png',dpi=300)
#files.download("Results.png")   #Uncomment to download if you are working in google colab