In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
%matplotlib inline          
import math
import datetime
import tensorflow as tf
from tensorflow import keras
from google.colab import files  #Comment this line if you are not using google colab

#plt.rc('xtick',labelsize=30)
#plt.rc('ytick',labelsize=30)

  import pandas.util.testing as tm


In [2]:
df = pd.read_csv('/content/drive/My Drive/Weather Prediction and Forecasting/inter-999-interpolate_final.csv', parse_dates=['obstime'], index_col=['obstime'])  # Reading the csv file and making the obstime column 
                                                                                                                             # as index of dataframe after converting it to datetime
df = df.iloc[:, 0:1]    # We are interested in temperature forecasting
df   

Unnamed: 0_level_0,tempr
obstime,Unnamed: 1_level_1
2016-02-06 00:00:00,-13.58
2016-02-06 01:00:00,-13.95
2016-02-06 02:00:00,-13.96
2016-02-06 03:00:00,-14.16
2016-02-06 04:00:00,-14.49
...,...
2019-06-19 19:00:00,-26.06
2019-06-19 20:00:00,-26.10
2019-06-19 21:00:00,-26.75
2019-06-19 22:00:00,-27.64


In [3]:
"""
Since we have hourly tempertaures and we are interested in forecasting daily temperatures, 
we will manually calculate the daily mean temperature

"""

from pandas.tseries.offsets import DateOffset
add_dates = [datetime.datetime(2016, 6, 2) + DateOffset(days=x) for x in range(0,1112+1) ]    # Our dataset contains 1113 days [2 June 2016 - 19 June 2019]
temp_avg = pd.Series(range(1113), index = range(1113))
start = 0
end = 0
for i in range(1113):
    end = end + 24
    ser = df.iloc[start:end, 0]
    temp_avg.update(pd.Series([round((ser.sum())/24.0, 2)], index = [i]))        # Calcuating the daily mean temperature and also rounding off to 2 decimal places
    start = end

df = pd.DataFrame(temp_avg.tolist(),index=add_dates[0:], columns=['tempr'])       # Daily average temperatures
df

Unnamed: 0,tempr
2016-06-02,-15.25
2016-06-03,-19.66
2016-06-04,-19.92
2016-06-05,-17.99
2016-06-06,-16.07
...,...
2019-06-15,-21.77
2019-06-16,-20.06
2019-06-17,-17.40
2019-06-18,-17.43


In [4]:
time_steps = 60

train_size = 943        # Training duration - 2 June 2016 - 31 Dec 2018
test_size = len(df) - train_size           # Test duration - 1 Jan 2019 - 19 June 2019
train, test = df.iloc[0:train_size], df.iloc[train_size - time_steps : len(df)]

In [5]:
train  
#test      #Uncomment and run to see the train and test dataframes

Unnamed: 0,tempr
2016-06-02,-15.25
2016-06-03,-19.66
2016-06-04,-19.92
2016-06-05,-17.99
2016-06-06,-16.07
...,...
2018-12-27,1.19
2018-12-28,0.68
2018-12-29,0.45
2018-12-30,0.54


In [6]:
def create_dataset(X, y, time_steps):      
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        v = X.iloc[i:(i + time_steps)].values
        Xs.append(v)        
        ys.append(y.iloc[i + time_steps])
    return np.array(Xs), np.array(ys)

In [7]:
X_train, y_train = create_dataset(train, train.tempr, time_steps)
X_test, y_test = create_dataset(test, test.tempr, time_steps)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(883, 60, 1) (883,)
(170, 60, 1) (170,)


In [8]:
#print(X_train)  Uncomment to see
#print(y_train)

In [9]:

#print(X_test) Uncomment to see
#print(y_test)

In [10]:
"""
Creating the model using keras API of tensorflow
"""
model = keras.Sequential()
model.add(keras.layers.LSTM(units = 128, input_shape=(X_train.shape[1], X_train.shape[2])))

model.add(keras.layers.Dense(1))
model.compile(loss='mean_squared_error', optimizer=keras.optimizers.Adam(0.001), metrics=['mae'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 128)               66560     
_________________________________________________________________
dense (Dense)                (None, 1)                 129       
Total params: 66,689
Trainable params: 66,689
Non-trainable params: 0
_________________________________________________________________


In [None]:
"""
Train the model using appropriate batch size and epochs

"""
history = model.fit(
    X_train, y_train, 
    epochs=50, 
    batch_size=32, 
    validation_split=0.1, 
    verbose=1, 
    shuffle=False
)

In [None]:
"""
Let's see how our model is learning after every epoch

"""
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()

In [None]:
y_pred = model.predict(X_test)   # Making predictions on our testing data
y_pred = np.round(y_pred, decimals=2)    #Round off to 2 decimals

In [None]:
df_test_pred = test[time_steps:]
df_test_pred['Prediction'] = y_pred
df_test_pred               # Dataframe containing the actual and predicted temperatures

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
testScore_mse = mean_squared_error(df_test_pred['tempr'], df_test_pred['Prediction'])  #  mean square error on test data ( ACTUAL AND PREDICTED )
testScore_rmse = math.sqrt(mean_squared_error(df_test_pred['tempr'], df_test_pred['Prediction']))   # Root mean square error on test data ( ACTUAL AND PREDICTED )
testScore_mae = mean_absolute_error(df_test_pred['tempr'], df_test_pred['Prediction'])   # mean absolute error on test data ( ACTUAL AND PREDICTED )
print("Mean Squared Error = "+str(testScore_mse))
print("Root Mean Squared Error = "+str(testScore_rmse))
print("Mean Absolute Error = "+str(testScore_mae))

In [None]:
import matplotlib.style as style
style.use('seaborn-poster') #sets the size of the charts
style.use('ggplot')
fig, ax = plt.subplots(figsize=(40,15))
ax.set_facecolor(	'#FFFFE0')
plt.plot(train.index, train['tempr'], label="Training data", color = '#000080')
plt.plot(df_test_pred.index, df_test_pred['tempr'], marker='.', color = '#013220', label="Actual temperatures")
plt.plot(df_test_pred.index, df_test_pred['Prediction'] , 'r', label="Predicted Temperatures")
plt.legend(loc='best', fontsize='xx-large')
plt.xlabel('Timestep', size = 30)
plt.ylabel('Temperature (deg celsius)', size = 25 )
plt.title("Digital Current Weather Information System (DCWIS)\n Parameter - Temperature\n Duration of Training = 2016-06-02 :: 2018-04-02 (60% data)\n Predicted Duration = 2018-04-03 :: 2019-06-19 (40% data)", size = 25)
plt.xticks(fontsize=25)
plt.yticks(fontsize=25)
plt.legend(fontsize = 20) 
ax.set_xlim([df.index.min(), df.index.max()])
plt.grid(color='black', linestyle='-.', linewidth=0.7)
plt.savefig('lstm.png',dpi=300)
#files.download("lstm.png")   #Uncomment to download the file if you are using google colab