## Import required libraries and Load the data

In [None]:
!pip install ipython-autotime --quiet

In [None]:
%load_ext autotime
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import xarray as xr

import warnings
warnings.filterwarnings('ignore')

time: 2.06 s (started: 2022-07-26 05:13:02 +00:00)


In [None]:
data = xr.open_dataset('/conten')

In [None]:
%load_ext google.colab.data_table
data = pd.read_csv('/content/output_1.csv') #address to your data file.
data.head()

In [None]:
data['date'] = pd.to_datetime(data['date'])
data.index = data['date']
data.head()

In [None]:
data.shape

In [None]:
data.columns

In [None]:
data.info()

## Exploratory data analysis

In [None]:
data.describe()

In [None]:
import plotly.express as px
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
#Customize the graphs or plots
sns.set_style('darkgrid')
matplotlib.rcParams['font.size'] = 10
matplotlib.rcParams['figure.figsize'] = (15,4)
matplotlib.rcParams['figure.facecolor'] = '#0000000F'

## Splitting the data 

In [None]:
data['Year'] = data['date'].dt.year

In [None]:
data.head()

In [None]:
plt.title('No. of Observations per Year')
sns.countplot(x = data.Year);

In [None]:
data.drop(['Year','date',],axis=1,inplace=True)
data.head()

In [None]:
data.isna().sum()

In [None]:
data = data.replace(0,np.nan)
data.isna().sum()

In [None]:
train = data[0:int(data.shape[0]*0.8)].copy()
train.tail()

In [None]:
test = data[int(data.shape[0]*0.8):].copy()
test.head()

In [None]:
print(train.shape)
print(test.shape)

## Data Normalization

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler().fit(train)
train = scaler.transform(train)

## Dividing the data into timeshifts

In [None]:
#Create two empty lists x_train and y_train
X_train = []
y_train = []

In [None]:
n = train.shape[1] // 7 # step to find number of locations dynamically.
target_list = []
for i in range(n):
  index_no = data.columns.get_loc('t2m_{}'.format(i))
  target_list.append(index_no)

In [None]:
for i in range(7, train.shape[0]):
  X_train.append(train[i-7:i])
  y_train.append(train[i,target_list])

In [None]:
#Converting Lists into numpy arrays
X_train = np.array(X_train) 
y_train =  np.array(y_train)

In [None]:
X_train.shape, y_train.shape

## Build the Model

In [None]:
np.random.seed(1337)

In [None]:
#Import libraries
import tensorflow.keras as keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM,Dense,Dropout

In [None]:
X_train.shape[1],X_train.shape[2]

In [None]:
model = Sequential()
model.add(LSTM(100 , return_sequences= True , activation='relu', input_shape = (X_train.shape[1],X_train.shape[2]) , kernel_regularizer= keras.regularizers.l2(0.001))) 
model.add(Dropout(0.3)) # Add a dropout layer in order to avoid overfitting.
model.add(LSTM(200, return_sequences= True)) # Hidden layer,Another Lstm layer for stacked Lstm method.
model.add(Dropout(0.3))
model.add(LSTM(100))
# final Output layer.
model.add(Dense(y_train.shape[1]))

In [None]:
model.summary()

In [None]:
model.compile(optimizer='adamax', loss='mse')

In [None]:
# fitting the model with train data
history = model.fit(X_train, y_train , epochs=25, batch_size=128)

In [None]:
# plotting the loss value of each epoch
plt.figure(figsize=(8,8))
plt.title('Loss Function')
plt.xlabel('No.of epochs')
plt.ylabel('rmse')
plt.plot(history.history['loss'])

## Evaluate the Model

In [None]:
train_preds = model.predict(X_train)

In [None]:
train_preds.shape

In [None]:
from math import sqrt
from sklearn.metrics import mean_squared_error
RMSE = np.sqrt(mean_squared_error(y_train, train_preds))
print(round(RMSE,3))

In [None]:
from sklearn.metrics import r2_score
Score = r2_score(y_train, train_preds)
print(round(Score,2)*100)

In [None]:
print(y_train[:,1])

In [None]:
# plotting the original Vs Predicted values graphically.
plt.figure(figsize=(30,8))
plt.title('Original vs predicted(optimizer = adamax)')
plt.xlabel('Over the data')
plt.ylabel('Temperature(air_temp)')
plt.plot(y_train)
plt.plot(train_preds)
plt.legend(['Original','Predicted'])
plt.show()

In [None]:
plt.title('Scatter plot (Test Preds vs Original)')
plt.xlabel('Original Values')
plt.ylabel('Predicted Values')
plt.scatter(y_train, train_preds, alpha=0.5,c=y_train,cmap='nipy_spectral',s=y_train)
m = 1
plt.plot(y_train, m*y_train)
plt.show()

## Preprocessing test data

In [None]:
last_7_days = data[:int(data.shape[0]*0.8)].tail(7).copy()
last_7_days

In [None]:
test.head()

In [None]:
#Adding last 30 days of train data and test data.
test_final = last_7_days.append(test)
test_final.tail()

In [None]:
#Scaling columns in the final test data.
test_final = scaler.transform(test_final)
test_final

In [None]:
#Creating two empty lists X_test and y_test
X_test = []
y_test = []

In [None]:
for i in range(7, test_final.shape[0]):
  X_test.append(test_final[i-7:i])
  y_test.append(test_final[i,target_list])

In [None]:
# converting into array.
X_test , y_test = np.array(X_test) , np.array(y_test)

In [None]:
X_test.shape , y_test.shape

## Making prediction on test data

In [None]:
y_preds = model.predict(X_test)

In [None]:
y_preds.shape

## Evaluating 

In [None]:
rmse = np.sqrt(mean_squared_error(y_test, y_preds))
print(round(rmse,2))

In [None]:
r2_score = r2_score(y_test, y_preds)
print(round(r2_score,2)*100)

In [None]:
# plotting the original Vs Predicted values graphically.
plt.figure(figsize=(15,6))
plt.rcParams.update({'font.size': 9})
plt.title('Original vs predicted(optimizer = adamax)')
plt.xlabel('Over the data')
plt.ylabel('Temperature(air_temp)')
plt.plot(y_test)
plt.plot(y_preds)
plt.legend(['Original','Predicted'])
plt.show()

In [None]:
plt.title('Scatter plot (Test Preds vs Original)')
plt.xlabel('Original Values')
plt.ylabel('Predicted Values')
plt.scatter(y_test, y_preds, alpha=0.5,c=y_test,cmap='nipy_spectral',s=y_test)
m = 1
plt.plot(y_test, m*y_test)
plt.show()