In [1]:
# Multi-step Time series prediction using LSTM
# https://ghoshratul063.medium.com/multi-step-time-series-prediction-using-lstm-3d616656ef25

In [2]:
!python -m pip install tensorflow



In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numpy import nan

from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense

from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler

In [4]:
data= []
data = pd.read_csv('Data/household_power_consumption.txt', sep = ';',parse_dates = True,low_memory = False)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2075259 entries, 0 to 2075258
Data columns (total 9 columns):
 #   Column                 Dtype  
---  ------                 -----  
 0   Date                   object 
 1   Time                   object 
 2   Global_active_power    object 
 3   Global_reactive_power  object 
 4   Voltage                object 
 5   Global_intensity       object 
 6   Sub_metering_1         object 
 7   Sub_metering_2         object 
 8   Sub_metering_3         float64
dtypes: float64(1), object(8)
memory usage: 142.5+ MB


In [5]:
# np.isnan(data).sum()

In [6]:
def fill_missing(data):
    one_day = 24*60
    for row in range(data.shape[0]):
        for col in range(data.shape[1]):
            if np.isnan(data[row, col]):
                data[row, col] = data[row-one_day, col]
fill_missing(data.values)
np.isnan(data).sum()

TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

In [None]:
data.to_csv('cleaned_data.csv')
dataset = pd.read_csv('cleaned_data.csv', parse_dates = True, index_col = 'date_time', low_memory = False)
dataset.head()
# Downsampling the data into dáy-wise bins and sum the values of the timestamps falling into a bin.

data = dataset.resample('D').sum()
fig, ax = plt.subplots(figsize=(18,18))

for i in range(len(data.columns)):
    plt.subplot(len(data.columns), 1, i+1)
    name = data.columns[i]
    plt.plot(data[name])
    plt.title(name, y=0, loc = 'right')
    plt.yticks([])
plt.show()
fig.tight_layout()

In [None]:
fig, ax = plt.subplots(figsize=(18,18))

for i in range(len(years)):
    plt.subplot(len(years), 1, i+1)
    year = years[i]
    active_power_data = data[str(year)]
    active_power_data['Global_active_power'].hist(bins = 200)
    plt.title(str(year), y = 0, loc = 'left')
plt.show()
fig.tight_layout()

In [None]:
fig, ax = plt.subplots(figsize=(18,18))

for i in range(len(data.columns)):
    plt.subplot(len(data.columns), 1, i+1)
    name = data.columns[i]
    data[name].hist(bins=200)
    plt.title(name, y=0, loc = 'right')
    plt.yticks([])
plt.show()
fig.tight_layout()

In [None]:
# Here splitting the dataset upto end of 2009 is in train dataset and remaining we keeping it in test dataset.

data_train = data.loc[:'2009-12-31', :]['Global_active_power']
data_train.head()

In [None]:
data_test = data['2010']['Global_active_power']
data_test.head()

In [None]:
data_train.shape, data_test.shape

In [None]:
data_train = np.array(data_train)
# We are splitting the data weekly wise(7days)

X_train, y_train = [], []

for i in range(7, len(data_train)-7):
    X_train.append(data_train[i-7:i])
    y_train.append(data_train[i:i+7])
X_train, y_train = np.array(X_train), np.array(y_train) #converting list into numpy array
X_train.shape, y_train.shape

In [None]:
# Normalising the dataset between 0 and 1

x_scaler = MinMaxScaler()
X_train = x_scaler.fit_transform(X_train)
X_train = X_train.reshape(1098, 7, 1) #Reshaping the shape into 3 dimensions to fit in the LSTM Model.
X_train.shape

In [None]:
# Building sequential model using Keras
reg = Sequential()
reg.add(LSTM(units = 200, activation = 'relu', input_shape=(7,1)))
reg.add(Dense(7))
reg.compile(loss='mse', optimizer='adam')
reg.fit(X_train, y_train, epochs = 100)

In [None]:
data_test = np.array(data_test)
# Here we are splitting the data weekly wise(7days)

X_test, y_test = [], []

for i in range(7, len(data_test)-7):
    X_test.append(data_test[i-7:i])
    y_test.append(data_test[i:i+7])
X_test, y_test = np.array(X_test), np.array(y_test)
X_test = x_scaler.transform(X_test)
y_test = y_scaler.transform(y_test)
X_test.shape

In [None]:
 
X_test = X_test.reshape(326,7,1)
X_test.shape

In [None]:
 
y_pred = reg.predict(X_test)
# Bringing y_pred values to their original forms by using inverse transform

y_pred = y_scaler.inverse_transform(y_pred)
y_true = y_scaler.inverse_transform(y_test)

In [None]:
def evaluate_model(y_true, y_predicted):
    scores = []
    
    #calculate scores for each day
    for i in range(y_true.shape[1]):
        mse = mean_squared_error(y_true[:, i], y_predicted[:, i])
        rmse = np.sqrt(mse)
        scores.append(rmse)
    
    #calculate score for whole prediction
    total_score = 0
    for row in range(y_true.shape[0]):
        for col in range(y_predicted.shape[1]):
            total_score = total_score + (y_true[row, col] - y_predicted[row, col])**2
    total_score = np.sqrt(total_score/(y_true.shape[0]*y_predicted.shape[1]))
    
    return total_score, scores

evaluate_model(y_true, y_pred)

In [None]:
# Standard deviation

np.std(y_true[0])
 

In [None]:
# https://ghoshratul063.medium.com/multi-step-time-series-prediction-using-lstm-3d616656ef25