# Predicting Daily Power using LSTM

## Import Libraries

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
import warnings
from sklearn.preprocessing import OrdinalEncoder
from sklearn.impute import SimpleImputer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler

warnings.simplefilter("ignore")

In [2]:
data = pd.read_csv('datasets/power_per_day.csv')
data.head()

Unnamed: 0,date,temp,weather,wind,humidity,day_power
0,2012-01-01,12.333333,Cloudy,21.333333,89.333333,0.8
1,2012-01-02,7.787234,Scattered clouds,17.212766,81.425532,2.9
2,2012-01-03,8.208333,Fog,32.354167,78.229167,0.8
3,2012-01-04,7.354167,Scattered clouds,26.375,71.041667,2.7
4,2012-01-05,8.292683,Scattered clouds,39.170732,77.804878,1.6


In [3]:
data.tail()

Unnamed: 0,date,temp,weather,wind,humidity,day_power
2686,2019-11-15,4.125,Scattered clouds,11.854167,85.375,2.0
2687,2019-11-16,3.755556,Scattered clouds,6.622222,83.777778,4.0
2688,2019-11-17,3.916667,Sunny,6.5625,86.895833,3.0
2689,2019-11-18,6.333333,Fog,12.625,83.604167,0.0
2690,2019-11-19,4.958333,Passing clouds,10.708333,78.354167,6.0


In [4]:
data.date = pd.to_datetime(data.date)

In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2691 entries, 0 to 2690
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   date       2691 non-null   datetime64[ns]
 1   temp       2686 non-null   float64       
 2   weather    2691 non-null   object        
 3   wind       2679 non-null   float64       
 4   humidity   2668 non-null   float64       
 5   day_power  2691 non-null   float64       
dtypes: datetime64[ns](1), float64(4), object(1)
memory usage: 126.3+ KB


In [6]:
data.shape

(2691, 6)

In [7]:
data.set_index('date',inplace=True)

In [8]:
df = data.copy()

train_set = df[:'31-12-2018']
test_set = df['01-01-2019':]

In [9]:
data.tail()

Unnamed: 0_level_0,temp,weather,wind,humidity,day_power
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2019-11-15,4.125,Scattered clouds,11.854167,85.375,2.0
2019-11-16,3.755556,Scattered clouds,6.622222,83.777778,4.0
2019-11-17,3.916667,Sunny,6.5625,86.895833,3.0
2019-11-18,6.333333,Fog,12.625,83.604167,0.0
2019-11-19,4.958333,Passing clouds,10.708333,78.354167,6.0


In [10]:
train_set.shape

(2379, 5)

In [11]:
test_set.shape

(312, 5)

## Encoded Cat_cols

In [12]:
ord_enc = OrdinalEncoder()
train_set[['weather']] = ord_enc.fit_transform(train_set[['weather']])
test_set[['weather']] = ord_enc.transform(test_set[['weather']])

In [13]:
imputer = SimpleImputer(strategy='median')
train_set[['temp', 'wind', 'humidity']] = imputer.fit_transform(train_set[['temp', 'wind', 'humidity']])
test_set[['temp', 'wind', 'humidity']] = imputer.transform(test_set[['temp', 'wind', 'humidity']])

In [14]:
scale_featrues = MinMaxScaler()
scale_target = MinMaxScaler()

train_set[['temp', 'wind', 'humidity']] = scale_featrues.fit_transform(train_set[['temp', 'wind', 'humidity']])
test_set[['temp', 'wind', 'humidity']] = scale_featrues.transform(test_set[['temp', 'wind', 'humidity']])

train_set[['day_power']] = scale_target.fit_transform(train_set[['day_power']])
test_set[['day_power']] = scale_target.transform(test_set[['day_power']])


In [15]:
train_set.shape

(2379, 5)

In [16]:
test_set.shape

(312, 5)

## Create Sequence

In [17]:
def create_sequences(data, seq_length):
    X = []
    y = []
    for i in range(seq_length, len(data)):
        X.append(data[i-seq_length:i])
        y.append(data.iloc[i,-1])
    return np.array(X), np.array(y)

seq_length = 7
X_train, y_train = create_sequences(train_set, seq_length)
X_test, y_test = create_sequences(test_set, seq_length)

print(X_train.shape,y_train.shape)

(2372, 7, 5) (2372,)


## Define LSTM model

In [18]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout

In [None]:
model = Sequential()
model.add(LSTM(units=128,return_sequences=True, input_shape=(seq_length, 5)))
model.add(LSTM(units=64))
model.add(Dense(units=1))

# compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=16, validation_split=0.1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50

# make predictions on test data

In [None]:
train_preds = model.predict(X_train)
train_preds = scale_target.inverse_transform(train_preds)
y_train = scale_target.inverse_transform(y_train.reshape(-1,1))

test_preds = model.predict(X_test)
test_preds = scale_target.inverse_transform(test_preds)
y_test = scale_target.inverse_transform(y_test.reshape(-1,1))


## Evaluation the model 

In [None]:
train_mse = np.mean(np.square(train_preds - y_train))
test_mse = np.mean(np.square(test_preds - y_test))
print('Training MSE:', train_mse)
print('Testing MSE:', test_mse)


## Plot training and validation loss over epochs

In [None]:
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='validation')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

## Plot actual vs predicted values on training set

In [None]:
df_training = pd.DataFrame({'Actual':y_train.flatten(),
                           'Predicted':train_preds.flatten()})

In [None]:
import plotly.express as px 

fig = px.line(df_training, y=['Actual', 'Predicted'],
              title='Actual vs Predicted Values (Training Set)',
             color_discrete_sequence=['#002147','#FF4F00'])
fig.show()

## Plot actual vs predicted values on test set

In [None]:
df_testing = pd.DataFrame({'Actual':y_test.flatten(),
                           'Predicted':test_preds.flatten()})

In [None]:
fig = px.line(df_testing, y=['Actual', 'Predicted'],
              title='Actual vs Predicted Values (Test Set)',
             color_discrete_sequence=['#002147','#FF4F00'],)
fig.show()

In [None]:
train_set.head()

In [None]:
test_set.head()

In [None]:
len(test_set)

In [None]:
train_dates = data.index

In [None]:
n_future = 312
forcast_period_dates = pd.date_range(list(train_dates)[-1],periods=n_future, freq='1d').tolist()

In [None]:
forcast = model.predict(X_train[-n_future:])

In [None]:
forcast_inverse = scale_target.inverse_transform(forcast.reshape(-1,1))

In [None]:
actual_inverse  = scale_target.inverse_transform(test_set[['day_power']])

In [None]:
len(forcast_inverse)

In [None]:
len(actual_inverse)

In [None]:
forcast.shape

In [None]:
forecast_dates = []
for time_i in forcast_period_dates:
    forecast_dates.append(time_i.date())

In [None]:
df_forecast = pd.DataFrame({'Date':np.array(forecast_dates),
                           'Predicted':forcast_inverse.flatten(),
                           'Actual':actual_inverse.flatten()})
df_forecast['Date'] = pd.to_datetime(df_forecast['Date'])

In [None]:
df_forecast

In [None]:
import plotly.express as px 
fig = px.line(df_forecast, x='Date', y=['Actual', 'Predicted'])
fig.show()