In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler

import warnings
warnings.filterwarnings('ignore')

# Read the Data

In [None]:
data = pd.read_csv("train.csv" , parse_dates=['Tarih'])
# data.set_index('Tarih' , drop= True , inplace=True)
data.head()

In [None]:
future_df = pd.read_csv("future.csv", parse_dates=['Tarih']).drop('Unnamed: 0' , axis = 1)
future_df.head()

In [None]:
med = pd.read_csv('med.csv')
med['Tarih'] = pd.to_datetime(med['Tarih'])
med.head()

# Describe the Data

In [None]:
data.shape

In [None]:
data.info()

In [None]:
data.describe().T

In [None]:
# Plot the Data

# LSTM Model

## Good Function

In [None]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=False):
	n_vars = 1 if type(data) is list else data.shape[1]
	dff = pd.DataFrame(data)
	cols, names = list(), list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(dff.shift(i))
		names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(dff.shift(-i))
		if i == 0:
			names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
		else:
			names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
	# put it all together
	agg = pd.concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg

# Feature Creators

In [None]:
# all_df = [train_df , val_df , test_df , future_df , data]
all_df = [data]

In [None]:
from eval_metrics import eval_metrics
from feat_engs import create_datetimes, create_holiday_weekend, create_electricOutage_timeofDay, create_businessDay_cumulativeholidays, create_outage_rolling_percentages, create_hourly_sin_cos, create_seasons

In [None]:
for df in all_df:
    create_datetimes(df)
    create_holiday_weekend(df)
    create_electricOutage_timeofDay(df,med)
    create_businessDay_cumulativeholidays(df)
    create_outage_rolling_percentages(df)
    create_hourly_sin_cos(df)
    create_seasons(df)
    df.drop('date' , axis=1 , inplace= True)
    df.set_index('Tarih' , inplace= True)

In [None]:
data

In [None]:
data.info()

In [None]:
values = data.values
values = values.astype('float32')

In [None]:
# normalize features
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(values)

In [None]:
scaled.shape

In [None]:
# frame as supervised learning
reframed = series_to_supervised(scaled, 1, 1)

In [None]:
reframed.drop(reframed.columns[[0]] , axis = 1 , inplace = True)
reframed

In [None]:
values.shape

In [None]:
# split into train and test sets
values = reframed.values

n_train_time = 1460*24
train = values[:n_train_time, :]
test = values[n_train_time:, :]
##test = values[n_train_time:n_test_time, :]
# split into input and outputs
X_train, y_train = train[:, :-1], train[:, -1]
X_test, y_test = test[:, :-1], test[:, -1]
# reshape input to be 3D [samples, timesteps, features]
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape) 
# We reshaped the input into the 3D format as expected by LSTMs, namely [samples, timesteps, features].

## Train | Test Split

In [None]:
import tensorflow as tf

# Seed ayarlamak
seed_value = 53
tf.keras.utils.set_random_seed(seed_value)

# Modelling

In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM , Dropout
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error

In [None]:
model = Sequential()
model.add(LSTM(100, input_shape=(X_test.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))
#    model.add(LSTM(70))
#    model.add(Dropout(0.3))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')


# fit network
history = model.fit(X_train, y_train, epochs=20, batch_size=16, validation_data=(X_test, y_test), verbose=2, shuffle=False)

In [None]:
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper right')
plt.show()


In [None]:
# make a prediction
yhat = model.predict(X_test)
test_X = X_test.reshape((X_test.shape[0], 7))
# invert scaling for forecast
inv_yhat = np.concatenate((yhat, test_X[:, -6:]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]
# invert scaling for actual
test_y = y_test.reshape((len(y_test), 1))
inv_y = np.concatenate((test_y, test_X[:, -6:]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]
# calculate RMSE
rmse = np.sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)