In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

# Read the Data

In [None]:
data = pd.read_csv("train.csv")
data.head()

In [None]:
future_data = pd.read_csv("future.csv").drop('Unnamed: 0' , axis = 1)
future_data.head()

# Describe the Data

In [None]:
data.shape

In [None]:
data.info()

In [None]:
data['Tarih'] = pd.to_datetime(data['Tarih'])
future_data['Tarih'] = pd.to_datetime(future_data['Tarih'])
data.set_index('Tarih', inplace=True)
future_data.set_index('Tarih' , inplace=True)

In [None]:
data.info()

In [None]:
data.describe().T

# Plot the Data

# Modelling

## Train | Test Split

In [None]:
import tensorflow as tf

In [None]:
# Seed ayarlamak
seed_value = 34
tf.keras.utils.set_random_seed(seed_value)

In [None]:
X_test_temp = data.loc[data.index > '2022-07-23 23:00:00'].copy()
X_train_temp = data.loc[data.index <= '2022-07-23 23:00:00'].copy()

In [None]:
X_val_temp = X_train_temp[-96:]
X_train_temp = X_train_temp[:-96]

# Scaling

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0,1))
train_scaled = scaler.fit_transform(X_train_temp)
test_scaled = scaler.transform(X_test_temp)

val_scaled = scaler.transform(X_val_temp)

# Reshape

Bu işlemler karışıyor

In [None]:
from keras.preprocessing.sequence import TimeseriesGenerator

# Veri jeneratörlerini oluşturma
train_gen = TimeseriesGenerator(train_scaled, train_scaled, length=24, batch_size=1)
val_gen = TimeseriesGenerator(val_scaled, val_scaled, length=24, batch_size=1)
test_gen = TimeseriesGenerator(test_scaled, test_scaled, length=24, batch_size=1)

# Eval Metric

In [None]:
def eval_metrics(y_true , y_pred):
    from sklearn.metrics import r2_score , mean_absolute_error , mean_squared_error , mean_absolute_percentage_error
    
    # MAPE hesaplama
    mape = mean_absolute_percentage_error(y_true, y_pred)
    
    # r2 hesaplama
    r2 = r2_score(y_true , y_pred)
    
    # mae hesaplama
    mae = mean_absolute_error(y_true , y_pred)

    # rmse hesaplama
    mse = mean_squared_error(y_true,y_pred)**0.5
    
    print(f"""
          Mape Score : {mape}
          R2 Score : {r2}
          MAE Score : {mae}
          MSE Score : {mse}
          """)
    
def eval_plot(y_true , y_pred):
    tests = pd.DataFrame(data = y_true , columns=['Real Values'] , index = X_test[:-24].index)
    preds = pd.DataFrame(data = y_pred , columns=['Predicts'] , index = future_data[:-24].index)
    compare = pd.concat([tests[:-24], preds] , axis= 1)
    print(compare.plot())
    
def eval_df (y_true , y_pred):
    compare = pd.DataFrame({'Real Values': y_true, 'Predicts': y_pred}, index=future_data[:-24].index)
    print(compare)
    
def create_submission(future_preds, num):
    submission_df = pd.DataFrame({'Tarih': future_data.index, 'Dağıtılan Enerji (MWh)': future_preds})
    filename = 'submission{}.csv'.format(num)
    submission_df.to_csv(filename, index=False)
    globals()['submission{}'.format(num)] = submission_df

# Modelling Time

### Manual

In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Model oluşturma
model = Sequential()
model.add(LSTM(50, input_shape=(24, 1)))
model.add(Dense(1))
model.compile(loss='mse', optimizer='adam')

# Veri jeneratörlerini oluşturma
train_gen = TimeseriesGenerator(train_scaled, train_scaled, length=24, batch_size=1)
val_gen = TimeseriesGenerator(val_scaled, val_scaled, length=24, batch_size=1)
test_gen = TimeseriesGenerator(test_scaled, test_scaled, length=24, batch_size=1)

# Modeli eğitme
model.fit(train_gen, epochs=3, validation_data=val_gen)

# Modeli değerlendirme
mse = model.evaluate_generator(test_gen)
print('Test MSE: %.3f' % mse)

### Automatic

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error , mean_absolute_percentage_error
import numpy as np
import pandas as pd

In [None]:
# Define the function for creating and training the LSTM model
def create_lstm_model(hidden_size, initial_num_unit, learning_rate, epoch=20, batch_size = 16 , look_back = 24 , unit_style= 'decrease' , save=False):
    if unit_style == "decrease":
        return_sequences = [True] * (hidden_size - 1) + [False]
        model = Sequential()
        for i in range(hidden_size):
            if i == 0:
                model.add(LSTM(initial_num_unit, input_shape=(look_back, 1), return_sequences=return_sequences[i]))
            else:
                model.add(LSTM(initial_num_unit // 2**i, return_sequences=return_sequences[i]))
        model.add(Dense(1))
        optimizer = Adam(learning_rate=learning_rate)
        model.compile(loss='mean_squared_error', optimizer=optimizer)
        if save:
            return_sequences = [True] * (hidden_size - 1) + [False]
            model = Sequential()
            for i in range(hidden_size):
                if i == 0:
                    model.add(LSTM(initial_num_unit, input_shape=(look_back, 1), return_sequences=return_sequences[i]))
                else:
                    model.add(LSTM(initial_num_unit // 2**i, return_sequences=return_sequences[i]))
            model.add(Dense(1))
            optimizer = Adam(learning_rate=learning_rate)
            model.compile(loss='mean_squared_error', optimizer=optimizer)
            
            
            sc = MinMaxScaler(feature_range=(0,1))
            train_scaled = sc.fit_transform(X_train_temp)
            test_scaled = sc.transform(X_test_temp)
            val_scaled = sc.transform(X_val_temp)
            
            packager(train_scaled , val_scaled , test_scaled)
            
            
            model.fit(X, y, epochs=epoch, batch_size=batch_size, verbose=1)
            y_pred = model.predict(X_test)
            org_y_pred = sc.inverse_transform(y_pred.reshape(-1,1))
            org_y_test = sc.inverse_transform(y_test.reshape(-1,1))
            
            testScore = mean_absolute_percentage_error(org_y_test, org_y_pred)
            
            model.save(f'../models/lstm_model_{hidden_size}_{initial_num_unit}_{learning_rate}_{epoch}_{round(testScore, 4)}.h5')
        else:
            model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epoch, batch_size=batch_size, verbose=1)
            return model
        
    elif unit_style == 'increase':
        return_sequences = [True] * (hidden_size - 1) + [False]
        model = Sequential()
        for i in range(hidden_size):
            if i == 0:
                model.add(LSTM(initial_num_unit, input_shape=(look_back, 1), return_sequences=return_sequences[i]))
            else:
                model.add(LSTM(initial_num_unit * 2**i, return_sequences=return_sequences[i]))
        model.add(Dense(1))
        optimizer = Adam(learning_rate=learning_rate)
        model.compile(loss='mean_squared_error', optimizer=optimizer)
        if save:
            model.fit(X, y, epochs=epoch, batch_size=batch_size, verbose=1)
            model.save(f'../models/lstm_model_{hidden_size}_{initial_num_unit}_{learning_rate}_{epoch}.h5')
        else:
            model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epoch, batch_size=batch_size, verbose=1)
            return model
        
    elif unit_style == "same":
        return_sequences = [True] * (hidden_size - 1) + [False]
        model = Sequential()
        for i in range(hidden_size):
            if i == 0:
                model.add(LSTM(initial_num_unit, input_shape=(look_back, 1), return_sequences=return_sequences[i]))
            else:
                model.add(LSTM(initial_num_unit, return_sequences=return_sequences[i]))
        model.add(Dense(1))
        optimizer = Adam(learning_rate=learning_rate)
        model.compile(loss='mean_squared_error', optimizer=optimizer)
        if save:
            model.fit(X, y, epochs=epoch, batch_size=batch_size, verbose=1)
            model.save(f'../models/lstm_model_{hidden_size}_{initial_num_unit}_{learning_rate}_{epoch}.h5')
        else:
            model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=epoch, batch_size=batch_size, verbose=1)
            return model

In [None]:
# Define empty lists to store model results
hidden_sizes = []
initial_units_list = []
learning_rates_list = []
epochs_list = []
train_scores = []
val_scores = []
test_scores = []

# Define hyperparameters to tune
hidden_layer_sizes = [2,3,4,5]
initial_num_units = [128,64,32,16]
learning_rates = [0.001, 0.01]
epochs = [5, 10, 20, 30]

# Define the number of epochs and batch size to use in training
batch_size = 1

# Define the number of previous time steps to use as input features
look_back = 24
        
        
for hidden_size in hidden_layer_sizes:
    for initial_num_unit in initial_num_units:
        for learning_rate in learning_rates:
            for epoch in epochs:
                print(f"Hidden size : {hidden_size} , Initial Num neurons : {initial_num_unit} , Learning Rate : {learning_rate} , Epoch : {epoch}")
                model = create_lstm_model(hidden_size, initial_num_unit, learning_rate , epoch = epoch , unit_style= 'decrease' )
                
                
                trainPredict = model.predict(X_train)
                valPredict = model.predict(X_val)
                testPredict = model.predict(X_test)
                org_y_train_pred = scaler.inverse_transform(trainPredict)
                org_y_val_pred = scaler.inverse_transform(valPredict)
                org_y_test_pred = scaler.inverse_transform(testPredict)
                
                org_y_train = scaler.inverse_transform(y_train.reshape(-1,1))
                org_y_val = scaler.inverse_transform(y_val.reshape(-1,1))
                org_y_test = scaler.inverse_transform(y_test.reshape(-1,1))
                
                trainScore = mean_absolute_percentage_error(org_y_train, org_y_train_pred)
                valScore = mean_absolute_percentage_error(org_y_val, org_y_val_pred)
                testScore = mean_absolute_percentage_error(org_y_test_pred, org_y_test_pred)


                # Append results to lists
                hidden_sizes.append(hidden_size)
                initial_units_list.append(initial_num_unit)
                learning_rates_list.append(learning_rate)
                epochs_list.append(epoch)
                train_scores.append(trainScore)
                val_scores.append(valScore)
                test_scores.append(testScore)
                print(f"Hidden size : {hidden_size} , Initial Num neurons : {initial_num_unit} , Learning Rate : {learning_rate} , Epoch : {epoch} , Unit Style : {'Decrease'} ")
                print(f"Train Score : {trainScore} , Val Score : {valScore} , Test Score : {testScore}")
                print("--------------------------------------------------------------------------------")
                if testScore < 0.02 :
                    create_lstm_model(hidden_size , initial_num_unit , learning_rate , epoch = epoch, save= True)

# Create dataframe from results
results_df = pd.DataFrame({
    'HiddenSize': hidden_sizes,
    'Initial Num Neurons': initial_units_list,
    'LearningRate': learning_rates_list,
    'Epoch': epochs_list,
    'TrainScore': train_scores,
    'valScore': val_scores,
    'TestScore': test_scores
}).sort_values(by='TestScore' , ascending=False)

# Print dataframe
results_df.style.format("{:.2%}").background_gradient(cmap="Blues")
print(results_df)

In [None]:
# Print dataframe
results_df.style.format("{:.2%}").background_gradient(cmap="Blues")
results_df

# Test | Evaluation

# Final Model & Predictions