In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, GRU
from tensorflow.keras.optimizers import Adam

In [3]:
df = pd.read_csv("dst_2014-2023.csv")
df

Unnamed: 0,Date,Dst
0,2014-01-01 00:00:00,4
1,2014-01-01 01:00:00,3
2,2014-01-01 02:00:00,1
3,2014-01-01 03:00:00,1
4,2014-01-01 04:00:00,-1
...,...,...
87643,2023-12-31 19:00:00,0
87644,2023-12-31 20:00:00,1
87645,2023-12-31 21:00:00,2
87646,2023-12-31 22:00:00,2


In [4]:
df.set_index('Date', inplace=True)
df.isna().sum()

Dst    0
dtype: int64

In [5]:
df_train_valid = df[:int(0.85 * len(df))]

In [6]:
# [[[1], [2], [3], [4], [5]]] [6]
# [[[2], [3], [4], [5], [6]]] [7]
# [[[3], [4], [5], [6], [7]]] [8]

def df_to_X_y(df, window_size=5):
  df_as_np = df.to_numpy()
  X = []
  y = []
  for i in range(len(df_as_np)-window_size):
    row = [[a] for a in df_as_np[i:i+window_size]]
    X.append(row)
    label = df_as_np[i+window_size]
    y.append(label)
  return np.array(X), np.array(y)

In [10]:
WINDOW_SIZE = 6
X, y = df_to_X_y(df_train_valid, WINDOW_SIZE)
X.shape, y.shape

((74494, 6, 1, 1), (74494, 1))

In [8]:
def create_model():
    model = Sequential([
        LSTM(64, input_shape=(WINDOW_SIZE, 1), return_sequences=True),
        LSTM(64, return_sequences=False),
        Dense(64, activation ='relu'),
        Dense(1, activation = 'linear')
    ])
    model.compile(optimizer=Adam(learning_rate=0.0001), loss='huber', metrics=['mae', 'mse'])
    return model

In [11]:
# Menggunakan TimeSeriesSplit untuk cross-validation
tscv = TimeSeriesSplit(n_splits=5)

# Menyimpan hasil cross-validation
cv_results = []

for train_index, val_index in tscv.split(X):
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    # Inisialisasi scaler
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()

    # Scaling data
    X_train_scaled = scaler_X.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape)
    X_val_scaled = scaler_X.transform(X_val.reshape(-1, X_val.shape[-1])).reshape(X_val.shape)

    y_train_scaled = scaler_y.fit_transform(y_train)
    y_val_scaled = scaler_y.transform(y_val)

    # Membuat dan melatih model
    model = create_model()
    model.fit(X_train_scaled, y_train_scaled, epochs=100, batch_size=32, validation_data=(X_val_scaled, y_val_scaled), verbose=1)

    # Evaluasi model
    y_val_pred_scaled = model.predict(X_val_scaled)
    y_val_pred = scaler_y.inverse_transform(y_val_pred_scaled)

    # Menghitung RMSE
    rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))
    mae = mean_absolute_error(y_val, y_val_pred)
    
    cv_results.append((rmse, mae))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [13]:
# Rata-rata dan standar deviasi hasil cross-validation
rmse_scores = [result[0] for result in cv_results]
mae_scores = [result[1] for result in cv_results]

avg_rmse = np.mean(rmse_scores)
std_rmse = np.std(rmse_scores)
avg_mae = np.mean(mae_scores)
std_mae = np.std(mae_scores)


print(f'Average RMSE: {avg_rmse}, Standard Deviation RMSE: {std_rmse}')
print(f'Average MAE: {avg_mae}, Standard Deviation MAE: {std_mae}')

Average RMSE: 3.26086270616384, Standard Deviation RMSE: 0.4954362861847178
Average MAE: 2.251209139268981, Standard Deviation MAE: 0.2805836788043266
