### **Meningkatkan Akurasi Forecasting**
  **Hyperparameter Tuning**:
   - Coba berbagai jumlah unit pada LSTM (e.g., 32, 64, 128).
   - Ubah nilai `learning_rate` pada optimizer Adam.
   - Eksperimen dengan jumlah lagging.


In [1]:
!pip install gdown

# Unduh file dari Google Drive
!gdown --id 1iePr-95ohkXLcgaXBEWNYb2R3lu_-pjz -O daily-min-temperatures.csv


Downloading...
From: https://drive.google.com/uc?id=1iePr-95ohkXLcgaXBEWNYb2R3lu_-pjz
To: /content/daily-min-temperatures.csv
100% 67.9k/67.9k [00:00<00:00, 31.5MB/s]


In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split

In [3]:
# Fungsi Custom untuk R^2 dan RMSE
def r2_score_tf(y_true, y_pred):
    y_true_mean = tf.reduce_mean(y_true)
    ss_total = tf.reduce_sum(tf.square(y_true - y_true_mean))
    ss_residual = tf.reduce_sum(tf.square(y_true - y_pred))
    return 1 - (ss_residual / ss_total)

def rmse_tf(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))

In [4]:
# Load dataset
data = pd.read_csv("daily-min-temperatures.csv")
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)

# Normalize the data
scaler = MinMaxScaler()
data['Temp_scaled'] = scaler.fit_transform(data[['Temp']])

In [5]:
# Create lagging features
def create_lagged_features(data, lag):
    X, y = [], []
    for i in range(lag, len(data)):
        X.append(data[i-lag:i])
        y.append(data[i])
    return np.array(X), np.array(y)

lag = 3
X, y = create_lagged_features(data['Temp_scaled'].values, lag)

In [6]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Reshape for LSTM
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

In [7]:
# Build LSTM model
def build_and_train_model(units, learning_rate, lag):
    # Recreate lagged features
    X, y = create_lagged_features(data['Temp_scaled'].values, lag)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
    X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
    X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

    # Define model
    model = Sequential([
        LSTM(units, activation='relu', input_shape=(lag, 1)),
        Dropout(0.2),
        Dense(1)
    ])
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mse')

    # Train model
    history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=0)

    # Evaluate model
    y_pred = model.predict(X_test).flatten()
    r2 = r2_score_tf(tf.constant(y_test, dtype=tf.float32), tf.constant(y_pred, dtype=tf.float32)).numpy()
    rmse = rmse_tf(tf.constant(y_test, dtype=tf.float32), tf.constant(y_pred, dtype=tf.float32)).numpy()

    return r2, rmse, history.history['val_loss'][-1]

In [8]:
# Hyperparameter tuning
units_list = [32, 64, 128]
learning_rates = [0.001, 0.01]
lags = [3, 5, 7]

results = []

for units in units_list:
    for lr in learning_rates:
        for lag in lags:
            print(f"Training model with units={units}, learning_rate={lr}, lag={lag}")
            r2, rmse, val_loss = build_and_train_model(units, lr, lag)
            results.append((units, lr, lag, r2, rmse, val_loss))

Training model with units=32, learning_rate=0.001, lag=3


  super().__init__(**kwargs)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step
Training model with units=32, learning_rate=0.001, lag=5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step
Training model with units=32, learning_rate=0.001, lag=7
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
Training model with units=32, learning_rate=0.01, lag=3
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step
Training model with units=32, learning_rate=0.01, lag=5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step
Training model with units=32, learning_rate=0.01, lag=7
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step
Training model with units=64, learning_rate=0.001, lag=3
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step
Training model with units=64, learning_rate=0.001, lag=5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step
Training mod

In [9]:
# Save results to DataFrame
results_df = pd.DataFrame(results, columns=['Units', 'Learning Rate', 'Lag', 'R2', 'RMSE', 'Validation Loss'])
print(results_df.sort_values(by='R2', ascending=False))

    Units  Learning Rate  Lag        R2      RMSE  Validation Loss
14    128          0.001    7  0.706197  0.084609         0.008789
8      64          0.001    7  0.703625  0.084978         0.008835
11     64          0.010    7  0.703338  0.085019         0.008609
2      32          0.001    7  0.701779  0.085242         0.008869
4      32          0.010    5  0.698196  0.085753         0.008626
7      64          0.001    5  0.695838  0.086087         0.009063
1      32          0.001    5  0.694601  0.086262         0.009187
13    128          0.001    5  0.688670  0.087096         0.009133
12    128          0.001    3  0.683104  0.087839         0.009160
3      32          0.010    3  0.680050  0.088262         0.008953
15    128          0.010    3  0.676413  0.088762         0.009069
10     64          0.010    5  0.675801  0.088877         0.009140
6      64          0.001    3  0.675183  0.088930         0.009428
9      64          0.010    3  0.674978  0.088958         0.00