In [1]:
# ==============================================================================
# --- Tahap 1: Inisialisasi dan Pemuatan Data ---
# (Kode ini identik dengan yang sudah berhasil sebelumnya)
# ==============================================================================

import pandas as pd
import numpy as np
import os
import sys
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

# --- Atur Path ---
BASE_DIR = os.path.abspath(os.path.join(os.getcwd(), '..'))
if BASE_DIR not in sys.path:
    sys.path.append(BASE_DIR)

PATH_SPLIT_DATA = os.path.join(BASE_DIR, 'Data', 'split_data')
PATH_PREDICTIONS = os.path.join(BASE_DIR, 'Data', 'predictions')
os.makedirs(PATH_PREDICTIONS, exist_ok=True)

# --- Muat Data ---
train_df = pd.read_parquet(os.path.join(PATH_SPLIT_DATA, 'train_set.parquet'))
test_df = pd.read_parquet(os.path.join(PATH_SPLIT_DATA, 'test_set.parquet'))

# --- Definisikan Fitur dan Target ---
TARGET = 'konsumsi_energi'
FEATURES = [col for col in train_df.columns if col not in ['timestamp', 'meter_id', TARGET, 'apakah_akhir_pekan', 'apakah_jam_kerja']]

print("Data Latih dan Uji berhasil dimuat.")
print("Fitur yang akan digunakan:", FEATURES)

Data Latih dan Uji berhasil dimuat.
Fitur yang akan digunakan: ['is_kelas', 'is_kantor', 'is_penelitian', 'avg_temp_previous_hour', 'jam', 'hari_minggu', 'hari_bulan', 'minggu_tahun', 'bulan', 'tahun', 'konsumsi_lag_1_jam', 'konsumsi_lag_24_jam']


In [2]:
# ==============================================================================
# --- Tahap 2: Pra-pemrosesan Data dengan Metodologi yang Benar ---
# (Kode ini juga identik, karena metodologi datanya harus sama)
# ==============================================================================

# --- Konfigurasi LSTM ---
N_PAST = 24
N_FUTURE = 1

X_train, y_train = [], []
X_test, y_test = [], []
scalers = {}

# --- Proses Data Latih ---
print("\nMemproses Data Latih...")
for meter_id, group in train_df.groupby('meter_id'):
    scaler = MinMaxScaler()
    group_scaled = scaler.fit_transform(group[FEATURES + [TARGET]])
    scalers[meter_id] = scaler
    for i in range(N_PAST, len(group_scaled) - N_FUTURE + 1):
        X_train.append(group_scaled[i - N_PAST:i, 0:len(FEATURES)])
        y_train.append(group_scaled[i + N_FUTURE - 1:i + N_FUTURE, len(FEATURES)])

# --- Proses Data Uji ---
print("Memproses Data Uji...")
test_indices = []
for meter_id, group in test_df.groupby('meter_id'):
    if meter_id in scalers:
        scaler = scalers[meter_id]
        group_scaled = scaler.transform(group[FEATURES + [TARGET]])
        for i in range(N_PAST, len(group_scaled) - N_FUTURE + 1):
            X_test.append(group_scaled[i - N_PAST:i, 0:len(FEATURES)])
            y_test.append(group_scaled[i + N_FUTURE - 1:i + N_FUTURE, len(FEATURES)])
            test_indices.append(group.index[i + N_FUTURE - 1])

X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)

print(f"\nBentuk data latih (X, y): {X_train.shape}, {y_train.shape}")
print(f"Bentuk data uji (X, y): {X_test.shape}, {y_test.shape}")


Memproses Data Latih...
Memproses Data Uji...

Bentuk data latih (X, y): (282555, 24, 12), (282555, 1)
Bentuk data uji (X, y): (47541, 24, 12), (47541, 1)


In [3]:
# ==============================================================================
# --- Tahap 3: Pelatihan Model LSTM (VERSI 1 LAPIS) ---
# (INI ADALAH SATU-SATUNYA BAGIAN YANG BERBEDA)
# ==============================================================================

# --- Bangun Arsitektur Model ---
model_1_layer = Sequential()
# Kita hanya menggunakan satu lapisan LSTM. return_sequences=False adalah default.
# Kita gunakan 64 unit agar sebanding dengan lapisan pertama model 2-lapis.
model_1_layer.add(LSTM(64, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
# Langsung ke lapisan output
model_1_layer.add(Dense(y_train.shape[1]))

model_1_layer.compile(optimizer='adam', loss='mse')
model_1_layer.summary()

# --- Latih Model ---
history_1_layer = model_1_layer.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1, verbose=1)

  super().__init__(**kwargs)


Epoch 1/20
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 8ms/step - loss: 0.0067 - val_loss: 0.0042
Epoch 2/20
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 7ms/step - loss: 0.0037 - val_loss: 0.0042
Epoch 3/20
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 7ms/step - loss: 0.0035 - val_loss: 0.0043
Epoch 4/20
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 7ms/step - loss: 0.0034 - val_loss: 0.0047
Epoch 5/20
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 7ms/step - loss: 0.0033 - val_loss: 0.0045
Epoch 6/20
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 8ms/step - loss: 0.0033 - val_loss: 0.0049
Epoch 7/20
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 8ms/step - loss: 0.0032 - val_loss: 0.0039
Epoch 8/20
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 8ms/step - loss: 0.0032 - val_loss: 0.0038
Epoch 9/20
[1m7

In [5]:
# ==============================================================================
# --- Tahap 4: Prediksi dan Rekonstruksi Hasil (Metodologi Benar) ---
# (Kode ini identik, hanya menggunakan model_1_layer untuk prediksi)
# ==============================================================================

# --- Lakukan Prediksi ---
predictions_scaled = model_1_layer.predict(X_test)

# --- Buat DataFrame Hasil yang Solid ---
df_hasil = test_df.loc[test_indices].copy()
y_pred_inversed = np.array([])
y_test_inversed = np.array([])

# --- Lakukan inverse transform per gedung ---
for meter_id, group in df_hasil.groupby('meter_id'):
    if meter_id in scalers:
        group_indices = group.index
        posisi = [test_indices.index(i) for i in group_indices]
        preds_scaled_group = predictions_scaled[posisi]
        test_scaled_group = y_test[posisi]
        dummy_pred = np.zeros((len(preds_scaled_group), len(FEATURES) + 1)); dummy_pred[:, -1] = preds_scaled_group.ravel()
        dummy_test = np.zeros((len(test_scaled_group), len(FEATURES) + 1)); dummy_test[:, -1] = test_scaled_group.ravel()
        inversed_preds = scalers[meter_id].inverse_transform(dummy_pred)[:, -1]
        inversed_tests = scalers[meter_id].inverse_transform(dummy_test)[:, -1]
        y_pred_inversed = np.append(y_pred_inversed, inversed_preds)
        y_test_inversed = np.append(y_test_inversed, inversed_tests)

# --- Tambahkan kolom hasil ke DataFrame ---
df_hasil['prediksi_lstm'] = y_pred_inversed
df_hasil.rename(columns={TARGET: 'target_aktual'}, inplace=True)

# --- Evaluasi Akhir (DIPERBARUI DENGAN METRIK TAMBAHAN DAN PENJELASAN) ---
y_true = df_hasil['target_aktual']
y_pred = df_hasil['prediksi_lstm']

# Definisikan fungsi untuk MAPE dan sMAPE untuk menghindari pembagian dengan nol
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    # Menambahkan epsilon kecil untuk menghindari pembagian dengan nol
    return np.mean(np.abs((y_true - y_pred) / (y_true + 1e-8))) * 100

def symmetric_mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    # Menambahkan epsilon kecil untuk menghindari pembagian dengan nol di kedua sisi
    return np.mean(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred) + 1e-8)) * 100

mae = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
r2 = r2_score(y_true, y_pred)
mape = mean_absolute_percentage_error(y_true, y_pred)
smape = symmetric_mean_absolute_percentage_error(y_true, y_pred)

print(f"\n--- Evaluasi Final yang Konsisten untuk LSTM 1-Lapis ---")
print("\n")
print(f"Mean Absolute Error (MAE):       {mae:.4f}")
print("--> Penjelasan: Rata-rata selisih absolut antara prediksi dan nilai aktual. Satuannya sama dengan target (kWh). Semakin kecil, semakin baik.")
print("\n")
print(f"Root Mean Squared Error (RMSE):  {rmse:.4f}")
print("--> Penjelasan: Mirip MAE, tapi lebih menghukum kesalahan besar karena dikuadratkan. Satuannya juga kWh. Semakin kecil, semakin baik.")
print("\n")
print(f"R-squared (R²):                  {r2:.4f}")
print("--> Penjelasan: Seberapa baik model menjelaskan variasi data. Nilai 1 berarti prediksi sempurna. Semakin mendekati 1, semakin baik.")
print("\n")
print(f"Symmetric MAPE (sMAPE):          {smape:.2f}%")
print("--> Penjelasan: Versi perbaikan dari MAPE, lebih stabil jika ada nilai aktual mendekati nol. Memberikan error dalam bentuk persentase. Semakin kecil, semakin baik.")
print("\n")
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
print("--> Peringatan: Nilai MAPE sangat besar! Ini terjadi karena beberapa nilai aktual sangat mendekati nol. Gunakan sMAPE sebagai alternatif yang lebih stabil.")


# --- Simpan Hasil dengan Nama Berbeda ---
output_filename = 'lstm_1_layer_results.parquet'
df_hasil[['timestamp', 'meter_id', 'target_aktual', 'prediksi_lstm']].to_parquet(
    os.path.join(PATH_PREDICTIONS, output_filename), index=False
)
print(f"\nDataFrame hasil LSTM (1 Lapis) yang sudah sejajar berhasil disimpan ke:\n{os.path.join(PATH_PREDICTIONS, output_filename)}")



[1m1486/1486[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step

--- Evaluasi Final yang Konsisten untuk LSTM 1-Lapis ---


Mean Absolute Error (MAE):       2.3463
--> Penjelasan: Rata-rata selisih absolut antara prediksi dan nilai aktual. Satuannya sama dengan target (kWh). Semakin kecil, semakin baik.


Root Mean Squared Error (RMSE):  5.4657
--> Penjelasan: Mirip MAE, tapi lebih menghukum kesalahan besar karena dikuadratkan. Satuannya juga kWh. Semakin kecil, semakin baik.


R-squared (R²):                  0.9593
--> Penjelasan: Seberapa baik model menjelaskan variasi data. Nilai 1 berarti prediksi sempurna. Semakin mendekati 1, semakin baik.


Symmetric MAPE (sMAPE):          20.57%
--> Penjelasan: Versi perbaikan dari MAPE, lebih stabil jika ada nilai aktual mendekati nol. Memberikan error dalam bentuk persentase. Semakin kecil, semakin baik.


Mean Absolute Percentage Error (MAPE): 23708976.16%
--> Peringatan: Nilai MAPE sangat besar! Ini terjadi karena beberapa nil