### Tahap 1: Pemuatan Data dan Inisialisasi (File ini dijalankan di google colab)

In [1]:
# ==============================================================================
# --- Tahap 1: Hubungkan ke Google Drive ---
# ==============================================================================
from google.colab import drive
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


In [2]:
# ==============================================================================
# --- Tahap 2: Atur Path, Muat Library & Data ---
# ==============================================================================
import os
import sys
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# --- Atur BASE_DIR secara manual ---
# Path ini sudah dikonfirmasi benar dari percakapan kita sebelumnya
BASE_DIR = "/content/drive/Othercomputers/My Laptop (2)/Git/TA_SpatioTemporal"

# --- Verifikasi Path & Definisikan Sub-folder ---
if not os.path.exists(BASE_DIR):
    print(f"❌ GAGAL: Path tidak ditemukan di: {BASE_DIR}")
else:
    print(f"✅ Path Proyek Berhasil Ditemukan: {BASE_DIR}")
    PATH_SPLIT_DATA = os.path.join(BASE_DIR, 'Data', 'split_data')
    PATH_PREDICTIONS = os.path.join(BASE_DIR, 'Data', 'predictions')
    os.makedirs(PATH_PREDICTIONS, exist_ok=True)

    # --- Muat Data ---
    try:
        train_df = pd.read_parquet(os.path.join(PATH_SPLIT_DATA, 'train_set.parquet'))
        test_df = pd.read_parquet(os.path.join(PATH_SPLIT_DATA, 'test_set.parquet'))
        print("\nBerhasil memuat train_set.parquet dan test_set.parquet dari Google Drive.")
    except Exception as e:
        print(f"\n[ERROR] Gagal memuat data: {e}")


✅ Path Proyek Berhasil Ditemukan: /content/drive/Othercomputers/My Laptop (2)/Git/TA_SpatioTemporal

Berhasil memuat train_set.parquet dan test_set.parquet dari Google Drive.


### Tahap 3: Pra-pemrosesan Data

In [3]:
# ==============================================================================
# --- Tahap 3: Pra-pemrosesan Data menjadi Sekuens ---
# ==============================================================================
if 'train_df' in locals():
    # --- Definisikan Fitur dan Target (PENTING!) ---
    TARGET = 'konsumsi_energi'
    FEATURES = [col for col in train_df.columns if col not in ['timestamp', 'meter_id', TARGET, 'apakah_akhir_pekan', 'apakah_jam_kerja']]
    print("\nFitur yang akan digunakan:", FEATURES)

    # --- Konfigurasi LSTM ---
    N_PAST = 24 # Jumlah jam masa lalu yang digunakan untuk prediksi
    N_FUTURE = 1 # Memprediksi 1 jam ke depan

    X_train, y_train = [], []
    X_test, y_test = [], []
    scalers = {} # Dictionary untuk menyimpan scaler untuk setiap gedung

    # --- Proses Data Latih ---
    print("\nMemproses Data Latih...")
    for meter_id, group in train_df.groupby('meter_id'):
        scaler = MinMaxScaler()
        group_scaled = scaler.fit_transform(group[FEATURES + [TARGET]])
        scalers[meter_id] = scaler
        for i in range(N_PAST, len(group_scaled) - N_FUTURE + 1):
            X_train.append(group_scaled[i - N_PAST:i, 0:len(FEATURES)])
            y_train.append(group_scaled[i + N_FUTURE - 1:i + N_FUTURE, len(FEATURES)])

    # --- Proses Data Uji ---
    print("Memproses Data Uji...")
    test_indices = []
    for meter_id, group in test_df.groupby('meter_id'):
        if meter_id in scalers:
            scaler = scalers[meter_id]
            group_scaled = scaler.transform(group[FEATURES + [TARGET]])
            for i in range(N_PAST, len(group_scaled) - N_FUTURE + 1):
                X_test.append(group_scaled[i - N_PAST:i, 0:len(FEATURES)])
                y_test.append(group_scaled[i + N_FUTURE - 1:i + N_FUTURE, len(FEATURES)])
                test_indices.append(group.index[i + N_FUTURE - 1])

    X_train, y_train = np.array(X_train), np.array(y_train)
    X_test, y_test = np.array(X_test), np.array(y_test)

    print(f"\nBentuk data latih (X, y): {X_train.shape}, {y_train.shape}")
    print(f"Bentuk data uji (X, y): {X_test.shape}, {y_test.shape}")
    print("\nData siap untuk dimasukkan ke model.")
else:
    print("❌ GAGAL: Variabel 'train_df' tidak ditemukan. Harap jalankan sel sebelumnya terlebih dahulu.")



Fitur yang akan digunakan: ['is_kelas', 'is_kantor', 'is_penelitian', 'avg_temp_previous_hour', 'jam', 'hari_minggu', 'hari_bulan', 'minggu_tahun', 'bulan', 'tahun', 'konsumsi_lag_1_jam', 'konsumsi_lag_24_jam']

Memproses Data Latih...
Memproses Data Uji...

Bentuk data latih (X, y): (282555, 24, 12), (282555, 1)
Bentuk data uji (X, y): (47541, 24, 12), (47541, 1)

Data siap untuk dimasukkan ke model.


### Tahap 4: Bangun & Latih Model LSTM

In [4]:
# ==============================================================================
# --- Tahap 4: Bangun dan Latih Model ---
# ==============================================================================

# --- OPSI 1: Arsitektur 1-Lapis ---
# print("--- Membangun Model LSTM 1-Lapis ---")
# model = Sequential()
# model.add(LSTM(64, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
# model.add(Dense(y_train.shape[1]))

# --- OPSI 2: Arsitektur 2-Lapis (Stacked LSTM) ---
# Hapus tanda komentar di bawah ini jika ingin menggunakan model 2-lapis
print("--- Membangun Model LSTM 2-Lapis ---")
model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(32, activation='relu', return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(y_train.shape[1]))


# --- Kompilasi dan Latih Model ---
model.compile(optimizer='adam', loss='mse')
model.summary()

print("\nMemulai pelatihan model...")
history = model.fit(X_train, y_train, epochs=20, batch_size=64, validation_split=0.1, verbose=1)
print("\nPelatihan model selesai.")


--- Membangun Model LSTM 2-Lapis ---


  super().__init__(**kwargs)



Memulai pelatihan model...
Epoch 1/20
[1m3974/3974[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 7ms/step - loss: 0.0120 - val_loss: 0.0048
Epoch 2/20
[1m3974/3974[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 6ms/step - loss: 0.0050 - val_loss: 0.0046
Epoch 3/20
[1m3974/3974[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 6ms/step - loss: 0.0046 - val_loss: 0.0044
Epoch 4/20
[1m3974/3974[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 6ms/step - loss: 0.0043 - val_loss: 0.0040
Epoch 5/20
[1m3974/3974[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 6ms/step - loss: 0.0041 - val_loss: 0.0040
Epoch 6/20
[1m3974/3974[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 6ms/step - loss: 0.0041 - val_loss: 0.0041
Epoch 7/20
[1m3974/3974[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 6ms/step - loss: 0.0040 - val_loss: 0.0041
Epoch 8/20
[1m3974/3974[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 6ms/step - loss: 0.0039 - val_l

### Tahap 5: Prediksi, Evaluasi, & Simpan Hasil

In [5]:
# ==============================================================================
# --- Tahap 5: Prediksi, Evaluasi, dan Penyimpanan Hasil ---
# ==============================================================================

# --- Lakukan Prediksi ---
print("Membuat prediksi pada data uji...")
predictions_scaled = model.predict(X_test)

# --- Buat DataFrame Hasil yang Solid ---
df_hasil = test_df.loc[test_indices].copy()
y_pred_inversed = np.array([])

# --- Lakukan inverse transform per gedung ---
for meter_id, group in df_hasil.groupby('meter_id'):
    if meter_id in scalers:
        group_indices = group.index
        posisi = [test_indices.index(i) for i in group_indices]
        preds_scaled_group = predictions_scaled[posisi]
        dummy_pred = np.zeros((len(preds_scaled_group), len(FEATURES) + 1)); dummy_pred[:, -1] = preds_scaled_group.ravel()
        inversed_preds = scalers[meter_id].inverse_transform(dummy_pred)[:, -1]
        y_pred_inversed = np.append(y_pred_inversed, inversed_preds)

# --- Tambahkan kolom hasil ke DataFrame ---
df_hasil['prediksi_lstm'] = y_pred_inversed
df_hasil.rename(columns={TARGET: 'target_aktual'}, inplace=True)

# --- Evaluasi Akhir ---
y_true = df_hasil['target_aktual']
y_pred = df_hasil['prediksi_lstm']

def symmetric_mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(2 * np.abs(y_pred - y_true) / (np.abs(y_true) + np.abs(y_pred) + 1e-8)) * 100

mae = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
r2 = r2_score(y_true, y_pred)
smape = symmetric_mean_absolute_percentage_error(y_true, y_pred)

print(f"\n--- Evaluasi Final Model LSTM ---")
print(f"Mean Absolute Error (MAE):       {mae:.4f}")
print(f"Root Mean Squared Error (RMSE):  {rmse:.4f}")
print(f"R-squared (R²):                  {r2:.4f}")
print(f"Symmetric MAPE (sMAPE):          {smape:.2f}%")

# --- Simpan Hasil ---
# Ubah nama file sesuai dengan model yang Anda latih
output_filename = 'lstm_2_layer_20_epochs_results.parquet'
df_hasil[['timestamp', 'meter_id', 'target_aktual', 'prediksi_lstm']].to_parquet(
    os.path.join(PATH_PREDICTIONS, output_filename), index=False
)
print(f"\nDataFrame hasil berhasil disimpan ke:\n{os.path.join(PATH_PREDICTIONS, output_filename)}")


Membuat prediksi pada data uji...
[1m1486/1486[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step

--- Evaluasi Final Model LSTM ---
Mean Absolute Error (MAE):       2.3404
Root Mean Squared Error (RMSE):  5.4088
R-squared (R²):                  0.9601
Symmetric MAPE (sMAPE):          21.05%

DataFrame hasil berhasil disimpan ke:
/content/drive/Othercomputers/My Laptop (2)/Git/TA_SpatioTemporal/Data/predictions/lstm_2_layer_20_epochs_results.parquet
