### Tahap 1: Pemuatan Data dan Inisialisasi

In [1]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [3]:
# --- Atur Path (Gunakan kode dari notebook 00 yang sudah direvisi) ---
# BASE_DIR, PATH_SPLIT_DATA, PATH_PREDICTIONS, dll.

# --- Muat Data ---
path_split_data = r'C:\MyFolder\Git\TA_SpatioTemporal\Data\split_data' # Path ke folder split_data
train_df = pd.read_parquet(os.path.join(path_split_data, 'train_set.parquet'))
test_df = pd.read_parquet(os.path.join(path_split_data, 'test_set.parquet'))

# --- Definisikan Fitur dan Target ---
TARGET = 'konsumsi_energi'
# Hapus fitur non-numerik atau yang tidak relevan untuk LSTM (seperti 'apakah_akhir_pekan')
FEATURES = [col for col in train_df.columns if col not in ['timestamp', 'meter_id', TARGET]]

print("Data Latih dan Uji berhasil dimuat.")
print("Fitur yang akan digunakan:", FEATURES)


Data Latih dan Uji berhasil dimuat.
Fitur yang akan digunakan: ['is_kelas', 'is_kantor', 'is_penelitian', 'avg_temp_previous_hour', 'jam', 'hari_minggu', 'hari_bulan', 'minggu_tahun', 'bulan', 'tahun', 'apakah_akhir_pekan', 'apakah_jam_kerja', 'konsumsi_lag_1_jam', 'konsumsi_lag_24_jam']


### Tahap 2: Pra-pemrosesan Data

In [4]:
# --- Konfigurasi LSTM ---
N_PAST = 24 # Jumlah jam masa lalu yang digunakan untuk prediksi (contoh)
N_FUTURE = 1 # Memprediksi 1 jam ke depan

X_train, y_train = [], []
X_test, y_test = [], []
scalers = {} # Dictionary untuk menyimpan scaler untuk setiap gedung

# --- Proses Data Latih ---
print("\nMemproses Data Latih...")
for meter_id, group in train_df.groupby('meter_id'):
    # 1. Scaling: Fit dan transform HANYA pada data latih gedung ini
    scaler = MinMaxScaler()
    group_scaled = scaler.fit_transform(group[FEATURES + [TARGET]])
    scalers[meter_id] = scaler # Simpan scaler untuk digunakan pada data uji nanti

    # 2. Buat Sekuens
    for i in range(N_PAST, len(group_scaled) - N_FUTURE + 1):
        X_train.append(group_scaled[i - N_PAST:i, 0:len(FEATURES)])
        y_train.append(group_scaled[i + N_FUTURE - 1:i + N_FUTURE, len(FEATURES)])

# --- Proses Data Uji ---
print("Memproses Data Uji...")
# Simpan indeks asli dari data uji untuk merekonstruksi hasil nanti
test_indices = []
for meter_id, group in test_df.groupby('meter_id'):
    if meter_id in scalers:
        # 1. Scaling: Gunakan scaler dari data latih (HANYA transform)
        scaler = scalers[meter_id]
        group_scaled = scaler.transform(group[FEATURES + [TARGET]])

        # 2. Buat Sekuens
        for i in range(N_PAST, len(group_scaled) - N_FUTURE + 1):
            X_test.append(group_scaled[i - N_PAST:i, 0:len(FEATURES)])
            y_test.append(group_scaled[i + N_FUTURE - 1:i + N_FUTURE, len(FEATURES)])
            # Simpan indeks baris asli dari DataFrame test_df
            test_indices.append(group.index[i + N_FUTURE - 1])

X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)

print(f"\nBentuk data latih (X, y): {X_train.shape}, {y_train.shape}")
print(f"Bentuk data uji (X, y): {X_test.shape}, {y_test.shape}")



Memproses Data Latih...
Memproses Data Uji...

Bentuk data latih (X, y): (282555, 24, 14), (282555, 1)
Bentuk data uji (X, y): (47541, 24, 14), (47541, 1)


### Tahap 3: Pelatihan Model LSTM

In [5]:
# --- Bangun Arsitektur Model ---
model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
model.add(LSTM(32, activation='relu', return_sequences=False))
model.add(Dense(y_train.shape[1]))

model.compile(optimizer='adam', loss='mse')
model.summary()

# --- Latih Model ---
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1, verbose=1)


  super().__init__(**kwargs)


Epoch 1/10
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 11ms/step - loss: 0.0068 - val_loss: 0.0042
Epoch 2/10
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 12ms/step - loss: 0.0036 - val_loss: 0.0040
Epoch 3/10
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 12ms/step - loss: 0.0034 - val_loss: 0.0039
Epoch 4/10
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 14ms/step - loss: 0.0033 - val_loss: 0.0042
Epoch 5/10
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 14ms/step - loss: 0.0032 - val_loss: 0.0045
Epoch 6/10
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 14ms/step - loss: 0.0032 - val_loss: 0.0045
Epoch 7/10
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 14ms/step - loss: 0.0031 - val_loss: 0.0040
Epoch 8/10
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 14ms/step - loss: 0.0031 - val_loss: 0.0038
Epo

### Tahap 4: Prediksi dan Rekonstruksi Hasil (Paling Kritis)

In [7]:
# --- Lakukan Prediksi ---
predictions_scaled = model.predict(X_test)

# --- Buat DataFrame Hasil yang Solid ---
# 1. Gunakan indeks yang sudah kita simpan untuk memastikan kesejajaran
df_hasil = test_df.loc[test_indices].copy()

# 2. Siapkan array untuk hasil yang sudah di-inverse-transform
y_pred_inversed = np.array([])
y_test_inversed = np.array([])

# 3. Lakukan inverse transform per gedung
for meter_id, group in df_hasil.groupby('meter_id'):
    if meter_id in scalers:
        # Ambil indeks baris untuk gedung ini
        group_indices = group.index
        
        # Cari posisi prediksi yang sesuai dengan indeks ini
        posisi = [test_indices.index(i) for i in group_indices]

        # Ambil prediksi dan nilai aktual yang relevan
        preds_scaled_group = predictions_scaled[posisi]
        test_scaled_group = y_test[posisi]
        
        # Buat dummy array untuk inverse transform
        dummy_pred = np.zeros((len(preds_scaled_group), len(FEATURES) + 1))
        dummy_pred[:, -1] = preds_scaled_group.ravel()
        
        dummy_test = np.zeros((len(test_scaled_group), len(FEATURES) + 1))
        dummy_test[:, -1] = test_scaled_group.ravel()

        # Inverse transform
        inversed_preds = scalers[meter_id].inverse_transform(dummy_pred)[:, -1]
        inversed_tests = scalers[meter_id].inverse_transform(dummy_test)[:, -1]
        
        # Tambahkan ke array hasil total
        y_pred_inversed = np.append(y_pred_inversed, inversed_preds)
        y_test_inversed = np.append(y_test_inversed, inversed_tests)

# 4. Tambahkan kolom hasil ke DataFrame
df_hasil['prediksi_lstm'] = y_pred_inversed
df_hasil.rename(columns={TARGET: 'target_aktual'}, inplace=True)

# --- Evaluasi Akhir (Sekarang Seharusnya Konsisten) ---
mae = mean_absolute_error(df_hasil['target_aktual'], df_hasil['prediksi_lstm'])
r2 = r2_score(df_hasil['target_aktual'], df_hasil['prediksi_lstm'])
print(f"\nEvaluasi Final yang Konsisten:")
print(f"MAE: {mae:.4f}")
print(f"R-squared (R²): {r2:.4f}")

# --- Simpan Hasil yang Sudah Benar ---
path_prediksi = r'C:\MyFolder\Git\TA_SpatioTemporal\Data\predictions' # Path ke folder predictions
df_hasil[['timestamp', 'meter_id', 'target_aktual', 'prediksi_lstm']].to_parquet(
    os.path.join(path_prediksi, 'lstm_results_revised.parquet'), index=False
)
print("\nDataFrame hasil LSTM yang sudah sejajar berhasil disimpan.")


[1m1486/1486[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step

Evaluasi Final yang Konsisten:
MAE: 2.4840
R-squared (R²): 0.9579

DataFrame hasil LSTM yang sudah sejajar berhasil disimpan.


# Old

## Pembagian Data dan Pelatihan Model LSTM

In [15]:
# --- 2. Membangun Arsitektur Model LSTM ---
model_lstm = Sequential([
    # Input layer harus tahu bentuk data kita: (jam ke belakang, jumlah fitur)
    Input(shape=(X_train.shape[1], X_train.shape[2])),
    # Layer LSTM dengan 50 unit memori
    LSTM(50, activation='relu'),
    # Output layer dengan 1 neuron untuk memprediksi 1 nilai (konsumsi_energi)
    Dense(1)
])

# Compile model
model_lstm.compile(optimizer='adam', loss='mse')
model_lstm.summary()

# --- 3. Melatih Model ---
print("\nMulai melatih model LSTM...")
history = model_lstm.fit(
    X_train, y_train,
    epochs=20,  # Jumlah iterasi pelatihan
    batch_size=32,
    validation_split=0.1, # Gunakan 10% data latih untuk validasi internal
    verbose=1
)
print("\nModel LSTM berhasil dilatih!")



Mulai melatih model LSTM...
Epoch 1/20
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 7ms/step - loss: 4.3235e-04 - val_loss: 1.8621e-04
Epoch 2/20
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 6ms/step - loss: 1.5900e-04 - val_loss: 1.4187e-04
Epoch 3/20
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 6ms/step - loss: 1.4609e-04 - val_loss: 1.2790e-04
Epoch 4/20
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 7ms/step - loss: 1.4050e-04 - val_loss: 1.2294e-04
Epoch 5/20
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m58s[0m 7ms/step - loss: 1.3823e-04 - val_loss: 1.2655e-04
Epoch 6/20
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 7ms/step - loss: 1.3439e-04 - val_loss: 1.2364e-04
Epoch 7/20
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 7ms/step - loss: 1.3031e-04 - val_loss: 1.2075e-04
Epoch 8/20
[1m7947/7947[0m [32m━━━━━━━━━━━━━━━━━━━━

## C: Evaluasi Kinerja Model LSTM

In [16]:
# --- 1. Membuat Prediksi ---
predictions_scaled = model_lstm.predict(X_test)

# --- 2. Inverse Transform (Mengembalikan ke Skala Asli) ---
# Kita harus membuat "dummy array" dengan bentuk yang sama seperti saat kita melakukan fit_transform
# agar bisa mengembalikan hanya kolom target kita.
dummy_array_pred = np.zeros((len(predictions_scaled), len(FEATURES)))
dummy_array_pred[:, 0] = predictions_scaled.flatten()
predictions_inversed = scaler.inverse_transform(dummy_array_pred)[:, 0]

dummy_array_true = np.zeros((len(y_test), len(FEATURES)))
dummy_array_true[:, 0] = y_test.flatten()
y_test_inversed = scaler.inverse_transform(dummy_array_true)[:, 0]

# --- 3. Hitung Metrik Evaluasi pada Data Asli ---
def calculate_smape(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    numerator = np.abs(y_pred - y_true)
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
    ratio = np.divide(numerator, denominator, out=np.zeros_like(denominator), where=denominator!=0)
    return np.mean(ratio) * 100

mae_lstm = mean_absolute_error(y_test_inversed, predictions_inversed)
rmse_lstm = np.sqrt(mean_squared_error(y_test_inversed, predictions_inversed))
r2_lstm = r2_score(y_test_inversed, predictions_inversed)
smape_lstm = calculate_smape(y_test_inversed, predictions_inversed)

print("\n--- Hasil Evaluasi Model LSTM ---")
print(f"Mean Absolute Error (MAE): {mae_lstm:.2f}")
print(f"Root Mean Square Error (RMSE): {rmse_lstm:.2f}")
print(f"Symmetric Mean Absolute Percentage Error (sMAPE): {smape_lstm:.2f}%")
print(f"R-squared (R²): {r2_lstm:.2f}")


[1m1486/1486[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step

--- Hasil Evaluasi Model LSTM ---
Mean Absolute Error (MAE): 1.67
Root Mean Square Error (RMSE): 3.90
Symmetric Mean Absolute Percentage Error (sMAPE): 21.27%
R-squared (R²): 0.98


In [None]:
# # simpan hasil prediksi ke file
# import os
# # Definisikan path untuk menyimpan file
# path_output = r'C:\MyFolder\Git\TA_SpatioTemporal\Data\predictions'
# os.makedirs(path_output, exist_ok=True) # Membuat folder jika belum ada

# # Variabel yang perlu disimpan adalah 'predictions_inversed'
# print("Menyimpan prediksi LSTM...")
# df_pred_lstm = pd.DataFrame({'prediksi_lstm': predictions_inversed})
# df_pred_lstm.to_parquet(os.path.join(path_output, 'lstm_predictions.parquet'), index=False)

# print("\nBerhasil menyimpan prediksi LSTM.")


Menyimpan prediksi LSTM...

Berhasil menyimpan prediksi LSTM.


In [18]:
# --- Tambahkan kode ini di akhir notebook LSTM ---

# Definisikan path untuk menyimpan file
path_output = r'C:\MyFolder\Git\TA_SpatioTemporal\Data\predictions'
os.makedirs(path_output, exist_ok=True) # Membuat folder jika belum ada

# Kita perlu membangun kembali DataFrame yang sesuai dengan hasil prediksi LSTM.
# Variabel yang kita butuhkan (sudah ada di notebook LSTM Anda):
# - df_lstm_input: DataFrame sebelum diubah menjadi sekuens.
# - predictions_inversed: Hasil prediksi dalam skala asli.
# - y_test_inversed: Nilai aktual dalam skala asli.

# 1. Tentukan panjang hasil prediksi
panjang_prediksi_lstm = len(predictions_inversed)

# 2. Ambil baris terakhir dari DataFrame *sebelum* diubah menjadi sekuens,
#    yang sesuai dengan data uji kita.
df_test_asli = df_lstm_input.iloc[-panjang_prediksi_lstm:].copy()

# 3. Buat DataFrame hasil untuk LSTM
results_lstm = df_test_asli.copy()
# Tambahkan nilai aktual dan prediksi yang sudah dikembalikan ke skala aslinya
results_lstm['target_aktual'] = y_test_inversed
results_lstm['prediksi_lstm'] = predictions_inversed

# Simpan DataFrame hasil yang sudah lengkap
results_lstm.to_parquet(os.path.join(path_output, 'lstm_results.parquet'), index=False)

print("\nBerhasil menyimpan hasil lengkap LSTM (termasuk data aktual dan prediksi).")



Berhasil menyimpan hasil lengkap LSTM (termasuk data aktual dan prediksi).
