In [None]:
# Import Library & Set Seed

import random
import numpy as np
import tensorflow as tf
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import matplotlib.pyplot as plt

# Set seed biar hasil reproducible
seed_value = 7
random.seed(seed_value)
np.random.seed(seed_value)
tf.random.set_seed(seed_value)

print("Library imported & seed set.")


In [None]:
csv_path = '../dataset_lstm/dataset_1.csv'
df = pd.read_csv(csv_path)
print("Data loaded! Jumlah baris:", len(df))
print(df.head())

In [None]:
#Ekstrak & Normalisasi Kolom bus_voltage

data_target = df['bus_voltage'].values.reshape(-1, 1)

# Normalisasi
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data_target)
print("Data dinormalisasi.")
print("Contoh data asli:", data_target[:5].flatten())
print("Contoh data norm:", data_scaled[:5].flatten())

In [None]:
#Sliding Window

WINDOW_SIZE = 10   # Bisa diubah
FUTURE_STEP = 1

X, y = [], []
for i in range(len(data_scaled) - WINDOW_SIZE - FUTURE_STEP + 1):
    X.append(data_scaled[i:i+WINDOW_SIZE, 0])
    y.append(data_scaled[i+WINDOW_SIZE:i+WINDOW_SIZE+FUTURE_STEP, 0])
X = np.array(X)
y = np.array(y)
X = X.reshape((X.shape[0], X.shape[1], 1))

print(f"Sliding window done! X shape: {X.shape}, y shape: {y.shape}")


In [None]:
#split data
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

print(f"Train size: {X_train.shape[0]}, Test size: {X_test.shape[0]}")


In [None]:
#model dan pelatihan
model = Sequential([
    LSTM(64, input_shape=(WINDOW_SIZE, 1)),
    Dense(FUTURE_STEP),
])
model.compile(optimizer='adam', loss='mse')
model.summary()

EPOCHS = 20
BATCH_SIZE = 32
history = model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE,
                    validation_data=(X_test, y_test), verbose=1)

print("Training selesai.")

#Plot Loss
plt.figure(figsize=(7,4))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Learning Curve (Loss)')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
#Prediksi Data Test dan Inverse

y_test_pred = model.predict(X_test)
y_test_pred_inv = scaler.inverse_transform(y_test_pred)[:, 0]
y_test_inv = scaler.inverse_transform(y_test)[:, 0]

print("Prediksi pada data test selesai.")
print("Contoh prediksi:", y_test_pred_inv[:5])
print("Contoh aktual:", y_test_inv[:5])


In [None]:
index_data_asli = split + WINDOW_SIZE + np.arange(len(y_test))

df_hasil = pd.DataFrame({
    'No': np.arange(1, len(y_test_inv) + 1),
    'Index Data Asli': index_data_asli,
    'Data Real': np.round(y_test_inv, 7),
    'Data Predict': np.round(y_test_pred_inv, 8)
})
df_hasil['Error'] = np.abs(df_hasil['Data Real'] - df_hasil['Data Predict'])

print("5 data awal:")
print(df_hasil.head(5))
print('\n5 data tengah:')
mid_idx = len(df_hasil) // 2
print(df_hasil.iloc[mid_idx-2:mid_idx+3])
print('\n5 data akhir:')
print(df_hasil.tail(5))

total_error = df_hasil['Error'].sum()
print("\nTotal Error (sum of absolute error):", total_error)


In [None]:
#Plot Actual (All Data) vs Prediksi (Test Saja, Index Asli)

predicted_full = np.full(df.shape[0], np.nan)
predicted_full[index_data_asli] = y_test_pred_inv

plt.figure(figsize=(24,8))
plt.plot(df['bus_voltage'].values, label='Actual bus_voltage (All Data)', color='blue')
plt.plot(predicted_full, label='Predicted bus_voltage (Test only)', color='red')
interval = 3600
n = len(df['bus_voltage'])
for i in range(interval, n, interval):
    plt.axvline(i, color='red', linestyle='--', linewidth=1)
plt.title('Prediksi vs Aktual Bus Voltage (Full Data; Prediksi hanya di area Test)')
plt.xlabel('Sample')
plt.ylabel('bus_voltage')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
#Plot Prediksi vs Aktual (Test Set Saja)

plt.figure(figsize=(12,6))
plt.plot(y_test_inv, label='Actual bus_voltage (Test)', color='blue')
plt.plot(y_test_pred_inv, label='Predicted bus_voltage (Test)', color='red', linestyle='--')
n_test = len(y_test_inv)
interval = 3600
for i in range(interval, n_test, interval):
    plt.axvline(i, color='red', linestyle='--', linewidth=1)
plt.title('Prediksi vs Aktual Bus Voltage (Test Set Saja)')
plt.xlabel('Sample (Test)')
plt.ylabel('bus_voltage')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# MAE otomatis dari sklearn
mae_otomatis = mean_absolute_error(y_test_inv, y_test_pred_inv)
print(f"MAE otomatis (sklearn): {mae_otomatis:.6f}")

# MAE manual: dari total error dibagi jumlah data test
mae_manual = total_error / len(df_hasil)
print(f"MAE manual (total error / n): {mae_manual:.6f}")


In [None]:
#Simpan Tabel ke CSV (Opsional)

df_hasil.to_csv('hasil_prediksi_lstm_test_final.csv', index=False)
print("Tabel hasil disimpan ke hasil_prediksi_lstm_test_final.csv")