## Import Necessary Libraries


In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns

## Load Dataset


In [None]:
train_file = "dataset/train_dataset.csv"
test_file = "dataset/test_dataset.csv"
actual_file = "dataset/actual_dataset.csv"

train_dataset = pd.read_csv(train_file)
test_dataset = pd.read_csv(test_file)
actual_dataset = pd.read_csv(actual_file)

print("Train Dataset Columns", train_dataset.columns)
print("Test Dataset Columns", test_dataset.columns)
print("Actual Dataset Columns", actual_dataset.columns)

## Preprocess Data


In [18]:
def preprocess_data(dataset):
    # Convert created_at to datetime if necessary
    dataset["created_at"] = pd.to_datetime(dataset["created_at"], format="ISO8601")
    dataset["power"] = dataset["power"] / 1000  # Convert power to kWh
    return dataset

In [19]:
train_dataset = preprocess_data(train_dataset)
test_dataset = preprocess_data(test_dataset)
actual_dataset = preprocess_data(actual_dataset)

train_pivot = train_dataset.pivot(index="created_at", columns="name", values="power").fillna(0)
test_pivot = test_dataset.pivot(index="created_at", columns="name", values="power").fillna(0)
actual_pivot = actual_dataset.pivot(index="created_at", columns="name", values="power").fillna(0)

# Encode 'name' column into numeric classes
label_encoder = LabelEncoder()
train_dataset["sensor_class"] = label_encoder.fit_transform(train_dataset["name"])

# Extract relevant features
features = ["voltage", "current", "power_factor"]
target = "sensor_class"

X_train = train_dataset[features].values
y_train = train_dataset[target].values

X_test = test_dataset[features].values
X_actual = actual_dataset[features].values

# Normalize the data using MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
train_scaled = scaler.fit_transform(train_pivot)
test_scaled = scaler.transform(test_pivot)
actual_scaled = scaler.transform(actual_pivot)

# One-hot encode target labels
y_train_encoded = tf.keras.utils.to_categorical(y_train, num_classes=3)

## Input Sequences and Corresponding Targets


In [5]:
def create_sequences(X, y, seq_length):
    X_sequences, y_sequences = [], []
    for i in range(len(X) - seq_length):
        X_sequences.append(X[i : i + seq_length])
        y_sequences.append(y[i + seq_length])
    return np.array(X_sequences), np.array(y_sequences)

In [6]:
SEQ_LENGTH = 24  # Sequence length for 24 hours
X_train, y_train = create_sequences(train_scaled, SEQ_LENGTH)
X_test, y_test = create_sequences(test_scaled, SEQ_LENGTH)

# Reshape input untuk LSTM
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 3))  # 3 fitur (satu untuk setiap sensor)
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 3))

print(f"Training set shape: {X_train.shape}, Testing set shape: {X_test.shape}")

## Define LSTM Model


In [None]:
model = tf.keras.Sequential(
    [
        tf.keras.layers.LSTM(
            64, return_sequences=False, input_shape=(SEQ_LENGTH, len(features))
        ),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(
            3, activation="softmax"
        ),  # 3 classes, using softmax activation
    ]
)

## Train Model


In [None]:
# Compile model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

# Train model
history = model.fit(
    X_train_seq, y_train_seq, epochs=1, batch_size=64, validation_split=0.25
)

## Predict the Test Set


In [None]:
y_test_pred = model.predict(X_test)

# Inverse transform predictions and actual values
y_test_pred_rescaled = scaler.inverse_transform(y_test_pred)
y_test_rescaled = scaler.inverse_transform(y_test)

## Evaluate Model

In [None]:
mse = mean_squared_error(y_test_rescaled, y_test_pred_rescaled)
mae = mean_absolute_error(y_test_rescaled, y_test_pred_rescaled)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_rescaled, y_test_pred_rescaled)

print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R² Score: {r2:.4f}")

## Plot Actual vs Predicted Power for Each Sensor

In [None]:
sensors = train_pivot.columns

plt.figure(figsize=(15, 5))
for i, sensor in enumerate(sensors):
    plt.subplot(1, 3, i + 1)
    plt.plot(y_test_rescaled[:, i], label='Actual')
    plt.plot(y_test_pred_rescaled[:, i], label='Predicted')
    plt.title(f'Sensor: {sensor}')
    plt.xlabel('Time Step')
    plt.ylabel('Power (kWh)')
    plt.legend()

plt.tight_layout()
plt.show()

## Validate with Actual Dataset

In [None]:
X_actual, _ = create_sequences(actual_scaled, SEQ_LENGTH)
X_actual = X_actual.reshape((X_actual.shape[0], X_actual.shape[1], 3))

y_actual_pred = model.predict(X_actual)
y_actual_pred_rescaled = scaler.inverse_transform(y_actual_pred)

# Plot Actual vs Predicted for Actual Dataset
plt.figure(figsize=(15, 5))
for i, sensor in enumerate(sensors):
    plt.subplot(1, 3, i + 1)
    plt.plot(actual_pivot.values[:, i], label='Actual')
    plt.plot(y_actual_pred_rescaled[:, i], label='Predicted')
    plt.title(f'Sensor: {sensor} (Actual vs Predicted)')
    plt.xlabel('Time Step')
    plt.ylabel('Power (kWh)')
    plt.legend()

plt.tight_layout()
plt.show()