In [1]:
!pip install pandas numpy matplotlib scikit-learn

Collecting matplotlib
  Downloading matplotlib-3.10.8-cp312-cp312-macosx_11_0_arm64.whl.metadata (52 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.3.3-cp312-cp312-macosx_11_0_arm64.whl.metadata (5.5 kB)
Collecting cycler>=0.10 (from matplotlib)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.61.1-cp312-cp312-macosx_10_13_universal2.whl.metadata (114 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Downloading kiwisolver-1.4.9-cp312-cp312-macosx_11_0_arm64.whl.metadata (6.3 kB)
Collecting pillow>=8 (from matplotlib)
  Downloading pillow-12.0.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (8.8 kB)
Collecting pyparsing>=3 (from matplotlib)
  Using cached pyparsing-3.2.5-py3-none-any.whl.metadata (5.0 kB)
Downloading matplotlib-3.10.8-cp312-cp312-macosx_11_0_arm64.whl (8.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.1/8.1 MB[0m [31m8.4 MB/s[0m 

In [2]:
DATA_PATH = "/content/household_power_consumption.txt"
print("Dataset found:", DATA_PATH)

# ============================
# Imports
# ============================
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# ============================
# Parameters
# ============================
TXT_FILENAME = DATA_PATH
INPUT_COL = 'Global_active_power'
PAST_STEPS = 30
FUTURE_STEPS = 120
TEST_RATIO = 0.15
VAL_RATIO = 0.15
BATCH_SIZE = 128
EPOCHS = 50
RANDOM_SEED = 42
MODEL_SAVE_PATH = 'best_simple_mlp.h5'

np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

# ============================
# Load Data
# ============================
df = pd.read_csv(
    TXT_FILENAME,
    sep=';',
    parse_dates={'datetime': ['Date', 'Time']},
    na_values=['?', ''],
    low_memory=False
)

df.sort_values('datetime', inplace=True)
df.set_index('datetime', inplace=True)

df[INPUT_COL] = pd.to_numeric(df[INPUT_COL], errors='coerce')
df = df.resample('1T').asfreq()
df[INPUT_COL] = df[INPUT_COL].interpolate(method='time', limit=60)
df = df.dropna(subset=[INPUT_COL])

series = df[[INPUT_COL]].astype('float32')
series_values = series.values

# ============================
# Sliding Windows
# ============================
total_steps = len(series)
n_windows = total_steps - (PAST_STEPS + FUTURE_STEPS) + 1

test_windows = int(np.ceil(TEST_RATIO * n_windows))
val_windows = int(np.ceil(VAL_RATIO * n_windows))
train_windows = n_windows - test_windows - val_windows

starts = np.arange(n_windows)
train_starts = starts[:train_windows]
val_starts = starts[train_windows:train_windows + val_windows]
test_starts = starts[train_windows + val_windows:]

# ============================
# Scaling (train only)
# ============================
scaler = MinMaxScaler()
last_train_idx = train_starts[-1] + PAST_STEPS
scaler.fit(series_values[:last_train_idx])

def build_xy(starts):
    X = np.zeros((len(starts), PAST_STEPS, 1))
    Y = np.zeros((len(starts), FUTURE_STEPS))
    for i, s in enumerate(starts):
        X[i] = scaler.transform(series_values[s:s+PAST_STEPS])
        Y[i] = scaler.transform(series_values[s+PAST_STEPS:s+PAST_STEPS+FUTURE_STEPS]).reshape(-1)
    return X, Y

X_train, Y_train = build_xy(train_starts)
X_val, Y_val = build_xy(val_starts)
X_test, Y_test = build_xy(test_starts)

# ============================
# Build MLP
# ============================
tf.keras.backend.clear_session()

model = Sequential([
    Input(shape=(PAST_STEPS, 1)),
    Flatten(),
    Dense(256, activation='relu'),
    Dense(256, activation='relu'),
    Dense(FUTURE_STEPS, activation='linear')
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss='mae'
)

model.summary()

# ============================
# Train
# ============================
callbacks = [
    EarlyStopping(patience=6, restore_best_weights=True),
    ModelCheckpoint(MODEL_SAVE_PATH, save_best_only=True)
]

model.fit(
    X_train, Y_train,
    validation_data=(X_val, Y_val),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    shuffle=False,
    callbacks=callbacks
)

# ============================
# Predict & Inverse Transform
# ============================
Y_pred = model.predict(X_test)

Y_test_inv = scaler.inverse_transform(Y_test.reshape(-1,1)).reshape(-1, FUTURE_STEPS)
Y_pred_inv = scaler.inverse_transform(Y_pred.reshape(-1,1)).reshape(-1, FUTURE_STEPS)

# ============================
# Metrics per Horizon
# ============================
mae_scores = np.zeros(FUTURE_STEPS)
mse_scores = np.zeros(FUTURE_STEPS)
r2_scores  = np.zeros(FUTURE_STEPS)

for i in range(FUTURE_STEPS):
    mae_scores[i] = mean_absolute_error(Y_test_inv[:, i], Y_pred_inv[:, i])
    mse_scores[i] = mean_squared_error(Y_test_inv[:, i], Y_pred_inv[:, i])
    r2_scores[i]  = r2_score(Y_test_inv[:, i], Y_pred_inv[:, i])

# Error Variance
error_variance = np.var(Y_test_inv - Y_pred_inv, axis=0)

# ============================
# PLOTS (MATCH FRIEND'S STYLE)
# ============================

# MAE
plt.figure(figsize=(12,6))
plt.plot(range(1, FUTURE_STEPS+1), mae_scores, label='MLP - MAE')
plt.xlabel('Forecast Span [minutes]')
plt.ylabel('Mean Absolute Error (MAE)')
plt.title('MAE for Multi-Step Forecasting (MLP)')
plt.legend()
plt.grid(False)
plt.show()

# MSE
plt.figure(figsize=(12,6))
plt.plot(range(1, FUTURE_STEPS+1), mse_scores, label='MLP - MSE')
plt.xlabel('Forecast Span [minutes]')
plt.ylabel('Mean Squared Error (MSE)')
plt.title('MSE for Multi-Step Forecasting (MLP)')
plt.legend()
plt.grid(False)
plt.show()

# R²
plt.figure(figsize=(12,6))
plt.plot(range(1, FUTURE_STEPS+1), r2_scores, label='MLP - R²')
plt.xlabel('Forecast Span [minutes]')
plt.ylabel('R² Score')
plt.title('R² for Multi-Step Forecasting (MLP)')
plt.legend()
plt.grid(False)
plt.show()

# Error Variance
plt.figure(figsize=(12,6))
plt.plot(range(1, FUTURE_STEPS+1),
         error_variance,
         label='MLP - Error Variance',
         marker='o',
         markersize=4)

plt.xlabel('Forecast Span [minutes]')
plt.ylabel('Error Variance')
plt.title('Error Variance for Multi-Step Forecasting (MLP)')
plt.legend()
plt.grid(False)
plt.show()

# ============================
# Save Results
# ============================
np.savez(
    'simple_mlp_results_full.npz',
    mae=mae_scores,
    mse=mse_scores,
    r2=r2_scores,
    error_variance=error_variance,
    Y_test=Y_test_inv,
    Y_pred=Y_pred_inv
)

print("✅ DONE — All metrics & plots generated")
print("Model saved to:", MODEL_SAVE_PATH)


Dataset found: /content/household_power_consumption.txt


ModuleNotFoundError: No module named 'tensorflow'