<a href="https://colab.research.google.com/github/alyahoang99/GANs/blob/main/predictivemodels.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import xgboost as xgb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
def create_lag_features(df, lags=7):
    df_lagged = pd.DataFrame()
    for col in df.columns:
        for lag in range(1, lags + 1):
            df_lagged[f'{col}_lag{lag}'] = df[col].shift(lag)
    df_lagged['target'] = df['Value']  # target stays unshifted
    df_lagged.dropna(inplace=True)
    return df_lagged

In [None]:
def split_features_targets(df, target_col='target'):
    X = df.drop(columns=[target_col])
    y = df[target_col]
    return X, y

In [None]:
def evaluate_model(y_true, y_pred, label=""):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    print(f"📊 {label} — MAE: {mae:.4f}, RMSE: {rmse:.4f}, R²: {r2:.4f}")
    return mae, rmse, r2

In [None]:
# ----------------------------
# Load Data
# ----------------------------

# Real data with timestamp
df_real = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/GANs/resampleddata (1).csv', parse_dates=['Timestamp'])
df_real = df_real[['Value', 'Value_HeatPump', 'Value_OtherAppliances']]

# Generated data (already scaled)
df_gen = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/GANs/RGANdata.csv')

In [None]:
# ----------------------------
# Scale both datasets
# ----------------------------

scaler = MinMaxScaler()
real_scaled = pd.DataFrame(scaler.fit_transform(df_real), columns=df_real.columns)
gen_scaled = df_gen.copy()  # already scaled

In [None]:
lags = 7
real_lagged = create_lag_features(real_scaled, lags=lags)
gen_lagged = create_lag_features(gen_scaled, lags=lags)


In [None]:
# Prepare X and y
# ----------------------------

X_real, y_real = split_features_targets(real_lagged)
X_gen, y_gen = split_features_targets(gen_lagged)

# Split (time-based)
split_idx_real = int(0.8 * len(X_real))
Xr_train, Xr_test = X_real.iloc[:split_idx_real], X_real.iloc[split_idx_real:]
yr_train, yr_test = y_real.iloc[:split_idx_real], y_real.iloc[split_idx_real:]

split_idx_gen = int(0.8 * len(X_gen))
Xg_train, Xg_test = X_gen.iloc[:split_idx_gen], X_gen.iloc[split_idx_gen:]
yg_train, yg_test = y_gen.iloc[:split_idx_gen], y_gen.iloc[split_idx_gen:]


In [None]:
# XGBoost Models
# ----------------------------

xgb_real = xgb.XGBRegressor(n_estimators=100, random_state=42)
xgb_real.fit(Xr_train, yr_train)
yr_pred_xgb = xgb_real.predict(Xr_test)
evaluate_model(yr_test, yr_pred_xgb, label="XGBoost (Real Data)")

xgb_gen = xgb.XGBRegressor(n_estimators=100, random_state=42)
xgb_gen.fit(Xg_train, yg_train)
yg_pred_xgb = xgb_gen.predict(Xg_test)
evaluate_model(yg_test, yg_pred_xgb, label="XGBoost (Generated Data)")



📊 XGBoost (Real Data) — MAE: 0.0395, RMSE: 0.0568, R²: 0.8951
📊 XGBoost (Generated Data) — MAE: 0.1416, RMSE: 0.1937, R²: 0.4796


(0.14160121956609373, np.float64(0.19367389609477315), 0.47962736728367295)

In [None]:
def reshape_for_lstm(X, y, timesteps):
    original_features = X.shape[1] // timesteps
    X_seq = X.values.reshape((X.shape[0], timesteps, original_features))
    return X_seq, y.values

In [None]:
# LSTM Models
# ----------------------------

def reshape_for_lstm(X, y, timesteps, original_features=3):
    expected_shape = timesteps * original_features
    assert X.shape[1] == expected_shape, f"Expected {expected_shape} features, got {X.shape[1]}"
    X_seq = X.values.reshape((X.shape[0], timesteps, original_features))
    return X_seq, y.values

# Reshape (features must be consistent with lags)
Xr_train_lstm, yr_train_lstm = reshape_for_lstm(Xr_train, yr_train, timesteps=lags)
Xr_test_lstm, yr_test_lstm = reshape_for_lstm(Xr_test, yr_test, timesteps=lags)

Xg_train_lstm, yg_train_lstm = reshape_for_lstm(Xg_train, yg_train, timesteps=lags)
Xg_test_lstm, yg_test_lstm = reshape_for_lstm(Xg_test, yg_test, timesteps=lags)

def build_lstm(input_shape):
    model = Sequential([
        LSTM(64, activation='tanh', input_shape=input_shape),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

lstm_real = build_lstm(Xr_train_lstm.shape[1:])
lstm_real.fit(Xr_train_lstm, yr_train_lstm, epochs=50, batch_size=16,
              validation_split=0.1, verbose=0, callbacks=[EarlyStopping(patience=5)])
yr_pred_lstm = lstm_real.predict(Xr_test_lstm).flatten()
evaluate_model(yr_test_lstm, yr_pred_lstm, label="LSTM (Real Data)")

lstm_gen = build_lstm(Xg_train_lstm.shape[1:])
lstm_gen.fit(Xg_train_lstm, yg_train_lstm, epochs=50, batch_size=16,
             validation_split=0.1, verbose=0, callbacks=[EarlyStopping(patience=5)])
yg_pred_lstm = lstm_gen.predict(Xg_test_lstm).flatten()
evaluate_model(yg_test_lstm, yg_pred_lstm, label="LSTM (Generated Data)")

  super().__init__(**kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
📊 LSTM (Real Data) — MAE: 0.0447, RMSE: 0.0645, R²: 0.8647


  super().__init__(**kwargs)


[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
📊 LSTM (Generated Data) — MAE: 0.1295, RMSE: 0.1795, R²: 0.5532


(0.1294653652981722, np.float64(0.17947086613719068), 0.5531516205989564)