In [1]:
#LSTM bnst5dm al data ale esmha cut intervals fixed BUT many parts are edited by chatgpt should be studied
import os, random
import numpy as np, pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import joblib
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# ---------------- CONFIG ----------------
DATA_DIR = "/content/Cut_fixed_data"   # <- set your data folder
EXPECTED_TIMESTEPS = 200
BATCH_SIZE = 8
LR = 1e-4
EPOCHS = 300
SEED = 42
PATIENCE = 25
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
OUT_DIR = "lstm_artifacts"
os.makedirs(OUT_DIR, exist_ok=True)

# reproducibility
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)
print("Device:", DEVICE)

# ---------------- LOAD DATA ----------------
def load_intervals(data_dir, expected_timesteps):
    files = sorted([os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith(".csv")])
    X_list, y_list, names = [], [], []
    for p in files:
        df = pd.read_csv(p)
        if df.shape[0] != expected_timesteps:
            continue
        if "BO-DI-DCCT1_getDcctCurrent" not in df.columns:
            continue
        feats = df.drop(columns=["BO-DI-DCCT1_getDcctCurrent", "Timestamp"], errors='ignore').values
        tgt = df["BO-DI-DCCT1_getDcctCurrent"].iloc[-1]
        X_list.append(feats); y_list.append(tgt); names.append(os.path.basename(p))
    X = np.array(X_list); y = np.array(y_list)
    return X, y, names

X, y, names = load_intervals(DATA_DIR, EXPECTED_TIMESTEPS)
if len(X) == 0:
    raise RuntimeError("No data loaded. Check DATA_DIR and file format.")
print(f"Loaded {len(X)} intervals, shape X={X.shape}, y={y.shape}")

# ---------------- OUTLIER-aware stratified split ----------------
# bin the target into quantiles to keep distribution similar in splits
quantiles = np.quantile(y, np.linspace(0,1,6))
y_bins = np.digitize(y, quantiles[1:-1])  # categories 0..4
try:
    X_trval, X_test, y_trval, y_test = train_test_split(X, y, test_size=0.2, random_state=SEED, stratify=y_bins)
except:
    X_trval, X_test, y_trval, y_test = train_test_split(X, y, test_size=0.2, random_state=SEED)

X_train, X_val, y_train, y_val = train_test_split(X_trval, y_trval, test_size=0.2, random_state=SEED)
print("Split sizes -> train, val, test:", X_train.shape[0], X_val.shape[0], X_test.shape[0])

# ---------------- SCALERS (fit on TRAIN only) ----------------
Ntrain, T, F = X_train.shape
scaler_X = StandardScaler().fit(X_train.reshape(-1, F))
scaler_y = StandardScaler().fit(y_train.reshape(-1,1))

def scale_X(X_in):
    n,t,f = X_in.shape
    flat = X_in.reshape(-1,f)
    flat_s = scaler_X.transform(flat)
    return flat_s.reshape(n,t,f)

X_train_s = scale_X(X_train)
X_val_s   = scale_X(X_val)
X_test_s  = scale_X(X_test)
y_train_s = scaler_y.transform(y_train.reshape(-1,1)).ravel()
y_val_s   = scaler_y.transform(y_val.reshape(-1,1)).ravel()

# ---------------- DATASET / DATALOADER ----------------
class SeqDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
    def __len__(self): return len(self.X)
    def __getitem__(self, idx): return self.X[idx], self.y[idx]

train_loader = DataLoader(SeqDataset(X_train_s, y_train_s), batch_size=BATCH_SIZE, shuffle=True)
val_loader   = DataLoader(SeqDataset(X_val_s, y_val_s), batch_size=BATCH_SIZE, shuffle=False)
test_tensor  = torch.tensor(X_test_s, dtype=torch.float32).to(DEVICE)

# ---------------- MODEL ----------------
class LSTMRegressor(nn.Module):
    def __init__(self, input_size, hidden_size=128, num_layers=2, dropout=0.2):
        super().__init__()
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Sequential(nn.Linear(hidden_size,64), nn.ReLU(), nn.Dropout(0.2), nn.Linear(64,1))
    def forward(self, x):
        out, _ = self.lstm(x)            # out: (batch, T, hidden)
        last = out[:, -1, :]             # take final timestep
        return self.fc(last).squeeze(-1)

model = LSTMRegressor(input_size=F).to(DEVICE)
opt = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=1e-6)
loss_fn = nn.SmoothL1Loss()   # Huber-like (robust)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, factor=0.5, patience=10)

# ---------------- TRAIN with early stopping ----------------
best_val = float('inf'); patience = 0
for epoch in range(1, EPOCHS+1):
    model.train()
    train_losses = []
    for xb, yb in train_loader:
        xb = xb.to(DEVICE); yb = yb.to(DEVICE)
        opt.zero_grad()
        out = model(xb)
        loss = loss_fn(out, yb)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=2.0)
        opt.step()
        train_losses.append(loss.item())
    model.eval()
    val_losses = []
    with torch.no_grad():
        for xb, yb in val_loader:
            xb = xb.to(DEVICE); yb = yb.to(DEVICE)
            val_losses.append(float(loss_fn(model(xb), yb).item()))
    avg_train = float(np.mean(train_losses)); avg_val = float(np.mean(val_losses))
    scheduler.step(avg_val)
    if epoch % 10 == 0 or epoch==1:
        print(f"Epoch {epoch} train_loss={avg_train:.6f} val_loss={avg_val:.6f}")
    if avg_val + 1e-8 < best_val:
        best_val = avg_val; patience = 0
        torch.save(model.state_dict(), os.path.join(OUT_DIR, "best_lstm.pth"))
    else:
        patience += 1
        if patience >= PATIENCE:
            print("Early stopping at epoch", epoch)
            break

# ---------------- EVAL on test ----------------
model.load_state_dict(torch.load(os.path.join(OUT_DIR, "best_lstm.pth"), map_location=DEVICE))
model.eval()
with torch.no_grad():
    preds_s = model(test_tensor).cpu().numpy().ravel()
preds = scaler_y.inverse_transform(preds_s.reshape(-1,1)).ravel()

mse = mean_squared_error(y_test, preds); r2 = r2_score(y_test, preds)
print("\nFinal Test metrics: MSE=", mse, "R2=", r2)
print("Baseline (train mean) MSE=", mean_squared_error(y_test, np.full_like(y_test, np.mean(y_train))))

# ---------------- SAVE artifacts ----------------
joblib.dump(scaler_X, os.path.join(OUT_DIR, "scaler_X.pkl"))
joblib.dump(scaler_y, os.path.join(OUT_DIR, "scaler_y.pkl"))
torch.save(model.state_dict(), os.path.join(OUT_DIR, "lstm_final.pth"))
print("Saved model + scalers to", OUT_DIR)



Device: cpu
Loaded 107 intervals, shape X=(107, 200, 25), y=(107,)
Split sizes -> train, val, test: 68 17 22
Epoch 1 train_loss=0.306055 val_loss=0.179815
Epoch 10 train_loss=0.276545 val_loss=0.134025
Epoch 20 train_loss=0.229675 val_loss=0.130803
Epoch 30 train_loss=0.264877 val_loss=0.128937
Epoch 40 train_loss=0.166108 val_loss=0.054653
Epoch 50 train_loss=0.072013 val_loss=0.024627
Epoch 60 train_loss=0.118782 val_loss=0.221707
Epoch 70 train_loss=0.139208 val_loss=0.246821
Early stopping at epoch 73

Final Test metrics: MSE= 0.16570247627949575 R2= 0.9408072765935664
Baseline (train mean) MSE= 2.8372411847213352
Saved model + scalers to lstm_artifacts
