In [1]:
!pip install -q scikit-learn pandas matplotlib torch joblib

import os
import pandas as pd
import numpy as np
from glob import glob
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import balanced_accuracy_score
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import joblib

# load the data
def load_csvs(folder, use_anomaly=True):
    files = sorted(glob(folder + '/*.csv'))
    all_rows = []
    for file in files:
        df = pd.read_csv(file, sep=';')
        df.drop(['datetime', 'changepoint'], axis=1, inplace=True, errors='ignore')
        if not use_anomaly:
            df['anomaly'] = 0
        all_rows.append(df)
    return pd.concat(all_rows, ignore_index=True)

base = r'D:\FinancingSupportSystem\Group B Data\Group B Task\data'
df_clean = load_csvs(os.path.join(base, 'anomaly-free'), use_anomaly=False)
df_valve1 = load_csvs(os.path.join(base, 'valve1'))
df_valve2 = load_csvs(os.path.join(base, 'valve2'))
df_other = load_csvs(os.path.join(base, 'other'))

df = pd.concat([df_clean, df_valve1, df_valve2, df_other], ignore_index=True)
df = df.dropna()

# Features & labels
features = df.drop('anomaly', axis=1).values
labels = df['anomaly'].values

# Scale
scaler = MinMaxScaler()
features = scaler.fit_transform(features)
joblib.dump(scaler, 'scaler.save')  # save scaler for test set


# 2. Windowing Function

def make_windows(data, labels, window_size):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i+window_size])
        y.append(int(np.any(labels[i:i+window_size])))
    return np.array(X), np.array(y)

# ----------------------------
# 3. Model Definition
# ----------------------------
class SmallCNN(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv1d(input_size, 8, kernel_size=3),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1)
        )
        self.fc = nn.Linear(8, 1)

    def forward(self, x):
        x = self.conv(x).squeeze(-1)
        return torch.sigmoid(self.fc(x))


# 4. Training + Evaluation Loop

device = 'cuda' if torch.cuda.is_available() else 'cpu'

def train_and_evaluate(window_size):
    print(f"\nTraining model with window size {window_size}...")
    X, y = make_windows(features, labels, window_size)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y)

    model = SmallCNN(X.shape[2]).to(device)

    X_train_t = torch.tensor(X_train, dtype=torch.float32).transpose(1, 2)
    y_train_t = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
    X_test_t = torch.tensor(X_test, dtype=torch.float32).transpose(1, 2)
    y_test_t = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

    train_ds = TensorDataset(X_train_t, y_train_t)
    train_dl = DataLoader(train_ds, batch_size=128, shuffle=True)

    loss_fn = nn.BCELoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.001)

    for epoch in range(100):
        model.train()
        for xb, yb in train_dl:
            xb, yb = xb.to(device), yb.to(device)
            pred = model(xb)
            loss = loss_fn(pred, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    # Evaluation
    model.eval()
    with torch.no_grad():
        preds = model(X_test_t.to(device)).cpu().numpy()
    preds = (preds > 0.5).astype(int)
    bal_acc = balanced_accuracy_score(y_test, preds)
    print(f"Balanced Accuracy (ws={window_size}): {bal_acc:.4f}")

    # Save model
    save_path = f"model_ws{window_size}.pth"
    torch.save(model.state_dict(), save_path)
    print(f"Model saved to {save_path}")
    return bal_acc


# 5. Train all required models

results = {}
for ws in [30, 90, 270]:
    results[ws] = train_and_evaluate(ws)

print("\nFinal Balanced Accuracies:", results)


# 6. bal_acc function

def bal_acc(model_path, window_size, X_test, y_test, scaler):
    model = SmallCNN(X_test.shape[2])
    model.load_state_dict(torch.load(model_path, map_location='cpu'))
    model.eval()
    X_test = scaler.transform(X_test)
    X, y = make_windows(X_test, y_test, window_size)
    X_test_t = torch.tensor(X, dtype=torch.float32).transpose(1, 2)
    with torch.no_grad():
        preds = model(X_test_t).numpy()
    preds = (preds > 0.5).astype(int)
    return balanced_accuracy_score(y, preds)


[notice] A new release of pip is available: 24.0 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip



Training model with window size 30...
Balanced Accuracy (ws=30): 0.5559
Model saved to model_ws30.pth

Training model with window size 90...
Balanced Accuracy (ws=90): 0.6528
Model saved to model_ws90.pth

Training model with window size 270...
Balanced Accuracy (ws=270): 0.8597
Model saved to model_ws270.pth

Final Balanced Accuracies: {30: 0.5558736918104241, 90: 0.6528314724615961, 270: 0.8596569891267796}
