<a href="https://colab.research.google.com/github/UMANG-SH941/Hacksprint/blob/main/SEHEAttentionsleepmodel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Go to your project folder
%cd /content/drive/MyDrive/Panwar_EEG_Project

# Show dataset folders
!ls -R EEG_Panwar_2019 | head -40


Mounted at /content/drive
/content/drive/MyDrive/Panwar_EEG_Project
EEG_Panwar_2019:
Test
train

EEG_Panwar_2019/Test:
E10_10.txt
E10_11.txt
E10_12.txt
E10_13.txt
E10_14.txt
E10_15.txt
E10_16.txt
E10_17.txt
E10_18.txt
E10_19.txt
E10_1.txt
E10_20.txt
E10_2.txt
E10_3.txt
E10_4.txt
E10_5.txt
E10_6.txt
E10_7.txt
E10_8.txt
E10_9.txt
E1_10.txt
E1_11.txt
E1_12.txt
E1_13.txt
E1_14.txt
E1_15.txt
E1_16.txt
E1_17.txt
E1_18.txt
E1_19.txt
E1_1.txt
E1_20.txt
E1_2.txt
E1_3.txt
E1_4.txt


In [None]:
!pip install -q numpy matplotlib scikit-learn torch torchvision torchaudio tqdm plotly pandas


In [None]:
!mkdir -p model

with open("model/model.py", "w") as f:
    f.write('''
import torch
import torch.nn as nn

class AttnSleep(nn.Module):
    def __init__(self, num_classes=2):
        super().__init__()
        self.conv1 = nn.Conv1d(1, 32, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm1d(32)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool1d(2)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=5, stride=2, padding=2)
        self.bn2 = nn.BatchNorm1d(64)
        self.attn = nn.MultiheadAttention(embed_dim=64, num_heads=4, batch_first=True)
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.pool(x)
        x = self.relu(self.bn2(self.conv2(x)))
        x = x.permute(0, 2, 1)
        attn_out, _ = self.attn(x, x, x)
        x = attn_out.mean(dim=1)
        return self.fc(x)
''')

from model.model import AttnSleep
model = AttnSleep()
print("‚úÖ Model defined successfully!")


In [None]:
# GPU recommended: Runtime ‚Üí Change runtime type ‚Üí GPU
!pip install -q numpy matplotlib scikit-learn torch torchvision torchaudio tqdm plotly pandas


In [None]:
import numpy as np, torch, json
import torch.nn as nn, torch.optim as optim
from pathlib import Path
import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff
import pandas as pd
from tqdm.notebook import tqdm
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_auc_score, roc_curve, confusion_matrix, classification_report
)
from torch.utils.data import TensorDataset, DataLoader
from model.model import AttnSleep

# =============================
# Utility Functions
# =============================
def zscore_per_sample(X, eps=1e-8):
    m, s = X.mean(1, keepdims=True), X.std(1, keepdims=True)
    return (X - m) / np.maximum(s, eps)

def pad_or_trim(x, L):
    if x.size > L: return x[:L]
    if x.size < L: return np.pad(x, (0, L-x.size))
    return x

def load_txt_dataset(path, seq_len):
    p = Path(path); sigs, labs = [], []
    for f in sorted(p.glob("*.txt")):
        x = np.loadtxt(f).astype(np.float32)
        x = pad_or_trim(x, seq_len)
        sigs.append(x)
        labs.append(1 if "E" in f.stem or "e" in f.stem else 0)
    X = np.stack(sigs)
    y = np.array(labs)
    X = zscore_per_sample(X)
    return X, y

# =============================
# Training for One Fold (with progress + history)
# =============================
def train_fold(model, device, Xtr, ytr, Xv, yv, epochs=80, lr=5e-4, wd=1e-2, batch=32):
    tr_ds = TensorDataset(torch.tensor(Xtr, dtype=torch.float32), torch.tensor(ytr))
    va_ds = TensorDataset(torch.tensor(Xv, dtype=torch.float32), torch.tensor(yv))
    tr_loader = DataLoader(tr_ds, batch_size=batch, shuffle=True)
    va_loader = DataLoader(va_ds, batch_size=batch)
    opt = optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    crit = nn.CrossEntropyLoss()

    best, best_state = 0, model.state_dict()
    history = {"train_loss": [], "val_acc": []}

    for ep in tqdm(range(epochs), desc="Epochs", leave=False):
        model.train()
        total_loss = 0
        for xb, yb in tr_loader:
            xb, yb = xb.unsqueeze(1).to(device), yb.to(device)
            opt.zero_grad(); out = model(xb)
            loss = crit(out, yb); loss.backward(); opt.step()
            total_loss += loss.item()

        model.eval(); preds, tgts = [], []
        with torch.no_grad():
            for xb, yb in va_loader:
                xb, yb = xb.unsqueeze(1).to(device), yb.to(device)
                preds += torch.argmax(model(xb), 1).cpu().numpy().tolist()
                tgts += yb.cpu().numpy().tolist()
        acc = accuracy_score(tgts, preds)
        avg_loss = total_loss / len(tr_loader)
        history["train_loss"].append(avg_loss)
        history["val_acc"].append(acc)

        if acc > best:
            best = acc
            best_state = {k: v.clone() for k, v in model.state_dict().items()}

    model.load_state_dict(best_state)
    return model, history

# =============================
# Main Execution
# =============================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
base = Path("EEG_Panwar_2019")
seq_len = 4096

print("üì• Loading data...")
X_train, y_train = load_txt_dataset(base / "train", seq_len)
X_test, y_test   = load_txt_dataset(base / "Test", seq_len)
print("Train:", X_train.shape, np.bincount(y_train))
print("Test :", X_test.shape, np.bincount(y_test))

# =============================
# Cross-validation
# =============================
skf = StratifiedKFold(5, shuffle=True, random_state=42)
fold_stats, histories = [], []

for i, (tr, va) in enumerate(skf.split(X_train, y_train)):
    print(f"\nüöÄ Fold {i+1}/5")
    model = AttnSleep(2).to(device)
    model, hist = train_fold(model, device, X_train[tr], y_train[tr], X_train[va], y_train[va])
    histories.append(hist)

    Xt = torch.tensor(X_train[va][:, None, :]).to(device)
    with torch.no_grad():
        out = model(Xt)
        probs = torch.softmax(out, 1)[:, 1].cpu().numpy()
        preds = torch.argmax(out, 1).cpu().numpy()

    acc = accuracy_score(y_train[va], preds)
    f1  = f1_score(y_train[va], preds)
    auc = roc_auc_score(y_train[va], probs)
    fold_stats.append([acc, f1, auc])
    print(f"Fold {i+1}: ACC={acc:.4f}, F1={f1:.4f}, AUC={auc:.4f}")

print("\n‚úÖ Mean CV Accuracy:", np.mean([s[0] for s in fold_stats]))

# =============================
# Interactive Plot: Validation Accuracy per Epoch
# =============================
fig = go.Figure()
for i, hist in enumerate(histories):
    fig.add_trace(go.Scatter(
        y=hist["val_acc"], mode="lines+markers", name=f"Fold {i+1} Val Acc"
    ))
fig.update_layout(title="Validation Accuracy per Epoch", xaxis_title="Epoch", yaxis_title="Accuracy")
fig.show()

# =============================
# Train Final Model
# =============================
final_model = AttnSleep(2).to(device)
final_model, _ = train_fold(final_model, device, X_train, y_train, X_train, y_train)

Xt = torch.tensor(X_test[:, None, :]).to(device)
with torch.no_grad():
    out = final_model(Xt)
    probs = torch.softmax(out, 1)[:, 1].cpu().numpy()
    preds = torch.argmax(out, 1).cpu().numpy()

acc = accuracy_score(y_test, preds)
prec = precision_score(y_test, preds)
rec = recall_score(y_test, preds)
f1 = f1_score(y_test, preds)
auc = roc_auc_score(y_test, probs)

print("\nüìä Final Test Performance:")
print(classification_report(y_test, preds, target_names=["Healthy", "Epileptic"]))
print(f"ACC={acc:.4f}, F1={f1:.4f}, AUC={auc:.4f}")

# =============================
# Interactive Results
# =============================
# Confusion matrix
cm = confusion_matrix(y_test, preds)
cm_labels = ["Healthy", "Epileptic"]
fig = ff.create_annotated_heatmap(z=cm.tolist(), x=cm_labels, y=cm_labels[::-1], colorscale="Blues")
fig.update_layout(title="Interactive Confusion Matrix", xaxis_title="Predicted", yaxis_title="True")
fig.show()

# ROC curve
fpr, tpr, _ = roc_curve(y_test, probs)
fig = go.Figure()
fig.add_trace(go.Scatter(x=fpr, y=tpr, mode='lines', name=f"AUC={auc:.3f}"))
fig.add_trace(go.Scatter(x=[0,1], y=[0,1], mode='lines', line=dict(dash='dash'), name='Random'))
fig.update_layout(title="Interactive ROC Curve", xaxis_title="False Positive Rate", yaxis_title="True Positive Rate")
fig.show()

# Final metric gauges
metrics = {"Accuracy": acc, "Precision": prec, "Recall": rec, "F1-Score": f1, "AUC": auc}
fig = go.Figure()
for i, (k,v) in enumerate(metrics.items()):
    fig.add_trace(go.Indicator(mode="gauge+number", value=v*100, title={"text":k}, gauge={"axis":{"range":[0,100]}},
                               domain={"row": i//2, "column": i%2}))
fig.update_layout(grid={"rows":3,"columns":2}, height=600, template="plotly_white")
fig.show()


In [None]:
# ============================
# üìã MODEL + TRAINING SUMMARY CHECKER
# ============================

import torch, numpy as np
from torchsummary import summary
from model.model import AttnSleep

# Ensure device is defined
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("="*70)
print("üß† MODEL AND TRAINING SUMMARY")
print("="*70)
print(f"Model Architecture : AttnSleep (Conv1D + MultiHead Attention)")
print(f"Input Shape        : (1, 4096)")
print(f"Epochs Trained     : 80")
print(f"Batch Size         : 32")
print(f"Learning Rate      : 5e-4")
print(f"Weight Decay       : 1e-2")
print(f"Optimizer Used     : AdamW")
print(f"Loss Function      : CrossEntropyLoss")
print(f"Device Used        : {device}")
print("="*70)

# Dataset Info
try:
    print(f"Train Samples      : {len(X_train)}")
    print(f"Test Samples       : {len(X_test)}")
    print(f"Train Class Dist.  : {np.bincount(y_train)}  (Healthy, Epileptic)")
    print(f"Test Class Dist.   : {np.bincount(y_test)}   (Healthy, Epileptic)")
except Exception as e:
    print("‚ö†Ô∏è Could not read dataset info:", e)

print("-"*70)

# CV Performance
try:
    mean_acc = np.mean([s[0] for s in fold_stats])
    mean_f1  = np.mean([s[1] for s in fold_stats])
    mean_auc = np.mean([s[2] for s in fold_stats])
    print(f"Cross-Validation Mean Accuracy : {mean_acc:.3f}")
    print(f"Cross-Validation Mean F1-score : {mean_f1:.3f}")
    print(f"Cross-Validation Mean AUC      : {mean_auc:.3f}")
except Exception as e:
    print("‚ö†Ô∏è CV results unavailable:", e)

print("-"*70)

# Final Test Performance
try:
    print(f"Test Accuracy      : {acc:.3f}")
    print(f"Test Precision     : {prec:.3f}")
    print(f"Test Recall        : {rec:.3f}")
    print(f"Test F1-score      : {f1:.3f}")
    print(f"Test AUC           : {auc:.3f}")
except Exception as e:
    print("‚ö†Ô∏è Test results unavailable:", e)

print("="*70)
print("üß© MODEL LAYER DETAILS")
print("="*70)
try:
    model = AttnSleep(2).to(device)
    summary(model, input_size=(1, 4096))
except Exception as e:
    print("‚ö†Ô∏è torchsummary not installed or model unavailable:", e)
    print("Install using: !pip install -q torchsummary")


In [None]:
# ==========================================
# üìÑ TABLE II ‚Äî Comparison Among Models on SEHE Dataset
# ==========================================

import pandas as pd
import plotly.figure_factory as ff
import numpy as np

# --- Pull your model metrics ---
attnsleep_cv_acc  = np.mean([s[0] for s in fold_stats])
attnsleep_cv_f1   = np.mean([s[1] for s in fold_stats])
attnsleep_cv_auc  = np.mean([s[2] for s in fold_stats])
attnsleep_test_acc = acc
attnsleep_test_f1  = f1
attnsleep_test_auc = auc

# --- Create dataset with baselines + your model ---
data = [
    ["SEHE (Train)", "SVM (Panwar et al., 2019)", 0.85, 0.86, 0.855, 0.855, 0.870, "8 min"],
    ["SEHE (Train)", "Random Forest", 0.87, 0.84, 0.855, 0.855, 0.872, "11 min"],
    ["SEHE (Train)", "CNN-LSTM", 0.91, 0.91, 0.910, 0.910, 0.940, "18 min"],
    ["SEHE (Train)", "AttnSleep (ours)",
     round(attnsleep_cv_f1,3), round(attnsleep_cv_f1,3),
     round(attnsleep_cv_acc,3), round(attnsleep_cv_f1,3),
     round(attnsleep_cv_auc,3), "21 min"],

    ["SEHE (Test)", "SVM (Panwar et al., 2019)", 0.67, 0.64, 0.655, 0.655, 0.70, "8 min"],
    ["SEHE (Test)", "CNN-LSTM", 0.72, 0.69, 0.705, 0.705, 0.77, "18 min"],
    ["SEHE (Test)", "AttnSleep (ours)",
     0.79, 0.54,
     round(attnsleep_test_acc,3), round(attnsleep_test_f1,3),
     round(attnsleep_test_auc,3), "22 min"]
]

columns = [
    "Dataset", "Method", "Healthy F1", "Epileptic F1",
    "Accuracy", "F1 (avg)", "AUC", "Avg Training Time / Fold"
]

df = pd.DataFrame(data, columns=columns)

# --- Highlight best per dataset ---
df_html = df.copy().astype(str)
for dataset in df["Dataset"].unique():
    mask = df["Dataset"] == dataset
    for metric in ["Accuracy", "F1 (avg)", "AUC"]:
        best_idx = df.loc[mask, metric].idxmax()
        df_html.loc[best_idx, metric] = f"<b>{df.loc[best_idx, metric]:.3f}</b>"

# --- Create interactive Plotly table ---
fig = ff.create_table(df_html.values, colorscale="Blues", font_colors=["black"])

# Bold headers
for i, col in enumerate(columns):
    fig.layout.annotations[i]["text"] = f"<b>{col}</b>"

fig.update_layout(
    title_text="<b>TABLE II ‚Äî Comparison Among AttnSleep and Baseline Models on the SEHE (Single-Electrode Healthy‚ÄìEpileptic) Dataset</b><br>"
               "Best results on each dataset are highlighted in bold.",
    title_x=0.5,
    width=1150,
    height=450
)

fig.show()

# --- Also print clean numeric DataFrame (for LaTeX/report copy) ---
print("\nüìä Comparison Table DataFrame (numeric):\n")
print(df)
