In [None]:
import os, sys
sys.path.insert(0, os.path.abspath('..'))
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, ConcatDataset
from pkldataset import PKLDataset
import random
import numpy as np
import form, gen

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def get_model(input_length: int = 2800, num_classes: int = 10, input_channels: int = 1):
    class CNN(nn.Module):
        def __init__(self):
            super().__init__()
            self.conv1 = nn.Sequential(
                nn.Conv1d(input_channels, 16, kernel_size=31, padding=15),
                nn.BatchNorm1d(16),
                nn.ReLU(inplace=True),
                nn.MaxPool1d(2)
            )
            self.conv2 = nn.Sequential(
                nn.Conv1d(16, 32, kernel_size=31, padding=15),
                nn.BatchNorm1d(32),
                nn.ReLU(inplace=True),
                nn.MaxPool1d(2)
            )
            self.conv3 = nn.Sequential(
                nn.Conv1d(32, 64, kernel_size=31, padding=15),
                nn.BatchNorm1d(64),
                nn.ReLU(inplace=True),
                nn.MaxPool1d(2)
            )
            conv_output_length = input_length // 8
            self.fc = nn.Sequential(
                nn.Flatten(),
                nn.Linear(64 * conv_output_length, 128),
                nn.ReLU(inplace=True),
                nn.Dropout(0.5),
                nn.Linear(128, num_classes)
            )

        def forward(self, x: torch.Tensor) -> torch.Tensor:
            if x.dim() == 2:
                x = x.unsqueeze(1)
            x = self.conv1(x)
            x = self.conv2(x)
            x = self.conv3(x)
            return self.fc(x)
    return CNN()
# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()

# Training dataset names
train_paths = ["../datasets/RPDC197/train_20", "../datasets/RPDC197/train_50", "../datasets/RPDC197/train_100", "../datasets/RPDC197/train_200", "../datasets/RPDC197/train_300",
 "../datasets/RPDC197/train_400", "../datasets/RPDC197/train_500", "../datasets/RPDC197/train_600"]

# Validation datasets to test each model on
val_paths = [
    "../datasets/RPDC185/val_1000",
    "../datasets/RPDC188/val_1000",
    "../datasets/RPDC191/val_1000",
    "../datasets/RPDC194/val_1000",
    "../datasets/RPDC197/val_1000",
]

seeds = [101, 202, 303, 404, 505, 606, 707, 808, 909, 1001]

# 2) prepare a nested results dict: { train_path: { val_path: [acc, acc, ...] } }
results = { tp: { vp: [] for vp in val_paths } for tp in train_paths }

# === Generator Pretraining ===
train_dataset_1 = PKLDataset(r"C:\Users\gus07\Desktop\data hiwi\preprocessing\HC\T197\RP")
train_loader_1 = DataLoader(train_dataset_1, batch_size=64, shuffle=True)

gen.generate(train_loader_1, num_epochs=150, num_samples=10,
             save_new_generator_path="generator_model.pth")
for seed in seeds:
    print(f"\n=== Seed {seed} ===")
    set_seed(seed)

    for train_path in train_paths:
        print(f"--- Transfer Learning on {train_path} ---")
        # load real
        ds_real = PKLDataset(train_path)
        loader_real = DataLoader(ds_real, batch_size=64, shuffle=True)

        # generate synthetic under the same seed
        gen.generate(
            loader_real,
            num_epochs=150,
            num_samples=20,
            pretrained_generator_path="generator_model.pth"
        )
        form.format()

        # build combined dataset
        synth_ds = PKLDataset("synth_data/individual_samples")
        combined = ConcatDataset([ds_real, synth_ds])
        loader_comb = DataLoader(combined, batch_size=32, shuffle=True)

        # build & train model
        model = get_model().to(device)
        optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-5)
        scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)

        def train_model(model, train_loader, criterion, optimizer, scheduler,
                        num_epochs=100, device=device, max_grad_norm=1.0):
            best_loss = float('inf')
            best_state = None
            for ep in range(num_epochs):
                model.train()
                running = 0.0
                for X, Y in train_loader:
                    X, Y = X.to(device), Y.to(device)
                    y_idx = Y.argmax(dim=1)
                    optimizer.zero_grad()
                    out = model(X)
                    loss = criterion(out, y_idx)
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
                    optimizer.step()
                    running += loss.item() * X.size(0)
                scheduler.step()
                epoch_loss = running / len(train_loader.dataset)
                if epoch_loss < best_loss:
                    best_loss = epoch_loss
                    best_state = model.state_dict()
            if best_state:
                model.load_state_dict(best_state)
            return model

        model = train_model(model, loader_comb, criterion, optimizer, scheduler)

        # 5) evaluate & append to results
        model.eval()
        with torch.no_grad():
            for vp in val_paths:
                val_ds = PKLDataset(vp)
                val_loader = DataLoader(val_ds, batch_size=64, shuffle=False)
                correct = total = 0
                for X, Y in val_loader:
                    X, Y = X.to(device), Y.to(device)
                    y_idx = Y.argmax(dim=1)
                    preds = model(X).argmax(dim=1)
                    correct += (preds == y_idx).sum().item()
                    total += Y.size(0)
                acc = 100. * correct / total
                results[train_path][vp].append(acc)
                print(f"[{train_path} → {vp}] Seed {seed}: {acc:.2f}%")

# 6) after all seeds, compute mean & std
print("\n=== Summary across seeds ===")
for tp in train_paths:
    for vp in val_paths:
        arr  = np.array(results[tp][vp])
        mean = arr.mean()
        std  = arr.std(ddof=1)
        print(f"{tp} -> {vp}: Mean = {mean:.2f}%, Std = {std:.2f}%")
