ResNet Baseline Training & LogReg Classification

In [None]:

import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import numpy as np
import pandas as pd
import json
from datetime import datetime
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')


# Configuration - GitHub repository structure
DATA_DIR = '../data/processed/all_datasets_images_rgb'
SAVE_DIR = '../data/processed/resnet_experiments'
MODELS_DIR = '../data/processed/resnet_experiments/models'
FIGURES_DIR = '../figures'

# Create directories
os.makedirs(SAVE_DIR, exist_ok=True)
os.makedirs(MODELS_DIR, exist_ok=True)
os.makedirs(FIGURES_DIR, exist_ok=True)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")
if torch.cuda.is_available():
    print(f"   GPU: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f}GB")


In [None]:
# Standard hyperparameters
STANDARD_CONFIG = {
    'learning_rate': 1e-4,
    'batch_size': 32,
    'weight_decay': 1e-4,
    'dropout_rate': 0.2,
    'epochs': 20,
    'patience': 5,
}

# ResNet architectures to evaluate
ARCHITECTURES = {
    'resnet34': {
        'model': lambda: torchvision.models.resnet34(weights='IMAGENET1K_V1'),
        'feature_dim': 512
    },
    'resnet50': {
        'model': lambda: torchvision.models.resnet50(weights='IMAGENET1K_V1'),
        'feature_dim': 2048
    },
    'resnet50x2': {
        'model': lambda: torchvision.models.wide_resnet50_2(weights='IMAGENET1K_V1'),
        'feature_dim': 2048
    }
}

In [None]:
class GenomicDataset(Dataset):
    """Dataset for genomic structural variant images"""

    def __init__(self, data_list, transform=None):
        self.data = data_list
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]

        try:
            # Load image
            data = torch.load(item['filepath'], map_location='cpu')
            if isinstance(data, dict):
                image = data['image']
            else:
                image = data

            # Handle channels (ensure RGB)
            if image.shape[0] != 3:
                if image.shape[0] < 3:
                    padding = torch.zeros(3 - image.shape[0], *image.shape[1:])
                    image = torch.cat([image, padding], dim=0)
                else:
                    image = image[:3]

            # Normalize to [0,1]
            if image.max() > 1:
                image = image.float() / 255.0

        except Exception as e:
            print(f"Error loading {item['filepath']}: {e}")
            image = torch.zeros(3, 224, 224)

        if self.transform:
            image = self.transform(transforms.ToPILImage()(image))

        return image, torch.tensor(item['label'], dtype=torch.long)


In [None]:
# Data loading functions

def load_all_genomic_data():
    """Load all genomic data files"""

    print("Loading all genomic data...")

    all_data = []
    datasets = ['HG002_GRCh37', 'HG002_GRCh38', 'HG005_GRCh38']

    for dataset_name in datasets:
        dataset_path = os.path.join(DATA_DIR, dataset_name)

        if not os.path.exists(dataset_path):
            print(f"Missing dataset: {dataset_path}")
            continue

        print(f"Loading {dataset_name}...")

        filenames = [f for f in os.listdir(dataset_path) if f.endswith('.pt')]

        tp_count = 0
        fp_count = 0

        for filename in tqdm(filenames, desc=f"Processing {dataset_name}"):
            parts = filename[:-3].split('_')

            if len(parts) >= 8:
                try:
                    label = parts[2]  # TP or FP
                    svtype = parts[6]  # INS, DEL, etc.

                    if label in ['TP', 'FP']:
                        filepath = os.path.join(dataset_path, filename)

                        all_data.append({
                            'dataset': dataset_name,
                            'filepath': filepath,
                            'label_str': label,
                            'svtype': svtype,
                            'binary_label': 1 if label == 'TP' else 0,
                            'multiclass_label': 0 if label == 'FP' else (1 if svtype == 'DEL' else 2)
                        })

                        if label == 'TP':
                            tp_count += 1
                        else:
                            fp_count += 1

                except (ValueError, IndexError):
                    continue

        print(f"   {tp_count} TP, {fp_count} FP = {tp_count + fp_count} total")

    print(f"\nTotal dataset:")
    print(f"   Samples: {len(all_data)}")

    # Count labels
    tp_total = sum(1 for x in all_data if x['label_str'] == 'TP')
    fp_total = sum(1 for x in all_data if x['label_str'] == 'FP')

    print(f"   TP: {tp_total} ({100*tp_total/len(all_data):.1f}%)")
    print(f"   FP: {fp_total} ({100*fp_total/len(all_data):.1f}%)")

    # Count SV types in TP
    from collections import Counter
    svtype_counts = Counter(x['svtype'] for x in all_data if x['label_str'] == 'TP')
    print(f"   SV types: {dict(svtype_counts)}")

    return all_data

def create_data_splits(all_data):
    """Create both 80/20 and leave-one-genome-out splits"""

    splits = {}

    # 1. Random 80/20 split
    print("\nCreating 80/20 split...")
    train_80, test_20 = train_test_split(
        all_data,
        test_size=0.2,
        stratify=[x['label_str'] for x in all_data],
        random_state=42
    )
    splits['80_20'] = {'train': train_80, 'test': test_20}
    print(f"   Train: {len(train_80)}, Test: {len(test_20)}")

    # 2. Leave-one-genome-out splits
    print("\nCreating leave-one-genome-out splits...")
    genomes = ['HG002_GRCh37', 'HG002_GRCh38', 'HG005_GRCh38']

    for test_genome in genomes:
        train_data = [x for x in all_data if x['dataset'] != test_genome]
        test_data = [x for x in all_data if x['dataset'] == test_genome]

        splits[f'holdout_{test_genome}'] = {'train': train_data, 'test': test_data}
        print(f"   {test_genome}: Train={len(train_data)}, Test={len(test_data)}")

    return splits

def create_transforms():
    """Standard ImageNet transforms"""

    train_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.458, 0.406], [0.229, 0.224, 0.225])
    ])

    test_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.458, 0.406], [0.229, 0.224, 0.225])
    ])

    return train_transform, test_transform


In [None]:
# ResNet Model

class FineTunedResNet(nn.Module):
    """ResNet with frozen backbone and trainable classifier"""

    def __init__(self, architecture, num_classes, dropout=0.2):
        super().__init__()

        self.architecture = architecture
        self.num_classes = num_classes

        # Load backbone
        self.backbone = ARCHITECTURES[architecture]['model']()
        feature_dim = ARCHITECTURES[architecture]['feature_dim']

        # Remove original classifier
        self.backbone.fc = nn.Identity()

        # Freeze backbone
        for param in self.backbone.parameters():
            param.requires_grad = False

        # Trainable classifier
        self.classifier = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(feature_dim, num_classes)
        )

        # Count parameters
        total_params = sum(p.numel() for p in self.parameters())
        trainable_params = sum(p.numel() for p in self.parameters() if p.requires_grad)

        print(f"   {architecture}: {total_params:,} total, {trainable_params:,} trainable")

    def forward(self, x):
        with torch.no_grad():
            features = self.backbone(x)
        return self.classifier(features)


In [None]:
# Training function

def train_resnet_model(architecture, num_classes, train_data, test_data, experiment_name):
    """Train a single ResNet model and save it"""

    print(f"\nTraining {architecture} ({num_classes}-class) - {experiment_name}")

    # Create model save directory
    model_save_dir = os.path.join(MODELS_DIR, experiment_name)
    os.makedirs(model_save_dir, exist_ok=True)
    print(f"Model will be saved to: {model_save_dir}")

    # Create transforms and datasets
    train_transform, test_transform = create_transforms()

    # Set label key based on num_classes
    label_key = 'binary_label' if num_classes == 2 else 'multiclass_label'

    # Update data with correct labels
    train_data_labeled = [{**item, 'label': item[label_key]} for item in train_data]
    test_data_labeled = [{**item, 'label': item[label_key]} for item in test_data]

    train_dataset = GenomicDataset(train_data_labeled, train_transform)
    test_dataset = GenomicDataset(test_data_labeled, test_transform)

    # DataLoaders
    train_loader = DataLoader(train_dataset, batch_size=STANDARD_CONFIG['batch_size'],
                             shuffle=True, num_workers=4, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=STANDARD_CONFIG['batch_size'],
                            shuffle=False, num_workers=4, pin_memory=True)

    # Model
    model = FineTunedResNet(architecture, num_classes, STANDARD_CONFIG['dropout_rate']).to(device)

    # Optimizer
    optimizer = torch.optim.AdamW(
        model.parameters(),
        lr=STANDARD_CONFIG['learning_rate'],
        weight_decay=STANDARD_CONFIG['weight_decay']
    )

    # Scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='max', factor=0.5, patience=3, verbose=False
    )

    # Training loop
    best_test_acc = 0
    best_model_state = None
    patience_counter = 0
    history = []

    for epoch in range(STANDARD_CONFIG['epochs']):
        # Training
        model.train()
        train_correct = 0
        train_total = 0

        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = F.cross_entropy(outputs, labels)
            loss.backward()
            optimizer.step()

            _, predicted = outputs.max(1)
            train_total += labels.size(0)
            train_correct += predicted.eq(labels).sum().item()

        train_acc = 100. * train_correct / train_total

        # Testing
        model.eval()
        test_correct = 0
        test_total = 0
        test_probs = []
        test_targets = []

        with torch.no_grad():
            for images, labels in test_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)

                _, predicted = outputs.max(1)
                test_total += labels.size(0)
                test_correct += predicted.eq(labels).sum().item()

                # For AUC (binary only)
                if num_classes == 2:
                    probs = F.softmax(outputs, dim=1)
                    test_probs.extend(probs[:, 1].cpu().numpy())
                    test_targets.extend(labels.cpu().numpy())

        test_acc = 100. * test_correct / test_total
        test_auc = roc_auc_score(test_targets, test_probs) if num_classes == 2 and len(set(test_targets)) > 1 else 0

        scheduler.step(test_acc)

        # Save best model state
        if test_acc > best_test_acc:
            best_test_acc = test_acc
            best_model_state = model.state_dict().copy()
            patience_counter = 0
            print(f"   New best: {test_acc:.2f}% - Model state saved!")
        else:
            patience_counter += 1

        history.append({
            'epoch': epoch + 1,
            'train_acc': train_acc,
            'test_acc': test_acc,
            'test_auc': test_auc
        })

        print(f"   Epoch {epoch+1}: Train={train_acc:.1f}%, Test={test_acc:.1f}%, AUC={test_auc:.3f}")

        # Early stopping
        if patience_counter >= STANDARD_CONFIG['patience']:
            print(f"   Early stopping at epoch {epoch+1}")
            break

    # Save the best model to disk
    checkpoint_path = None
    if best_model_state is not None:
        checkpoint = {
            'model_state_dict': best_model_state,
            'architecture': architecture,
            'num_classes': num_classes,
            'experiment_name': experiment_name,
            'best_test_acc': best_test_acc,
            'final_test_auc': test_auc,
            'config': STANDARD_CONFIG,
            'history': history,
            'model_config': {
                'feature_dim': ARCHITECTURES[architecture]['feature_dim'],
                'dropout_rate': STANDARD_CONFIG['dropout_rate']
            }
        }

        checkpoint_path = os.path.join(model_save_dir, 'best_model.pth')
        torch.save(checkpoint, checkpoint_path)
        print(f"   Model saved to: {checkpoint_path}")

        # Save model info
        info_path = os.path.join(model_save_dir, 'model_info.json')
        model_info = {
            'experiment_name': experiment_name,
            'architecture': architecture,
            'num_classes': num_classes,
            'best_test_acc': best_test_acc,
            'final_test_auc': test_auc,
            'checkpoint_path': checkpoint_path,
            'saved_at': datetime.now().isoformat()
        }
        with open(info_path, 'w') as f:
            json.dump(model_info, f, indent=2)

        print(f"   Model info saved to: {info_path}")

    print(f"   Best test accuracy: {best_test_acc:.2f}%")

    return {
        'architecture': architecture,
        'num_classes': num_classes,
        'experiment': experiment_name,
        'best_test_acc': best_test_acc,
        'final_test_auc': test_auc,
        'history': history,
        'model_path': checkpoint_path,
        'model_save_dir': model_save_dir
    }


In [None]:

def load_saved_resnet_model(checkpoint_path):
    """Load a saved ResNet model from checkpoint"""

    print(f"Loading model from: {checkpoint_path}")

    checkpoint = torch.load(checkpoint_path, map_location='cpu')

    # Recreate model
    architecture = checkpoint['architecture']
    num_classes = checkpoint['num_classes']

    model = FineTunedResNet(
        architecture,
        num_classes,
        checkpoint['model_config']['dropout_rate']
    )

    # Load weights
    model.load_state_dict(checkpoint['model_state_dict'])
    model.to(device)

    print(f"Loaded {architecture} ({num_classes}-class)")
    print(f"   Best accuracy: {checkpoint['best_test_acc']:.2f}%")
    print(f"   Final AUC: {checkpoint['final_test_auc']:.3f}")

    return model, checkpoint


def analyze_saved_models():
    """Analyze all saved ResNet models"""

    print("ANALYZING SAVED RESNET MODELS")
    print("="*50)

    if not os.path.exists(MODELS_DIR):
        print("No models directory found")
        return None

    results = []

    for model_dir in os.listdir(MODELS_DIR):
        model_path = os.path.join(MODELS_DIR, model_dir)
        checkpoint_path = os.path.join(model_path, 'best_model.pth')

        if os.path.exists(checkpoint_path):
            try:
                checkpoint = torch.load(checkpoint_path, map_location='cpu', weights_only=False)
                results.append({
                    'Model': checkpoint['architecture'],
                    'Classes': f"{checkpoint['num_classes']}-class",
                    'Split': checkpoint['experiment_name'].split('_')[-1] if '_' in checkpoint['experiment_name'] else 'unknown',
                    'Accuracy': checkpoint['best_test_acc'],
                    'AUC': checkpoint.get('final_test_auc', 0),
                    'Full_Name': checkpoint['experiment_name']
                })
            except Exception as e:
                print(f"Error loading {model_dir}: {str(e)[:100]}...")

    if not results:
        print("No saved models found!")
        return None

    # Create DataFrame
    df = pd.DataFrame(results)

    print(f"Found {len(df)} saved models")

    # Performance table
    print(f"\nTOP 10 MODELS BY ACCURACY:")
    top_models = df.nlargest(10, 'Accuracy')[['Model', 'Classes', 'Split', 'Accuracy', 'AUC']]
    print(top_models.to_string(index=False, float_format='%.2f'))

    # Best by category
    print(f"\nBEST BY CATEGORY:")

    # Best binary
    binary_models = df[df['Classes'] == '2-class']
    if len(binary_models) > 0:
        binary_best = binary_models.nlargest(1, 'Accuracy').iloc[0]
        print(f"   Binary: {binary_best['Model']} ({binary_best['Split']}) - {binary_best['Accuracy']:.2f}%")

    # Best 3-class
    multiclass_models = df[df['Classes'] == '3-class']
    if len(multiclass_models) > 0:
        multiclass_best = multiclass_models.nlargest(1, 'Accuracy').iloc[0]
        print(f"   3-class: {multiclass_best['Model']} ({multiclass_best['Split']}) - {multiclass_best['Accuracy']:.2f}%")

    # Overall champion
    overall_best = df.nlargest(1, 'Accuracy').iloc[0]
    print(f"\nOVERALL CHAMPION:")
    print(f"   {overall_best['Full_Name']}: {overall_best['Accuracy']:.2f}% (AUC: {overall_best['AUC']:.3f})")

    # CSV-Filter comparison
    csv_target = 94.94
    gap = csv_target - overall_best['Accuracy']
    print(f"\nCSV-FILTER COMPARISON:")
    print(f"   Target: {csv_target}%")
    print(f"   Your best: {overall_best['Accuracy']:.2f}%")
    print(f"   Gap: {gap:.2f}%")

    if gap <= 0:
        print(f"   YOU BEAT CSV-FILTER!")
    elif gap <= 2:
        print(f"   Very close! Excellent performance on realistic data.")
    else:
        print(f"   Good performance - remember you're solving a harder problem!")

    return df

In [None]:
# Run experiments

def run_all_resnet_experiments():
    """Run all ResNet experiments with model saving"""

    print("RUNNING ALL RESNET EXPERIMENTS")
    print("="*60)

    # Load data
    all_data = load_all_genomic_data()
    splits = create_data_splits(all_data)

    # Experiment configuration
    architectures = list(ARCHITECTURES.keys())
    class_setups = [2, 3]  # Binary and 3-class
    split_names = list(splits.keys())

    total_experiments = len(architectures) * len(class_setups) * len(split_names)
    print(f"\nPlanning {total_experiments} experiments:")
    print(f"   Architectures: {architectures}")
    print(f"   Class setups: {class_setups}")
    print(f"   Data splits: {split_names}")

    # Run experiments
    all_results = []

    for architecture in architectures:
        for num_classes in class_setups:
            for split_name in split_names:
                train_data = splits[split_name]['train']
                test_data = splits[split_name]['test']

                experiment_name = f"{architecture}_{num_classes}class_{split_name}"

                try:
                    result = train_resnet_model(architecture, num_classes, train_data, test_data, experiment_name)
                    all_results.append(result)

                    # Save intermediate results
                    results_df = pd.DataFrame(all_results)
                    results_df.to_csv(os.path.join(SAVE_DIR, 'all_results.csv'), index=False)

                except Exception as e:
                    print(f"Failed {experiment_name}: {e}")
                    continue

                # Clear memory
                torch.cuda.empty_cache()

    # Final summary
    print(f"\nEXPERIMENT SUMMARY:")
    print(f"   Completed: {len(all_results)}/{total_experiments}")

    if all_results:
        results_df = pd.DataFrame(all_results)

        print(f"\nBEST RESULTS:")
        best_by_arch = results_df.groupby('architecture')['best_test_acc'].max()
        for arch, acc in best_by_arch.items():
            print(f"   {arch}: {acc:.2f}%")

        overall_best = results_df.loc[results_df['best_test_acc'].idxmax()]
        print(f"\nOVERALL BEST: {overall_best['best_test_acc']:.2f}%")
        print(f"   Model: {overall_best['architecture']} ({overall_best['num_classes']}-class)")
        print(f"   Split: {overall_best['experiment']}")
        print(f"   Saved at: {overall_best['model_path']}")

        # CSV-Filter comparison
        csv_filter_target = 94.94
        if overall_best['best_test_acc'] >= csv_filter_target:
            print(f"BEAT CSV-FILTER! (+{overall_best['best_test_acc'] - csv_filter_target:.2f}%)")
        else:
            print(f"Gap to CSV-Filter: {csv_filter_target - overall_best['best_test_acc']:.2f}%")

        # List all saved models
        print(f"\nSAVED MODELS:")
        for _, result in results_df.iterrows():
            if result['model_path']:
                print(f"   {result['experiment']}: {result['model_path']}")

    return all_results

In [None]:
# Logistic Regression on latents

def get_champion_model_path():
    """Get the path to the best performing binary model"""

    analysis_df = analyze_saved_models()
    if analysis_df is None:
        return None

    # Get best binary model
    binary_models = analysis_df[analysis_df['Classes'] == '2-class']
    if len(binary_models) == 0:
        print("No binary models found!")
        return None

    best_binary = binary_models.nlargest(1, 'Accuracy').iloc[0]
    champion_name = best_binary['Full_Name']
    champion_path = os.path.join(MODELS_DIR, champion_name, 'best_model.pth')

    print(f"Champion model: {champion_name}")
    print(f"   Accuracy: {best_binary['Accuracy']:.2f}%")
    print(f"   Path: {champion_path}")

    return champion_path, champion_name

def load_champion_data_split(champion_name):
    """Load the same data split used for the champion model"""

    print(f"Loading data split for champion: {champion_name}")

    # Parse split from champion name
    if 'holdout_HG005_GRCh38' in champion_name:
        test_genome = 'HG005_GRCh38'
    elif 'holdout_HG002_GRCh38' in champion_name:
        test_genome = 'HG002_GRCh38'
    elif 'holdout_HG002_GRCh37' in champion_name:
        test_genome = 'HG002_GRCh37'
    elif '80_20' in champion_name:
        test_genome = '80_20'
    else:
        print(f"Unknown split in champion name: {champion_name}")
        return None, None

    # Load all data and create same split
    all_data = load_all_genomic_data()

    if test_genome == '80_20':
        train_data, test_data = train_test_split(
            all_data,
            test_size=0.2,
            stratify=[x['label_str'] for x in all_data],
            random_state=42
        )
    else:
        train_data = [x for x in all_data if x['dataset'] != test_genome]
        test_data = [x for x in all_data if x['dataset'] == test_genome]

    # Convert to binary labels
    train_data = [{**item, 'label': item['binary_label']} for item in train_data]
    test_data = [{**item, 'label': item['binary_label']} for item in test_data]

    print(f"   Split: {test_genome}")
    print(f"   Train: {len(train_data)} samples")
    print(f"   Test: {len(test_data)} samples")

    return train_data, test_data

def extract_features_from_champion(feature_extractor, dataloader):
    """Extract latent features from ResNet backbone"""

    print("Extracting latent features...")

    feature_extractor.eval()
    all_features = []
    all_labels = []

    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc="Extracting features"):
            images = images.to(device)

            # Extract features from backbone
            features = feature_extractor(images)

            all_features.append(features.cpu().numpy())
            all_labels.append(labels.numpy())

    # Concatenate all batches
    X = np.concatenate(all_features, axis=0)
    y = np.concatenate(all_labels, axis=0)

    print(f"   Features shape: {X.shape}")
    print(f"   Labels shape: {y.shape}")
    print(f"   Class balance: {np.bincount(y)}")

    return X, y

def train_logistic_regression_on_latents():
    """Train logistic regression on ResNet latents from champion model"""

    print("TRAINING LOGISTIC REGRESSION ON RESNET LATENTS")
    print("="*60)

    # Get champion model
    champion_path, champion_name = get_champion_model_path()
    if champion_path is None:
        print("No champion model found. Run ResNet training first.")
        return None

    # Load champion model
    champion_model, checkpoint = load_saved_resnet_model(champion_path)

    # Extract backbone for feature extraction
    feature_extractor = champion_model.backbone

    # Load same data split as champion
    train_data, test_data = load_champion_data_split(champion_name)
    if train_data is None:
        return None

    # Create transforms and datasets
    _, test_transform = create_transforms()  # Use test transform for both

    train_dataset = GenomicDataset(train_data, test_transform)
    test_dataset = GenomicDataset(test_data, test_transform)

    # Create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=4)

    # Extract features
    print("Extracting training features...")
    X_train, y_train = extract_features_from_champion(feature_extractor, train_loader)

    print("Extracting test features...")
    X_test, y_test = extract_features_from_champion(feature_extractor, test_loader)

    # Train logistic regression with regularization search
    print("Training logistic regression...")

    C_values = [0.001, 0.01, 0.1, 1.0, 10.0, 100.0]
    best_score = 0
    best_model = None
    best_C = None

    for C in C_values:
        model = LogisticRegression(C=C, max_iter=1000, random_state=42)
        model.fit(X_train, y_train)

        # Evaluate on test set
        y_pred = model.predict(X_test)
        y_proba = model.predict_proba(X_test)[:, 1]

        accuracy = accuracy_score(y_test, y_pred)
        auc = roc_auc_score(y_test, y_proba)

        print(f"   C={C:6.3f}: Accuracy={accuracy:.4f}, AUC={auc:.4f}")

        if accuracy > best_score:
            best_score = accuracy
            best_model = model
            best_C = C

    # Final evaluation with best model
    y_pred = best_model.predict(X_test)
    y_proba = best_model.predict_proba(X_test)[:, 1]

    final_accuracy = accuracy_score(y_test, y_pred) * 100
    final_auc = roc_auc_score(y_test, y_proba)

    print(f"\nRESULTS:")
    print(f"   Best C: {best_C}")
    print(f"   Logistic Regression Accuracy: {final_accuracy:.2f}%")
    print(f"   Logistic Regression AUC: {final_auc:.3f}")

    # Compare to original ResNet
    original_accuracy = checkpoint['best_test_acc']
    original_auc = checkpoint['final_test_auc']

    print(f"\nCOMPARISON:")
    print(f"   Original ResNet: {original_accuracy:.2f}% (AUC: {original_auc:.3f})")
    print(f"   Linear on Latents: {final_accuracy:.2f}% (AUC: {final_auc:.3f})")
    print(f"   Difference: {final_accuracy - original_accuracy:.2f}%")

    if final_accuracy > original_accuracy:
        print("   Linear regression on latents BEATS the original classifier!")
    elif abs(final_accuracy - original_accuracy) < 1:
        print("   Linear regression performs similarly - latents capture most info!")
    else:
        print("   Original classifier is better - nonlinear relationships matter")

    # Classification report
    print(classification_report(y_test, y_pred, target_names=['FP', 'TP']))

    return best_model, X_train, X_test, y_train, y_test, final_accuracy, final_auc

In [None]:
print("MAIN FUNCTIONS:")
print("   results = run_all_resnet_experiments()")
print("   analysis_df = analyze_saved_models()")
print("   linear_model, X_train, X_test, y_train, y_test, acc, auc = train_logistic_regression_on_latents()")
print()
print("EXPERIMENT DETAILS:")
print("   3 architectures (ResNet34/50/50x2)")
print("   2 class setups (binary TP/FP, 3-class FP/DEL/INS)")
print("   4 data splits (80/20 + 3 leave-one-genome-out)")
print("   = 32 total experiments")
print("   Champion model used for LogReg latent classification")

# To run all experiments:
# results = run_all_resnet_experiments()

# To analyze results:
# analysis_df = analyze_saved_models()

# To train LogReg on latents:
# linear_model, X_train, X_test, y_train, y_test, acc, auc = train_logistic_regression_on_latents()

In [None]:
results = run_all_resnet_experiments()