In [15]:
# ================================
# PORTION 3 – RAAT NOTEBOOK
# Xception Teacher → MobileNetV3 Student
# Kaggle-ready, reproducible
# ================================

# --------- Imports ---------
import os, time, random, math
from pathlib import Path
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset


import cv2
import albumentations as A
from albumentations.pytorch import ToTensorV2

from sklearn.metrics import accuracy_score, f1_score, recall_score

# --------- Reproducibility ---------
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

# --------- Constants ---------
CLASS_NAMES = ['akiec','bcc','bkl','df','mel','nv','vasc']
CLASS_TO_IDX = {c:i for i,c in enumerate(CLASS_NAMES)}

IMAGENET_MEAN = [0.485,0.456,0.406]
IMAGENET_STD  = [0.229,0.224,0.225]

IMAGE_DIR_1 = '/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_1'
IMAGE_DIR_2 = '/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_2'

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

# --------- Dataset ---------
class HAM10000Dataset(Dataset):
    def __init__(self, csv_path, img_size, is_train):
        self.df = pd.read_csv(csv_path)
        self.df['label'] = self.df['dx'].map(CLASS_TO_IDX)
        self.img_size = img_size
        self.is_train = is_train

        if is_train:
            self.tf = A.Compose([
                A.Resize(img_size,img_size),
                A.HorizontalFlip(p=0.5),
                A.VerticalFlip(p=0.5),
                A.RandomRotate90(p=0.5),
                A.ColorJitter(0.2,0.2,0.2,0.1,p=0.5),
                A.Normalize(IMAGENET_MEAN, IMAGENET_STD),
                ToTensorV2()
            ])
        else:
            self.tf = A.Compose([
                A.Resize(img_size,img_size),
                A.Normalize(IMAGENET_MEAN, IMAGENET_STD),
                ToTensorV2()
            ])

    def _load_img(self, img_id):
        p1 = Path(IMAGE_DIR_1)/f"{img_id}.jpg"
        p2 = Path(IMAGE_DIR_2)/f"{img_id}.jpg"
        p = p1 if p1.exists() else p2
        img = cv2.imread(str(p))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        return img

    def __len__(self): return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = self._load_img(row.image_id)
        img = self.tf(image=img)['image']
        return img, int(row.label)

# --------- Metrics ---------
def compute_metrics(y_true,y_pred):
    return {
        'acc': accuracy_score(y_true,y_pred),
        'macro_f1': f1_score(y_true,y_pred,average='macro'),
        'recall': recall_score(y_true,y_pred,average=None)
    }

# --------- Teacher: Xception ---------
import timm
import torch.nn as nn
from torch.amp import autocast, GradScaler

class XceptionTeacher(nn.Module):
    def __init__(self, num_classes=7):
        super().__init__()
        self.backbone = timm.create_model(
            "legacy_xception",
            pretrained=True,
            num_classes=num_classes
        )
        self.feature_dim = self.backbone.get_classifier().in_features

    def forward(self, x, return_feat=False):
        if return_feat:
            f = self.backbone.forward_features(x)
            f = self.backbone.global_pool(f)
            f = f.flatten(1)
            out = self.backbone.get_classifier()(f)
            return out, f
        return self.backbone(x)


    def freeze_backbone(self):
        for name, param in self.backbone.named_parameters():
            if "classifier" not in name:
                param.requires_grad = False


# --------- Student: MobileNetV3 ---------
from torchvision.models import mobilenet_v3_large, MobileNet_V3_Large_Weights

class MobileNetStudent(nn.Module):
    def __init__(self,num_classes=7):
        super().__init__()
        self.backbone = mobilenet_v3_large(weights=MobileNet_V3_Large_Weights.IMAGENET1K_V1)
        self.features = self.backbone.features
        self.pool = nn.AdaptiveAvgPool2d(1)
        self.project = nn.Linear(960,2048)
        self.fc = nn.Linear(960,num_classes)

    def forward(self,x,return_feat=False):
        f = self.features(x)
        pooled = self.pool(f).flatten(1)
        out = self.fc(pooled)
        proj = self.project(pooled)
        return (out,proj) if return_feat else out

# --------- Loss ---------
class FeatureAlignmentLoss(nn.Module):
    def forward(self,s,t):
        return ((s-t.detach())**2).mean()

# --------- Training loops ---------
def train_teacher():
    model = XceptionTeacher().to(DEVICE)
    opt = optim.AdamW(model.parameters(),lr=3e-4)
    ce = nn.CrossEntropyLoss()
    scaler = GradScaler('cuda')

    train_ds = HAM10000Dataset('/kaggle/input/data-prime/train (1).csv',224,True)
    val_ds   = HAM10000Dataset('/kaggle/input/data-prime/val.csv',224,False)

    train_ld = DataLoader(train_ds,32,True,num_workers=2)
    val_ld   = DataLoader(val_ds,32,False,num_workers=2)

    for epoch in range(20):
        model.train()
        for x,y in train_ld:
            x,y = x.to(DEVICE),y.to(DEVICE)
            opt.zero_grad()
            with autocast(device_type="cuda"):
                out = model(x)
                loss = ce(out,y)
            scaler.scale(loss).backward()
            scaler.step(opt)
            scaler.update()

        model.eval()
        preds,labels = [],[]
        with torch.no_grad():
            for x,y in val_ld:
                x = x.to(DEVICE)
                o = model(x)
                preds.append(o.argmax(1).cpu().numpy())
                labels.append(y.numpy())
        m = compute_metrics(np.concatenate(labels),np.concatenate(preds))
        print(f"Teacher Epoch {epoch+1}: Macro-F1={m['macro_f1']:.4f}")

    torch.save(model.state_dict(),'/kaggle/working/teacher_xception.pth')
    return model


def train_student_with_beta(teacher,beta):
    student = MobileNetStudent().to(DEVICE)
    ce = nn.CrossEntropyLoss()
    align = FeatureAlignmentLoss()
    opt = optim.AdamW(student.parameters(),lr=3e-4)
    scaler = GradScaler()

    train_ds = HAM10000Dataset('/kaggle/input/data-prime/train (1).csv',160,True)
    val_ds   = HAM10000Dataset('/kaggle/input/data-prime/val.csv',160,False)

    train_ld = DataLoader(train_ds,32,True,num_workers=2)
    val_ld   = DataLoader(val_ds,32,False,num_workers=2)

    history=[]
    teacher.eval()

    for epoch in range(20):
        student.train()
        for x,y in train_ld:
            x,y = x.to(DEVICE),y.to(DEVICE)
            opt.zero_grad()
            with autocast(device_type="cuda"):
                s_out,s_feat = student(x,True)
                _,t_feat = teacher(x,True)
                loss_ce = ce(s_out,y)
                loss_align = align(s_feat,t_feat)
                loss = loss_ce + beta*loss_align
            scaler.scale(loss).backward()
            scaler.step(opt)
            scaler.update()

        student.eval()
        preds,labels=[],[]
        with torch.no_grad():
            for x,y in val_ld:
                x=x.to(DEVICE)
                o = student(x)
                preds.append(o.argmax(1).cpu().numpy())
                labels.append(y.numpy())
        m = compute_metrics(np.concatenate(labels),np.concatenate(preds))
        history.append({'epoch':epoch+1,'beta':beta,'macro_f1':m['macro_f1']})
        print(f"Student β={beta} Epoch {epoch+1}: Macro-F1={m['macro_f1']:.4f}")

    torch.save(student.state_dict(),f'/kaggle/working/student_beta_{beta}.pth')
    return student, pd.DataFrame(history)

# --------- Execute ---------
teacher = train_teacher()
results=[]
for beta in [0.1,0.3,0.5]:
    _,hist = train_student_with_beta(teacher,beta)
    hist.to_csv(f'/kaggle/working/history_beta_{beta}.csv',index=False)
    results.append(hist)

print('RAAT Portion 3 complete')


Teacher Epoch 1: Macro-F1=0.6003
Teacher Epoch 2: Macro-F1=0.5601
Teacher Epoch 3: Macro-F1=0.6374
Teacher Epoch 4: Macro-F1=0.6687
Teacher Epoch 5: Macro-F1=0.6287
Teacher Epoch 6: Macro-F1=0.7002
Teacher Epoch 7: Macro-F1=0.6396
Teacher Epoch 8: Macro-F1=0.6758
Teacher Epoch 9: Macro-F1=0.6737
Teacher Epoch 10: Macro-F1=0.6931
Teacher Epoch 11: Macro-F1=0.6242
Teacher Epoch 12: Macro-F1=0.6502
Teacher Epoch 13: Macro-F1=0.6761
Teacher Epoch 14: Macro-F1=0.6841
Teacher Epoch 15: Macro-F1=0.6691
Teacher Epoch 16: Macro-F1=0.6466
Teacher Epoch 17: Macro-F1=0.6522
Teacher Epoch 18: Macro-F1=0.6471
Teacher Epoch 19: Macro-F1=0.6519
Teacher Epoch 20: Macro-F1=0.6682
Student β=0.1 Epoch 1: Macro-F1=0.5322
Student β=0.1 Epoch 2: Macro-F1=0.5971
Student β=0.1 Epoch 3: Macro-F1=0.6056
Student β=0.1 Epoch 4: Macro-F1=0.6325
Student β=0.1 Epoch 5: Macro-F1=0.6139
Student β=0.1 Epoch 6: Macro-F1=0.6211
Student β=0.1 Epoch 7: Macro-F1=0.6330
Student β=0.1 Epoch 8: Macro-F1=0.6282
Student β=0.1 Epo

In [30]:
# =============================
# PORTION 4 – VISUALIZATION & ABLATIONS (KAGGLE)
# Compatible with your provided Portion 3 code & outputs
# =============================

# --------- Imports ---------
import os
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt
import seaborn as sns
from torch.utils.data import DataLoader
from sklearn.manifold import TSNE
from sklearn.metrics import confusion_matrix

# Reuse definitions from Portion 3 (must already be executed above)
# - DEVICE
# - CLASS_NAMES
# - HAM10000Dataset
# - XceptionTeacher
# - MobileNetStudent

sns.set(style="whitegrid")
OUT_DIR = "/kaggle/working"

# =====================================================
# 1. β ABLATION – MACRO-F1 CURVES
# =====================================================
plt.figure(figsize=(8,6))
for beta in [0.1, 0.3, 0.5]:
    hist = pd.read_csv(f"{OUT_DIR}/history_beta_{beta}.csv")
    plt.plot(hist['epoch'], hist['macro_f1'], label=f"β={beta}", linewidth=2)

plt.xlabel("Epoch")
plt.ylabel("Macro-F1")
plt.title("RAAT β Ablation – Validation Macro-F1")
plt.legend()
plt.tight_layout()
plt.savefig(f"{OUT_DIR}/beta_ablation_macro_f1.png", dpi=300)
plt.close()

# =====================================================
# 2. FLOPs vs ACCURACY (Paper-style Tradeoff Plot)
# =====================================================
models = ['Teacher', 'Student β=0.1', 'Student β=0.3', 'Student β=0.5']
macro_f1 = [0.7002, 0.6608, 0.6915, 0.6534]
flops = [8000, 320, 320, 320]  # MFLOPs (approx, fixed backbone)

plt.figure(figsize=(7,5))
plt.scatter(flops, macro_f1, s=120)
for i, m in enumerate(models):
    plt.annotate(m, (flops[i] + 50, macro_f1[i]))

plt.xlabel("FLOPs (MFLOPs)")
plt.ylabel("Macro-F1")
plt.title("Efficiency–Accuracy Tradeoff (RAAT)")
plt.tight_layout()
plt.savefig(f"{OUT_DIR}/flops_vs_accuracy.png", dpi=300)
plt.close()

# =====================================================
# 3. t-SNE – TEACHER vs BEST STUDENT (β=0.3)
# =====================================================
@torch.no_grad()
def extract_features(model, loader, is_teacher=False):
    model.eval()
    feats, labels = [], []
    for x, y in loader:
        x = x.to(DEVICE)
        if is_teacher:
            _, f = model(x, return_feat=True)
        else:
            _, f = model(x, return_feat=True)
        feats.append(f.cpu())
        labels.append(y)
    return torch.cat(feats).numpy(), torch.cat(labels).numpy()

val_ds = HAM10000Dataset('/kaggle/input/data-prime/val.csv', 160, False)
val_ld = DataLoader(val_ds, batch_size=32, shuffle=False)

teacher = XceptionTeacher().to(DEVICE)
teacher.load_state_dict(torch.load(f"{OUT_DIR}/teacher_xception.pth", map_location=DEVICE))

student = MobileNetStudent().to(DEVICE)
student.load_state_dict(torch.load(f"{OUT_DIR}/student_beta_0.3.pth", map_location=DEVICE))

teacher_feats, y = extract_features(teacher, val_ld, is_teacher=True)
student_feats, _ = extract_features(student, val_ld)

Z = TSNE(n_components=2, perplexity=30, random_state=42).fit_transform(
    np.vstack([teacher_feats, student_feats])
)

labels = np.concatenate([y, y])
domain = np.array(['Teacher'] * len(y) + ['Student'] * len(y))

plt.figure(figsize=(8,6))
sns.scatterplot(
    x=Z[:,0], y=Z[:,1],
    hue=domain,
    style=labels,
    alpha=0.6,
    palette='Set2'
)
plt.title("t-SNE Feature Alignment: Teacher vs Student (β=0.3)")
plt.tight_layout()
plt.savefig(f"{OUT_DIR}/tsne_teacher_vs_student.png", dpi=300)
plt.close()

# =====================================================
# 4. CONFUSION MATRIX – BEST STUDENT (β=0.3)
# =====================================================
@torch.no_grad()
def get_preds(model, loader):
    model.eval()
    yt, yp = [], []
    for x, y in loader:
        x = x.to(DEVICE)
        p = model(x).argmax(1).cpu()
        yt.append(y)
        yp.append(p)
    return torch.cat(yt), torch.cat(yp)

y_true, y_pred = get_preds(student, val_ld)
cm = confusion_matrix(y_true, y_pred, normalize='true')

plt.figure(figsize=(7,6))
sns.heatmap(
    cm,
    xticklabels=CLASS_NAMES,
    yticklabels=CLASS_NAMES,
    annot=True,
    fmt='.2f',
    cmap='Blues'
)
plt.title("Confusion Matrix – Student β=0.3")
plt.ylabel("True Label")
plt.xlabel("Predicted Label")
plt.tight_layout()
plt.savefig(f"{OUT_DIR}/confusion_matrix_student_beta_0.3.png", dpi=300)
plt.close()

print("[Portion 4] All visualizations successfully saved to /kaggle/working")


[Portion 4] All visualizations successfully saved to /kaggle/working
