In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from torchvision import transforms
from torchvision.models import resnet18


import requests
from typing import Tuple
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import roc_auc_score
from tqdm import tqdm

In [2]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/Sommer25/TML/Assignment_1

Mounted at /content/drive
/content/drive/MyDrive/Sommer25/TML/Assignment_1


In [3]:
# --- Constants ---
mean = [0.2980, 0.2962, 0.2987]
std = [0.2886, 0.2875, 0.2889]
BATCH_SIZE = 64
TOKEN = "55172888"  # <-- Replace this with your actual token

# --- Dataset Classes ---
class TaskDataset(Dataset):
    def __init__(self, transform=None):
        self.ids = []
        self.imgs = []
        self.labels = []
        self.transform = transform

    def __getitem__(self, index) -> Tuple[int, torch.Tensor, int]:
        id_ = self.ids[index]
        img = self.imgs[index]
        if self.transform is not None:
            img = self.transform(img)
        label = self.labels[index]
        return id_, img, label

    def __len__(self):
        return len(self.ids)

class MembershipDataset(TaskDataset):
    def __init__(self, transform=None):
        super().__init__(transform)
        self.membership = []

    def __getitem__(self, index) -> Tuple[int, torch.Tensor, int, int]:
        id_, img, label = super().__getitem__(index)
        return id_, img, label, self.membership[index]

# --- Load the ResNet18 Target Model ---
model = resnet18(pretrained=False)
model.fc = torch.nn.Linear(512, 44)
ckpt = torch.load("./01_MIA.pt", map_location="cpu")
model.load_state_dict(ckpt)
model.eval()


transform = transforms.Compose([
    transforms.ToPILImage(),           # Convert tensor to PIL image
    transforms.ToTensor(),            # Convert back to tensor (as float in [0,1])
    transforms.Normalize(mean, std),  # Normalize using provided mean/std
])

# Load the public dataset
public_data: MembershipDataset = torch.load("pub.pt", weights_only = False)
public_data.transform = transform
public_loader = DataLoader(public_data, batch_size=BATCH_SIZE, shuffle=False)




In [4]:
len(public_loader),len(public_data)

(313, 20000)

In [5]:
public_data[0][1].shape

torch.Size([3, 32, 32])

# Extract features for attack training

In [6]:
def prediction_entropy(probs):
    return -torch.sum(probs * torch.log(probs + 1e-10), dim=1)

features = []
labels = []

with torch.no_grad():
    for ids, imgs, true_labels, memberships in tqdm(public_loader):
        outputs = model(imgs)  # logits
        probs = F.softmax(outputs, dim=1)  # probabilities
        ent = prediction_entropy(probs).cpu().numpy().reshape(-1,1)
        probs_np = probs.cpu().numpy()  # shape (batch, 44)

        # Cross entropy loss per sample
        true_labels_tensor = true_labels.long()
        losses = F.cross_entropy(outputs, true_labels_tensor, reduction='none').cpu().numpy().reshape(-1,1)

        # Combine features: probs + entropy + loss
        batch_features = np.hstack([probs_np, ent, losses])
        features.append(batch_features)
        labels.append(memberships.numpy())

features = np.vstack(features)
labels = np.hstack(labels)

100%|██████████| 313/313 [00:41<00:00,  7.50it/s]


# Train Attacker Model

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(features, labels, test_size=0.2, random_state=42)

mlp = MLPClassifier(hidden_layer_sizes=(64, 32), max_iter=100, random_state=42)
mlp.fit(X_train, y_train)

val_preds = mlp.predict_proba(X_val)[:,1]

auc = roc_auc_score(y_val, val_preds)
print(f"Validation AUC: {auc:.4f}")


Validation AUC: 0.6583




In [8]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp311-cp311-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [9]:
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import AdaBoostClassifier

models = {
    "MLP": MLPClassifier(hidden_layer_sizes=(128, 64, 32),
                         max_iter=500,
                         learning_rate_init=0.001,
                         early_stopping=True,
                         random_state=42),
    "XGBoost": XGBClassifier(n_estimators=300,
                             learning_rate=0.05,
                             max_depth=4,
                             subsample=0.8,
                             colsample_bytree=0.8,
                             use_label_encoder=False,
                             eval_metric='logloss',
                             random_state=42),
    "CatBoost": CatBoostClassifier(iterations=500,
                                   learning_rate=0.03,
                                   depth=6,
                                   od_type='Iter',
                                   od_wait=20,
                                   verbose=0,
                                   random_seed=42),
    "AdaBoost": AdaBoostClassifier(n_estimators=200,
                                   learning_rate=1.0,
                                   random_state=42)
}

# Train, Predict, Evaluate
results = {}
for name, attack_model in models.items():
    attack_model.fit(X_train, y_train)
    y_probs = attack_model.predict_proba(X_val)[:, 1]
    auc = roc_auc_score(y_val, y_probs)
    results[name] = auc
    print(f"{name} ROC-AUC: {auc:.4f}")

# Select Best Model
best_model = max(results, key=results.get)
print(f"\n✅ Best model: {best_model} with AUC = {results[best_model]:.4f}")

MLP ROC-AUC: 0.6474


Parameters: { "use_label_encoder" } are not used.



XGBoost ROC-AUC: 0.6613
CatBoost ROC-AUC: 0.6645
AdaBoost ROC-AUC: 0.6440

✅ Best model: CatBoost with AUC = 0.6645


# Predict membership scores on private dataset

In [10]:
best_model

'CatBoost'

In [12]:
# --- Load Private Dataset ---
priv_data: MembershipDataset = torch.load("priv_out.pt", weights_only = False)
priv_features = []
with torch.no_grad():
    for i in tqdm(range(len(priv_data))):
        sample = priv_data[i]
        # Unpack only the needed parts
        if len(sample) >= 3:
            ids, img, label = sample[:3]
        else:
            continue  # skip malformed sample
        img = img.unsqueeze(0)  # Add batch dim
        label = torch.tensor([label])
        outputs = model(img)
        probs = F.softmax(outputs, dim=1)

        ent = prediction_entropy(probs).cpu().numpy().reshape(-1, 1)
        probs_np = probs.cpu().numpy()
        losses = F.cross_entropy(outputs, label.long(), reduction='none').cpu().numpy().reshape(-1, 1)

        batch_features = np.hstack([probs_np, ent, losses])
        priv_features.append(batch_features)

priv_features = np.vstack(priv_features)

# Predict membership scores
# membership_scores = mlp.predict_proba(priv_features)[:, 1]
# membership_scores = models[best_model].predict_proba(priv_features)[:, 1]

100%|██████████| 20000/20000 [02:58<00:00, 112.15it/s]


In [14]:
membership_scores = models[best_model].predict_proba(priv_features)[:, 1]

In [15]:
membership_scores

array([0.54454371, 0.55492939, 0.0325212 , ..., 0.3607481 , 0.49960944,
       0.56055112])

# Prepare submission CSV


In [17]:
df = pd.DataFrame({
    "ids": priv_data.ids,
    "score": membership_scores,
})

df.to_csv("test.csv", index=False)
response = requests.post(
    "http://34.122.51.94:9090/mia",
    files={"file": open("test.csv", "rb")},
    headers={"token": TOKEN}
)
print(response.json())

{'TPR@FPR=0.05': 0.13133333333333333, 'AUC': 0.6665335555555555}
