In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision.models import resnet18
from torchvision import transforms
import numpy as np
import pandas as pd
import requests
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import roc_auc_score
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.model_selection import cross_val_score
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

# Model and data parameters
mean = [0.2980, 0.2962, 0.2987]
std = [0.2886, 0.2875, 0.2889]

In [None]:
class TaskDataset(Dataset):
    def __init__(self, transform=None):
        self.ids = []
        self.imgs = []
        self.labels = []
        self.transform = transform

    def __getitem__(self, index):
        id_ = self.ids[index]
        img = self.imgs[index]
        if self.transform is not None:
            img = self.transform(img)
        label = self.labels[index]
        return id_, img, label

    def __len__(self):
        return len(self.ids)

In [None]:
class MembershipDataset(TaskDataset):
    def __init__(self, transform=None):
        super().__init__(transform)
        self.membership = []

    def __getitem__(self, index):
        id_, img, label = super().__getitem__(index)
        return id_, img, label, self.membership[index]

In [None]:
class AdvancedMembershipInferenceAttack:
    def __init__(self, target_model_path="./01_MIA.pt", device='cuda' if torch.cuda.is_available() else 'cpu'):
        self.device = device
        self.target_model = self.load_target_model(target_model_path)
        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.ToTensor(),
            transforms.Normalize(mean=mean, std=std)
        ])

    def load_target_model(self, model_path):
        """Load the target ResNet18 model"""
        model = resnet18()
        model.fc = torch.nn.Linear(512, 44)
        ckpt = torch.load(model_path, map_location=self.device)
        model.load_state_dict(ckpt)
        model.to(self.device)
        model.eval()
        return model

    def run_advanced_attack(self, public_dataset_path="./pub.pt", private_dataset_path="./priv_out.pt"):
        """Run the advanced membership inference attack"""
        print("Loading and transforming datasets...")
        # Load datasets
        public_data = torch.load(public_dataset_path, weights_only=False, map_location=self.device)
        public_data.transform = self.transform

        private_data = torch.load(private_dataset_path, weights_only=False, map_location=self.device)
        private_data.transform = self.transform

        print("Extracting comprehensive features from target model using Public Data...")
        public_features = self.extract_comprehensive_features(public_data)
        print("Extracting comprehensive features from target model using Private Data...")
        private_features = self.extract_comprehensive_features(private_data)

        print("Training advanced shadow models...")
        # Train shadow models
        shadow_models = self.train_shadow_models(public_data, num_models=6)

        print("Extracting shadow model features using Public Data...")
        # Extract shadow features (simplified for efficiency)
        public_shadow_features = self.extract_shadow_features_fast(public_data, shadow_models)
        print("Extracting shadow model features using Private Data...")
        private_shadow_features = self.extract_shadow_features_fast(private_data, shadow_models)

        # Combine all features
        public_all_features = np.hstack([public_features, public_shadow_features])
        private_all_features = np.hstack([private_features, private_shadow_features])

        print("Training advanced attack models...")
        # Train attack models
        public_labels = np.array(public_data.membership)
        attack_models = self.train_advanced_attack_models(public_all_features, public_labels)

        print("Predicting membership with advanced ensemble...")
        # Predict membership
        membership_scores = self.predict_membership_advanced(private_all_features, attack_models)

        return private_data.ids, membership_scores

    def collate_fn(self, batch):
        filtered_batch = []
        for sample in batch:
            if sample[0] is None or sample[1] is None or sample[2] is None:
                print("Skipping invalid sample:", sample)
                continue
            filtered_batch.append(sample[:3])
        return torch.utils.data.default_collate(filtered_batch)

    def compute_gradient_features(self, imgs, labels):
        """Compute gradient-based features"""
        imgs = imgs.clone().detach().requires_grad_(True)
        imgs = imgs.to(self.device)
        labels = labels.to(self.device)

        logits = self.target_model(imgs)
        loss = F.cross_entropy(logits, labels, reduction='none')

        gradient_features = []
        for i in range(len(imgs)):
            # Compute gradients
            self.target_model.zero_grad()
            loss[i].backward(retain_graph=True)

            if imgs.grad is not None:
                grad_norm = torch.norm(imgs.grad[i]).item()
                grad_mean = torch.mean(torch.abs(imgs.grad[i])).item()
                grad_std = torch.std(imgs.grad[i]).item()
                gradient_features.append([grad_norm, grad_mean, grad_std])
            else:
                gradient_features.append([0.0, 0.0, 0.0])

        return np.array(gradient_features)

    def compute_layer_activations(self, imgs):
        """Extract intermediate layer activations"""
        activations = {}
        imgs = imgs.to(self.device)

        def hook_fn(name):
            def hook(module, input, output):
                activations[name] = output.detach()
            return hook

        # Register hooks for key layers
        handles = []
        handles.append(self.target_model.layer1.register_forward_hook(hook_fn('layer1')))
        handles.append(self.target_model.layer2.register_forward_hook(hook_fn('layer2')))
        handles.append(self.target_model.layer3.register_forward_hook(hook_fn('layer3')))
        handles.append(self.target_model.layer4.register_forward_hook(hook_fn('layer4')))

        # Forward pass
        _ = self.target_model(imgs)

        # Remove hooks
        for handle in handles:
            handle.remove()

        # Compute activation statistics
        activation_features = []
        for i in range(len(imgs)):
            features = []
            for layer_name in ['layer1', 'layer2', 'layer3', 'layer4']:
                if layer_name in activations:
                    act = activations[layer_name][i]
                    # Global average pooling
                    act_pooled = F.adaptive_avg_pool2d(act.unsqueeze(0), (1, 1)).squeeze()

                    # Statistical features
                    features.extend([
                        torch.mean(act_pooled).item(),
                        torch.std(act_pooled).item(),
                        torch.max(act_pooled).item(),
                        torch.min(act_pooled).item()
                    ])
            activation_features.append(features)

        return np.array(activation_features)

    def compute_advanced_confidence_features(self, logits, probs, labels):
        """Compute advanced confidence-based features"""
        features = []
        labels = labels.to(self.device)

        for i in range(len(logits)):
            logit = logits[i]
            prob = probs[i]
            label = labels[i]

            sample_features = []

            # Basic confidence features
            max_prob = torch.max(prob).item()
            max_logit = torch.max(logit).item()
            sample_features.extend([max_prob, max_logit])

            # Entropy variations
            entropy = -torch.sum(prob * torch.log(prob + 1e-8)).item()
            sample_features.append(entropy)

            # Temperature scaling effects
            for temp in [0.5, 2.0, 5.0]:
                temp_probs = F.softmax(logit / temp, dim=0)
                temp_entropy = -torch.sum(temp_probs * torch.log(temp_probs + 1e-8)).item()
                sample_features.append(temp_entropy)

            # Logit statistics
            logit_mean = torch.mean(logit).item()
            logit_std = torch.std(logit).item()
            logit_max_minus_mean = (torch.max(logit) - logit_mean).item()
            sample_features.extend([logit_mean, logit_std, logit_max_minus_mean])

            # Top-k analysis
            top_k_probs, top_k_indices = torch.topk(prob, min(5, len(prob)))
            for k in range(len(top_k_probs)):
                sample_features.append(top_k_probs[k].item())

            # Pad if necessary
            while len(top_k_probs) < 5:
                sample_features.append(0.0)

            # True class analysis
            true_class_prob = prob[label].item()
            true_class_logit = logit[label].item()
            true_class_rank = (prob > true_class_prob).sum().item() + 1
            sample_features.extend([true_class_prob, true_class_logit, true_class_rank])

            # Margin analysis
            sorted_probs, _ = torch.sort(prob, descending=True)
            if len(sorted_probs) >= 2:
                margin1 = (sorted_probs[0] - sorted_probs[1]).item()
                sample_features.append(margin1)
            else:
                sample_features.append(0.0)

            if len(sorted_probs) >= 3:
                margin2 = (sorted_probs[1] - sorted_probs[2]).item()
                sample_features.append(margin2)
            else:
                sample_features.append(0.0)

            # Prediction correctness
            pred_class = torch.argmax(logit).item()
            is_correct = 1.0 if pred_class == label.item() else 0.0
            sample_features.append(is_correct)

            # Loss-based features
            loss = F.cross_entropy(logit.unsqueeze(0), label.unsqueeze(0)).item()
            sample_features.append(loss)

            features.append(sample_features)

        return np.array(features)

    def extract_comprehensive_features(self, dataset, batch_size=32):
        """Extract all types of features"""
        dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=self.collate_fn)

        all_features = []

        for batch_idx, batch in enumerate(tqdm(dataloader,desc="Extracting features: ")):
            if len(batch) == 4:
                ids, imgs, labels, _ = batch
            else:
                ids, imgs, labels = batch

            imgs = imgs.to(self.device)
            labels = labels.to(self.device)

            with torch.no_grad():
                # Get model predictions
                logits = self.target_model(imgs)
                probs = F.softmax(logits, dim=1)

            # Extract different types of features
            confidence_features = self.compute_advanced_confidence_features(logits, probs, labels)

            # Gradient features (requires gradients)
            gradient_features = self.compute_gradient_features(imgs.clone(), labels)

            # Activation features
            with torch.no_grad():
                activation_features = self.compute_layer_activations(imgs)

            # Combine all features
            batch_features = np.hstack([
                confidence_features,
                gradient_features,
                activation_features
            ])

            all_features.append(batch_features)

        return np.vstack(all_features)

    def train_shadow_models(self, public_dataset, num_models=8):
        """Train more diverse shadow models"""
        shadow_models = []
        total_samples = len(public_dataset)

        for i in range(num_models):
            print(f"Training shadow model {i+1}/{num_models}")

            # Create different training strategies
            if i % 2 == 0:
                # Random subset
                subset_size = int(total_samples * (0.4 + 0.3 * np.random.random()))
                indices = np.random.choice(total_samples, subset_size, replace=False)
            else:
                # Class-balanced subset
                subset_size = int(total_samples * 0.5)
                indices = self.get_balanced_subset(public_dataset, subset_size)

            subset_data = torch.utils.data.Subset(public_dataset, indices)

            # Train with different hyperparameters
            lr = 0.001 * (0.5 + np.random.random())
            epochs = 3 + np.random.randint(0, 5)

            shadow_model = self.train_single_shadow_model(subset_data, lr=lr, epochs=epochs)
            shadow_models.append(shadow_model)

        return shadow_models

    def get_balanced_subset(self, dataset, subset_size):
        """Get a class-balanced subset"""
        # Group indices by class
        class_indices = {}
        for idx in range(len(dataset)):
            if hasattr(dataset, 'membership'):
                _, _, label, _ = dataset[idx]
            else:
                _, _, label = dataset[idx]

            label = label.item() if torch.is_tensor(label) else label
            if label not in class_indices:
                class_indices[label] = []
            class_indices[label].append(idx)

        # Sample from each class
        selected_indices = []
        samples_per_class = subset_size // len(class_indices)

        for class_label, indices in class_indices.items():
            if len(indices) >= samples_per_class:
                selected = np.random.choice(indices, samples_per_class, replace=False)
            else:
                selected = indices
            selected_indices.extend(selected)

        return np.array(selected_indices[:subset_size])

    def train_single_shadow_model(self, train_data, lr=0.001, epochs=5):
        """Train shadow model with advanced techniques"""
        model = resnet18()
        model.fc = torch.nn.Linear(512, 44)
        model.to(self.device)

        optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
        criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)

        dataloader = DataLoader(train_data, batch_size=64, shuffle=True, drop_last=True)

        model.train()
        for epoch in range(epochs):
            for batch in dataloader:
                if len(batch) == 4:
                    _, imgs, labels, _ = batch
                else:
                    _, imgs, labels = batch

                imgs, labels = imgs.to(self.device), labels.to(self.device)

                # Data augmentation
                if np.random.random() > 0.5:
                    imgs = torch.flip(imgs, dims=[3])  # Horizontal flip

                optimizer.zero_grad()
                outputs = model(imgs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

            scheduler.step()

        model.eval()
        return model

    def extract_shadow_features_fast(self, dataset, shadow_models):
        """Fast shadow feature extraction"""
        dataloader = DataLoader(dataset, batch_size=64, shuffle=False, collate_fn=self.collate_fn)
        all_shadow_features = []

        for model_idx, shadow_model in enumerate(shadow_models):
            print(f"Extracting from shadow model {model_idx + 1}")

            model_features = []
            with torch.no_grad():
                for batch in dataloader:
                    if len(batch) == 4:
                        _, imgs, labels, _ = batch
                    else:
                        _, imgs, labels = batch

                    imgs = imgs.to(self.device)
                    labels = labels.to(self.device)

                    logits = shadow_model(imgs)
                    probs = F.softmax(logits, dim=1)

                    # Key shadow features
                    max_probs = torch.max(probs, dim=1)[0]
                    entropies = -torch.sum(probs * torch.log(probs + 1e-8), dim=1)
                    losses = F.cross_entropy(logits, labels, reduction='none')

                    # True class probabilities
                    true_class_probs = probs.gather(1, labels.unsqueeze(1)).squeeze()

                    batch_features = torch.stack([
                        max_probs,
                        entropies,
                        losses,
                        true_class_probs
                    ], dim=1)

                    model_features.append(batch_features.cpu().numpy())

            all_shadow_features.append(np.vstack(model_features))

        return np.hstack(all_shadow_features)

    def train_advanced_attack_models(self, features, labels):
        """Train sophisticated ensemble of attack models"""
        # Multiple scalers for robustness
        scalers = [
            StandardScaler(),
            RobustScaler(),
        ]

        models = []

        for i, scaler in enumerate(scalers):
            features_scaled = scaler.fit_transform(features)

            # Random Forest with optimized parameters
            rf = RandomForestClassifier(
                n_estimators=500,
                max_depth=15,
                min_samples_split=10,
                min_samples_leaf=5,
                max_features='sqrt',
                bootstrap=True,
                random_state=42 + i
            )
            rf.fit(features_scaled, labels)
            print("Trained Random Forest with scalar ", i)
            models.append((rf, scaler))

            # Gradient Boosting
            gb = GradientBoostingClassifier(
                n_estimators=300,
                learning_rate=0.05,
                max_depth=8,
                min_samples_split=10,
                min_samples_leaf=5,
                random_state=42 + i
            )
            gb.fit(features_scaled, labels)
            print("Trained Gradient Boosting with scalar ", i)
            models.append((gb, scaler))

            # SVM with RBF kernel
            svm = SVC(
                C=10.0,
                kernel='rbf',
                gamma='scale',
                probability=True,
                random_state=42 + i
            )
            svm.fit(features_scaled, labels)
            print("Trained SVC with scalar ", i)
            models.append((svm, scaler))

            # Neural Network
            mlp = MLPClassifier(
                hidden_layer_sizes=(256, 128, 64),
                activation='relu',
                solver='adam',
                alpha=0.001,
                learning_rate_init=0.001,
                max_iter=500,
                random_state=42 + i
            )
            mlp.fit(features_scaled, labels)
            print("Trained Multi Level Perceptron with scalar ", i)
            models.append((mlp, scaler))

        return models

    def predict_membership_advanced(self, features, models):
        """Advanced ensemble prediction with confidence weighting"""
        predictions = []
        weights = []

        for model, scaler in models:
            features_scaled = scaler.transform(features)

            if hasattr(model, 'predict_proba'):
                pred = model.predict_proba(features_scaled)[:, 1]
            else:
                pred = model.decision_function(features_scaled)
                # Normalize to [0, 1]
                pred = (pred - pred.min()) / (pred.max() - pred.min() + 1e-8)

            predictions.append(pred)

            # Weight based on model type (some models are more reliable)
            if isinstance(model, (RandomForestClassifier, GradientBoostingClassifier)):
                weights.append(1.5)
            elif isinstance(model, SVC):
                weights.append(1.2)
            else:
                weights.append(1.0)

        # Weighted ensemble
        predictions = np.array(predictions)
        weights = np.array(weights)
        weights = weights / weights.sum()

        ensemble_pred = np.average(predictions, axis=0, weights=weights)

        # Apply calibration/sharpening
        ensemble_pred = np.power(ensemble_pred, 0.8)  # Slight sharpening

        return ensemble_pred

In [None]:
def main():
    # Initialize advanced attack
    attack = AdvancedMembershipInferenceAttack()
    ids, scores = attack.run_advanced_attack()

    # Prepare submission
    df = pd.DataFrame({
        "ids": ids,
        "score": scores
    })

    df.to_csv("test.csv", index=None)

    # Submit results (replace TOKEN with your actual token)
    response = requests.post(
        "http://34.122.51.94:9090/mia",
        files={"file": open("test.csv", "rb")},
        headers={"token": "96005201"}
    )
    print(response.json())

    print(f"Attack completed. Submission saved to test.csv")
    print(f"Score statistics: Mean={np.mean(scores):.3f}, Std={np.std(scores):.3f}")
    print(f"Score range: [{np.min(scores):.3f}, {np.max(scores):.3f}]")

if __name__ == "__main__":
    main()

In [None]:
response = requests.post(
        "http://34.122.51.94:9090/mia",
        files={"file": open("/content/test.csv", "rb")},
        headers={"token": "96005201"}
    )
print(response.json())