In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
import torch.optim as optim
from torch.utils.data import DataLoader
import numpy as np
from typing import Tuple
from torch.utils.data import Dataset 
import pandas as pd
import requests
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

In [2]:
class TaskDataset(Dataset):
    def __init__(self, transform=None):
        self.ids = []
        self.imgs = []
        self.labels = []
        self.transform = transform

    def __getitem__(self, index) -> Tuple[int, torch.Tensor, int]:
        id_ = self.ids[index]
        img = self.imgs[index]
        if self.transform is not None:
            img = self.transform(img)
        label = self.labels[index]
        return id_, img, label

    def __len__(self):
        return len(self.ids)


class MembershipDataset(TaskDataset):
    def __init__(self, transform=None):
        super().__init__(transform)
        self.membership = []

    def __getitem__(self, index) -> Tuple[int, torch.Tensor, int, int]:
        id_, img, label = super().__getitem__(index)
        return id_, img, label, self.membership[index]

In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),
])
public_dataset: MembershipDataset(transform=transform) = torch.load("./pub.pt")
public_dataset = DataLoader(public_dataset, batch_size=64, shuffle=True)


In [4]:
shadow_model_res18 = models.resnet18(pretrained=False)
shadow_model_res18.fc = nn.Linear(512, 44)

checkpoint = torch.load("./shadow_model_res18.pt")
shadow_model_res18.load_state_dict(checkpoint.state_dict())



<All keys matched successfully>

In [5]:
shadow_model_vgg = models.vgg16(pretrained=False)
shadow_model_vgg.classifier[6] = nn.Linear(4096, 44)

checkpoint = torch.load("./shadow_model_vgg.pt")
shadow_model_vgg.load_state_dict(checkpoint.state_dict())


<All keys matched successfully>

In [6]:
shadow_model_dense = models.densenet121(pretrained=False)
shadow_model_dense.classifier = nn.Linear(1024, 44)

checkpoint = torch.load("./shadow_model_dense.pt")
shadow_model_dense.load_state_dict(checkpoint.state_dict())

<All keys matched successfully>

In [7]:
shadow_model_resnext = models.resnext50_32x4d(pretrained=False)
shadow_model_resnext.fc = nn.Linear(2048, 44)

checkpoint = torch.load("./shadow_model_resnext.pt")
shadow_model_resnext.load_state_dict(checkpoint.state_dict())

<All keys matched successfully>

In [8]:
shadow_model_effnet = models.efficientnet_b0(pretrained=False)
shadow_model_effnet.classifier[1] = nn.Linear(1280, 44)

checkpoint = torch.load("./shadow_model_effnet.pt")
shadow_model_effnet.load_state_dict(checkpoint.state_dict())

<All keys matched successfully>

In [9]:
shadow_model_mobilenet = models.mobilenet_v2(pretrained=False)
shadow_model_mobilenet.classifier[1] = nn.Linear(1280, 44)

checkpoint = torch.load("./shadow_model_mobilenet.pt")
shadow_model_mobilenet.load_state_dict(checkpoint.state_dict())

<All keys matched successfully>

In [10]:
shadow_model_squeezenet = models.squeezenet1_0(pretrained=False)
shadow_model_squeezenet.classifier[1] = nn.Conv2d(512, 44, kernel_size=(1, 1), stride=(1, 1))

checkpoint = torch.load("./shadow_model_squeezenet.pt")
shadow_model_squeezenet.load_state_dict(checkpoint.state_dict())

<All keys matched successfully>

In [11]:
shadow_model_res34 = models.resnet34(pretrained=False)
shadow_model_res34.fc = nn.Linear(512, 44)

checkpoint = torch.load("./shadow_model_res34.pt")
shadow_model_res34.load_state_dict(checkpoint.state_dict())

<All keys matched successfully>

In [12]:
shadow_model_dense_adamw = models.densenet121(pretrained=False)
shadow_model_dense_adamw.classifier = nn.Linear(1024, 44)

checkpoint = torch.load("./shadow_model_dense_adamw.pt")
shadow_model_dense_adamw.load_state_dict(checkpoint.state_dict())

<All keys matched successfully>

In [13]:
shadow_model_res18_sgd = models.resnet18(pretrained=False)
shadow_model_res18_sgd.fc = nn.Linear(512, 44)

checkpoint = torch.load("./shadow_model_res18_sgd.pt")
shadow_model_res18_sgd.load_state_dict(checkpoint.state_dict())

<All keys matched successfully>

In [14]:
shadow_model_shufflenet = models.shufflenet_v2_x1_0(pretrained=False)
shadow_model_shufflenet.fc = nn.Linear(1024, 44)

checkpoint = torch.load("./shadow_model_shufflenet.pt")
shadow_model_shufflenet.load_state_dict(checkpoint.state_dict())

<All keys matched successfully>

In [15]:
def extract_features(model, data_loader):
    model.eval() 
    features_list = []
    labels_list = []
    member_list = []
    with torch.no_grad():
        for ids, inputs, labels, membership in data_loader:
            features = model(inputs).cpu().numpy()
            features_list.append(features)
            labels_list.append(labels.numpy())
            member_list.append(membership.numpy())
    features = np.concatenate(features_list)
    labels = np.concatenate(labels_list)
    membership = np.concatenate(member_list)
    return features, labels, membership

In [16]:
shadow_models = [
    shadow_model_res18, 
    shadow_model_vgg, 
    shadow_model_dense, 
    shadow_model_effnet, 
    shadow_model_resnext, 
    shadow_model_mobilenet,
    shadow_model_squeezenet,
    shadow_model_shufflenet,
    shadow_model_res18_sgd,
    shadow_model_dense_adamw,
    shadow_model_res34
]

In [17]:
from sklearn.preprocessing import StandardScaler
import torch
from sklearn.model_selection import train_test_split

X_train_list = []
y_train_list = []
X_test_list = []
y_test_list = []

for shadow_model in shadow_models:
    features, _, membership = extract_features(shadow_model, public_dataset)
    X_train, X_test, y_train, y_test = train_test_split(features, membership, test_size=0.15, random_state=42)
    X_train_list.extend(X_train) 
    y_train_list.extend(y_train)  
    X_test_list.extend(X_test)    
    y_test_list.extend(y_test)    

X_train_tensor = torch.tensor(X_train_list, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_list, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_list, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_list, dtype=torch.float32)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_tensor)
X_test_scaled = scaler.transform(X_test_tensor)

  X_train_tensor = torch.tensor(X_train_list, dtype=torch.float32)


In [18]:
print(X_train_scaled.shape)
print(X_test_scaled.shape)
print(y_train_tensor.shape)
print(y_test_tensor.shape)

(187000, 44)
(33000, 44)
torch.Size([187000])
torch.Size([33000])


In [20]:
# import warnings

# # Suppress all warnings
# warnings.filterwarnings('ignore')
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.model_selection import GridSearchCV
# from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
# from sklearn.feature_selection import SelectFromModel
# from sklearn.pipeline import Pipeline
# import numpy as np

# rf = RandomForestClassifier(random_state=42)
# param_grid = {
#     'n_estimators': [50, 100, 200, 300],
#     'max_depth': [None, 10, 20, 30, 40],
#     'min_samples_split': [2, 5, 10, 15],
#     'min_samples_leaf': [1, 2, 4, 6],
#     'max_features': ['auto', 'sqrt', 'log2']
# }
# grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1, verbose=2)
# grid_search.fit(X_train_scaled, y_train)
# best_rf = grid_search.best_estimator_
# y_pred_proba = best_rf.predict_proba(X_test_scaled)
# logits = np.log(y_pred_proba[:, 1] / (1 - y_pred_proba[:, 1]))
# threshold = 0
# y_pred = (logits > threshold).astype(int)
# print(y_pred)
# print("Best Parameters:", grid_search.best_params_)
# print("Accuracy:", accuracy_score(y_test, y_pred))
# print("Precision:", precision_score(y_test, y_pred))
# print("Recall:", recall_score(y_test, y_pred))
# print("F1 Score:", f1_score(y_test, y_pred))
# print("ROC AUC Score:", roc_auc_score(y_test, y_pred_proba[:, 1]))

In [20]:
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, accuracy_score
from sklearn.svm import SVC
import numpy as np

log_reg = LogisticRegression(max_iter=1000)

voting_clf = VotingClassifier(estimators=[
    ('lr', log_reg),
    ('rf', RandomForestClassifier()),
    ('svc', SVC(probability=True))
], voting='soft')

print("Starting Training")
voting_clf.fit(X_train_scaled, y_train_tensor)
print("Finished Training")
print("Starting Evaluation")
y_pred_voting = voting_clf.predict_proba(X_test_scaled)
logits_voting = np.log(y_pred_voting[:, 1] / (1 - y_pred_voting[:, 1]))
threshold = 0
y_pred_voting = (logits_voting > threshold).astype(int)

print("Voting Classifier Accuracy:", accuracy_score(y_test_tensor, y_pred_voting))
print("Voting Classifier Precision:", precision_score(y_test_tensor, y_pred_voting))
print("Voting Classifier Recall:", recall_score(y_test_tensor, y_pred_voting))
print("Ending Evaluation")


Starting Training


In [49]:
# Logistic Regression 
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, accuracy_score

binary_classifier = LogisticRegression()
binary_classifier.fit(X_train_scaled, y_train)
print(X_test_scaled.shape)
y_pred_log = binary_classifier.predict_proba(X_test_scaled)
logits_log = np.log(y_pred_log[:, 1] / (1 - y_pred_log[:, 1]))
threshold = 0
y_pred_logistic= (logits_log > threshold).astype(int)

print(y_pred_logistic.shape)
print(y_test.shape)

print("Accuracy:", accuracy_score(y_test, y_pred_logistic))

(4000, 44)
(4000,)
(4000,)
Accuracy: 0.5065


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [18]:
og_model = models.resnet18(pretrained=False)
og_model.fc = torch.nn.Linear(512, 44)

ckpt = torch.load("./01_MIA_67.pt", map_location="cpu")

og_model.load_state_dict(ckpt)



<All keys matched successfully>

In [19]:
priv_out_dataset: MembershipDataset = torch.load("./priv_out.pt")
mem_outputs = []

mem_out = og_model(torch.stack(priv_out_dataset.imgs))
imgs_out = torch.tensor(mem_out, dtype=torch.float32)


  imgs_out = torch.tensor(mem_out, dtype=torch.float32)


In [33]:
y_pred_voting = voting_clf.predict_proba(imgs_out)
logits_pred_voting = np.log(y_pred_voting[:, 1] / (1 - y_pred_voting[:, 1]))

In [34]:
print(logits_pred_voting)

[-0.5608155  -0.63304793  0.70806656 ...  0.02555459  0.61898369
  0.31641305]


In [47]:
y_pred_log = binary_classifier.predict_proba(imgs_out)
logits_log = np.log(y_pred_log[:, 1] / (1 - y_pred_log[:, 1]))

In [48]:
df = pd.DataFrame({
    "ids": priv_out_dataset.ids,
    "score": logits_log
})
df.to_csv("test_log.csv", index=None)

In [2]:
response = requests.post("http://34.71.138.79:9090/mia", files={"file": open("test_log.csv", "rb")}, headers={"token": "40034445"})
print(response.json())

{'TPR@FPR=0.05': 0.049, 'AUC': 0.4913101111111111}
