"""
    Detector for <<Model inversion attacks that exploit confidence information and basic countermeasures>>
    [https://dl.acm.org/doi/pdf/10.1145/2810103.2813677]

    Arguments:
    model_path: a trained neural network with torch model type (.pt)
    
    user_dict: a dictionary that contains user's id and password

    blacklist: a list that contains blocked users

    L2_norm: calculate L2_norm for (i) and (i+1) input

    cosine_similarity: calculate cosine similarity for (i) and (i+1) input

    input_tensors: inputs from user with user_id

    attack_flag: flag that marks potential attack behavior
"""

In [127]:
import torch
import numpy as np
import pandas as pd
from PIL import Image
from datetime import datetime


class MIAttack_detector:
    def __init__(self, model_type, model_path, user_dict, blacklist):
        """
        model type: as PyTorch requested, you have to define a neural network(usually defined as a Class) before using torch.load() function
        user_dict: dictionary that contains user_id and password
        blacklist: list that contains banned users
        """
        self.model = model_type
        self.model = torch.load(model_path)
        self.model.eval()
        self.user_dict = user_dict
        self.blacklist = blacklist
        self.input_tensors = []

    def user_varification(self, user_id, user_password):
        if user_id in self.blacklist:
            print("user is blocked.")
            return user_id
        else:
            if user_id in self.user_dict.keys() and self.user_dict[user_id] == user_password:
                print("user is verificated.")
                return user_id
        
    def L2_norm(self, input_tensor):
        """
        calculate l2 norm 
        """
        l2_norm = torch.linalg.norm(input_tensor)
        return l2_norm
    
    def cosine_similarity(self, tensor1, tensor2):
        """
        calculate cosine similarity between 2 tensors
        """
        tensor1 = torch.flatten(tensor1)
        tensor2 = torch.flatten(tensor2)
    
        similarity = torch.nn.functional.cosine_similarity(tensor1, tensor2, dim=0)
    
        return similarity.item()
    
    def predict(self, img_path):
        # change png(or any image type) to tensor then pass to the model
        img = Image.open(img_path)
        img = img.convert("L")
        input_array = np.asarray(img)

        #x now with shape(width, height, channel)ï¼Œand we need to reshape to (channel, width, height)
        input_array = np.expand_dims(input_array, axis=0)
        input_array.transpose((2,0,1))
        #transform ndarray to tensor
        input_tensor = torch.tensor(input_array).type(torch.float32)

        #prediction
        with torch.no_grad():
            output = self.model(input_tensor)
            self.input_tensors.append(input_tensor)
            return output.numpy()
    
    def danger_level(self, l2_value, cosine_value):
        l2_danger = 0
        cosine_danger = 0
        image_size = self.input_tensors[0].size(0) * self.input_tensors[0].size(1) * self.input_tensors[0].size(2)
        if l2_value <= image_size * 0.13:
            l2_danger += 1
        if cosine_value >= 0.9:
            cosine_danger += 1
        return l2_danger + cosine_danger
    
    def save_to_csv(self, user_id, filename):
        # save record to .csv file
        # df.columns=["user_id","input_tensor","L2_norms","cosine_sims","danger_level","timestamp"]
        if len(self.input_tensors) <= 1:
            data = [[user_id, self.input_tensors[0].shape, 0, 0, 0, datetime.now().strftime("%Y-%m-%d %H:%M:%S")]]
            df = pd.DataFrame(data=data, columns=["user_id","input_tensor","L2_norms","cosine_sims","danger_level","timestamp"])
            df.to_csv(filename, mode='a', header=True, index=False)
        else:
            data = [[user_id, 
                    self.input_tensors[-1].shape, 
                    self.L2_norm(self.input_tensors[-1]-self.input_tensors[-2]).numpy(), 
                    self.cosine_similarity(self.input_tensors[-1],self.input_tensors[-2]),
                    self.danger_level(self.L2_norm(self.input_tensors[-1]-self.input_tensors[-2]).numpy(), self.cosine_similarity(self.input_tensors[-1],self.input_tensors[-2])),
                    datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                    ]]
            df = pd.DataFrame(data=data, columns=["user_id","input_tensor","L2_norms","cosine_sims","danger_level","timestamp"])
            df.to_csv(filename, mode='a', header=False, index=False)


In [128]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv_1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1)
        self.conv_2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1)
        self.fc_1 = nn.Linear(in_features=5 * 5 * 64, out_features=128)
        self.fc_2 = nn.Linear(in_features=128, out_features=10)

    def forward(self, x):
        x = F.relu(self.conv_1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv_2(x))
        x = F.max_pool2d(x, 2, 2)
        # print(x.shape)
        x = x.view(-1, 5 * 5 * 64)
        x = F.relu(self.fc_1(x))
        x = self.fc_2(x)

        x = F.softmax(x, dim=1)
        return x
    
model_path = "/Users/songjunzhe/Desktop/python/ml_attack/myNet_mnist_98_7300.pt"
user_dict = dict()
user_dict["Admin"] = 123456
blacklist = []
detector = MIAttack_detector(model_type=Net(),model_path=model_path, user_dict=user_dict, blacklist=blacklist)


In [129]:
user_id = detector.user_varification(user_id="Admin", user_password=123456)

user is verificated.


In [130]:
for i in range(1,11):
    user_img_path = "/Users/songjunzhe/Desktop/python/ml_attack/mi-attack-result/miface-0-iter300/iter={}.png".format(i)
    output = detector.predict(img_path=user_img_path)
    print(output)
    detector.save_to_csv(user_id=user_id, filename="/Users/songjunzhe/Desktop/python/ml_attack/detector_usage_data.csv")

[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
[[1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]


In [131]:
df = pd.read_csv("/Users/songjunzhe/Desktop/python/ml_attack/detector_usage_data.csv", header=0)
df

Unnamed: 0,user_id,input_tensor,L2_norms,cosine_sims,danger_level,timestamp
0,Admin,"torch.Size([1, 28, 28])",0.0,0.0,0,2023-04-06 16:54:02
1,Admin,"torch.Size([1, 28, 28])",28.670542,0.99999,2,2023-04-06 16:54:02
2,Admin,"torch.Size([1, 28, 28])",17.262676,0.999996,2,2023-04-06 16:54:02
3,Admin,"torch.Size([1, 28, 28])",16.941074,0.999997,2,2023-04-06 16:54:02
4,Admin,"torch.Size([1, 28, 28])",27.513634,0.999991,2,2023-04-06 16:54:02
5,Admin,"torch.Size([1, 28, 28])",30.016663,0.999989,2,2023-04-06 16:54:02
6,Admin,"torch.Size([1, 28, 28])",29.478806,0.99999,2,2023-04-06 16:54:02
7,Admin,"torch.Size([1, 28, 28])",34.770676,0.999986,2,2023-04-06 16:54:02
8,Admin,"torch.Size([1, 28, 28])",35.39774,0.999985,2,2023-04-06 16:54:02
9,Admin,"torch.Size([1, 28, 28])",36.04164,0.999985,2,2023-04-06 16:54:02
