https://www.tensorflow.org/responsible_ai/privacy/tutorials/privacy_report

https://github.com/tensorflow/privacy/tree/master/tensorflow_privacy/privacy/privacy_tests/membership_inference_attack

Perform first category EMD during inference phase

I want a generalized apporach where I can use TF reports on pytorch models

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tensorflow import keras
import tensorflow as tf

# Define the neural network architecture
class MNISTClassifier(nn.Module):
    def __init__(self):
        super(MNISTClassifier, self).__init__()
        self.reshape=nn.Flatten()
        self.fc1 = nn.Linear(28*28, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)
        self.nonlinear = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.reshape(x)
        # x = torch.flatten(x, start_dim=1)
        # x = x.reshape(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        x= self.nonlinear(x)
        # x = torch.softmax(x, dim=1)
        return x

# Load the MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

train_data = datasets.MNIST('data', train=True, download=True, transform=transform)
test_data = datasets.MNIST('data', train=False, transform=transform)

train_loader = DataLoader(train_data, batch_size=64*20, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64*20, shuffle=False)

# Train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MNISTClassifier().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 1
for epoch in range(epochs):
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()


2023-04-19 08:24:53.623649: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
model.to('cpu')

MNISTClassifier(
  (reshape): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=10, bias=True)
  (nonlinear): Softmax(dim=1)
)

In [3]:
import numpy as np
from sklearn.metrics import accuracy_score
from scipy.interpolate import CubicSpline
import numpy as np
from PyEMD import EMD
from sklearn.kernel_approximation import RBFSampler


# Define the Koopman observables (it can be monomials up to degree n or in my case non-linear sines)
def koopman_observables(x, n):
    observables = [np.sin(x**i) for i in range(n + 1)]
    return np.vstack(observables)


def approxim(output):
        #spline interpolate output via scipy
    outputs = output.detach().numpy()

    Out=[]

    for i in range(len(outputs)):

        #flatten output
        sub_out = outputs[i]

        #attach time like input
        time = np.linspace(0, len(sub_out ), sub_out .shape[0])
        
        #inerpolator model
        cubic_spliner = CubicSpline(time, sub_out )

        #new time with more samples
        sampling_ratio = 3
        idx=0
        interpolated_time = np.linspace(0, len(sub_out ), sub_out .shape[0]*sampling_ratio)

        #interpolate
        outputs_interpolated = cubic_spliner(interpolated_time)

        # Define the feature map used in EDMD
        feature_map = RBFSampler(gamma=0.6, n_components=3)

        # Apply the feature map to the data
        phi_x = feature_map.fit_transform(outputs_interpolated.reshape(-1, 1))

        # Compute the Koopman matrix using EDMD
        K = phi_x[:-1, :].T @ phi_x[1:, :] @ np.linalg.pinv(phi_x[:-1, :].T @ phi_x[:-1, :])

        # Compute the Koopman approximation of the system dynamics
        koopman_x = K @ phi_x.T

        approx_interp_output = koopman_x.T[:,0]

        #downsample
        sub_out  = approx_interp_output[::sampling_ratio]

        #append to list
        Out.append(sub_out)

    #convert to numpy
    outputs = np.array(Out)

    #convert to torch tensor
    outputs = torch.from_numpy(outputs)
    return outputs

In [4]:
# count the number of trainable parameters
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')


The model has 109,386 trainable parameters


In [5]:
target_model = model.to('cpu')

In [6]:
target_model.eval()

MNISTClassifier(
  (reshape): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=10, bias=True)
  (nonlinear): Softmax(dim=1)
)

In [7]:
import time
input_tensor=torch.randn((1, 1, 28, 28))

model.eval()
# Perform a warm-up run to avoid potential overhead caused by initial device setup
with torch.no_grad():
    _ = approxim(target_model(input_tensor))



num_iterations = 1000  # Choose a suitable number of iterations to average over

start_time = time.time()
with torch.no_grad():
    for _ in range(num_iterations):
        _ = approxim(target_model(input_tensor))
end_time = time.time()

inference_latency = (end_time - start_time) / num_iterations
#convert to microseconds
inference_latency = inference_latency * 1000000
print(f'Inference latency: {inference_latency:.20f} micro-seconds')



Inference latency: 888.13185691833496093750 micro-seconds
