# SplitGuard: Detecting and Mitigating Training-Hijacking Attacks in Split Learning

In [1]:
import numpy as np
import torch
import random
import torch.nn as nn
from torchvision import transforms, datasets
from torchvision.utils import save_image
import math
import itertools
import statistics
import pickle
# import architectures_torch as architectures

from models import *
from util import *

from tqdm.notebook import tqdm
from torchvision.models import resnet18

import matplotlib.pyplot as plt
import pandas as pd
import time
import io

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print('Running on', device)

Running on cuda


In [2]:
trainloader, testloader = load_dataset('cifar')
print(len(trainloader))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to sg_data/data/cifar/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:02<00:00, 62599232.90it/s]


Extracting sg_data/data/cifar/cifar-10-python.tar.gz to sg_data/data/cifar
Files already downloaded and verified
782


In [3]:
# initialization of pickle loader
class CPU_Unpickler(pickle.Unpickler):
    def find_class(self, module, name):
        if module == 'torch.storage' and name == '_load_from_bytes':
            return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
        else:
          return super().find_class(module, name)

1) Loading FSHA and honest gradients

In [4]:
def load_gradients(file_name, number_of_files, print_info=True):
    grads_original = []

    for pickled_file in range(number_of_files):
        file = open(file_name + str(pickled_file), 'rb')

        if torch.cuda.is_available():
            gradients_list = pickle.load(file)
        else:
            gradients_list = CPU_Unpickler(file).load()

        grads_original.append(gradients_list)
        file.close()

    if print_info:
        print("Data structure of a gradient: ",type(grads_original))
        print("Gradients brief info: {} epochs, {} gradients in an epoch, single gradient length in size  {}".format(
            len(grads_original), len(grads_original[0]), len(grads_original[0][0])
        ))
        print()

    return grads_original

In [5]:
# load all gradients from honest server
honest_grads_original =  load_gradients('cifar10_HONEST/honest_cifar_grads_', 100)

Data structure of a gradient:  <class 'list'>
Gradients brief info: 100 epochs, 782 gradients in an epoch, single gradient length in size  1728



In [31]:
# load all gradients from FSHA server
fsha_grads_original =  load_gradients('cifar10_newAtt_fake/FSHA_newAtt_cifar10_fakes_', 100)




Data structure of a gradient:  <class 'list'>
Gradients brief info: 100 epochs, 79 gradients in an epoch, single gradient length in size  1728



2) Client NN training

In [None]:
torch.cuda.empty_cache()
REPS = 2 # number of reps to average scores from
model_str = 'resnet'
optimizer = 'adam'
dataset = 'cifar'
del trainloader, testloader
trainloader, testloader = load_dataset(dataset)
print(dataset, model_str)

# other params
NUM_CLASSES = 100 if dataset == 'cifar100' else 10
EPOCHS = 1 # number of epochs to run simulation for
SETUPS = [('honest')]

client_collected_grads = []

for rep in tqdm(range(REPS)):
    client_temp_collected_grads = []

    rep_scores = []
    start_time_nn_train = time.time()

    test_setup_no = 0
    for adv_type in SETUPS:

        model = get_models(model_str,  dataset, device)

        client_opt, server_opt = get_optims(optimizer, model, model)
        criterion = nn.CrossEntropyLoss()

        for epoch in range(1):
            for index, item in enumerate(tqdm(trainloader, leave=False)):
              # random percentage selection method
                client_data_rate =50

                #   # cli_collected_grads.append([client_grad,adv_type]) #collect all grads
                if random.random() < client_data_rate/100:

                  images, labels = item[0].to(device), item[1].to(device)
                  client_opt.zero_grad()
                  server_opt.zero_grad()

                  pred = model(images)
                  loss = criterion(pred, labels)

                  # loss backward, collect client grad
                  loss.backward()
                  client_grad = list(model.parameters())[0].grad.detach().clone().flatten()

                  client_temp_collected_grads.append([client_grad,adv_type]) # collect train grads

    end_time_nn_train = time.time()
    print("Setup {} training is completed. Elasped time: {}".format(rep,round(end_time_nn_train-start_time_nn_train,3)))
    # print("len collected train grads: ", len(cli_10_collected_grads))
    client_collected_grads.append(client_temp_collected_grads)


print("Number of epochs: ", len(client_collected_grads))
print("Data rate and number of gradients in the trained NN model while collecting gradients for AD: ({}%, {})".format(
  client_data_rate, len(client_temp_collected_grads)))

torch.cuda.empty_cache()


Files already downloaded and verified
Files already downloaded and verified
cifar resnet


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/782 [00:00<?, ?it/s]

Setup 0 training is completed. Elasped time: 26.233


  0%|          | 0/782 [00:00<?, ?it/s]

Setup 1 training is completed. Elasped time: 18.938
Number of epochs:  2
Data rate and number of gradients in the trained NN model while collecting gradients for AD: (50%, 392)


3) Converting Tensor gradients to NumPy Array


In [7]:
def grad_tensor_to_numpy_converter(original_gradients, print_info=True):
    new_gradients_nparray = []

    for new_epoch in range(len(original_gradients)):

        new_grads_one_epoch = []
        for new_gradient in range(len(original_gradients[new_epoch])):
            new_grads_one_epoch.append(original_gradients[new_epoch][new_gradient].detach().cpu().numpy())
        new_gradients_nparray.append(new_grads_one_epoch)

    if print_info == True:
        print("Initial D.S. of one gradient in an epoch: ",type(original_gradients[0][0]))
        print("After conversion, D.S. of one gradient in an epoch: ",type(new_gradients_nparray[0][0]))
        print("Gradients brief info: {} epochs, {} gradients, single gradient length in size  {}".format(
            len(new_gradients_nparray), len(new_gradients_nparray[0]), len(new_gradients_nparray[0][0])
        ))
        print()

    return new_gradients_nparray




In [32]:
honest_grads_nparray = grad_tensor_to_numpy_converter(honest_grads_original)
fsha_grads_nparray = grad_tensor_to_numpy_converter(fsha_grads_original)


Initial D.S. of one gradient in an epoch:  <class 'torch.Tensor'>
After conversion, D.S. of one gradient in an epoch:  <class 'numpy.ndarray'>
Gradients brief info: 100 epochs, 782 gradients, single gradient length in size  1728

Initial D.S. of one gradient in an epoch:  <class 'torch.Tensor'>
After conversion, D.S. of one gradient in an epoch:  <class 'numpy.ndarray'>
Gradients brief info: 100 epochs, 79 gradients, single gradient length in size  1728



4) Selecting desired reduced data rate

In [33]:
honest_grads_reduced_data_rate = []

# random percentage selection method
data_rate_honest_gradients = 100

for epoch_honest in range(len(honest_grads_nparray)):

  honest_grads_one_epoch = []
  for gradient_honest in range(len(honest_grads_nparray[epoch_honest])):
    if random.random() < data_rate_honest_gradients/100:
      honest_grads_one_epoch.append(honest_grads_nparray[epoch_honest][gradient_honest])

  honest_grads_reduced_data_rate.append(honest_grads_one_epoch)

print("Number of honest epochs after reduced data rate: {}".format(len(honest_grads_reduced_data_rate)))

# check number of gradients and rates for first 10 epoch
print("First 10 epoch number of gradients and data rates:")
for i in range(10):
  print("({}, {}%) ".format(
    len(honest_grads_reduced_data_rate[i]),
    round((len(honest_grads_reduced_data_rate[i])/len(honest_grads_nparray[i])),2)*100),
    end= " ")

Number of honest epochs after reduced data rate: 100
First 10 epoch number of gradients and data rates:
(782, 100.0%)  (782, 100.0%)  (782, 100.0%)  (782, 100.0%)  (782, 100.0%)  (782, 100.0%)  (782, 100.0%)  (782, 100.0%)  (782, 100.0%)  (782, 100.0%)  

5) Checking gradient and epoch sizes before training LOF

In [34]:
print("Number of FSHA Epochs: ",len(fsha_grads_nparray))
print("Length of one FSHA Epoch: ",len(fsha_grads_nparray[0]))
print("Length of one FSHA Grad: ",len(fsha_grads_nparray[9][20]))
print()
print("Number of Honest Epochs: ",len(honest_grads_reduced_data_rate))
print("Length of one Honest Epoch: ",len(honest_grads_reduced_data_rate[0]))
print("Length of one Honest Grad: ",len(honest_grads_reduced_data_rate[2][3]))

Number of FSHA Epochs:  100
Length of one FSHA Epoch:  79
Length of one FSHA Grad:  1728

Number of Honest Epochs:  100
Length of one Honest Epoch:  782
Length of one Honest Grad:  1728


6) Anomaly Detection using Local Outlier Factor(LOF)

In [None]:
from sklearn.neighbors import LocalOutlierFactor

window_size_list = [1, 10]

for window_size in window_size_list:
  Total_TP_ind1 = 0
  Total_FP_ind1 = 0
  t_detection_point = 0

  # get TPR(True Positive Rates) results
  # train LOF with (N)th honest epoch and predict (N)th FSHA epoch
  for epoch_fsha in range(len(fsha_grads_nparray) - 1): # for FSHA EPOCHS

    honest_epoch_len = len(honest_grads_reduced_data_rate[epoch_fsha])

    time_lof_training_start = time.time()
    lof = LocalOutlierFactor(n_neighbors = (honest_epoch_len-1), novelty = True)
    # train LOF with new epoch's gradients
    lof_novelty = lof.fit(honest_grads_reduced_data_rate[epoch_fsha])
    time_lof_training_end = time.time()

    # print(f"# neighbors: {honest_epoch_len-1}, "+
    #       f"  Trained Honest Epoch[{epoch_fsha}]: {honest_epoch_len},"+
    #       f"  Tested FSHA Epoch[{epoch_fsha}]: {len(fsha_grads_nparray[epoch_fsha])}")

    # print("Epoch: {} | [{}, {}, {}, {}],      t: {}".format(epoch_fsha,
    #   window_size, ind2, ind3, len(fsha_grads_nparray[epoch_fsha]),
    #   t_detection_point/len(fsha_grads_nparray[epoch_fsha])))

    # get gradients from epoch and evaluate their anomaly score
    for fsha_grad in range(window_size, len(fsha_grads_nparray[epoch_fsha]) - (window_size - 1)):
      # get gradients in selected window size
      fsha_grads_in_window = fsha_grads_nparray[epoch_fsha][fsha_grad - window_size : fsha_grad]
      # print("i: {} | grad window index: [{}, {}]".format(fsha_grad, fsha_grad - window_size, fsha_grad))

      pred_novelty = lof.predict(fsha_grads_in_window)
      inliers_fsha = pred_novelty.tolist().count(1)
      outliers_fsha = pred_novelty.tolist().count(-1)

      # detection of the first third of the epoch
      if outliers_fsha > inliers_fsha:
        # print("Epoch: {} | Attack is detected in ({})th gradient.  |  t: {}".format(epoch_fsha, fsha_grad, t_detection_point/len(fsha_grads_nparray[epoch_fsha])))
        Total_TP_ind1 +=1
        t_detection_point += fsha_grad/len(trainloader)
        break
    # print()

  print("Window size: ", window_size)
  print("Avr TPR:", Total_TP_ind1/(len(fsha_grads_nparray)-1))
  print("t: ", t_detection_point/(len(fsha_grads_nparray)-1))
  print("--------------------------------")

  # get FPR(False Positive Rates) results
  # train LOF with (N)th honest epoch and predict (N+1)th honest epoch
  for epoch_honest in range(len(honest_grads_nparray)-1):

    honest_epoch_len = len(honest_grads_reduced_data_rate[epoch_honest])

    time_lof_training_start = time.time()
    lof = LocalOutlierFactor(n_neighbors = (honest_epoch_len-1), novelty = True)
    # train LOF with new epoch's gradients
    lof_novelty = lof.fit(honest_grads_reduced_data_rate[epoch_honest])
    time_lof_training_end = time.time()

    # print(f"# neighbors: {honest_epoch_len-1}, "+
    #     f"  Trained Honest Epoch[{epoch_honest}]: {honest_epoch_len},"+
    #     f"  Tested Honest Epoch[{epoch_honest+1}]: {len(honest_grads_reduced_data_rate[epoch_honest+1])}")

    for honest_grad in range(window_size, len(honest_grads_nparray[epoch_honest+1]) - (window_size - 1)):
      # print("Honest Epoch: {} | Honest grad: {}/{}".format(epoch_honest, honest_grad,len(honest_grads_nparray[epoch_honest+1])))

      # get gradients in selected window size
      honest_grads_in_window = honest_grads_nparray[epoch_honest+1][honest_grad - window_size : honest_grad]

      pred_novelty_honest_test = lof.predict(honest_grads_in_window)
      inliers_honest_test = pred_novelty_honest_test.tolist().count(1)
      outliers_honest_test = pred_novelty_honest_test.tolist().count(-1)

      if outliers_honest_test > inliers_honest_test:
        print("**Honest Epoch: {} | Attack is detected in ({})th honest gradient.".format(epoch_honest, honest_grad))
        Total_FP_ind1 +=1
        break
    # print()


  print("Window size: ", window_size)
  # print("#attacks in ind1_fp = {}, #attacks in ind2_fp = {}, #attacks in ind3_fp = {}".format(
  #   Total_FP_ind1, Total_FP_ind2, Total_FP_ind3 ))
  print("Avr FPR:", Total_FP_ind1/(len(honest_grads_nparray)-1))
  print("--------------------------------")


Window size:  1
Avr TPR: 0.0
t:  0.0
--------------------------------
