# Feature Engineering with SHAP values Experiment 2

## Google Colab

In [None]:
from google.colab import drive
drive.flush_and_unmount()
drive.mount('/content/drive', force_remount=True)

import sys
sys.path.append('/content/drive/My Drive/Colab Notebooks')
sys.path.append('/content/drive/My Drive/Colab Notebooks/federated_learning')

!pip install shap==0.40.0

In [None]:
import sklearn

## Experimental Setup

In [3]:
from federated_learning.utils import SHAPUtil, experiment_util, Visualizer
from federated_learning import ClientPlane, Configuration, ObserverConfiguration
from federated_learning.server import Server
from datetime import datetime

In [None]:
def cos_similarity_values(s_client, s_server):
    import numpy as np
    cos_similarity = [[] for i in range(10)]
    shap_subtract = np.subtract(s_client, s_server)
    for row_idx, row in enumerate(shap_subtract):
        for img_idx, image in enumerate(row):
                cos_similarity[row_idx].append(round(np.sum(image.flatten()), 3))

    print(np.matrix(cos_similarity))

In [None]:
from scipy import spatial
import numpy

In [None]:
scipy.__version__


In [None]:
numpy.__version__

In [None]:
def cos_similarity_values(s_client, s_server):
    from scipy import spatial
    import numpy as np
    cos_similarity_server = [[] for i in range(10)]
    cos_similarity_client = [[] for i in range(10)]
    shap_subtract = np.subtract(s_client, s_server)
    for row_idx, row in enumerate(s_server):
        for img_idx, image in enumerate(row):
                cos_similarity_server[row_idx].append(np.sum(image.flatten()))
    for row_idx, row in enumerate(s_client):
        for img_idx, image in enumerate(row):
                cos_similarity_client[row_idx].append(np.sum(image.flatten()))
    spatial.distance.cosine(np.array(cos_similarity_server).flatten(), np.array(cos_similarity_client).flatten())
    return spatial.distance.cosine(np.array(cos_similarity_server).flatten(), np.array(cos_similarity_client).flatten())

In [None]:
def cos_similarity_values(s_client, s_server):
    from scipy import spatial
    import numpy as np
    cos_similarity = [[] for i in range(10)]
    similarity_sum = [[] for i in range(10)]
    shap_subtract = np.subtract(s_client, s_server)
    for row_idx, row in enumerate(s_client):
        for img_idx, image in enumerate(row):
                cos_similarity[row_idx].append(spatial.distance.cosine(image.flatten(),s_server[row_idx][img_idx].flatten()))
                
    return np.sum(cos_similarity)
    


In [None]:
# Works for MNIST
def cos_similarity_values(s_client, s_server):
    from scipy import spatial
    import numpy as np
    cos_similarity = [[] for i in range(10)]
    differences_sum = [[] for i in range(10)]
    shap_subtract = np.subtract(s_client, s_server)
    for row_idx, row in enumerate(s_client):
        for img_idx, image in enumerate(row):
                cos_similarity[row_idx].append(spatial.distance.cosine(image.flatten(),s_server[row_idx][img_idx].flatten()))
                differences_sum[row_idx].append(np.sum(shap_subtract[row_idx][img_idx].flatten()))
    
    return np.sum(cos_similarity), np.array(differences_sum).diagonal()[np.argmax(np.abs(np.array(differences_sum).diagonal()))]
    


In [None]:
def cos_similarity_values(s_client, s_server):
    from scipy import spatial
    import numpy as np
    cos_similarity = [[] for i in range(10)]
    similarity_sum = [[] for i in range(10)]
    shap_subtract = np.subtract(s_client, s_server)
    for row_idx, row in enumerate(s_client):
        for img_idx, image in enumerate(row):
                cos_similarity[row_idx].append(spatial.distance.cosine(image.flatten(),s_server[row_idx][img_idx].flatten()))
                similarity_sum[row_idx].append(np.sum(shap_subtract[row_idx][img_idx].flatten()))
    argmax = np.argmax(np.array(cos_similarity).diagonal())
    print(np.array(cos_similarity).diagonal()[argmax] * np.array(similarity_sum).diagonal()[argmax])
    return np.sum(cos_similarity), np.array(cos_similarity).diagonal().dot(np.array(similarity_sum).diagonal())
    


In [42]:
def cos_similarity_values(s_client, s_server):
    from scipy import spatial
    import numpy as np
    cos_similarity = [[] for i in range(10)]
    differences_sum = [[] for i in range(10)]
    shap_subtract = np.subtract(s_client, s_server)
    for row_idx, row in enumerate(s_client):
        for img_idx, image in enumerate(row):
                cos_similarity[row_idx].append(spatial.distance.cosine(image.flatten(),s_server[row_idx][img_idx].flatten()))
                differences_sum[row_idx].append(np.sum(shap_subtract[row_idx][img_idx].flatten()))
    #print(cos_similarity[5][5], cos_similarity[5][4],cos_similarity[4][5])
    return np.sum(cos_similarity), np.sum(np.abs(differences_sum)), np.max(cos_similarity), np.abs(np.array(differences_sum).flatten()[np.argmax(np.abs(np.array(differences_sum).flatten()))])
    


# MNIST

In [4]:
from federated_learning.nets import MNISTCNN
from federated_learning.dataset import MNISTDataset
import os
config = Configuration()
config.POISONED_CLIENTS = 0
config.DATA_POISONING_PERCENTAGE = 1
config.DATASET = MNISTDataset
config.MODELNAME = config.MNIST_NAME
config.NETWORK = MNISTCNN
observer_config = ObserverConfiguration()
observer_config.experiment_type = "shap_fl_poisoned"
observer_config.experiment_id = 1
observer_config.test = False
observer_config.datasetObserverConfiguration = "MNIST"
neutral_label = 2

In [None]:
# Google Colab Settigns
config.TEMP = os.path.join('/content/drive/My Drive/Colab Notebooks/temp')
config.FMNIST_DATASET_PATH = os.path.join('/content/data/fmnist')
config.MNIST_DATASET_PATH = os.path.join('/content/data/mnist')
config.CIFAR10_DATASET_PATH = os.path.join('/content/data/cifar10')
config.VM_URL = "none"

In [5]:
data = config.DATASET(config)
shap_util = SHAPUtil(data.test_dataloader) 
server = Server(config, observer_config,data.train_dataloader, data.test_dataloader, shap_util)
visualizer = Visualizer(shap_util)

MNIST training data loaded.
MNIST test data loaded.


## Experiment Setup 

In [None]:
import numpy as np
import copy
import torch
import os
for i in range(200):
    if (i+1) in [2, 5,10,75,100,200]:
        file = "./temp/models/ex6/MNIST_round_{}.model".format(i+1)
        if not os.path.exists(os.path.dirname(file)):
                os.makedirs(os.path.dirname(file))
        torch.save(server.net.state_dict(), file)
    experiment_util.set_rounds(client_plane, server, i+1)
    experiment_util.run_round(client_plane, server, i+1)

## Experiment

In [63]:
import torch
config.FROM_LABEL = 3
config.TO_LABEL = 8
shap_images = [config.FROM_LABEL ,config.TO_LABEL]
for j in [100]:
    data = config.DATASET(config)
    client_plane = ClientPlane(config, observer_config, data, shap_util)
    model_file = file = "./temp/models/ex6/MNIST_round_{}.model".format(j)
    server.net =  MNISTCNN()
    server.net.load_state_dict(torch.load(model_file))

    server.test()
    recall, precision, accuracy = server.analize_test()
    print("Original", recall, precision, accuracy)
    server_shap = server.get_shap_values()

    config.POISONED_CLIENTS = 100
    experiment_util.update_configs(client_plane, server, config, observer_config)
    print(len(client_plane.clients[0].train_dataloader.dataset.dataset.targets[client_plane.clients[0].train_dataloader.dataset.dataset.targets == 5]))

    client_plane.poison_clients()
    clean_clients = experiment_util.select_random_clean(client_plane, config, 100)
    poisoned_clients = experiment_util.select_poisoned(client_plane, 100)
    clean_dis = []
    poisoned_dis = []
    clean_diff = []
    poisoned_diff = []
    clean_max_diff = []
    poisoned_max_diff = []
    clean_max_dis = []
    poisoned_max_dis = []
    print("Clean")
    print(len(client_plane.clients[0].train_dataloader.dataset.dataset.targets[client_plane.clients[0].train_dataloader.dataset.dataset.targets == 5]))
    for idx, i in enumerate(clean_clients[:100]):
        client_plane.update_clients(server.get_nn_parameters())
        client_plane.clients[i].train(j+1)
        clean_client_shap = client_plane.clients[i].get_shap_values()
        distance, diff, dis_max, diff_max = cos_similarity_values(clean_client_shap, server_shap)
        clean_dis.append(distance)
        clean_diff.append(diff)
        clean_max_dis.append(dis_max)
        clean_max_diff.append(diff_max)
        if (idx+1)%25 == 0:
            print(clean_dis[idx-25:idx])
    
    clean = {
        "dis" : clean_dis, 
        "diff" : clean_diff,
        "max_dis" : clean_max_dis,
        "max_diff" : clean_max_diff
    }

    print("Poisoned")
    server.net =  MNISTCNN()
    server.net.load_state_dict(torch.load(model_file))
    for idx, i in enumerate(poisoned_clients[:100]):
        client_plane.update_clients(server.get_nn_parameters())    
        client_plane.clients[i].train(j+1)
        poisoned_client_shap = client_plane.clients[i].get_shap_values()
        distance, diff, dis_max, diff_max = cos_similarity_values(poisoned_client_shap, server_shap)
        poisoned_dis.append(distance)
        poisoned_diff.append(diff)
        poisoned_max_dis.append(dis_max)
        poisoned_max_diff.append(diff_max)
        if (idx+1)%25 == 0:
            print(poisoned_dis[idx-25:idx])
    print(len(client_plane.clients[poisoned_clients[0]].train_dataloader.dataset.dataset.targets[client_plane.clients[poisoned_clients[0]].train_dataloader.dataset.dataset.targets == 5]))
    client_plane.reset_default_client_nets()
    client_plane.reset_poisoning_attack()
    
    poisoned = {
        "dis" : poisoned_dis, 
        "diff" : poisoned_diff,
        "max_dis" : poisoned_max_dis,
        "max_diff" : poisoned_max_diff
    }

MNIST training data loaded.
MNIST test data loaded.
Create 200 clients with dataset of size 300

Test set: Average loss: 0.0002, Accuracy: 9625/10000 (96%)

Original tensor([0.9929, 0.9833, 0.9612, 0.9525, 0.9562, 0.9720, 0.9729, 0.9523, 0.9446,
        0.9366]) tensor([0.9615, 0.9867, 0.9350, 0.9649, 0.9812, 0.9527, 0.9739, 0.9459, 0.9664,
        0.9565]) 0.9625
5421
Poison 100/200 clients
Flip 100.0% of the 3 labels to 8
[114 152 131  48  30  65  58  39 110 111 125 113  95 126 109 132  86 120
  25 171  33  77 166   0  10  91 135  12 121  28 192  34  92 142 148 104
  94 119 185  75  41  56  69 102  27 150 129  37  90 140 146  44  87 141
 175  88  50 193 112   3 178 154  74 136  97  26   9 180  21 127 107   6
 177 139  67  93  85 194 105  78  63 165 115 106 174  20  46 186   7 157
  36 100  24  54 108  35 161  23  31  43]
20/100 clients poisoned
40/100 clients poisoned
60/100 clients poisoned
80/100 clients poisoned
100/100 clients poisoned
Clean
5421
[]
[24.97977832954254, 16.2242632

In [34]:
print(clean_dis)

[15.530316455622417, 18.244994262760798, 20.836231860556786, 19.975176716495582, 17.67435601202746, 15.24317492068116, 18.80911652975068, 23.3779652276858, 18.95633340237264, 20.499526765716134, 18.186258815564354, 20.035178491146503, 18.396957921604173, 16.903583453221326, 20.284596315727768, 17.742191459164772, 20.418248917800376, 20.866013884757056, 16.872173573636594, 19.205362967662523, 19.18864946929456, 19.66393926118082, 17.655000348279188, 18.333519263186254, 17.366433898086978, 21.908157897900765, 18.326555985507238, 21.155069017772053, 19.318674771060333, 27.129678178967836, 17.957733497164877, 21.44502716965343, 17.007277753044963, 19.045306927595064, 19.640173197600777, 20.0775339796562, 20.63190986840273, 19.16170179565608, 20.219699594867684, 16.16571347255154, 20.744657731466468, 17.963838393081662, 16.477355364441372, 23.91165657303493, 18.41753628302295, 23.23727536645302, 23.145276770511607, 19.490551919275156, 17.636783745889097, 20.054525204752043, 18.8895608940746

In [64]:
import pprint
pp = pprint.PrettyPrinter(width=6000, compact=True)
pp.pprint(clean)

{'diff': [0.6633873930856102, 1.0323399182147122, 1.0516210630335632, 0.5827687656117053, 1.4623795216739777, 0.7263965098683743, 0.6677118209243585, 0.7456121382634658, 0.6842120236088863, 1.4424620763281681, 1.576993231884808, 1.3521888985980914, 1.5570048251377049, 1.2074823305945057, 0.9154437713262125, 1.3610847716914511, 0.8174158031731882, 1.0274446284162828, 0.7115386128128637, 1.3212946757635928, 1.0712023337206227, 1.2577817601184487, 1.412887035518747, 1.0984166856316118, 2.201627087737092, 1.0338549820214116, 1.3088756239566104, 1.5039355433383736, 0.9997456826917023, 1.1648831256143959, 1.0544449378368372, 1.4101974027543704, 0.8090811487959471, 1.2742144684429006, 1.5085113248033464, 0.9257820314557704, 0.9548976605769518, 1.6984789366019721, 1.1211888876768934, 1.7285291895305153, 1.2107672704819972, 1.5223410880754888, 1.6840484905238653, 0.6851265244526191, 0.9096266359026743, 1.859776132352248, 1.001439081637975, 0.6765615208964901, 1.0696522068233487, 0.7554157671885

In [62]:
pp.pprint(poisoned)

{'diff': [4.350447908102634, 4.11401355537956, 4.242383100333176, 3.640177293428475, 4.384246363766371, 3.750533066355017, 4.541517332440831, 3.6616529941570772, 4.040543904914584, 4.374157515563593, 4.222564876780909, 4.044961584015893, 4.757022614559643, 4.734185069292219, 4.855368524577328, 3.8115248926852026, 5.274711963241388, 4.120536787873713, 4.508849922332602, 4.18158604350975, 3.6870939056857774, 4.06506934169024, 4.82095122993415, 5.258245826039327, 4.5829809112678594, 4.61635426927449, 5.343014713716545, 5.554582087546924, 4.002896179401662, 4.025771496940882, 4.320368156812647, 4.098414303250997, 4.066426162565429, 4.80466567846995, 4.597297633695247, 3.908485555693767, 4.132137257949262, 4.788074281707418, 5.05412834128604, 3.8376499274764493, 4.135299860679261, 4.398360805259879, 4.714425849063594, 4.681396065148586, 4.945574860344331, 4.297160582858492, 4.52326105579217, 5.074047309558155, 3.0072632028195057, 4.367139185778366, 4.170946024847293, 5.6101574572636395, 3.8

# Fashion MNIST

In [None]:
from federated_learning.nets import FMNISTCNN
from federated_learning.dataset import FMNISTDataset
import os
config = Configuration()
config.POISONED_CLIENTS = 0
config.DATA_POISONING_PERCENTAGE = 1
config.DATASET = FMNISTDataset
config.MODELNAME = config.FMNIST_NAME
config.NETWORK = FMNISTCNN
observer_config = ObserverConfiguration()
observer_config.experiment_type = "shap_fl_poisoned"
observer_config.experiment_id = 1
observer_config.test = False
observer_config.datasetObserverConfiguration = "MNIST"
neutral_label = 2

In [None]:
# Google Colab Settigns
config.TEMP = os.path.join('/content/drive/My Drive/Colab Notebooks/temp')
config.FMNIST_DATASET_PATH = os.path.join('/content/data/fmnist')
config.MNIST_DATASET_PATH = os.path.join('/content/data/mnist')
config.CIFAR10_DATASET_PATH = os.path.join('/content/data/cifar10')
config.VM_URL = "none"

In [None]:
data = config.DATASET(config)
shap_util = SHAPUtil(data.test_dataloader) 
server = Server(config, observer_config,data.train_dataloader, data.test_dataloader, shap_util)
client_plane = ClientPlane(config, observer_config, data, shap_util)
visualizer = Visualizer(shap_util)

In [None]:
import numpy as np
import copy
import torch
import os
for i in range(200):
    if (i+1) in [2, 5,10,75,100,200]:
        file = "/content/drive/My Drive/Colab Notebooks/temp/models/ex6/FMNIST_round_{}.model".format(i+1)
        if not os.path.exists(os.path.dirname(file)):
                os.makedirs(os.path.dirname(file))
        torch.save(server.net.state_dict(), file)
    experiment_util.set_rounds(client_plane, server, i+1)
    experiment_util.run_round(client_plane, server, i+1)

In [None]:
import torch
config.FROM_LABEL = 5
config.TO_LABEL = 4
shap_images = [config.FROM_LABEL ,config.TO_LABEL]
for j in [100]:
    data = config.DATASET(config)
    client_plane = ClientPlane(config, observer_config, data, shap_util)
    model_file = file = "/content/drive/My Drive/Colab Notebooks/temp/models/ex6/FMNIST_round_{}.model".format(j)
    server.net = FMNISTCNN()
    server.net.load_state_dict(torch.load(model_file))

    server.test()
    recall, precision, accuracy = server.analize_test()
    print("Original", recall, precision, accuracy)
    server_shap = server.get_shap_values()

    config.POISONED_CLIENTS = 100
    experiment_util.update_configs(client_plane, server, config, observer_config)
    print(len(client_plane.clients[0].train_dataloader.dataset.dataset.targets[client_plane.clients[0].train_dataloader.dataset.dataset.targets == 5]))

    client_plane.poison_clients()
    clean_clients = experiment_util.select_random_clean(client_plane, config, 100)
    poisoned_clients = experiment_util.select_poisoned(client_plane, 100)
    clean_dis = []
    poisoned_dis = []
    clean_diff = []
    poisoned_diff = []
    clean_max_diff = []
    poisoned_max_diff = []
    clean_max_dis = []
    poisoned_max_dis = []
    print("Clean")
    print(len(client_plane.clients[0].train_dataloader.dataset.dataset.targets[client_plane.clients[0].train_dataloader.dataset.dataset.targets == 5]))
    for idx, i in enumerate(clean_clients[:100]):
        client_plane.update_clients(server.get_nn_parameters())
        client_plane.clients[i].train(j+1)
        clean_client_shap = client_plane.clients[i].get_shap_values()
        distance, diff, dis_max, diff_max = cos_similarity_values(clean_client_shap, server_shap)
        clean_dis.append(distance)
        clean_diff.append(diff)
        clean_max_dis.append(dis_max)
        clean_max_diff.append(diff_max)
        if (idx+1)%25 == 0:
            print(clean_dis[idx-25:idx])
    
    clean = {
        "dis" : clean_dis, 
        "diff" : clean_diff,
        "max_dis" : clean_max_dis,
        "max_diff" : clean_max_diff
    }

    print("Poisoned")
    server.net =  FMNISTCNN()
    server.net.load_state_dict(torch.load(model_file))
    for idx, i in enumerate(poisoned_clients[:100]):
        client_plane.update_clients(server.get_nn_parameters())    
        client_plane.clients[i].train(j+1)
        poisoned_client_shap = client_plane.clients[i].get_shap_values()
        distance, diff, dis_max, diff_max = cos_similarity_values(poisoned_client_shap, server_shap)
        poisoned_dis.append(distance)
        poisoned_diff.append(diff)
        poisoned_max_dis.append(dis_max)
        poisoned_max_diff.append(diff_max)
        if (idx+1)%25 == 0:
            print(poisoned_dis[idx-25:idx])
    print(len(client_plane.clients[poisoned_clients[0]].train_dataloader.dataset.dataset.targets[client_plane.clients[poisoned_clients[0]].train_dataloader.dataset.dataset.targets == 5]))
    client_plane.reset_default_client_nets()
    client_plane.reset_poisoning_attack()
    
    poisoned = {
        "dis" : poisoned_dis, 
        "diff" : poisoned_diff,
        "max_dis" : poisoned_max_dis,
        "max_diff" : poisoned_max_diff
    }

In [None]:
print(clean_distance)

In [None]:
print(poisoned_distance)

In [None]:
print(clean_diff)

In [None]:
print(poisoned_diff)

In [None]:
print(clean_max)

In [None]:
print(poisoned_max)