# Feature Engineering with SHAP values Experiment 2

## Google Colab

In [None]:
from google.colab import drive
drive.flush_and_unmount()
drive.mount('/content/drive', force_remount=True)

import sys
sys.path.append('/content/drive/My Drive/Colab Notebooks')
sys.path.append('/content/drive/My Drive/Colab Notebooks/federated_learning')

!pip install shap==0.40.0

In [None]:
import sklearn

## Experimental Setup

In [1]:
from federated_learning.utils import SHAPUtil, experiment_util, Visualizer
from federated_learning import ClientPlane, Configuration, ObserverConfiguration
from federated_learning.server import Server
from datetime import datetime

In [None]:
def cos_similarity_values(s_client, s_server):
    import numpy as np
    cos_similarity = [[] for i in range(10)]
    shap_subtract = np.subtract(s_client, s_server)
    for row_idx, row in enumerate(shap_subtract):
        for img_idx, image in enumerate(row):
                cos_similarity[row_idx].append(round(np.sum(image.flatten()), 3))

    print(np.matrix(cos_similarity))

In [None]:
from scipy import spatial
import numpy

In [None]:
scipy.__version__


In [None]:
numpy.__version__

In [None]:
def cos_similarity_values(s_client, s_server):
    from scipy import spatial
    import numpy as np
    cos_similarity_server = [[] for i in range(10)]
    cos_similarity_client = [[] for i in range(10)]
    shap_subtract = np.subtract(s_client, s_server)
    for row_idx, row in enumerate(s_server):
        for img_idx, image in enumerate(row):
                cos_similarity_server[row_idx].append(np.sum(image.flatten()))
    for row_idx, row in enumerate(s_client):
        for img_idx, image in enumerate(row):
                cos_similarity_client[row_idx].append(np.sum(image.flatten()))
    spatial.distance.cosine(np.array(cos_similarity_server).flatten(), np.array(cos_similarity_client).flatten())
    return spatial.distance.cosine(np.array(cos_similarity_server).flatten(), np.array(cos_similarity_client).flatten())

In [None]:
def cos_similarity_values(s_client, s_server):
    from scipy import spatial
    import numpy as np
    cos_similarity = [[] for i in range(10)]
    similarity_sum = [[] for i in range(10)]
    shap_subtract = np.subtract(s_client, s_server)
    for row_idx, row in enumerate(s_client):
        for img_idx, image in enumerate(row):
                cos_similarity[row_idx].append(spatial.distance.cosine(image.flatten(),s_server[row_idx][img_idx].flatten()))
                
    return np.sum(cos_similarity)
    


In [None]:
# Works for MNIST
def cos_similarity_values(s_client, s_server):
    from scipy import spatial
    import numpy as np
    cos_similarity = [[] for i in range(10)]
    differences_sum = [[] for i in range(10)]
    shap_subtract = np.subtract(s_client, s_server)
    for row_idx, row in enumerate(s_client):
        for img_idx, image in enumerate(row):
                cos_similarity[row_idx].append(spatial.distance.cosine(image.flatten(),s_server[row_idx][img_idx].flatten()))
                differences_sum[row_idx].append(np.sum(shap_subtract[row_idx][img_idx].flatten()))
    
    return np.sum(cos_similarity), np.array(differences_sum).diagonal()[np.argmax(np.abs(np.array(differences_sum).diagonal()))]
    


In [None]:
def cos_similarity_values(s_client, s_server):
    from scipy import spatial
    import numpy as np
    cos_similarity = [[] for i in range(10)]
    similarity_sum = [[] for i in range(10)]
    shap_subtract = np.subtract(s_client, s_server)
    for row_idx, row in enumerate(s_client):
        for img_idx, image in enumerate(row):
                cos_similarity[row_idx].append(spatial.distance.cosine(image.flatten(),s_server[row_idx][img_idx].flatten()))
                similarity_sum[row_idx].append(np.sum(shap_subtract[row_idx][img_idx].flatten()))
    argmax = np.argmax(np.array(cos_similarity).diagonal())
    print(np.array(cos_similarity).diagonal()[argmax] * np.array(similarity_sum).diagonal()[argmax])
    return np.sum(cos_similarity), np.array(cos_similarity).diagonal().dot(np.array(similarity_sum).diagonal())
    


In [26]:
def cos_similarity_values(s_client, s_server):
    from scipy import spatial
    import numpy as np
    cos_similarity = [[] for i in range(10)]
    differences_sum = [[] for i in range(10)]
    shap_subtract = np.subtract(s_client, s_server)
    for row_idx, row in enumerate(s_client):
        for img_idx, image in enumerate(row):
                cos_similarity[row_idx].append(spatial.distance.cosine(image.flatten(),s_server[row_idx][img_idx].flatten()))
                differences_sum[row_idx].append(np.sum(shap_subtract[row_idx][img_idx].flatten()))
    print(cos_similarity[5][5], cos_similarity[5][4],cos_similarity[4][5])
    return np.sum(cos_similarity), np.max(cos_similarity), np.array(differences_sum).flatten()[np.argmax(np.abs(np.array(differences_sum).flatten()))]
    


# MNIST

In [3]:
from federated_learning.nets import MNISTCNN
from federated_learning.dataset import MNISTDataset
import os
config = Configuration()
config.POISONED_CLIENTS = 0
config.DATA_POISONING_PERCENTAGE = 1
config.DATASET = MNISTDataset
config.MODELNAME = config.MNIST_NAME
config.NETWORK = MNISTCNN
observer_config = ObserverConfiguration()
observer_config.experiment_type = "shap_fl_poisoned"
observer_config.experiment_id = 1
observer_config.test = False
observer_config.datasetObserverConfiguration = "MNIST"
neutral_label = 2

In [None]:
# Google Colab Settigns
config.TEMP = os.path.join('/content/drive/My Drive/Colab Notebooks/temp')
config.FMNIST_DATASET_PATH = os.path.join('/content/data/fmnist')
config.MNIST_DATASET_PATH = os.path.join('/content/data/mnist')
config.CIFAR10_DATASET_PATH = os.path.join('/content/data/cifar10')
config.VM_URL = "none"

In [4]:
data = config.DATASET(config)
shap_util = SHAPUtil(data.test_dataloader) 
server = Server(config, observer_config,data.train_dataloader, data.test_dataloader, shap_util)
visualizer = Visualizer(shap_util)

MNIST training data loaded.
MNIST test data loaded.


## Experiment Setup 

In [None]:
import numpy as np
import copy
import torch
import os
for i in range(200):
    if (i+1) in [2, 5,10,75,100,200]:
        file = "./temp/models/ex6/MNIST_round_{}.model".format(i+1)
        if not os.path.exists(os.path.dirname(file)):
                os.makedirs(os.path.dirname(file))
        torch.save(server.net.state_dict(), file)
    experiment_util.set_rounds(client_plane, server, i+1)
    experiment_util.run_round(client_plane, server, i+1)

## Experiment

In [35]:
import torch
config.FROM_LABEL = 5
config.TO_LABEL = 4
shap_images = [config.FROM_LABEL ,config.TO_LABEL]
for j in [100]:
    data = config.DATASET(config)
    client_plane = ClientPlane(config, observer_config, data, shap_util)
    model_file = file = "./temp/models/ex6/MNIST_round_{}.model".format(j)
    server.net =  MNISTCNN()
    server.net.load_state_dict(torch.load(model_file))

    server.test()
    recall, precision, accuracy = server.analize_test()
    print("Original", recall, precision, accuracy)
    server_shap = server.get_shap_values()

    config.POISONED_CLIENTS = 100
    experiment_util.update_configs(client_plane, server, config, observer_config)
    print(len(client_plane.clients[0].train_dataloader.dataset.dataset.targets[client_plane.clients[0].train_dataloader.dataset.dataset.targets == 5]))

    client_plane.poison_clients()
    clean_clients = experiment_util.select_random_clean(client_plane, config, 100)
    poisoned_clients = experiment_util.select_poisoned(client_plane, 100)
    clean_distance = []
    poisoned_distance = []
    clean_diff = []
    poisoned_diff = []
    clean_max = []
    poisoned_max = []
    print("Clean")
    print(len(client_plane.clients[0].train_dataloader.dataset.dataset.targets[client_plane.clients[0].train_dataloader.dataset.dataset.targets == 5]))
    for idx, i in enumerate(clean_clients[:100]):
        client_plane.update_clients(server.get_nn_parameters())
        client_plane.clients[i].train(j+1)
        clean_client_shap = client_plane.clients[i].get_shap_values()
        distance, distance_max, diag_diff = cos_similarity_values(clean_client_shap, server_shap)
        clean_distance.append(distance)
        clean_max.append(distance_max)
        clean_diff.append(diag_diff)
        if (idx+1)%25 == 0:
            print(clean_distance[idx-25:idx])

    print("Poisoned")
    server.net =  MNISTCNN()
    server.net.load_state_dict(torch.load(model_file))
    for idx, i in enumerate(poisoned_clients[:100]):
        client_plane.update_clients(server.get_nn_parameters())    
        client_plane.clients[i].train(j+1)
        poisoned_client_shap = client_plane.clients[i].get_shap_values()
        distance, distance_max, diag_diff  = cos_similarity_values(poisoned_client_shap, server_shap)
        poisoned_distance.append(distance)
        poisoned_max.append(distance_max)
        poisoned_diff.append(diag_diff)
        if (idx+1)%25 == 0:
            print(poisoned_distance[idx-25:idx])
    print(len(client_plane.clients[poisoned_clients[0]].train_dataloader.dataset.dataset.targets[client_plane.clients[poisoned_clients[0]].train_dataloader.dataset.dataset.targets == 5]))
    client_plane.reset_default_client_nets()
    client_plane.reset_poisoning_attack()

MNIST training data loaded.
MNIST test data loaded.
Create 200 clients with dataset of size 300

Test set: Average loss: 0.0002, Accuracy: 9625/10000 (96%)

Original tensor([0.9929, 0.9833, 0.9612, 0.9525, 0.9562, 0.9720, 0.9729, 0.9523, 0.9446,
        0.9366]) tensor([0.9615, 0.9867, 0.9350, 0.9649, 0.9812, 0.9527, 0.9739, 0.9459, 0.9664,
        0.9565]) 0.9625
5421
Poison 100/200 clients
Flip 100.0% of the 5 labels to 4
[ 89 184  69 106 178  95  51   7 161  88 146  68  82 167  45  46   9  75
 122  65 195  74  49  25  81  84 194 100   4 179 102 124  11 127 128  73
  20 109  27 168  79  44  12  16  63 176  93 189 126 183  42  38   0 153
 181 165   3  31  22  86   6 185 144  53  29 115 197  19 187 137  58 120
 123 101   5 158  80 182 129  59  98  26  30 108   1 150 193 140  39 166
 141 116 180  21  24 135  23 170 143  17]
20/100 clients poisoned
40/100 clients poisoned
60/100 clients poisoned
80/100 clients poisoned
100/100 clients poisoned
Clean
2691
0.0040680450574385185 0.005099650

In [36]:
print(clean_distance)

[17.93892382846402, 26.563642406228805, 18.3705900218161, 20.872262888093573, 16.960495123810148, 18.118409259435516, 23.47984501099413, 17.17799674105117, 22.958780323689986, 21.682625965237115, 20.851989449588405, 21.551130352999866, 18.56618702690269, 15.990075457053678, 21.78156462807071, 21.9750453885165, 17.023969208744674, 17.363270708582732, 17.079906892845564, 18.926629292085565, 20.947859575683957, 19.124299170121148, 20.38732819116176, 20.30466353076803, 23.884589922762974, 19.566280196234796, 19.958299955978564, 18.030332572291808, 18.936057814642357, 18.2657761427648, 18.0956362514172, 22.481463442191085, 21.57119750582589, 18.457332853836405, 15.639827344061315, 18.09943777593432, 21.87813063196303, 16.41600814807535, 19.579592305840233, 17.22845988594389, 19.828894009149867, 23.678572487946845, 14.863518306324872, 14.978255323609243, 13.144944352740202, 20.038971450298128, 22.738216777157135, 18.412194733746475, 17.066562366254185, 21.51492418803346, 19.250582673602672, 

In [37]:
print(clean_max)

[1.649774482736913, 1.7654770007331577, 1.712704644532058, 1.7939483093533615, 1.5818061950354707, 1.725276857937665, 1.5681986155360836, 1.760060518990715, 1.7048822986169618, 1.772665098270899, 1.6587928647156605, 1.814671029683393, 1.7674505633985964, 1.6976609196644359, 1.7589970390964216, 1.8089232369219848, 1.74061663810699, 1.9045846303735656, 1.5180913418214517, 1.833323586068004, 1.68984712906225, 1.8115836457302126, 1.7512127568847151, 1.7415146317468875, 1.929674295408574, 1.7959195281402578, 1.6603518699857844, 1.9231256419057696, 1.9625986534483881, 1.8593039247602714, 1.698382876850192, 1.7449142808035871, 1.7343673890359839, 1.6895871489326817, 1.6337241889818856, 1.7219631596359282, 1.8354807154917254, 1.8434128700478003, 1.6506616153128064, 1.7434599798239692, 1.8496132360812945, 1.7958884941114654, 1.8284507094801399, 1.6401149280736504, 1.3979263183986346, 1.5440771709127552, 1.8021827629517209, 1.7806529474562462, 1.78869111718224, 1.7281916902183954, 1.728912676026

In [38]:
print(clean_diff)

[0.21352032019190226, -0.2975621300304141, -0.08217269246192371, 0.14903339890484535, 0.22709506674418378, 0.10821700155903091, -0.05235284207381652, 0.110217747266392, 0.10060400337627495, -0.07540740894047238, -0.18044058365833182, 0.1516121076250574, 0.129774183087207, -0.19673233917706945, 0.21233397653339736, -0.14134806876681338, 0.17371695692335898, 0.1420155898403639, 0.19289433331036143, 0.23063015584729207, -0.24668814401750794, -0.2316966268141094, -0.19213991491541593, 0.21308265196301512, -0.25265994034927175, -0.15839428076898088, 0.1032796839389915, 0.1370174577925205, 0.19952608114687642, 0.13137484262118804, 0.11980457692538593, 0.09174463948513933, -0.25827886564558966, -0.2459849209468299, -0.13690909263826612, -0.165274484586039, -0.08372843569016464, 0.17104563684855623, 0.18531420661985865, -0.198090634203691, -0.12638828377512645, -0.22162291837177506, -0.1447279447640737, 0.12626094977496827, 0.20978549156116877, -0.10283207471666245, -0.2041980662091767, 0.1516

In [39]:
print(poisoned_distance)

[29.692014655619513, 27.711717278786477, 32.30634826370024, 28.911543138185266, 31.878641116839134, 30.546124809262384, 30.022697293930218, 23.716318736193227, 26.668983464518956, 28.63532294192695, 30.194592926268133, 27.783418906960875, 29.974352074308225, 29.018640858864515, 34.1658778666774, 34.31084054085106, 25.953097814057937, 28.76648307013253, 26.935432781761257, 27.77052227038345, 26.563661398039674, 25.77801219645026, 34.38884326832455, 27.96949240325978, 27.700580960197733, 33.32916500854085, 29.384908671330884, 29.410451527947856, 29.021421839288788, 34.25511273283632, 30.222580274874844, 27.038798869151506, 24.822975373722134, 36.5927787186891, 33.06601006971876, 28.255649277001595, 31.49521167409784, 31.515206488822944, 31.071394892136812, 33.94513648236053, 34.08058559684026, 28.05201615649249, 34.01641113114225, 28.237911428908607, 29.357269465375243, 27.367556551779604, 30.40758671239194, 28.51258777284626, 28.95401938861313, 30.47088773605214, 29.05373364808217, 32.3

In [40]:
print(poisoned_max)

[1.8959470363570985, 1.8054083653048412, 1.7729398597428885, 1.7068250085687209, 1.7724764014281282, 1.7680975223817978, 1.9370454866308253, 1.8589555653285377, 1.8901669733628028, 1.669790865263323, 1.6993803401393048, 1.6513734196073135, 1.7681309373850256, 1.8104953756809556, 1.888120166764776, 1.7065104451123434, 1.8576999126817992, 1.8302523613569726, 1.7948228114653255, 1.8075440216587166, 1.882081916945175, 1.6872682573974522, 1.822775154361306, 1.8320683901412402, 1.829789661473991, 1.6602996728655608, 1.7969770564278078, 1.731028713435679, 1.7747742578682537, 1.7825014064691993, 1.7181501516520457, 1.7481599030585784, 1.8148551724568867, 1.9512532265619922, 1.842761000492056, 1.7875836351204994, 1.8386826718670948, 1.7901669844424641, 1.7217866595253106, 1.777636044021769, 1.8206728036981588, 1.8105531890428228, 1.6580164547743208, 1.733647663393283, 1.8115166965296123, 1.808304792232021, 1.6888730759726198, 1.7542453268519567, 1.6935076956474506, 1.7709334627965798, 1.6889567

In [41]:
print(poisoned_diff)

[-0.8729501796932272, -0.6868213936351838, -0.8272646708027991, -0.8156487782262514, -0.7327927227714666, -0.7677277672838209, -0.7250215240875697, -0.5991965754546031, -0.7266792340402923, -0.834750120667344, -0.844938891801495, -0.8065530101389535, -0.7248797359390498, -0.8313376555710812, -0.7228010452826983, -0.856284073963865, -0.6442699145352248, -0.4757093756023507, -0.7900296273183476, -0.7875787590316805, -0.6307925275752037, -0.8709834790335356, -0.7884791659096623, -0.6574909993667681, -0.7087352061186225, -0.5552494861281372, -0.8329266102448567, -0.8497686057211643, -0.8442684483655356, -0.8113043970002043, -0.7786279760687895, -0.7382968092906371, -0.6355243095804144, -0.7939781934454658, -0.9153734454192423, -0.8572905089807524, -0.7856452436845451, -0.8064017657035256, -0.588343458558434, -0.8846772579712667, -0.6250578968781348, -0.7400341644456141, -0.8544397491008642, -0.8197247871279857, -0.6145757910707228, -0.834720791827973, -0.4848549828021842, -0.88188458549059

# Fashion MNIST

In [None]:
from federated_learning.nets import FMNISTCNN
from federated_learning.dataset import FMNISTDataset
import os
config = Configuration()
config.POISONED_CLIENTS = 0
config.DATA_POISONING_PERCENTAGE = 1
config.DATASET = FMNISTDataset
config.MODELNAME = config.FMNIST_NAME
config.NETWORK = FMNISTCNN
observer_config = ObserverConfiguration()
observer_config.experiment_type = "shap_fl_poisoned"
observer_config.experiment_id = 1
observer_config.test = False
observer_config.datasetObserverConfiguration = "MNIST"
neutral_label = 2

In [None]:
# Google Colab Settigns
config.TEMP = os.path.join('/content/drive/My Drive/Colab Notebooks/temp')
config.FMNIST_DATASET_PATH = os.path.join('/content/data/fmnist')
config.MNIST_DATASET_PATH = os.path.join('/content/data/mnist')
config.CIFAR10_DATASET_PATH = os.path.join('/content/data/cifar10')
config.VM_URL = "none"

In [None]:
data = config.DATASET(config)
shap_util = SHAPUtil(data.test_dataloader) 
server = Server(config, observer_config,data.train_dataloader, data.test_dataloader, shap_util)
client_plane = ClientPlane(config, observer_config, data, shap_util)
visualizer = Visualizer(shap_util)

In [None]:
import numpy as np
import copy
import torch
import os
for i in range(200):
    if (i+1) in [2, 5,10,75,100,200]:
        file = "/content/drive/My Drive/Colab Notebooks/temp/models/ex6/FMNIST_round_{}.model".format(i+1)
        if not os.path.exists(os.path.dirname(file)):
                os.makedirs(os.path.dirname(file))
        torch.save(server.net.state_dict(), file)
    experiment_util.set_rounds(client_plane, server, i+1)
    experiment_util.run_round(client_plane, server, i+1)

In [None]:
import torch
config.FROM_LABEL = 5
config.TO_LABEL = 4
shap_images = [config.FROM_LABEL ,config.TO_LABEL]
for j in [100]:
    data = config.DATASET(config)
    client_plane = ClientPlane(config, observer_config, data, shap_util)
    model_file = file = "/content/drive/My Drive/Colab Notebooks/temp/models/ex6/FMNIST_round_{}.model".format(j)
    server.net =  FMNISTCNN()
    server.net.load_state_dict(torch.load(model_file))

    server.test()
    recall, precision, accuracy = server.analize_test()
    print("Original", recall, precision, accuracy)
    server_shap = server.get_shap_values()

    config.POISONED_CLIENTS = 100
    experiment_util.update_configs(client_plane, server, config, observer_config)
    print(len(client_plane.clients[0].train_dataloader.dataset.dataset.targets[client_plane.clients[0].train_dataloader.dataset.dataset.targets == 5]))

    client_plane.poison_clients()
    clean_clients = experiment_util.select_random_clean(client_plane, config, 100)
    poisoned_clients = experiment_util.select_poisoned(client_plane, 100)
    clean_distance = []
    poisoned_distance = []
    clean_diff = []
    poisoned_diff = []
    print("Clean")
    print(len(client_plane.clients[0].train_dataloader.dataset.dataset.targets[client_plane.clients[0].train_dataloader.dataset.dataset.targets == 5]))
    for idx, i in enumerate(clean_clients[:100]):
        client_plane.update_clients(server.get_nn_parameters())
        client_plane.clients[i].train(j+1)
        clean_client_shap = client_plane.clients[i].get_shap_values()
        distance, diag = cos_similarity_values(clean_client_shap, server_shap)
        clean_distance.append(distance)
        clean_diff.append(diag)
        if (idx+1)%25 == 0:
            print(clean_distance[idx-25:idx])

    print("Poisoned")
    server.net =  FMNISTCNN()
    server.net.load_state_dict(torch.load(model_file))
    for idx, i in enumerate(poisoned_clients[:100]):
        client_plane.update_clients(server.get_nn_parameters())    
        client_plane.clients[i].train(j+1)
        poisoned_client_shap = client_plane.clients[i].get_shap_values()
        distance, diag = cos_similarity_values(poisoned_client_shap, server_shap)
        poisoned_distance.append(distance)
        poisoned_diff.append(diag)
        if (idx+1)%25 == 0:
            print(poisoned_distance[idx-25:idx])
    print(len(client_plane.clients[poisoned_clients[0]].train_dataloader.dataset.dataset.targets[client_plane.clients[poisoned_clients[0]].train_dataloader.dataset.dataset.targets == 5]))
    client_plane.reset_default_client_nets()
    client_plane.reset_poisoning_attack()

In [None]:
print(clean_distance)

In [None]:
print(poisoned_distance)

In [None]:
print(clean_diff)

In [None]:
print(poisoned_diff)