# Analysis of Split ECG latent space for Model Poisoning Detection

## Distance Based Per-Class First Moments

In [1]:
import numpy as np
import torch 
import pickle
import pandas as pd

In [5]:

# Returns the cosine similarity between two vectors
def cosine_similarity(vector1, vector2):
    return np.dot(vector1, vector2) / (np.linalg.norm(vector1) * np.linalg.norm(vector2))

# Returns the euclidean similarity between two vectors
def euclidean_similarity(vector1, vector2):	
    return np.linalg.norm(vector1 - vector2)

# Returns the manhattan similarity between two vectors
def manhattan_similarity(vector1, vector2):	
    return np.sum(np.abs(vector1 - vector2))	

# Returns the jaccard similarity between two vectors	
def jaccard_similarity(vector1, vector2):	
    return np.sum(np.minimum(vector1, vector2)) / np.sum(np.maximum(vector1, vector2))

# Weights classes of a multi-label dataset based on the number of samples in each class. Every unique combination of labels is considered a class. Uses softmax to normalize the weights.
def class_weights(dataset):		
    # Get the number of samples in each class
    class_counts = np.zeros(dataset.num_classes)
    for i in range(dataset.num_classes):
        class_counts[i] = np.sum(dataset.labels[:, i])
    
    # Normalize the weights
    class_weights = np.exp(class_counts)
    class_weights = class_weights / np.sum(class_weights)
    
    return class_weights	

# Gets a pandas series of 5D numpy arrays. Filters out all arrays, which have a value of 0 in the 3rd dimension.
def filter_labels(df, idx, val=1):	
    return df[df.label.apply(lambda x: x[idx] == val)]

# Gets a numpy array as input. Returns a numpy array with all possible 2-combinations of the input. The order of the combinations is not important and combinations with the same elements are not included.
def get_similarities(array, similarity_functions):	
    similarities = {}	
    for s in similarity_functions:
        similarities[s] = []
    
    for i in range(len(array)):
        for j in range(i+1, len(array)):
            for s in similarity_functions:
                similarities[s].append(locals()[s+"_similarity"](array[i], array[j]))
    return np.array(similarities)	

In [29]:
# Gets a numpy array with n rows. Returns the pairwise cosine similarity between all rows. The result is a list of n(n-1)/2 values.
def get_similarities(array, ):			
    return np.array([cosine_similarity(array[i], array[j]) for i in range(len(array)) for j in range(i+1, len(array))])	

In [59]:
image_path = "/home/mohammad/indaStudentTower2Backup/Split_ECG_Classification/latent_space/single_client_honest/client_1.pickle"

In [60]:
client1 = pickle.load(open(image_path, "rb"))
samples = client1["samples"]
class_counts = samples.label.sum(axis=0)
samples_30 = samples[samples.epoch == 30]
max_class_1 = filter_labels(samples_30, np.argmax(class_counts))
max_class_0 = filter_labels(samples_30, np.argmax(class_counts), 0)
pooled_vectors = np.array(list(max_class_1.client_output_pooled))
combinations = get_combinations(pooled_vectors)

: 

: 

In [61]:
# Hint: Current methof has quadratic complexity. Search for ways to improve it.
9475 * (9475-1 ) /2

44883075.0

In [3]:
def foo():
    print("bar")

In [4]:
locals()["foo"]()

bar


### Case 1: Honest Client

In [30]:
cos_sim = get_cosine_similarity(pooled_vectors)

In [35]:
np.mean(cos_sim)

0.9240416362367839

In [36]:
np.var(cos_sim)

0.0025126748094221434

In [14]:
cos_sim = cosine_similarity(pooled_vectors, pooled_vectors.T)

In [28]:
cos_sim[3][1]

6.0279220274143594e-05

In [35]:
pooled_vectors.shape

(9475, 24)

In [26]:
sim = pairwise_similarity(pooled_vectors, cosine_similarity)

In [27]:
sim.shape

(9475, 9475)

In [None]:
sim[0][0]

In [28]:
sim

array([[0.        , 0.9039967 , 0.92422921, ..., 0.93199027, 0.95175634,
        0.95940484],
       [0.        , 0.        , 0.92796726, ..., 0.97648788, 0.95105033,
        0.96545122],
       [0.        , 0.        , 0.        , ..., 0.96173151, 0.94985534,
        0.94585569],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.97933296,
        0.9797038 ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.98479301],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [36]:
np.cov(pooled_vectors)

array([[120.22450007, 119.42928379,  93.80921672, ..., 147.57123968,
        116.19818361, 203.38387522],
       [119.42928379, 144.81136211, 102.24885159, ..., 169.05863278,
        127.16930033, 225.26501639],
       [ 93.80921672, 102.24885159,  82.71137638, ..., 126.34615963,
         96.69917432, 169.31727868],
       ...,
       [147.57123968, 169.05863278, 126.34615963, ..., 206.9149232 ,
        156.62530814, 273.8333732 ],
       [116.19818361, 127.16930033,  96.69917432, ..., 156.62530814,
        123.55340639, 212.25461511],
       [203.38387522, 225.26501639, 169.31727868, ..., 273.8333732 ,
        212.25461511, 374.51973956]])

In [38]:
sim2 = pairwise_similarity_1d(pooled_vectors, cosine_similarity)

ValueError: shapes (9475,1,24) and (1,9475,24) not aligned: 24 (dim 2) != 9475 (dim 1)

In [39]:
pooled_vectors.shape

(9475, 24)

In [42]:
cosine_similarity(pooled_vectors, pooled_vectors.T).shape

(9475, 9475)

In [44]:
get_similarities(pooled_vectors, "cosine")

array([[2830.07704501, 2784.5157626 , 2145.72346015, ..., 3425.943581  ,
        2708.53567258, 4789.79988423],
       [2784.5157626 , 3352.48940652, 2344.82963801, ..., 3906.79176009,
        2945.75660517, 5246.02527076],
       [2145.72346015, 2344.82963801, 1904.53896848, ..., 2900.13676477,
        2217.49195628, 3873.79064062],
       ...,
       [3425.943581  , 3906.79176009, 2900.13676477, ..., 4774.62645892,
        3620.00961442, 6353.02882379],
       [2708.53567258, 2945.75660517, 2217.49195628, ..., 3620.00961442,
        2861.66835972, 4943.91441687],
       [4789.79988423, 5246.02527076, 3873.79064062, ..., 6353.02882379,
        4943.91441687, 8807.09474908]])