In [1]:
import sys
import os
import pathlib

parent_dir = pathlib.Path.cwd().parent
sys.path.append(str(parent_dir))

from multiprocessing import Pool
import numpy as np
import pandas as pd
import pickle
from sklearn.metrics import confusion_matrix, accuracy_score

from tqdm.notebook import tqdm
from deepdiff import DeepDiff


from utils.partition import weight_update_statistics
from utils.flower_detection import mal_agents_update_statistics
from utils.data_crunch import load_meta_metrics, metrics_to_WL


pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

In [2]:
#case = "manyR"
#case = "R0manyD"
case = "R1manyD"

cosine = True

#Rs = [0, 1, 3, 10, 30, 100, 300, 1_000, 3_000]
SERVER_ROUNDS = 20
suffix_str = ""
if cosine:
    # Kappas for cosine similarity
    kappas = np.round(np.concatenate((np.arange(0,0.3, 0.02), np.arange(0.3, 1, 0.1))),3)   
    cosine_str = '_cosine'
else:
    # Kappas for Euclidean distance
    kappas = np.round(np.concatenate((np.arange(0, 1, 0.2), np.arange(1,10,1), np.arange(10,55,5))),3)
    cosine_str = ''
print(kappas, len(kappas))

df_handle = f"df_{case}{cosine_str}{suffix_str}.csv"
print(df_handle)

[0.   0.02 0.04 0.06 0.08 0.1  0.12 0.14 0.16 0.18 0.2  0.22 0.24 0.26
 0.28 0.3  0.4  0.5  0.6  0.7  0.8  0.9 ] 22
df_R1manyD_cosine.csv


In [3]:
output_dir = "/home/salesort/Documents/644_CMPUT/outputs"
out_dirs = []
file_names = ["metrics_dict_01.pkl", "metrics_dict_02.pkl",
              "metrics_dict_03.pkl", "metrics_dict_04.pkl",
              "metrics_dict_05.pkl", "metrics_dict_06.pkl",
              "metrics_dict_07.pkl", "metrics_dict_08.pkl",
              "metrics_dict_09.pkl", "metrics_dict_10.pkl",
              "metrics_dict_11.pkl", "metrics_dict_12.pkl",
              "metrics_dict_13.pkl", "metrics_dict_14.pkl",
              "metrics_dict_15.pkl", "metrics_dict_16.pkl",
              "metrics_dict_17.pkl", "metrics_dict_18.pkl",
              "metrics_dict_19.pkl", "metrics_dict_20.pkl",]

if case == "manyR":
    Rs = [0, 1, 3, 10, 30, 100, 300, 1_000, 3_000]
    Ds = [0.1]
    out_dirs = ["Results_r00000",
                "Results_r00001",
                "Results_r00003",
                "Results_r00010",
                "Results_r00030",
                "Results_r00100",
                "Results_r00300",
                "Results_r01000",
                "Results_r03000",]

elif case == "R0manyD":
    Rs = [0]
    Ds = [0.10, 0.50, 0.75, 0.90, 0.99]
    out_dirs = ["Results_r00000_prop0-10",
                "Results_r00000_prop0-50",
                "Results_r00000_prop0-75",
                "Results_r00000_prop0-90",
                "Results_r00000_prop0-99",]
elif case == "R1manyD":
    Rs = [1]
    Ds = [0.10, 0.50, 0.75, 0.90, 0.99]
    out_dirs = ["Results_r00001_prop0-10",
                "Results_r00001_prop0-50",
                "Results_r00001_prop0-75",
                "Results_r00001_prop0-90",
                "Results_r00001_prop0-99",]

classifier_keys = {}
for R in Rs:
    for D in Ds:
        classifier_keys[f"R{str(R)}D{str(D)}"] = {"R": R, "D": D}
keys = list(classifier_keys.keys())
print(classifier_keys)
print(keys)
assert len(keys) == len(out_dirs)

#for key_i, out_dir in enumerate(out_dirs):
#    print(f"Key: {keys[key_i]}, {out_dir}")
#    handles_at_dir = [f"{output_dir}/{out_dir}/{file_name}" for file_name in file_names]
#    meta_metrics = {}
#    for handle_i, handle_file in enumerate(handles_at_dir):
#        with open(handle_file, "rb") as handle:
#            print("\t", handle_file)
#            #metrics = pickle.load(handle)
#        meta_metrics[handle_i] = metrics

{'R1D0.1': {'R': 1, 'D': 0.1}, 'R1D0.5': {'R': 1, 'D': 0.5}, 'R1D0.75': {'R': 1, 'D': 0.75}, 'R1D0.9': {'R': 1, 'D': 0.9}, 'R1D0.99': {'R': 1, 'D': 0.99}}
['R1D0.1', 'R1D0.5', 'R1D0.75', 'R1D0.9', 'R1D0.99']


In [4]:
l_classifier_res = []

for key_i, out_dir in enumerate(out_dirs):
    print(f"{key_i}/{len(keys)}")
    meta_metrics = {}
    #meta_metrics = load_meta_metrics(R)
    
    handles_at_dir = [f"{output_dir}/{out_dir}/{file_name}" for file_name in file_names]
    for handle_i, handle_file in enumerate(handles_at_dir):
        with open(handle_file, "rb") as handle:
            metrics = pickle.load(handle)
        meta_metrics[handle_i] = metrics

    results = {}
    WL_all = [metrics_to_WL(meta_metrics[rnd]) for rnd in meta_metrics.keys()]

    for kappa in tqdm(kappas):
        result_at_kappa = []
        for rnd in meta_metrics.keys():
            WL = WL_all[rnd]
            detected_round = weight_update_statistics(WL, kappa=kappa, cosine=cosine)
            result_at_kappa.append(detected_round)
        results[kappa] = result_at_kappa

    results_uni = {}
    for kappa in kappas:
        results_uni[kappa] = np.concatenate(results[kappa])
    
    R = classifier_keys[keys[key_i]]["R"]
    if R == 0:
        Y_true = [0] * 200
    else:    
        Y_true = [0, 0, 0, 1, 0, 0, 0, 0 ,0 ,0] * 20

    for kappa in results_uni.keys():
        accu = accuracy_score(Y_true, results_uni[kappa])
        cmtx = confusion_matrix(Y_true, results_uni[kappa])
        if all(results_uni[kappa] == 0) and all(np.array(Y_true) == 0):
            TN = cmtx[0,0]
            FN = TP = FP = 0 
        else:
            TN = cmtx[0,0]
            FN = cmtx[1,0]
            TP = cmtx[1,1]
            FP = cmtx[0,1]
        l_classifier_res.append([classifier_keys[keys[key_i]]["R"], classifier_keys[keys[key_i]]["D"], float(kappa), accu, TN, FN, TP, FP])

0/5


  0%|          | 0/22 [00:00<?, ?it/s]

1/5


  0%|          | 0/22 [00:00<?, ?it/s]

2/5


  0%|          | 0/22 [00:00<?, ?it/s]

3/5


  0%|          | 0/22 [00:00<?, ?it/s]

4/5


  0%|          | 0/22 [00:00<?, ?it/s]

In [5]:
df_classifier = pd.DataFrame(l_classifier_res, columns = ["R", "D", "kappa", "accu", "TN", "FN", "TP", "FP"])

In [6]:
df_classifier["Recall"] = df_classifier["TP"]/(df_classifier["TP"] + df_classifier["FN"]) # TP/(TP+FN)
df_classifier["Precision"] = df_classifier["TP"]/(df_classifier["TP"] + df_classifier["FP"]) # TP/(TP+FP)
df_classifier["TPR"] = df_classifier["TP"]/(df_classifier["TP"] + df_classifier["FN"]) # TP/(TP+FN)
df_classifier["FPR"] = df_classifier["FP"]/(df_classifier["FP"] + df_classifier["TN"]) # FP/(FP+TN)
# Accuracy = (TP+TN)/(TP+TN+FP+FN)
df_classifier["F1"] = 2*df_classifier["TP"]/(2*df_classifier["TP"]+df_classifier["FP"]+df_classifier["FN"])# F1 = 2*TP / (2*TP+FP+FN)

In [7]:
df_classifier.head(200)

Unnamed: 0,R,D,kappa,accu,TN,FN,TP,FP,Recall,Precision,TPR,FPR,F1
0,1,0.1,0.0,0.1,0,0,20,180,1.0,0.1,1.0,1.0,0.181818
1,1,0.1,0.02,0.99,178,0,20,2,1.0,0.909091,1.0,0.011111,0.952381
2,1,0.1,0.04,1.0,180,0,20,0,1.0,1.0,1.0,0.0,1.0
3,1,0.1,0.06,1.0,180,0,20,0,1.0,1.0,1.0,0.0,1.0
4,1,0.1,0.08,1.0,180,0,20,0,1.0,1.0,1.0,0.0,1.0
5,1,0.1,0.1,0.99,180,2,18,0,0.9,1.0,0.9,0.0,0.947368
6,1,0.1,0.12,0.975,180,5,15,0,0.75,1.0,0.75,0.0,0.857143
7,1,0.1,0.14,0.97,180,6,14,0,0.7,1.0,0.7,0.0,0.823529
8,1,0.1,0.16,0.945,180,11,9,0,0.45,1.0,0.45,0.0,0.62069
9,1,0.1,0.18,0.93,180,14,6,0,0.3,1.0,0.3,0.0,0.461538


In [8]:
df_classifier.to_csv(f'/home/salesort/Documents/644_CMPUT/res_analysis/{df_handle}', index = False)

---

## Tests

In [31]:
# 1min 40s
results = {}
WL_all = [metrics_to_WL(meta_metrics[rnd]) for rnd in meta_metrics.keys()]

for kappa in tqdm(kappas):
    result_at_kappa = []
    for rnd in meta_metrics.keys():
        WL = WL_all[rnd]
        detected_round = weight_update_statistics(WL, kappa=kappa)
        result_at_kappa.append(detected_round)
    results[str(np.round(kappa, 1))] = result_at_kappa
    

  0%|          | 0/21 [00:00<?, ?it/s]

In [63]:
results_uni = {}
for kappa in results.keys():
    results_uni[kappa] = np.concatenate(results[kappa])
    
if R == 0:
    Y_true = [0] * 200
else:    
    Y_true = [0, 0, 0, 1, 0, 0, 0, 0 ,0 ,0] * 20


l_classifier_res = []

for kappa in results_uni.keys():
    accu = accuracy_score(Y_true, results_uni[kappa])
    cmtx = confusion_matrix(Y_true, results_uni[kappa])
    if all(results_uni[kappa] == 0) and all(np.array(Y_true) == 0):
        TN = cmtx[0,0]
        FN = TP = FP = 0 
    else:
        TN = cmtx[0,0]
        FN = cmtx[1,0]
        TP = cmtx[1,1]
        FP = cmtx[0,1]
    l_classifier_res.append([R, float(kappa), accu, TN, FN, TP, FP])
    

0.0
0.1
0.2
0.3
0.4
0.5
0.6
0.7
0.8
0.9
1.0
5.0
10.0
15.0
20.0
25.0
30.0
35.0
40.0
45.0
50.0


In [65]:
df_at_R = pd.DataFrame(l_classifier_res, columns = ["R", "kappa", "accu", "TN", "FN", "TP", "FP"])
df_at_R.head(200)

Unnamed: 0,R,kappa,accu,TN,FN,TP,FP
0,0,0.0,0.0,0,0,0,200
1,0,0.1,0.015,3,0,0,197
2,0,0.2,0.04,8,0,0,192
3,0,0.3,0.085,17,0,0,183
4,0,0.4,0.195,39,0,0,161
5,0,0.5,0.37,74,0,0,126
6,0,0.6,0.56,112,0,0,88
7,0,0.7,0.69,138,0,0,62
8,0,0.8,0.835,167,0,0,33
9,0,0.9,0.91,182,0,0,18


In [61]:
if all(results_uni['5.0'] == 0) and all(np.array(Y_true) == 0):
    print("Zero")

Zero


In [50]:
confusion_matrix(Y_true, results_uni['5.0'])

array([[200]])

In [31]:
def multiply(a, b=2, c=1):
    return a * b *c

In [32]:
from multiprocessing import Pool
with Pool(5) as p:
    results = p.starmap(multiply, [(1,3), (2,3), (3,3)])
print(results)

[3, 6, 9]


In [25]:
a = [1,2,3]
b = [11, 12, 13]
c = [21, 22, 23]

df = pd.DataFrame(np.array([a,b,c]).T, columns= ["A", "B", "C"])
df.head()

Unnamed: 0,A,B,C
0,1,11,21
1,2,12,22
2,3,13,23


In [5]:
print(len(WL))
for clientw in WL:
    print(len(clientw))

10
3382346
3382346
3382346
3382346
3382346
3382346
3382346
3382346
3382346
3382346


In [21]:
for server_round in meta_metrics.keys():
    print(server_round)
    print(meta_metrics[server_round].keys())
    for client in range(10):
        print(f"Client {client}: Layers {len(meta_metrics[server_round][str(client)][0])}")

0
dict_keys(['3', '7', '4', '9', '5', '8', '6', '1', '0', '2'])
Client 0: Layers 8
Client 1: Layers 8
Client 2: Layers 8
Client 3: Layers 8
Client 4: Layers 8
Client 5: Layers 8
Client 6: Layers 8
Client 7: Layers 8
Client 8: Layers 8
Client 9: Layers 8
1
dict_keys(['4', '5', '9', '7', '3', '0', '2', '6', '1', '8'])
Client 0: Layers 8
Client 1: Layers 8
Client 2: Layers 8
Client 3: Layers 8
Client 4: Layers 8
Client 5: Layers 8
Client 6: Layers 8
Client 7: Layers 8
Client 8: Layers 8
Client 9: Layers 8
2
dict_keys(['7', '8', '1', '3', '0', '9', '5', '4', '2', '6'])
Client 0: Layers 8
Client 1: Layers 8
Client 2: Layers 8
Client 3: Layers 8
Client 4: Layers 8
Client 5: Layers 8
Client 6: Layers 8
Client 7: Layers 8
Client 8: Layers 8
Client 9: Layers 8
3
dict_keys(['6', '1', '5', '4', '2', '7', '0', '9', '8', '3'])
Client 0: Layers 8
Client 1: Layers 8
Client 2: Layers 8
Client 3: Layers 8
Client 4: Layers 8
Client 5: Layers 8
Client 6: Layers 8
Client 7: Layers 8
Client 8: Layers 8
Cli

In [87]:
clients = {1: 'b', 2: 'c', 0: 'a'}
keys = clients.keys()

In [88]:
for i in range(3):
    print(clients[i])

a
b
c


In [99]:
x = np.array([1,0,0])
cli_order = [int(x) for x in clients.keys()]
print(cli_order)

[1, 2, 0]


In [100]:
res = np.zeros(len(x))
for i, order in enumerate(cli_order):
    res[order] = x[i]
print(res)


[0. 1. 0.]


In [None]:
# New upload function for more cases
def load_meta_metrics(R, 
                      server_rounds = 20,
                      output_dir = "/home/salesort/Documents/644_CMPUT/outputs",
                      res_temp = "Results_r",
                      file_temp = "metrics_dict_",
                      Rs = [0, 1, 3, 10, 30, 100, 300, 1000, 3000]):
    R_dirs = [f"{res_temp}{ri:05d}" for ri in Rs]
    file_names = [f"{file_temp}{rnd:02d}.pkl" for rnd in range(1,server_rounds+1)]

    meta_metrics = {}
    for i in range(server_rounds):
        with open(f"{output_dir}/{res_temp}{R:05d}/{file_names[i]}", "rb") as handle:
            metrics = pickle.load(handle)
        meta_metrics[i]= metrics

    return meta_metrics


In [48]:
#case = "manyR"
#case = "R0manyD"
case = "R1manyD"

In [49]:
output_dir = "/home/salesort/Documents/644_CMPUT/outputs"
out_dirs = []
file_names = ["metrics_dict_01.pkl", "metrics_dict_02.pkl",
              "metrics_dict_03.pkl", "metrics_dict_04.pkl",
              "metrics_dict_05.pkl", "metrics_dict_06.pkl",
              "metrics_dict_07.pkl", "metrics_dict_08.pkl",
              "metrics_dict_09.pkl", "metrics_dict_10.pkl",
              "metrics_dict_11.pkl", "metrics_dict_12.pkl",
              "metrics_dict_13.pkl", "metrics_dict_14.pkl",
              "metrics_dict_15.pkl", "metrics_dict_16.pkl",
              "metrics_dict_17.pkl", "metrics_dict_18.pkl",
              "metrics_dict_19.pkl", "metrics_dict_20.pkl",]

if case == "manyR":
    Rs = [0, 1, 3, 10, 30, 100, 300, 1_000, 3_000]
    Ds = [0.1]
    out_dirs = ["Results_r00000",
                "Results_r00001",
                "Results_r00003",
                "Results_r00010",
                "Results_r00030",
                "Results_r00100",
                "Results_r00300",
                "Results_r01000",
                "Results_r03000",]

elif case == "R0manyD":
    Rs = [0]
    Ds = [0.10, 0.50, 0.75, 0.90, 0.99]
    out_dirs = ["Results_r00000_prop0-10",
                "Results_r00000_prop0-50",
                "Results_r00000_prop0-75",
                "Results_r00000_prop0-90",
                "Results_r00000_prop0-99",]
elif case == "R1manyD":
    Rs = [1]
    Ds = [0.10, 0.50, 0.75, 0.90, 0.99]
    out_dirs = ["Results_r00001_prop0-10",
                "Results_r00001_prop0-50",
                "Results_r00001_prop0-75",
                "Results_r00001_prop0-90",
                "Results_r00001_prop0-99",]

classifier_keys = {}
for R in Rs:
    for D in Ds:
        classifier_keys[f"R{str(R)}D{str(D)}"] = {"R": R, "D": D}
keys = list(classifier_keys.keys())
print(dict_keys)
print(keys)
assert len(keys) == len(out_dirs)

for key_i, out_dir in enumerate(out_dirs):
    print(f"Key: {keys[key_i]}, {out_dir}")
    handles_at_dir = [f"{output_dir}/{out_dir}/{file_name}" for file_name in file_names]
    meta_metrics = {}
    for handle_i, handle_file in enumerate(handles_at_dir):
        with open(handle_file, "rb") as handle:
            print("\t", handle_file)
            #metrics = pickle.load(handle)
        meta_metrics[handle_i] = metrics
    


{'R1D0.1': {'R': 1, 'D': 0.1}, 'R1D0.5': {'R': 1, 'D': 0.5}, 'R1D0.75': {'R': 1, 'D': 0.75}, 'R1D0.9': {'R': 1, 'D': 0.9}, 'R1D0.99': {'R': 1, 'D': 0.99}}
['R1D0.1', 'R1D0.5', 'R1D0.75', 'R1D0.9', 'R1D0.99']
Key: R1D0.1, Results_r00001_prop0-10
	 /home/salesort/Documents/644_CMPUT/outputs/Results_r00001_prop0-10/metrics_dict_01.pkl
	 /home/salesort/Documents/644_CMPUT/outputs/Results_r00001_prop0-10/metrics_dict_02.pkl
	 /home/salesort/Documents/644_CMPUT/outputs/Results_r00001_prop0-10/metrics_dict_03.pkl
	 /home/salesort/Documents/644_CMPUT/outputs/Results_r00001_prop0-10/metrics_dict_04.pkl
	 /home/salesort/Documents/644_CMPUT/outputs/Results_r00001_prop0-10/metrics_dict_05.pkl
	 /home/salesort/Documents/644_CMPUT/outputs/Results_r00001_prop0-10/metrics_dict_06.pkl
	 /home/salesort/Documents/644_CMPUT/outputs/Results_r00001_prop0-10/metrics_dict_07.pkl
	 /home/salesort/Documents/644_CMPUT/outputs/Results_r00001_prop0-10/metrics_dict_08.pkl
	 /home/salesort/Documents/644_CMPUT/output