# Data Visualization

## 0. Importing Libraries

In [1]:
import json
import os
import sys
from sklearn import metrics
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.validators.scatter.marker import SymbolValidator
import numpy as np
import pandas as pd

# base_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
# sys.path.append(base_path)


## 1. Basic Processing Block

In [2]:
username = "fnx11"

# just for notebook
base_path = f'/home/{username}/thesis/codes/CosDefence/'

# for picking data from the repo
json_folder = os.path.join(base_path, 'json_files/')

# for picking data from results folder
# json_folder = f'/home/{username}/thesis/codes/Results/iter3/'

plots_folder = base_path + 'plots/'

file_p_frac0 = json_folder + 'mnist_C0.1_P0.0_FDRS30_LR0.001_OptAdam.txt'
file_p_frac10 = json_folder + 'mnist_C0.1_P0.1_FDRS30_LR0.001_OptAdam.txt'
file_p_frac20 = json_folder + 'mnist_C0.1_P0.2_FDRS30_LR0.001_OptAdam.txt'
file_p_frac40 = json_folder + 'mnist_C0.1_P0.4_FDRS30_LR0.001_OptAdam.txt'

files_dict = {}
c_fracs = [0.1, 0.2, 0.4, 1.0]
p_fracs = [0.0, 0.1, 0.2, 0.4]
atts = [0, 1]
for c_frac in c_fracs:
    for p_frac in p_fracs:
        for att in atts:
            dict_key = f'file_c{int(c_frac*100)}_p{int(p_frac*100):02}_a{att}'
#             print(dict_key)
            files_dict[dict_key] = json_folder + f'basic_C{c_frac}_P{p_frac}_Att{att}_FDRS400_LR0.001_OptAdam.txt'

def get_poisoned_client_nums(file_name):
    result_data = {}
    with open(file_name) as result_file:
        result_data = json.load(result_file)
    
    return result_data['poisoned_client_sel']

def get_training_losses(file_name):
    result_data = {}
    with open(file_name) as result_file:
        result_data = json.load(result_file)
    
    return result_data['avg_training_losses']

def get_test_data_df(file_name):
    result_data = {}
    with open(file_name) as result_file:
        result_data = json.load(result_file)

    testing_rounds = len(result_data['testing_losses'])
    testing_every = result_data['config']['testing_every']
    round_num = [i*testing_every for i in range(1, testing_rounds+1)]

    test_data_df = pd.DataFrame()
    test_data_df['round_num'] = round_num
    test_data_df['testing_loss'] = result_data['testing_losses']
    test_data_df['total_acc'] = result_data['total_accuracies']
    
    avg_precision = []
    avg_recall = []
    avg_f1score = []
    for j in range(testing_rounds):
        avg_precision.append(result_data['avg_metric_vals'][j][0])
        avg_recall.append(result_data['avg_metric_vals'][j][1])
        avg_f1score.append(result_data['avg_metric_vals'][j][2])
    test_data_df['avg_precision'] = avg_precision
    test_data_df['avg_recall'] = avg_recall
    test_data_df['avg_f1score'] = avg_f1score
    
        
    for i in range(10):
        class_name = f'class_{i}'
        temp_class_acc = []
        temp_class_precision = []
        temp_class_recall = []
        temp_class_f1score = []
        for j in range(testing_rounds):
            temp_class_acc.append(result_data['class_accuracies'][j][i])
            temp_class_precision.append(result_data['class_precisions'][j][i])
            temp_class_recall.append(result_data['class_recalls'][j][i])
            temp_class_f1score.append(result_data['class_f1scores'][j][i])
            
        test_data_df[class_name+'_acc'] = temp_class_acc
        test_data_df[class_name+'_precision'] = temp_class_precision
        test_data_df[class_name+'_recall'] = temp_class_recall
        test_data_df[class_name+'_f1score'] = temp_class_f1score
        
    return test_data_df

def get_final_test_data(file_name):
    result_data = {}
    with open(file_name) as result_file:
        result_data = json.load(result_file)
        
    predictions = result_data['final_test_data']['predictions']
    ground_truths = result_data['final_test_data']['ground_truths']
    total_acc = result_data['final_test_data']['total_acc']
    classes_acc = result_data['final_test_data']['classes_acc']
    testing_loss = result_data['final_test_data']['testing_loss']
    
    return total_acc, classes_acc, predictions, ground_truths, testing_loss

# saving all the data in a dataframe so that we can easily plot it with plottly


## 1. Plotting Training Loss of Clients

Clients are picked randomly there is no gurantee that all the clients have equal number of training epochs.

In [3]:
avg_training_losses = get_training_losses(file_p_frac20)

FileNotFoundError: [Errno 2] No such file or directory: '/home/fnx11/thesis/codes/CosDefence/json_files/mnist_C0.1_P0.2_FDRS30_LR0.001_OptAdam.txt'

In [15]:
fig = go.Figure()
# for i in range(100):
#     losses = client_training_losses[i][:100]
#     fig.add_trace(go.Scatter(x=np.arange(len(losses)), y=losses,
#                     mode='lines+markers',
#                     name=f'client{i}'))

fig.add_trace(go.Scatter(x=np.arange(len(avg_training_losses)), y=avg_training_losses,
                    mode='lines',
                    name=f'Training Loss'))

fig.update_layout(title='Training Losses',
                   xaxis_title='Epoch',
                   yaxis_title='Loss')
fig.show()

## 2. Plotting Testing Loss and Accuracies

Testing and accuracy calculation is done at every 10th round(can be changed).

In [16]:
data_df = get_test_data_df(file_p_frac20)
data_df.head(40) # we know that we trained for 800 rounds and tested every 10 round.

Unnamed: 0,round_num,testing_loss,total_acc,avg_precision,avg_recall,avg_f1score,class_0_acc,class_0_precision,class_0_recall,class_0_f1score,...,class_7_recall,class_7_f1score,class_8_acc,class_8_precision,class_8_recall,class_8_f1score,class_9_acc,class_9_precision,class_9_recall,class_9_f1score
0,3,0.050803,51.19,0.691059,0.5119,0.446473,98.061224,0.486582,0.980612,0.650423,...,0.257782,0.407379,0.0,1.0,0.0,0.0,3.171457,0.888889,0.031715,0.061244
1,6,0.029256,72.85,0.74957,0.7285,0.707174,97.755102,0.778229,0.977551,0.866576,...,0.761673,0.813506,53.080082,0.810345,0.530801,0.641439,18.334985,0.728346,0.18335,0.292953
2,9,0.023956,77.22,0.799095,0.7722,0.755298,95.918367,0.911736,0.959184,0.934858,...,0.874514,0.874088,62.217659,0.73633,0.622177,0.674457,78.39445,0.560993,0.783944,0.653989
3,12,0.017194,83.46,0.844055,0.8346,0.832335,95.918367,0.926108,0.959184,0.942356,...,0.853113,0.883182,73.305955,0.795987,0.73306,0.763228,77.502478,0.699463,0.775025,0.735308
4,15,0.015321,85.26,0.858032,0.8526,0.848557,97.244898,0.925243,0.972449,0.948259,...,0.910506,0.888467,77.61807,0.789144,0.776181,0.782609,77.106046,0.784274,0.77106,0.777611
5,18,0.012611,87.55,0.878318,0.8755,0.87464,97.755102,0.914995,0.977551,0.945239,...,0.893969,0.896585,84.086242,0.794374,0.840862,0.816958,81.169475,0.806897,0.811695,0.809289
6,21,0.012262,87.62,0.882813,0.8762,0.87505,97.653061,0.921078,0.976531,0.947994,...,0.88716,0.903418,89.938398,0.748718,0.899384,0.817164,84.44004,0.787431,0.8444,0.814921
7,24,0.012186,87.89,0.886135,0.8789,0.876756,96.938776,0.93412,0.969388,0.951427,...,0.885214,0.912738,86.960986,0.798303,0.86961,0.832432,87.809713,0.754043,0.878097,0.811355
8,27,0.009698,90.35,0.904738,0.9035,0.902887,97.755102,0.922929,0.977551,0.949455,...,0.91537,0.90874,84.188912,0.886486,0.841889,0.863612,87.314172,0.847115,0.873142,0.859932
9,30,0.00838,91.79,0.91826,0.9179,0.91754,98.061224,0.9303,0.980612,0.954794,...,0.904669,0.925373,86.2423,0.910076,0.862423,0.885609,86.323092,0.903527,0.863231,0.882919


### A. Comparing All accuracies

In [17]:
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['total_acc'],
                    mode='lines+markers',
                    name='total_acc'))
for i in range(10):
    class_name = f'class_{i}_acc'
    fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df[class_name],
                        mode='lines+markers',
                        name=class_name))

fig.update_layout(title='Accuracy Evolution',
                   xaxis_title='Round Num',
                   yaxis_title='Accuracy')
fig.show()


### B. Comparing Accuracy of Flipped classes(Source and Target)

In [18]:
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['total_acc'],
                    mode='lines+markers',
                    name='total_acc'))

fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['class_2_acc'],
                    mode='lines+markers',
                    name='flipped_source'))

fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['class_9_acc'],
                    mode='lines+markers',
                    name='flipped_target'))

fig.update_layout(title='Accuracy Evolution',
                   xaxis_title='Round Num',
                   yaxis_title='Accuracy')
fig.show()

### C. Testing Loss

In [19]:
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['testing_loss'],
                    mode='lines+markers',
                    name='testing_loss'))
fig.update_layout(title='Testing Loss over the Training',
                   xaxis_title='Round Num',
                   yaxis_title='Loss')
fig.show()

## 3. Compare Different Poisoned Data Scenarios

Read 4 different files with different compare the accuracies and taining losses

In [20]:
data0_df = get_test_data_df(file_p_frac0)
data10_df = get_test_data_df(file_p_frac10)
data20_df = get_test_data_df(file_p_frac20)
data40_df = get_test_data_df(file_p_frac40)
# all_data_dfs = [data0_df[:20], data10_df[:20], data20_df[:20], data40_df[:20]]
all_data_dfs = [data0_df, data10_df, data20_df, data40_df]

### A. Comparing Total Acc, Flipped Source and Flipped Target in different scenarios

In [21]:
# Create traces
fig = go.Figure()
for i, data_df in enumerate(all_data_dfs):
    
    # to make data40 and plot curves make better sense
    if i==3:
        i += 1

    fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['total_acc'],
                        mode='lines+markers',
                        name=f'total_acc{i*10}'))

    fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['class_2_acc'],
                        mode='lines+markers',
                        name=f'flipped_source{i*10}'))

    fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['class_9_acc'],
                        mode='lines+markers',
                        name=f'flipped_target{i*10}'))

fig.update_layout(title='Accuracy Evolution',
                   xaxis_title='Round Num',
                   yaxis_title='Accuracy')
fig.show()

### Separate sub plots

In [22]:
fig = make_subplots(rows=2, cols=2)

# Top left
fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['total_acc'],
                    mode='lines+markers',
                    name='total_acc0'), row=1, col=1)

fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['class_2_acc'],
                    mode='lines+markers',
                    name='flipped_source0'), row=1, col=1)

fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['class_9_acc'],
                    mode='lines+markers',
                    name='flipped_target0'), row=1, col=1)

# Top right
fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['total_acc'],
                    mode='lines+markers',
                    name='total_acc10'), row=1, col=2)

fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['class_2_acc'],
                    mode='lines+markers',
                    name='flipped_source10'), row=1, col=2)

fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['class_9_acc'],
                    mode='lines+markers',
                    name='flipped_target10'), row=1, col=2)

# Bottom left
fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['total_acc'],
                    mode='lines+markers',
                    name='total_acc20'), row=2, col=1)

fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['class_2_acc'],
                    mode='lines+markers',
                    name='flipped_source20'), row=2, col=1)

fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['class_9_acc'],
                    mode='lines+markers',
                    name='flipped_target20'), row=2, col=1)

# Bottom right
fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['total_acc'],
                    mode='lines+markers',
                    name='total_acc40'), row=2, col=2)

fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['class_2_acc'],
                    mode='lines+markers',
                    name='flipped_source40'), row=2, col=2)

fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['class_9_acc'],
                    mode='lines+markers',
                    name='flipped_target40'), row=2, col=2)

fig.update_layout(title='Accuracy Evolution',
                   xaxis_title='Round Num',
                   yaxis_title='Accuracy')
fig.show()


## 4. Calculate Precision, Recall and F1-Measure of each class

In [12]:
total_acc, classes_acc, predictions, ground_truths, testing_loss = get_final_test_data(file_p_frac0)
confusion_matrix = metrics.confusion_matrix(ground_truths, predictions)
clf_report = metrics.classification_report(ground_truths, predictions, digits=3)
print(confusion_matrix)
print(clf_report)

[[372  51  53  46  18  10   6  57 264 123]
 [ 12 467   4  49   2   7   9  23 106 321]
 [ 74  17 141 128 258  59 173  75  46  29]
 [ 38  24  94 340  66  81 201  81  20  55]
 [ 52   8  84  78 368  47 227  97  20  19]
 [ 16  18 103 275 140  84 252  63  28  21]
 [ 10   9  93 182 212  66 341  45   8  34]
 [ 35  22  44 137 135  36 114 394  13  70]
 [147 112  24  48   6   5   2  11 520 125]
 [ 31 201   8  37   3   8   6  40 115 551]]
              precision    recall  f1-score   support

           0      0.473     0.372     0.416      1000
           1      0.503     0.467     0.484      1000
           2      0.218     0.141     0.171      1000
           3      0.258     0.340     0.293      1000
           4      0.305     0.368     0.333      1000
           5      0.208     0.084     0.120      1000
           6      0.256     0.341     0.293      1000
           7      0.445     0.394     0.418      1000
           8      0.456     0.520     0.486      1000
           9      0.409     

In [13]:
total_acc, classes_acc, predictions, ground_truths, testing_loss = get_final_test_data(file_p_frac10)
confusion_matrix = metrics.confusion_matrix(ground_truths, predictions)
clf_report = metrics.classification_report(ground_truths, predictions, digits=3)
print(confusion_matrix)
print(clf_report)

[[   0    0    0    0    0    0    0    0    0 1000]
 [   0    0    0    0    0    0    0    0    0 1000]
 [   0    0    0    0    0    0    0    0    0 1000]
 [   0    0    0    0    0    0    0    0    0 1000]
 [   0    0    0    0    0    0    0    0    0 1000]
 [   0    0    0    0    0    0    0    0    0 1000]
 [   0    0    0    0    0    0    0    0    0 1000]
 [   0    0    0    0    0    0    0    0    0 1000]
 [   0    0    0    0    0    0    0    0    0 1000]
 [   0    0    0    0    0    0    0    0    0 1000]]
              precision    recall  f1-score   support

           0      0.000     0.000     0.000      1000
           1      0.000     0.000     0.000      1000
           2      0.000     0.000     0.000      1000
           3      0.000     0.000     0.000      1000
           4      0.000     0.000     0.000      1000
           5      0.000     0.000     0.000      1000
           6      0.000     0.000     0.000      1000
           7      0.000     0.000   


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



In [14]:
total_acc, classes_acc, predictions, ground_truths, testing_loss = get_final_test_data(file_p_frac20)
confusion_matrix = metrics.confusion_matrix(ground_truths, predictions)
clf_report = metrics.classification_report(ground_truths, predictions, digits=3)
print(confusion_matrix)
print(clf_report)

[[446  40  90   5   5   4  75  45 100 190]
 [ 75 270  15  19   0   3  61  28  79 450]
 [ 87   3 186  20   4   7 572  45  15  61]
 [ 43  10 102  77   1  13 596  47   3 108]
 [ 50   2  91   5   3   0 734  59   7  49]
 [ 27   9 111  85   1  14 629  39  10  75]
 [ 13   2  63  28   1   4 811  28   1  49]
 [ 39   8  70  15   3   8 438 262   5 152]
 [291  73  33  10   4   8  37  16 278 250]
 [ 93  78  13  20   2   5  70  39  53 627]]
              precision    recall  f1-score   support

           0      0.383     0.446     0.412      1000
           1      0.545     0.270     0.361      1000
           2      0.240     0.186     0.210      1000
           3      0.271     0.077     0.120      1000
           4      0.125     0.003     0.006      1000
           5      0.212     0.014     0.026      1000
           6      0.202     0.811     0.323      1000
           7      0.431     0.262     0.326      1000
           8      0.505     0.278     0.358      1000
           9      0.312     

In [15]:
total_acc, classes_acc, predictions, ground_truths, testing_loss = get_final_test_data(file_p_frac40)
confusion_matrix = metrics.confusion_matrix(ground_truths, predictions)
clf_report = metrics.classification_report(ground_truths, predictions, digits=3)
print(confusion_matrix)
print(clf_report)

[[   0    0    0    0    0    0    0    0    0 1000]
 [   0    0    0    0    0    0    0    0    0 1000]
 [   0    0    0    0    0    0    0    0    0 1000]
 [   0    0    0    0    0    0    0    0    0 1000]
 [   0    0    0    0    0    0    0    0    0 1000]
 [   0    0    0    0    0    0    0    0    0 1000]
 [   0    0    0    0    0    0    0    0    0 1000]
 [   0    0    0    0    0    0    0    0    0 1000]
 [   0    0    0    0    0    0    0    0    0 1000]
 [   0    0    0    0    0    0    0    0    0 1000]]
              precision    recall  f1-score   support

           0      0.000     0.000     0.000      1000
           1      0.000     0.000     0.000      1000
           2      0.000     0.000     0.000      1000
           3      0.000     0.000     0.000      1000
           4      0.000     0.000     0.000      1000
           5      0.000     0.000     0.000      1000
           6      0.000     0.000     0.000      1000
           7      0.000     0.000   


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



## 5. Varying c_frac, p_frac and attack
c_frac={0.1, 0.2, 0.4, 1.0} and p_frac = {0.0, 0.1, 0.2, 0.4} and attack = {(2, 9), (0, 2)}  bird-> truck, airplane -> bird.

### First keeping c_per same and varying p_per, plotting total_acc and source_acc
c_frac = 0.1, 0.2, 0.4, 1.0
c_per = 10, 20, 40, 100

In [16]:
for k in [10, 20, 40, 100]:
    all_data_dfs = []
    for j in [0, 10, 20, 40]:
        file_name = f'file_c{k:02}_p{j:02}_a0'
        all_data_dfs.append(get_test_data_df(files_dict[file_name]))
    fig = go.Figure()
    for i, data_df in enumerate(all_data_dfs):
        # to make data40 and plot curves make better sense
        if i==3:
            i += 1
            
        i = i*10
#         fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['total_acc'],
#                             mode='lines', line={'dash': 'dash'},
#                             name=f'total_acc_c_per{k:02}_p_per{i:02}'))

        fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['total_acc'],
                            mode='lines+markers',
                            name=f'total_acc_p_per{i:02}'))

        fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['class_2_acc'],
                            mode='lines+markers', marker_symbol = 'square',
                            name=f'flipped_source_p_per{i:02}'))


    fig.update_layout(title=f'Effect on Accuracy when c_per={k:02} p_per increased from 0 to 40',
                       xaxis_title='Round Num',
                       yaxis_title='Accuracy')
    
    fig.write_image(plots_folder + file_name +".png", width=800)
    fig.show()

### Now let's compare two attack types
first we see for same c_per = 20 and p_per = 20, plot att0 and att1

In [17]:
atts = [0, 1]
all_data_dfs = []
all_data_dfs.append(get_test_data_df(files_dict['file_c20_p20_a0']))
all_data_dfs.append(get_test_data_df(files_dict['file_c20_p20_a1']))

# Create traces
fig = go.Figure()
for att, data_df in zip(atts, all_data_dfs):

    fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['total_acc'],
                        mode='lines+markers',
                        name=f'total_acc_att{att}'))

    fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['class_2_acc'],
                        mode='lines+markers',
                        name=f'flipped_source_att{att}'))

#     fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['class_9_acc'],
#                         mode='lines+markers',
#                         name=f'flipped_target_att{att}'))

fig.update_layout(title='Accuracy Evolution',
                   xaxis_title='Round Num',
                   yaxis_title='Accuracy')
fig.show()


## Plotting avg training loss and testing loss
when p_per increases for c_per=20

In [18]:
fig = go.Figure()
for j in [0, 10, 20, 40]:
    file_name = f'file_c20_p{j:02}_a0'
    avg_training_losses =  get_training_losses(files_dict[file_name])
    
    fig.add_trace(go.Scatter(x=np.arange(len(avg_training_losses)), y=avg_training_losses,
                        mode='lines',
                        name=f'c_per_20_p_per_{j:02}'))

fig.update_layout(title='Training Losses',
                   xaxis_title='Fed Round',
                   yaxis_title='Loss')
fig.show()

fig = go.Figure()
for j in [0, 10, 20, 40]:
    file_name = f'file_c20_p{j:02}_a0'
    data_df =  get_test_data_df(files_dict[file_name])
    
    fig.add_trace(go.Scatter(x=data_df['round_num'], y=data_df['testing_loss'],
                    mode='lines+markers',
                    name=f'testing_loss_c20_p{j:02}'))

fig.update_layout(title='Testing Loss over the Training',
                   xaxis_title='Round Num',
                   yaxis_title='Loss')
fig.show()