# Graphs depicting rate of blacklisting for liars given fixed lies

This file allows us to measure how many liars are blacklisted, depending on the choice of liars for a fixed set of lies.

There are multiple configurable variables (see below)

The generated data can be created by the graph_var_liars_test.go file.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
#configs
lowerBoundLies = 1000  #lower bound on difference between true latency and lie told about it
upperBoundLies = 5000 #upper bound on difference between true latency and lie told about it
nbNodes = 100
nbLiars = 33
nbLiarCombinations = 100 #nb different combinations of liars chosen throughout test
randomLiars = False      #whether the liars are chosen randomly or within cluster
withSuspects = True      #activate enhanced blacklisting

random = "random_liars"
if not randomLiars:
    random = "clustered_liars"

filename = "test_" + \
    str(nbNodes) + "_nodes_" +\
    str(nbLiars) + "_liars" +\
    "_var_liars_distance_" + str(upperBoundLies) +\
    "_" + random +\
    "_" + str(nbLiarCombinations) + "_combinations"

if withSuspects:
    filename += "_with_suspects"

In [3]:
data= pd.read_csv("data/" + filename + ".csv")

In [4]:
data

Unnamed: 0,node,is_liar,is_blacklisted,lie,cluster
0,N0,True,True,2308,0
1,N0,True,True,2540,0
2,N0,True,True,3071,0
3,N0,True,True,4974,0
4,N0,True,True,2315,0
5,N0,True,True,3556,0
6,N0,True,True,2353,0
7,N0,True,True,3061,0
8,N0,True,True,4828,0
9,N0,True,True,3823,0


In [5]:
data["grouped"] = list(zip(data.is_liar, data.is_blacklisted))
grouped_data = data[["node", "grouped"]]

In [6]:
grouped_by_node = grouped_data.groupby("node").agg(lambda x: set(x.values.tolist()))

In [7]:
grouped_by_node["nb_situations"] = grouped_by_node['grouped'].map(lambda x: len(x))

In [8]:
grouped_by_node.sort_values(by="nb_situations", ascending=False)

Unnamed: 0_level_0,grouped,nb_situations
node,Unnamed: 1_level_1,Unnamed: 2_level_1
N99,"{(False, False), (True, True)}",2
N32,"{(False, False), (True, True)}",2
N64,"{(False, False), (True, True)}",2
N31,"{(False, False), (True, True)}",2
N96,"{(False, False)}",1
N66,"{(False, False)}",1
N74,"{(False, False)}",1
N73,"{(False, False)}",1
N72,"{(False, False)}",1
N71,"{(False, False)}",1


In [9]:
data[["node", "is_liar", "is_blacklisted"]].groupby(["is_liar", "is_blacklisted"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,node
is_liar,is_blacklisted,Unnamed: 2_level_1
False,False,6700
True,True,273900


In [10]:
lie_effects = data[["lie", "is_blacklisted"]][data["lie"] > 0]

In [11]:
lie_effects.groupby("lie").count().reset_index()

Unnamed: 0,lie,is_blacklisted
0,1000,100
1,1001,100
2,1005,200
3,1006,300
4,1007,100
5,1009,100
6,1010,100
7,1019,100
8,1021,100
9,1022,100


In [12]:
lie_effects.groupby("lie").agg(lambda x: set(x.values.tolist())).reset_index().sort_values(by="lie")

Unnamed: 0,lie,is_blacklisted
0,1000,{True}
1,1001,{True}
2,1005,{True}
3,1006,{True}
4,1007,{True}
5,1009,{True}
6,1010,{True}
7,1019,{True}
8,1021,{True}
9,1022,{True}


In [13]:
bl_by_lie = lie_effects.groupby(['lie', 'is_blacklisted']).size().unstack(fill_value=0).reset_index().sort_values(by="lie")

In [14]:
bl_by_lie

is_blacklisted,lie,True
0,1000,100
1,1001,100
2,1005,200
3,1006,300
4,1007,100
5,1009,100
6,1010,100
7,1019,100
8,1021,100
9,1022,100


In [15]:
bl_by_lie['lie_buckets'] = pd.qcut(bl_by_lie['lie'], 10)

In [16]:
bl_by_lie[["lie_buckets", True, False]].groupby("lie_buckets").sum().reset_index()

KeyError: '[False] not in index'

In [None]:
fig, ax = plt.subplots(figsize=(20, 10))
bl_by_lie[["lie_buckets", True, False]]\
    .groupby("lie_buckets").sum().reset_index()\
    .plot.bar(stacked=True, ax=ax, rot=0, 
              x="lie_buckets", title="How often a specific lie range got nodes blacklisted")

ax.set_ylabel("Number of times a lie was told by a node and whether that node was blacklisted  over 100 sets, liars random")
ax.set_xlabel("Lie range")

In [None]:
fig.savefig('graphs/range/'+ filename +".png")

In [None]:
bl_by_lie["Percentage"] = (bl_by_lie[True]/(bl_by_lie[True] + bl_by_lie[False])).replace([np.inf, -np.inf], 0.0)

In [None]:
bl_by_lie

In [None]:
fig1, ax1 = plt.subplots(figsize=(20, 10))
bl_by_lie[["Percentage", "lie_buckets"]]\
    .groupby("lie_buckets").mean().reset_index()\
    .plot(kind="bar",x="lie_buckets", ax=ax1, yticks=[0, 0.2, 0.4, 0.6, 0.8, 1.0],  rot=0,
          title="percentage of nodes telling a given lie that got blacklisted over 100 sets, liars random")

ax1.set_ylabel("Percentage of nodes who told a lie in a given range that got blacklisted")
ax1.set_xlabel("Lie ranges")

In [None]:
fig1.savefig('graphs/percentage/'+filename+'_percentage.png')