In [1]:
import pandas as pd
import plotly.express as px

from src.wrapper import create_data, simulate, read_accuracy, anonimize, utility


In [2]:
# háttérismeret és anonimizálandó adat generálása
create_data('enron', 2000, 'exp1', 1, 'ns09', 1, 0.8, 0.8)
create_data('enron', 2000, 'exp2', 1, 'ns09', 1, 0.5, 0.75)
create_data('enron', 2000, 'exp3', 1, 'ns09', 1, 0.4, 0.6)


@ 2021-06-11 16:53:31.701

Creating test data for: enron [exp1]

	(Nodes: 36692, edges: 183831)

	Export params: exports=1, export_size=2000

	Perturb. params: algo=ns09, variants=1, alpha_v=0.8, alpha_e=0.8

	Export: ./output/enron_2.0k_exp1/SimuData/e0.tgf, nodes: 2002, edges: 31146

		Variant: v0

			g_src nodes: 1786, edges: 22430

			g_tar nodes: 1781, edges: 22936

			node overlap: 1570

@ 2021-06-11 16:53:33.068

Creating test data for: enron [exp2]

	(Nodes: 36692, edges: 183831)

	Export params: exports=1, export_size=2000

	Perturb. params: algo=ns09, variants=1, alpha_v=0.75, alpha_e=0.5

	Export: ./output/enron_2.0k_exp2/SimuData/e0.tgf, nodes: 2002, edges: 33380

		Variant: v0

			g_src nodes: 1648, edges: 17437

			g_tar nodes: 1661, edges: 17095

			node overlap: 1362

@ 2021-06-11 16:53:34.389

Creating test data for: enron [exp3]

	(Nodes: 36692, edges: 183831)

	Export params: exports=1, export_size=2000

	Perturb. params: algo=ns09, variants=1, alpha_v=0.6, alpha_e=0

In [3]:
# Deanonimizáció
# megfigyelhetjük, hogy itt jelentősen sikeresebb a deanon, mikor jobb a háttérismeret
simulate('enron', 2000, 'exp1', 'ns09', 1, 'random.25', 50, 0.01)
simulate('enron', 2000, 'exp2', 'ns09', 1, 'random.25', 50, 0.01)
simulate('enron', 2000, 'exp3', 'ns09', 1, 'random.25', 50, 0.01)


@ 2021-06-11 16:53:42.185

Simulating on data derived from: enron [exp1]

	Parameters: algo=ns09, num_rounds=1, seed_type=random.25, seed_count=50

	Parameters of ns09: theta=0.01

	Exports: 1, perturbed: 1

Simulating: e0_v0

	g_src nodes: 1786, edges: 22430

	g_tar nodes: 1781, edges: 22936

	node overlap: 1570

	#0

	    50 seed nodes (from top 25%)

	    Propagate: [++++++]

	    6 rounds, 9.46 sec(s) 

	    TP=71.46%, FP=1.91%, N/A=26.62%, FP/TP=2.67%

	    Total error (w/mappings out of GT): 5.1%)

@ 2021-06-11 16:53:52.12

Simulating on data derived from: enron [exp2]

	Parameters: algo=ns09, num_rounds=1, seed_type=random.25, seed_count=50

	Parameters of ns09: theta=0.01

	Exports: 1, perturbed: 1

Simulating: e0_v0

	g_src nodes: 1648, edges: 17437

	g_tar nodes: 1661, edges: 17095

	node overlap: 1362

	#0

	    50 seed nodes (from top 25%)

	    Propagate: [++++++++]

	    8 rounds, 4.85 sec(s) 

	    TP=62.78%, FP=8.44%, N/A=28.78%, FP/TP=13.45%

	    Total error (w/mappin

In [4]:
# Pontosságok beolvasása
df = pd.DataFrame(columns=['experiment', 'anon', 'deanon', 'TPR', 'FPR'])
for i in range(3):
	experiment = 'exp'+str(i+1)
	TPR, _, FPR, _ = read_accuracy('enron', 2000, experiment, 'ns09')['avg']
	df = df.append({'experiment':experiment, 'anon':'none', 'deanon':'ns09', 'TPR':TPR, 'FPR':FPR}, ignore_index=True)
df


Unnamed: 0,experiment,anon,deanon,TPR,FPR
0,exp1,none,ns09,71.464968,1.910828
1,exp2,none,ns09,62.77533,8.443465
2,exp3,none,ns09,6.666667,2.095238


In [5]:
# Eredmények ábrázolása a score boardon
fig = px.scatter(df, x='FPR', y='TPR', 
    color='experiment', 
    range_y=[0,100],
    labels={
        "FPR": "Incorrect matches (FPR)",
        "TPR": "Correct matches (TPR)",
    },
    title="Scoreboard: Correct vs Incorrect matches")
fig.show()


  ## 2. rész
  - egy perturb
  - több deanon összehasonlítása (nar, blb, grh, KL, DV?)
  - accuracy mennyi
  - scoreboard

In [6]:
# paraméterek
experiment = '2_exp'
network = 'enron'
size = 3000


In [7]:
create_data(network, size, experiment, 1, 'sample', 1, 1, 0.95)


@ 2021-06-11 16:54:03.195

Creating test data for: enron [2_exp]

	(Nodes: 36692, edges: 183831)

	Export params: exports=1, export_size=3000

	Perturb. params: algo=sample

	Export: ./output/enron_3.0k_2_exp/SimuData/e0.tgf, nodes: 3002, edges: 45224

		Variant: v0

			g_src nodes: 2852, edges: 41249

			g_tar nodes: 2852, edges: 40276

			node overlap: 2707



In [8]:
# ez az ns09 algo
simulate(network, size, experiment, 'ns09', 1, 'random.25', 50, 0.01)
# ez a blb, mit tud?

@ 2021-06-11 16:54:05.254

Simulating on data derived from: enron [2_exp]

	Parameters: algo=ns09, num_rounds=1, seed_type=random.25, seed_count=50

	Parameters of ns09: theta=0.01

	Exports: 1, perturbed: 1

Simulating: e0_v0

	g_src nodes: 2852, edges: 41249

	g_tar nodes: 2852, edges: 40276

	node overlap: 2707

	#0

	    50 seed nodes (from top 25%)

	    Propagate: [++++++]

	    6 rounds, 15.43 sec(s) 

	    TP=77.02%, FP=0.11%, N/A=22.87%, FP/TP=0.14%

	    Total error (w/mappings out of GT): 1.59%)



In [9]:
simulate(network, size, experiment, 'blb', 1, 'random.25', 50, '0.1,0.5')

@ 2021-06-11 16:54:21.464

Simulating on data derived from: enron [2_exp]

	Parameters: algo=blb, num_rounds=1, seed_type=random.25, seed_count=50

	Parameters of blb: theta=0.1, delta=0.5

	Exports: 1, perturbed: 1

Simulating: e0_v0

	g_src nodes: 2852, edges: 41249

	g_tar nodes: 2852, edges: 40276

	node overlap: 2707

	#0

	    50 seed nodes (from top 25%)

	    Propagate: [++++++]

	    6 rounds, 21.63 sec(s) 

	    TP=77.8%, FP=0.3%, N/A=21.91%, FP/TP=0.38%

	    Total error (w/mappings out of GT): 1.85%)



In [10]:
# ez a grh (TODO) -> KL implement
simulate(network, size, experiment, 'grh', 1, 'random.25', 50, 0.1)

@ 2021-06-11 16:54:43.872

Simulating on data derived from: enron [2_exp]

	Parameters: algo=grh, num_rounds=1, seed_type=random.25, seed_count=50

	Parameters of grh: theta=0.1

	Exports: 1, perturbed: 1

Simulating: e0_v0

	g_src nodes: 2852, edges: 41249

	g_tar nodes: 2852, edges: 40276

	node overlap: 2707

	#0

	    50 seed nodes (from top 25%)

	    Propagate: [++++++++]

	    8 rounds, 27.33 sec(s) 

	    TP=57.44%, FP=0%, N/A=42.56%, FP/TP=0%

	    Total error (w/mappings out of GT): 0%)



In [11]:
# add new data
df = pd.DataFrame(columns=['experiment', 'anon', 'deanon', 'TPR', 'FPR'])
for deanon in ['ns09', 'blb', 'grh']:
	TPR, _, FPR, _ = read_accuracy(network, size, experiment, deanon)['avg']
	df = df.append({'experiment':experiment, 'anon':'none', 'deanon':deanon, 'TPR':TPR, 'FPR':FPR}, ignore_index=True)
df

Unnamed: 0,experiment,anon,deanon,TPR,FPR
0,2_exp,none,ns09,77.022534,0.110824
1,2_exp,none,blb,77.798301,0.29553
2,2_exp,none,grh,57.443665,0.0


In [12]:
fig = px.scatter(df, x='FPR', y='TPR', 
    symbol='deanon', 
    range_y=[0,100],
    labels={
        "FPR": "Incorrect matches (FPR)",
        "TPR": "Correct matches (TPR)",
    },
    title="Scoreboard: Correct vs Incorrect matches")
fig.show()

 3. rész
 1 perturn, 3 anon, 1 deanon, utility measure

In [13]:
# paraméterek
experiment = '3_exp'
deanon = 'ns09'
network = 'wiki'
size = 2000

In [14]:
create_data(network, size, experiment, 1, 'ns09', 1, 0.5, 0.75)

@ 2021-06-11 16:55:15.826

Creating test data for: wiki [3_exp]

	(Nodes: 7115, edges: 100762)

	Export params: exports=1, export_size=2000

	Perturb. params: algo=ns09, variants=1, alpha_v=0.75, alpha_e=0.5

	Export: ./output/wiki_2.0k_3_exp/SimuData/e0.tgf, nodes: 2002, edges: 65100

		Variant: v0

			g_src nodes: 1742, edges: 33228

			g_tar nodes: 1741, edges: 32505

			node overlap: 1486



In [15]:

# define parameters
params = {
    'sw': 0.1,
    'kda': 50,
    'dp': 50
}

# results will be sroted in this df
df = pd.DataFrame(columns=['experiment', 'anon', 'deanon', 'TPR', 'FPR', 'utility'])

for anon in ['sw', 'kda', 'dp']:
    # anonimize graph
    anonimize(network, size, experiment, anon, params[anon])
    
    # calculate utility loss
    util = utility(network, size, experiment, 'lcc')

    # run deanon algo
    simulate(network, size, experiment, deanon, 1, 'random.25', 50, 0.01)

    # calculate results
    TPR, _, FPR, _ = read_accuracy(network, size, experiment, deanon)['avg']
    df = df.append({'experiment':experiment, 'anon':anon, 'deanon':deanon, 'TPR':TPR, 'FPR':FPR, 'utility':util}, ignore_index=True)

df


/home/topi/Code/MNB/tutorial/output/wiki_2.0k_3_exp/SimuData/e0_v0_tar_orig.tgf /home/topi/Code/MNB/tutorial/output/wiki_2.0k_3_exp/SimuData/e0_v0_tar.tgf 0.1

174

174

0

no avaliable

@ 2021-06-11 16:55:19.127

Simulating on data derived from: wiki [3_exp]

	Parameters: algo=ns09, num_rounds=1, seed_type=random.25, seed_count=50

	Parameters of ns09: theta=0.01

	Exports: 1, perturbed: 1

Simulating: e0_v0

	g_src nodes: 1742, edges: 33228

	g_tar nodes: 1741, edges: 32505

	node overlap: 1486

	#0

	    50 seed nodes (from top 25%)

	    Propagate: [++++++++]

	    8 rounds, 12.2 sec(s) 

	    TP=88.96%, FP=6.06%, N/A=4.98%, FP/TP=6.81%

	    Total error (w/mappings out of GT): 15.81%)

here

newDegree Distrub 

493 50

140 50

105 50

85 50

73 50

64 50

57 50

53 50

49 50

45 50

41 50

38 50

36 50

33 50

31 50

29 50

27 50

26 50

24 50

23 50

21 50

20 50

19 50

17 50

16 50

15 50

14 50

13 50

12 50

10 50

9 50

8 50

7 50

4 50

2 41

a

a

a

a

a

a

a

a

a

a

a

Unnamed: 0,experiment,anon,deanon,TPR,FPR,utility
0,3_exp,sw,ns09,88.963661,6.056528,1.0
1,3_exp,kda,ns09,76.514132,14.199192,0.847891
2,3_exp,dp,ns09,76.716016,10.565276,0.786118


In [16]:
fig = px.scatter(df, x='FPR', y='TPR', 
    color='anon',
    range_y=[0,100],
    labels={
        "FPR": "Incorrect matches (FPR)",
        "TPR": "Correct matches (TPR)",
    },
    title="Scoreboard: Correct vs Incorrect matches")
fig.show()

 4. rész
 - 1 perturb
 - 3 anon
 - 3 deanon
 - scoreboard
 utility

In [17]:
# paraméterek
experiment = '4_exp'
network = 'wiki'
size = 2000
nseed = 50


In [18]:
# define parameters
params = {
    # anon
    'sw': 0.1,
    'kda': 50,
    'dp': 50,
    # deanon
    'ns09': 0.01,
    'grh': 0.1,
    'blb': '0.1,0.5',
    # util
    'inf': 5
}

# adott háttérismeret
create_data(network, size, experiment, 1, 'ns09', 1, 0.5, 0.75)

# results will be sroted in this df
df = df_util = pd.DataFrame()

for anon in ['sw', 'kda', 'dp']:
    # anonimize graph
    anonimize(network, size, experiment, anon, params[anon])
    
    # calculate utility loss for anonimization method
    for util in ['inf', 'deg', 'lcc']:
        param = params.get(util, None)
        value = utility(network, size, experiment, util, param)
        df_util = df_util.append({'anon':anon, 'value':value, 'util': util}, ignore_index=True)

    for deanon in ['ns09', 'grh', 'blb']:
        # run deanon algos
        simulate(network, size, experiment, deanon, 1, 'random.25', nseed, params[deanon])
        # calculate results
        TPR, _, FPR, _ = read_accuracy(network, size, experiment, deanon)['avg']
        df = df.append({'experiment':experiment, 'anon':anon, 'deanon':deanon, 'TPR':TPR, 'FPR':FPR}, ignore_index=True)
df


@ 2021-06-11 16:56:09.986

Creating test data for: wiki [4_exp]

	(Nodes: 7115, edges: 100762)

	Export params: exports=1, export_size=2000

	Perturb. params: algo=ns09, variants=1, alpha_v=0.75, alpha_e=0.5

	Export: ./output/wiki_2.0k_4_exp/SimuData/e0.tgf, nodes: 2002, edges: 62975

		Variant: v0

			g_src nodes: 1732, edges: 32482

			g_tar nodes: 1739, edges: 31868

			node overlap: 1483

/home/topi/Code/MNB/tutorial/output/wiki_2.0k_4_exp/SimuData/e0_v0_tar_orig.tgf /home/topi/Code/MNB/tutorial/output/wiki_2.0k_4_exp/SimuData/e0_v0_tar.tgf 0.1

173

173

0

no avaliable

@ 2021-06-11 16:56:16.955

Simulating on data derived from: wiki [4_exp]

	Parameters: algo=ns09, num_rounds=1, seed_type=random.25, seed_count=50

	Parameters of ns09: theta=0.01

	Exports: 1, perturbed: 1

Simulating: e0_v0

	g_src nodes: 1732, edges: 32482

	g_tar nodes: 1739, edges: 31868

	node overlap: 1483

	#0

	    50 seed nodes (from top 25%)

	    Propagate: [+++++++++]

	    9 rounds, 13.34 sec(s) 

	

Unnamed: 0,FPR,TPR,anon,deanon,experiment
0,5.664194,89.952798,sw,ns09,4_exp
1,0.0,57.855698,sw,grh,4_exp
2,2.225219,92.245448,sw,blb,4_exp
3,13.553608,78.354686,kda,ns09,4_exp
4,0.067431,3.304113,kda,grh,4_exp
5,6.271072,82.805125,kda,blb,4_exp
6,8.968307,81.186784,dp,ns09,4_exp
7,0.202293,42.279164,dp,grh,4_exp
8,5.529332,80.242751,dp,blb,4_exp


In [19]:
# Scoreborad
fig = px.scatter(df, x='FPR', y='TPR', 
    color='deanon',
    symbol='anon',
    range_y=[0,100],
    labels={
        "FPR": "Incorrect matches (FPR)",
        "TPR": "Correct matches (TPR)",
    },
    title="Scoreboard: Correct vs Incorrect matches")
fig.show()

In [20]:
# Change the default stacking
fig = px.bar(df_util, x="anon", y="value",
    color='util', 
    barmode='group',
    labels={
        "util": "Utility",
        "anon": "Anonimization technique",
    },
    title="Utility loss for each technique")
fig.show()
