# Experiments

In [None]:
import math
import matplotlib.pyplot as plt 
import main
import datareader as dr

In [None]:
data_ml, JSim_ml = main.setup('movielens.dat')
data_lfm, JSim_lfm = main.setup('lastfm.dat')
data_art_low, JSim_art_low = main.setup('Artificial_data/artificial_data_2000.dat')
data_art_med, JSim_art_med = main.setup('Artificial_data/artificial_data_200.dat')
data_art_high, JSim_art_high = main.setup('Artificial_data/artificial_data_100.dat')

## Accurracy of similarity estimations 

### MSE Bucket PrivMin Movielens 

In [None]:
K = 100
Eps = [10, 20, 40,60,80]
runs=10
buckets = [2, 3, 4]

fig, ax = plt.subplots()

for bucket in buckets:
    MSEs = []
    for eps in Eps:
        temp = 0
        for i in range(runs):
            analysis = main.main(data_ml, JSim_ml, 'bucket', K, eps=eps, num_buckets=bucket)
            MSE = analysis.MSE()
            temp += (MSE/runs)   
        MSEs.append(temp)  
    lab = '|B| = ' + str(bucket)
    ax.plot(Eps, MSEs, label = lab)

ax.set_xlim([Eps[0], Eps[-1]])
ax.legend()
ax.set_xlabel(r'$\epsilon$')
ax.set_ylabel('MSE')

### MSE Bucket PrivMin LastFM

In [None]:
K = 100
Eps = [10, 20,40,60,80]
runs=10
buckets = [2, 3,4]

fig, ax = plt.subplots()

for bucket in buckets:
    MSEs = []
    for eps in Eps:
        temp = 0
        for i in range(runs):
            analysis = main.main(data_lfm, JSim_lfm, 'bucket', K, eps=eps, num_buckets=bucket)
            MSE = analysis.MSE()
            prec, rec, f1 = analysis.PRF()
            temp += (MSE/runs)
            
        MSEs.append(temp)
        
    lab = '|B| = ' + str(bucket)
    ax.plot(Eps, MSEs, label = lab)

ax.set_xlim([Eps[0], Eps[-1]])
ax.legend()
ax.set_xlabel(r'$\epsilon$')
ax.set_ylabel('MSE')

### MSE Noisy Secure MinHash Movielens

In [None]:
Eps = [10,15, 20, 30, 40]
l = 100
num_hashes = l
K = 1
alpha = 1
runs=10
C = [1000, 10]
A = dr.avg_set_size(data_ml)
Deltas = [1/(c*A) for c in C]

fig, ax = plt.subplots()

for delta in Deltas:
    MSEs = []
    for eps in Eps:
        temp = 0
        for i in range(runs):
            # Movielens
            analysis = main.main(data_ml, JSim_ml, 'noisy', K, l= l, delta= delta, alpha= alpha, eps=eps)
            MSE = analysis.MSE()
            temp += (MSE/runs)
        MSEs.append(temp)

    lab = r'$\delta$ = ' + str('{:0.1e}'.format(delta, "1E"))
    ax.plot(Eps, MSEs, label = lab)

ax.set_xlim([Eps[0], Eps[-1]])
ax.legend()
ax.set_xlabel(r'$\epsilon$')
ax.set_ylabel('MSE')

### MSE Noisy Secure MinHash LastFM

In [None]:
K = 1
Eps = [10, 15, 20, 30, 40]
l = 100
alpha = 1
runs=10
C = [1000, 100, 10]
buckets = [2, 3,4]
A = dr.avg_set_size(data_lfm)
Deltas = [1/(c*A) for c in C]

fig, ax = plt.subplots()

for delta in Deltas:
    MSEs = []
    for eps in Eps:
        temp = 0
        for i in range(runs):
            # Movielens
            analysis = main.main(data_lfm, JSim_lfm, 'noisy', K, l= l, delta= delta, alpha= alpha, eps=eps)
            MSE = analysis.MSE()
            temp += (MSE/runs)

        MSEs.append(temp)

    lab = r'$\delta$ = ' + str('{:0.1e}'.format(delta, "1E"))
    ax.plot(Eps, MSEs, label = lab)


ax.set_xlim([Eps[0], Eps[-1]])
ax.legend()
ax.set_xlabel(r'$\epsilon$')
ax.set_ylabel('MSE')

### Joint MSE Movielens

In [None]:
num_hashes = 100
runs = 10
Eps = [10, 20, 30, 40]
data, JSim  = data_ml, JSim_ml

color_b = ['blue', 'deepskyblue', 'darkblue', 'lightblue']
color_n = ['green', 'yellowgreen', 'darkgreen']

# Bucket PrivMin

buckets = [2, 3]
fig, ax = plt.subplots()
K = num_hashes

it =0
for bucket in buckets:
    MSEs = []
    for eps in Eps:
        temp = 0
        for i in range(runs):
            analysis = main.main(data, JSim, 'bucket', K, eps=eps, num_buckets=bucket)
            MSE = analysis.MSE()
            temp += (MSE/runs)   
        MSEs.append(temp)  
    lab = 'buckets = ' + str(bucket)
    ax.plot(Eps, MSEs, label = lab, color= color_b[it])
    it += 1
   

# Noisy Secure MinHash

K = 1
l = num_hashes
alpha = 1
C = [1000, 10]
A = dr.avg_set_size(data)
Deltas = [1/(c*A) for c in C]

it = 0
for delta in Deltas:
    MSEs = []
    for eps in Eps:
        temp = 0
        for i in range(runs):
            analysis = main.main(data, JSim, 'noisy', K, l= l, delta= delta, alpha= alpha, eps=eps)
            MSE = analysis.MSE()
            temp += (MSE/runs)
            
        MSEs.append(temp)
        
    lab = r'$\delta$ = ' + str('{:0.1e}'.format(delta, "1E"))
    ax.plot(Eps, MSEs, label = lab, color= color_n[it])
    it += 1

ax.set_xlim([Eps[0], Eps[-1]])
ax.legend()
ax.set_xlabel(r'$\epsilon$')
ax.set_ylabel('MSE')



### Joint MSE Last.FM

In [None]:
num_hashes = 100
runs = 10
Eps = [10, 20, 30, 40]

data, JSim  = data_lfm, JSim_lfm

color_b = ['blue', 'deepskyblue', 'darkblue', 'lightblue']
color_n = ['green', 'yellowgreen', 'darkgreen']

# Bucket PrivMin

buckets = [2, 3]
fig, ax = plt.subplots()
K = num_hashes

it =0
for bucket in buckets:
    MSEs = []
    for eps in Eps:
        temp = 0
        for i in range(runs):
            analysis = main.main(data, JSim, 'bucket', K, eps=eps, num_buckets=bucket)
            MSE = analysis.MSE()
            temp += (MSE/runs)   
        MSEs.append(temp)  
    lab = 'buckets = ' + str(bucket)
    ax.plot(Eps, MSEs, label = lab, color= color_b[it])
    it += 1
    
    
# Noisy Secure MinHash

K = 1
l = num_hashes
alpha = 1
C = [1000, 10]
A = dr.avg_set_size(data)
Deltas = [1/(c*A) for c in C]

it = 0
for delta in Deltas:
    MSEs = []
    for eps in Eps:
        temp = 0
        for i in range(runs):
            analysis = main.main(data, JSim, 'noisy', K, l= l, delta= delta, alpha= alpha, eps=eps)
            MSE = analysis.MSE()
            temp += (MSE/runs)
            
        MSEs.append(temp)
        
    lab = r'$\delta$ = ' + str('{:0.1e}'.format(delta, "1E"))
    ax.plot(Eps, MSEs, label = lab, color= color_n[it])
    it += 1

ax.set_xlim([Eps[0], Eps[-1]])
ax.legend()
ax.set_xlabel(r'$\epsilon$')
ax.set_ylabel('MSE')

# Utility of Similarity Estimations

### Utility Bucket PrivMin LastFM

In [None]:
K = 100
Eps = [10, 50, 100, 200]
runs=10
buckets = [2,3,4]

fig, ax = plt.subplots()

for bucket in buckets:
    recalls = []
    for eps in Eps:
        temp = 0
        for i in range(runs):
            analysis = main.main(data_lfm, JSim_lfm, 'bucket', K, eps=eps, num_buckets=bucket)
            prec, rec, f1 = analysis.PRF()
            temp += (rec/runs)
            
        recalls.append(temp)
        
    lab = 'b = ' + str(bucket)
    ax.plot(Eps, recalls, label = lab)

ax.set_xlim([Eps[0], Eps[-1]])
ax.legend()
ax.set_xlabel(r'$\epsilon$')
ax.set_ylabel('Recall')

### Utility Bucket PrivMin Movielens

In [None]:
K = 100
Eps = [10, 50, 100, 200]
runs=10
buckets = [2,3,4]

fig, ax = plt.subplots()

for bucket in buckets:
    recalls = []
    for eps in Eps:
        temp = 0
        for i in range(runs):
            analysis = main.main(data_ml, JSim_ml, 'bucket', K, eps=eps, num_buckets=bucket)
            prec, rec, f1 = analysis.PRF()
            temp += (rec/runs)
            
        recalls.append(temp)
        
    lab = 'b = ' + str(bucket)
    ax.plot(Eps, recalls, label = lab)

ax.set_xlim([Eps[0], Eps[-1]])
ax.legend()
ax.set_xlabel(r'$\epsilon$')
ax.set_ylabel('Recall')

### Utility Noisy Secure MinHash Movielens

In [None]:
K = 1
Eps = [10, 20, 40, 100]
l = 100
alpha = 1
runs=10
C = [1000, 100, 10]
A = dr.avg_set_size(data_ml)
Deltas = [1/(c*A) for c in C]

fig, ax = plt.subplots()

for delta in Deltas:
    recalls = []
    for eps in Eps:
        temp = 0
        for i in range(runs):
            # Movielens
            analysis = main.main(data_ml, JSim_ml, 'noisy', K, l= l, delta= delta, alpha= alpha, eps=eps)
            prec, rec, f1 = analysis.PRF()
            temp += (rec/runs)
            
        recalls.append(temp)
        
    lab = r'$\delta$ = ' + str('{:0.1e}'.format(delta, "1E"))
    ax.plot(Eps, recalls, label = lab)

ax.set_xlim([Eps[0], Eps[-1]])
ax.legend()
ax.set_xlabel(r'$\epsilon$')
ax.set_ylabel('Recall')

### Utility Noisy Secure MinHash Last.FM

In [None]:
K = 1
Eps = [10, 20, 40, 100]
l = 100
alpha = 1
runs=10
C = [1000, 100, 10]
A = dr.avg_set_size(data_ml)
Deltas = [1/(c*A) for c in C]

fig, ax = plt.subplots()

for delta in Deltas:
    recalls = []
    for eps in Eps:
        temp = 0
        for i in range(runs):
            # Last.FM
            analysis = main.main(data_lfm, JSim_lfm, 'noisy', K, l= l, delta= delta, alpha= alpha, eps=eps)
            prec, rec, f1 = analysis.PRF()
            temp += (rec/runs)
            
        recalls.append(temp)
        
    lab = r'$\delta$ = ' + str('{:0.1e}'.format(delta, "1E"))
    ax.plot(Eps, recalls, label = lab)

ax.set_xlim([Eps[0], Eps[-1]])
ax.legend()
ax.set_xlabel(r'$\epsilon$')
ax.set_ylabel('Recall')

### Joint Utility Movielens

In [None]:
num_hashes = 100
runs = 10
Eps = [10, 20, 30, 40]

data, JSim  = data_ml, JSim_ml

color_b = ['blue', 'deepskyblue', 'darkblue', 'lightblue']
color_n = ['green', 'yellowgreen', 'darkgreen']


buckets = [2, 3]
K = num_hashes

fig, ax = plt.subplots()

# Bucket PrivMin

it =0
for bucket in buckets:
    recalls = []
    for eps in Eps:
        temp = 0
        for i in range(runs):
            analysis = main.main(data, JSim, 'bucket', K, eps=eps, num_buckets=bucket)
            prec, rec, f1 = analysis.PRF()
            temp += (rec/runs)   
        recalls.append(temp)  
    lab = 'buckets = ' + str(bucket)
    ax.plot(Eps, recalls, label = lab, color= color_b[it])
    it += 1
    
    
# Noisy Secure MinHash

K = 1
l = num_hashes
alpha = 1
C = [1000, 10]
A = dr.avg_set_size(data)
Deltas = [1/(c*A) for c in C]

it = 0
for delta in Deltas:
    recalls = []
    for eps in Eps:
        temp = 0
        for i in range(runs):
            analysis = main.main(data, JSim, 'noisy', K, l= l, delta= delta, alpha= alpha, eps=eps)
            prec, rec, f1 = analysis.PRF()
            temp += (rec/runs)
            
        recalls.append(temp)
        
    lab = r'$\delta$ = ' + str('{:0.1e}'.format(delta, "1E"))
    ax.plot(Eps, recalls, label = lab, color= color_n[it])
    it += 1

ax.set_xlim([Eps[0], Eps[-1]])
ax.legend()
ax.set_xlabel(r'$\epsilon$')
ax.set_ylabel('Recall')

### Joint Utility Last.FM

In [None]:
num_hashes = 100
runs = 10
Eps = [10, 20, 30, 40]

data, JSim  = data_lfm, JSim_lfm

color_b = ['blue', 'deepskyblue', 'darkblue', 'lightblue']
color_n = ['green', 'yellowgreen', 'darkgreen']

# Bucket PrivMin

buckets = [2, 3]
K = num_hashes

fig, ax = plt.subplots()

it =0
for bucket in buckets:
    recalls = []
    for eps in Eps:
        temp = 0
        for i in range(runs):
            analysis = main.main(data, JSim, 'bucket', K, eps=eps, num_buckets=bucket)
            prec, rec, f1 = analysis.PRF()
            temp += (rec/runs)   
        recalls.append(temp)  
    lab = 'buckets = ' + str(bucket)
    ax.plot(Eps, recalls, label = lab, color= color_b[it])
    it += 1
    
# Noisy Secure MinHash

K = 1
l = num_hashes
alpha = 1
C = [1000, 10]
A = dr.avg_set_size(data)
Deltas = [1/(c*A) for c in C]

it = 0
for delta in Deltas:
    recalls = []
    for eps in Eps:
        temp = 0
        for i in range(runs):
            analysis = main.main(data, JSim, 'noisy', K, l= l, delta= delta, alpha= alpha, eps=eps)
            prec, rec, f1 = analysis.PRF()
            temp += (rec/runs)
            
        recalls.append(temp)
        
    lab = r'$\delta$ = ' + str('{:0.1e}'.format(delta, "1E"))
    ax.plot(Eps, recalls, label = lab, color= color_n[it])
    it += 1

ax.set_xlim([Eps[0], Eps[-1]])
ax.legend()
ax.set_xlabel(r'$\epsilon$')
ax.set_ylabel('Recall')

## Artificial data MSE

In [None]:
num_hashes = 100
runs = 10
Eps = [20, 50, 100]

data, JSim = main.setup('test_movielens_10.dat')

color_b = ['blue', 'deepskyblue', 'darkblue', 'lightblue']
color_n = ['green', 'yellowgreen', 'darkgreen']

datasets= [[data_art_low, JSim_art_low], [data_art_med, JSim_art_med], [data_art_high, JSim_art_high]]
#datasets = [[data, JSim], [data, JSim], [data, JSim]]
label_data = ['low sim', 'medium sim', 'high sim']

In [None]:
# Bucket PrivMin

buckets = [2, 3]
K = num_hashes

fig, ax = plt.subplots(1, 3, figsize=(15,5), sharex='col', sharey='row')

# for each dataset:
dt_nr = 0
for dataset in datasets:
    data, JSim = dataset[0], dataset[1]
  
    it =0
    for bucket in buckets:
        MSEs = []
        for eps in Eps:
            temp = 0
            for i in range(runs):
                analysis = main.main(data, JSim, 'bucket', K, eps=eps, num_buckets=bucket)
                MSE = analysis.MSE()
                temp += (MSE/runs)   
            MSEs.append(temp)  
        lab = 'buckets = ' + str(bucket)
        print('-------', dt_nr, 'b--------')
        ax[dt_nr].plot(Eps, MSEs, label = lab, color= color_b[it])
        it += 1
        

    # Noisy Secure MinHash
    
    K = 1
    l = num_hashes
    alpha = 1
    C = [1000, 10]
    A = dr.avg_set_size(data)
    Deltas = [1/(c*A) for c in C]

    it = 0
    for delta in Deltas:
        MSEs = []
        for eps in Eps:
            temp = 0
            for i in range(runs):
                analysis = main.main(data, JSim, 'noisy', K, l= l, delta= delta, alpha= alpha, eps=eps)
                MSE = analysis.MSE()
                temp += (MSE/runs)

            MSEs.append(temp)

        lab = r'$\delta$ = ' + str('{:0.1e}'.format(delta, "1E"))
        print('-------', dt_nr, 'n--------')
        ax[dt_nr].plot(Eps, MSEs, label = lab, color= color_n[it])
        it += 1

    
    ax[dt_nr].set(xlabel=r'$\epsilon$', ylabel="MSE", title= label_data[dt_nr])
    dt_nr += 1

handles, labels = ax[0].get_legend_handles_labels()
fig.legend(handles, labels, loc='upper right', borderaxespad=4)
plt.tight_layout()