## Correlation b/w PageRank and Retrievability

### Collection: Wikipedia

##### Imports

In [7]:
# imports
import pickle
import csv
import scipy
import rbo

##### load data

In [3]:
pagerank_path = './PageRank/pagerank-pageids.tsv'

pagerank = {}
with open(pagerank_path, 'r') as f:
    reader = csv.reader(f, delimiter='\t')
    for row in reader:
        # pageid -> pagerank_score
        pagerank[int(row[0])] = float(row[1])

In [4]:
rd_aol_path = './Retrievability/allrd-pageids-aol.pickle'
rd_artificial_path = './Retrievability/allrd-pageids-artificial.pickle'
norm_rd_artificial_path = './Retrievability/norm_allrd_pageids_artificial.pickle'

with open(rd_aol_path, 'rb') as f:
    allrd_aol = pickle.load(f)

with open(rd_artificial_path, 'rb') as f:
    allrd_artificial = pickle.load(f)
    
with open(norm_rd_artificial_path, 'rb') as f:
    norm_allrd_artificial = pickle.load(f)

#### Pearson's Correlation Coefficient

In [18]:
print("\nPearson's Correlation Coefficient calculation\n")

for c in sorted(allrd_artificial, key=lambda x: int(x.split('_')[-1])):
    common_pageids = set(pagerank) & set(allrd_artificial[c])
    pagerank_list, rd_list = [], []
    for pageid in sorted(common_pageids):
        pagerank_list.append(pagerank[pageid])
        rd_list.append(allrd_artificial[c][pageid])
    # Pearson's correlation computation
    rho, pval = scipy.stats.pearsonr(pagerank_list, rd_list)
    print(f'For PageRank vs r(d) (artifical queries) for c = {c.split("_")[-1]}:\t rho = {rho:.4f}\t\tp-value = {pval}')

print()
for c in sorted(norm_allrd_artificial, key=lambda x: int(x.split('_')[-1])):
    common_pageids = set(pagerank) & set(norm_allrd_artificial[c])
    pagerank_list, rd_list = [], []
    for pageid in sorted(common_pageids):
        pagerank_list.append(pagerank[pageid])
        rd_list.append(norm_allrd_artificial[c][pageid])
    # Pearson's correlation computation
    rho, pval = scipy.stats.pearsonr(pagerank_list, rd_list)
    print(f'For PageRank vs normalized r(d) (artifical queries) for c = {c.split("_")[-1]}:\t rho = {rho:.4f}\t\tp-value = {pval}')

print()
for c in sorted(allrd_aol, key=lambda x: int(x.split('_')[-1])):
    common_pageids = set(pagerank) & set(allrd_aol[c])
    pagerank_list, rd_list = [], []
    for pageid in sorted(common_pageids):
        pagerank_list.append(pagerank[pageid])
        rd_list.append(allrd_aol[c][pageid])
    # Pearson's correlation computation
    rho, pval = scipy.stats.pearsonr(pagerank_list, rd_list)
    print(f'For PageRank vs r(d) (AOL queries) for c = {c.split("_")[-1]}:\t rho = {rho:.4f}\t\tp-value = {pval}')


Pearson's Correlation Coefficient calculation

For PageRank vs r(d) (artifical queries) for c = 10:	 rho = 0.0260		p-value = 0.0
For PageRank vs r(d) (artifical queries) for c = 20:	 rho = 0.0273		p-value = 0.0
For PageRank vs r(d) (artifical queries) for c = 30:	 rho = 0.0276		p-value = 0.0
For PageRank vs r(d) (artifical queries) for c = 50:	 rho = 0.0280		p-value = 0.0
For PageRank vs r(d) (artifical queries) for c = 100:	 rho = 0.0293		p-value = 0.0

For PageRank vs normalized r(d) (artifical queries) for c = 10:	 rho = -0.0052		p-value = 4.084338189182967e-33
For PageRank vs normalized r(d) (artifical queries) for c = 20:	 rho = -0.0063		p-value = 1.0064916893265469e-53
For PageRank vs normalized r(d) (artifical queries) for c = 30:	 rho = -0.0073		p-value = 8.302183401044182e-74
For PageRank vs normalized r(d) (artifical queries) for c = 50:	 rho = -0.0086		p-value = 1.4315280237211882e-103
For PageRank vs normalized r(d) (artifical queries) for c = 100:	 rho = -0.0102		p-value 

#### Spearman's Rank Correlation Coefficient

In [23]:
print("\nSpearman's Correlation Coefficient calculation\n")

for c in sorted(allrd_artificial, key=lambda x: int(x.split('_')[-1])):
    common_pageids = set(pagerank) & set(allrd_artificial[c])
    pagerank_list, rd_list = [], []
    for pageid in sorted(common_pageids):
        pagerank_list.append(pagerank[pageid])
        rd_list.append(allrd_artificial[c][pageid])
    pagerank_list, rd_list = zip(*sorted(zip(pagerank_list,rd_list), reverse=True))
    # Spearman's correlation computation
    corr, pval = scipy.stats.spearmanr(pagerank_list, rd_list)
    print(f'For PageRank vs r(d) (artifical queries) for c = {c.split("_")[-1]}:\t r = {corr:.4f}\t\tp-value = {pval}')

print()
for c in sorted(norm_allrd_artificial, key=lambda x: int(x.split('_')[-1])):
    common_pageids = set(pagerank) & set(norm_allrd_artificial[c])
    pagerank_list, rd_list = [], []
    for pageid in sorted(common_pageids):
        pagerank_list.append(pagerank[pageid])
        rd_list.append(norm_allrd_artificial[c][pageid])
    pagerank_list, rd_list = zip(*sorted(zip(pagerank_list,rd_list), reverse=True))
    # Spearman's correlation computation
    corr, pval = scipy.stats.spearmanr(pagerank_list, rd_list)
    print(f'For PageRank vs normalized r(d) (artifical queries) for c = {c.split("_")[-1]}:\t r = {corr:.4f}\t\tp-value = {pval}')

print()
for c in sorted(allrd_aol, key=lambda x: int(x.split('_')[-1])):
    common_pageids = set(pagerank) & set(allrd_aol[c])
    pagerank_list, rd_list = [], []
    for pageid in sorted(common_pageids):
        pagerank_list.append(pagerank[pageid])
        rd_list.append(allrd_aol[c][pageid])
    pagerank_list, rd_list = zip(*sorted(zip(pagerank_list,rd_list), reverse=True))
    # Spearman's correlation computation
    corr, pval = scipy.stats.spearmanr(pagerank_list, rd_list)
    print(f'For PageRank vs r(d) (AOL queries) for c = {c.split("_")[-1]}:\t r = {corr:.4f}\t\tp-value = {pval}')


Spearman's Correlation Coefficient calculation

For PageRank vs r(d) (artifical queries) for c = 10:	 r = 0.1755		p-value = 0.0
For PageRank vs r(d) (artifical queries) for c = 20:	 r = 0.2045		p-value = 0.0
For PageRank vs r(d) (artifical queries) for c = 30:	 r = 0.2196		p-value = 0.0
For PageRank vs r(d) (artifical queries) for c = 50:	 r = 0.2371		p-value = 0.0
For PageRank vs r(d) (artifical queries) for c = 100:	 r = 0.2567		p-value = 0.0

For PageRank vs normalized r(d) (artifical queries) for c = 10:	 r = -0.1480		p-value = 0.0
For PageRank vs normalized r(d) (artifical queries) for c = 20:	 r = -0.1268		p-value = 0.0
For PageRank vs normalized r(d) (artifical queries) for c = 30:	 r = -0.1166		p-value = 0.0
For PageRank vs normalized r(d) (artifical queries) for c = 50:	 r = -0.1052		p-value = 0.0
For PageRank vs normalized r(d) (artifical queries) for c = 100:	 r = -0.0947		p-value = 0.0

For PageRank vs r(d) (AOL queries) for c = 10:	 r = 0.0654		p-value = 0.0
For PageRank 

#### Kendall Rank Correlation Coefficient

In [24]:
print("\nKendall's Correlation Coefficient calculation\n")

for c in sorted(allrd_artificial, key=lambda x: int(x.split('_')[-1])):
    common_pageids = set(pagerank) & set(allrd_artificial[c])
    pagerank_list, rd_list = [], []
    for pageid in sorted(common_pageids):
        pagerank_list.append(pagerank[pageid])
        rd_list.append(allrd_artificial[c][pageid])
    pagerank_list, rd_list = zip(*sorted(zip(pagerank_list,rd_list), reverse=True))
    # Kendall's correlation computation
    corr, pval = scipy.stats.kendalltau(pagerank_list, rd_list)
    print(f'For PageRank vs r(d) (artifical queries) for c = {c.split("_")[-1]}:\t tau = {corr:.4f}\t\tp-value = {pval}')

print()
for c in sorted(norm_allrd_artificial, key=lambda x: int(x.split('_')[-1])):
    common_pageids = set(pagerank) & set(norm_allrd_artificial[c])
    pagerank_list, rd_list = [], []
    for pageid in sorted(common_pageids):
        pagerank_list.append(pagerank[pageid])
        rd_list.append(norm_allrd_artificial[c][pageid])
    pagerank_list, rd_list = zip(*sorted(zip(pagerank_list,rd_list), reverse=True))
    # Kendall's correlation computation
    corr, pval = scipy.stats.kendalltau(pagerank_list, rd_list)
    print(f'For PageRank vs normalized r(d) (artifical queries) for c = {c.split("_")[-1]}:\t tau = {corr:.4f}\t\tp-value = {pval}')

print()
for c in sorted(allrd_aol, key=lambda x: int(x.split('_')[-1])):
    common_pageids = set(pagerank) & set(allrd_aol[c])
    pagerank_list, rd_list = [], []
    for pageid in sorted(common_pageids):
        pagerank_list.append(pagerank[pageid])
        rd_list.append(allrd_aol[c][pageid])
    pagerank_list, rd_list = zip(*sorted(zip(pagerank_list,rd_list), reverse=True))
    # Kendall's correlation computation
    corr, pval = scipy.stats.kendalltau(pagerank_list, rd_list)
    print(f'For PageRank vs r(d) (AOL queries) for c = {c.split("_")[-1]}:\t tau = {corr:.4f}\t\tp-value = {pval}')


Kendall's Correlation Coefficient calculation



  (2 * xtie * ytie) / m + x0 * y0 / (9 * m * (size - 2)))


For PageRank vs r(d) (artifical queries) for c = 10:	 tau = 0.1223		p-value = 0.0
For PageRank vs r(d) (artifical queries) for c = 20:	 tau = 0.1406		p-value = 0.0
For PageRank vs r(d) (artifical queries) for c = 30:	 tau = 0.1504		p-value = 0.0
For PageRank vs r(d) (artifical queries) for c = 50:	 tau = 0.1618		p-value = 0.0
For PageRank vs r(d) (artifical queries) for c = 100:	 tau = 0.1748		p-value = 0.0

For PageRank vs normalized r(d) (artifical queries) for c = 10:	 tau = -0.1002		p-value = 0.0
For PageRank vs normalized r(d) (artifical queries) for c = 20:	 tau = -0.0856		p-value = 0.0
For PageRank vs normalized r(d) (artifical queries) for c = 30:	 tau = -0.0786		p-value = 0.0
For PageRank vs normalized r(d) (artifical queries) for c = 50:	 tau = -0.0709		p-value = 0.0
For PageRank vs normalized r(d) (artifical queries) for c = 100:	 tau = -0.0638		p-value = 0.0

For PageRank vs r(d) (AOL queries) for c = 10:	 tau = 0.0456		p-value = 0.0
For PageRank vs r(d) (AOL queries) for c

#### Rank Biased Overlap (RBO) measure

In [6]:
print("RBO measure calculation\n")

for c in sorted(allrd_artificial, key=lambda x: int(x.split('_')[-1])):
    common_pageids = set(pagerank) & set(allrd_artificial[c])
    pagerank_list = [pageid for pageid,_ in sorted(pagerank.items(), key=lambda x: x[1], reverse=True) if pageid in common_pageids]
    rd_list = [pageid for pageid,_ in sorted(dict(allrd_artificial[c]).items(), key=lambda x: x[1], reverse=True) if pageid in common_pageids]
    # RBO measure computation
    rbo_measure = rbo.RankingSimilarity(pagerank_list, rd_list).rbo()
    print(f'For PageRank vs r(d) (artifical queries) for c = {c.split("_")[-1]}:\t\t rbo = {rbo_measure:.4f}')

print()
for c in sorted(norm_allrd_artificial, key=lambda x: int(x.split('_')[-1])):
    common_pageids = set(pagerank) & set(norm_allrd_artificial[c])
    pagerank_list = [pageid for pageid,_ in sorted(pagerank.items(), key=lambda x: x[1], reverse=True) if pageid in common_pageids]
    rd_list = [pageid for pageid,_ in sorted(dict(norm_allrd_artificial[c]).items(), key=lambda x: x[1], reverse=True) if pageid in common_pageids]
    # RBO measure computation
    rbo_measure = rbo.RankingSimilarity(pagerank_list, rd_list).rbo()
    print(f'For PageRank vs Normalized r(d) (artifical queries) for c = {c.split("_")[-1]}:\t\t rbo = {rbo_measure:.4f}')

print()
for c in sorted(allrd_aol, key=lambda x: int(x.split('_')[-1])):
    common_pageids = set(pagerank) & set(allrd_aol[c])
    pagerank_list = [pageid for pageid,_ in sorted(pagerank.items(), key=lambda x: x[1], reverse=True) if pageid in common_pageids]
    rd_list = [pageid for pageid,_ in sorted(dict(allrd_aol[c]).items(), key=lambda x: x[1], reverse=True) if pageid in common_pageids]
    # RBO measure computation
    rbo_measure = rbo.RankingSimilarity(pagerank_list, rd_list).rbo()
    print(f'For PageRank vs r(d) (AOL queries) for c = {c.split("_")[-1]}:\t\t rbo = {rbo_measure:.4f}')

RBO measure calculation

For PageRank vs r(d) (artifical queries) for c = 10:		 rbo = 0.5568
For PageRank vs r(d) (artifical queries) for c = 20:		 rbo = 0.5627
For PageRank vs r(d) (artifical queries) for c = 30:		 rbo = 0.5661
For PageRank vs r(d) (artifical queries) for c = 50:		 rbo = 0.5702
For PageRank vs r(d) (artifical queries) for c = 100:		 rbo = 0.5753

For PageRank vs Normalized r(d) (artifical queries) for c = 10:		 rbo = 0.4770
For PageRank vs Normalized r(d) (artifical queries) for c = 20:		 rbo = 0.4817
For PageRank vs Normalized r(d) (artifical queries) for c = 30:		 rbo = 0.4840
For PageRank vs Normalized r(d) (artifical queries) for c = 50:		 rbo = 0.4863
For PageRank vs Normalized r(d) (artifical queries) for c = 100:		 rbo = 0.4879

For PageRank vs r(d) (AOL queries) for c = 10:		 rbo = 0.5228
For PageRank vs r(d) (AOL queries) for c = 20:		 rbo = 0.5255
For PageRank vs r(d) (AOL queries) for c = 30:		 rbo = 0.5275
For PageRank vs r(d) (AOL queries) for c = 50:		 r

## Correlation b/w Retrievability on Real Queries vs Artificial Queries

##### Imports

In [8]:
# imports
import pickle
import csv
import scipy
import rbo

##### load data

In [None]:
rd_aol_path = './Retrievability/allrd-pageids-aol.pickle'
rd_artificial_path = './Retrievability/allrd-pageids-artificial.pickle'
norm_rd_artificial_path = './Retrievability/norm_allrd_pageids_artificial.pickle'

with open(rd_aol_path, 'rb') as f:
    allrd_aol = pickle.load(f)

with open(rd_artificial_path, 'rb') as f:
    allrd_artificial = pickle.load(f)
    
with open(norm_rd_artificial_path, 'rb') as f:
    norm_allrd_artificial = pickle.load(f)

#### Pearson's Correlation Coefficient

In [12]:
print("\nPearson's Correlation Coefficient calculation\n")

for c in sorted(allrd_artificial, key=lambda x: int(x.split('_')[-1])):
    common_pageids = set(allrd_aol[c]) & set(allrd_artificial[c])
    aol_rd_list, artificial_rd_list = [], []
    for pageid in sorted(common_pageids):
        aol_rd_list.append(allrd_aol[c][pageid])
        artificial_rd_list.append(allrd_artificial[c][pageid])
    # Pearson's correlation computation
    rho, pval = scipy.stats.pearsonr(aol_rd_list, artificial_rd_list)
    print(f'For AOL r(d) vs Artificial r(d) for c = {c.split("_")[-1]}:\t r = {rho:.4f}\t\tp-value = {pval}')

print()
for c in sorted(norm_allrd_artificial, key=lambda x: int(x.split('_')[-1])):
    common_pageids = set(allrd_aol[c]) & set(norm_allrd_artificial[c])
    aol_rd_list, artificial_rd_list = [], []
    for pageid in sorted(common_pageids):
        aol_rd_list.append(allrd_aol[c][pageid])
        artificial_rd_list.append(norm_allrd_artificial[c][pageid])
    # Pearson's correlation computation
    rho, pval = scipy.stats.pearsonr(aol_rd_list, artificial_rd_list)
    print(f'For AOL r(d) vs Artificial "Normalized" r(d) for c = {c.split("_")[-1]}:\t r = {rho:.4f}\t\tp-value = {pval}')


Pearson's Correlation Coefficient calculation

For AOL r(d) vs Artificial r(d) for c = 10:	 r = 0.1695		p-value = 0.0
For AOL r(d) vs Artificial r(d) for c = 20:	 r = 0.2080		p-value = 0.0
For AOL r(d) vs Artificial r(d) for c = 30:	 r = 0.2318		p-value = 0.0
For AOL r(d) vs Artificial r(d) for c = 50:	 r = 0.2631		p-value = 0.0
For AOL r(d) vs Artificial r(d) for c = 100:	 r = 0.3065		p-value = 0.0

For AOL r(d) vs Artificial "Normalized" r(d) for c = 10:	 r = 0.0514		p-value = 0.0
For AOL r(d) vs Artificial "Normalized" r(d) for c = 20:	 r = 0.0578		p-value = 0.0
For AOL r(d) vs Artificial "Normalized" r(d) for c = 30:	 r = 0.0593		p-value = 0.0
For AOL r(d) vs Artificial "Normalized" r(d) for c = 50:	 r = 0.0608		p-value = 0.0
For AOL r(d) vs Artificial "Normalized" r(d) for c = 100:	 r = 0.0626		p-value = 0.0


#### Spearman's Rank Correlation Coefficient

In [10]:
print("\nSpearman's Correlation Coefficient calculation\n")

for c in sorted(allrd_artificial, key=lambda x: int(x.split('_')[-1])):
    common_pageids = set(allrd_aol[c]) & set(allrd_artificial[c])
    aol_rd_list, artificial_rd_list = [], []
    for pageid in sorted(common_pageids):
        aol_rd_list.append(allrd_aol[c][pageid])
        artificial_rd_list.append(allrd_artificial[c][pageid])
    # Spearman's correlation computation
    rho, pval = scipy.stats.spearmanr(aol_rd_list, artificial_rd_list)
    print(f'For AOL r(d) vs Artificial r(d) for c = {c.split("_")[-1]}:\t rho = {rho:.4f}\t\tp-value = {pval}')

print()
for c in sorted(norm_allrd_artificial, key=lambda x: int(x.split('_')[-1])):
    common_pageids = set(allrd_aol[c]) & set(norm_allrd_artificial[c])
    aol_rd_list, artificial_rd_list = [], []
    for pageid in sorted(common_pageids):
        aol_rd_list.append(allrd_aol[c][pageid])
        artificial_rd_list.append(norm_allrd_artificial[c][pageid])
    # Spearman's correlation computation
    rho, pval = scipy.stats.spearmanr(aol_rd_list, artificial_rd_list)
    print(f'For AOL r(d) vs Artificial "Normalized" r(d) for c = {c.split("_")[-1]}:\t rho = {rho:.4f}\t\tp-value = {pval}')


Spearman's Correlation Coefficient calculation

For AOL r(d) vs Artificial r(d) for c = 10:	 rho = 0.3066		p-value = 0.0
For AOL r(d) vs Artificial r(d) for c = 20:	 rho = 0.3938		p-value = 0.0
For AOL r(d) vs Artificial r(d) for c = 30:	 rho = 0.4482		p-value = 0.0
For AOL r(d) vs Artificial r(d) for c = 50:	 rho = 0.5140		p-value = 0.0
For AOL r(d) vs Artificial r(d) for c = 100:	 rho = 0.5893		p-value = 0.0

For AOL r(d) vs Artificial "Normalized" r(d) for c = 10:	 rho = 0.0856		p-value = 0.0
For AOL r(d) vs Artificial "Normalized" r(d) for c = 20:	 rho = 0.0908		p-value = 0.0
For AOL r(d) vs Artificial "Normalized" r(d) for c = 30:	 rho = 0.0907		p-value = 0.0
For AOL r(d) vs Artificial "Normalized" r(d) for c = 50:	 rho = 0.0921		p-value = 0.0
For AOL r(d) vs Artificial "Normalized" r(d) for c = 100:	 rho = 0.1034		p-value = 0.0


#### Kendall Rank Correlation Coefficient

In [13]:
print("\nKendall's Correlation Coefficient calculation\n")

for c in sorted(allrd_artificial, key=lambda x: int(x.split('_')[-1])):
    common_pageids = set(allrd_aol[c]) & set(allrd_artificial[c])
    aol_rd_list, artificial_rd_list = [], []
    for pageid in sorted(common_pageids):
        aol_rd_list.append(allrd_aol[c][pageid])
        artificial_rd_list.append(allrd_artificial[c][pageid])
    aol_rd_list, artificial_rd_list = zip(*sorted(zip(aol_rd_list,artificial_rd_list), reverse=True))
    # Kendall's correlation computation
    rho, pval = scipy.stats.kendalltau(aol_rd_list, artificial_rd_list)
    print(f'For AOL r(d) vs Artificial r(d) for c = {c.split("_")[-1]}:\t tau = {rho:.4f}\t\tp-value = {pval}')

print()
for c in sorted(norm_allrd_artificial, key=lambda x: int(x.split('_')[-1])):
    common_pageids = set(allrd_aol[c]) & set(norm_allrd_artificial[c])
    aol_rd_list, artificial_rd_list = [], []
    for pageid in sorted(common_pageids):
        aol_rd_list.append(allrd_aol[c][pageid])
        artificial_rd_list.append(norm_allrd_artificial[c][pageid])
    aol_rd_list, artificial_rd_list = zip(*sorted(zip(aol_rd_list,artificial_rd_list), reverse=True))
    # Kendall's correlation computation
    rho, pval = scipy.stats.kendalltau(aol_rd_list, artificial_rd_list)
    print(f'For AOL r(d) vs Artificial "Normalized" r(d) for c = {c.split("_")[-1]}:\t tau = {rho:.4f}\t\tp-value = {pval}')


Kendall's Correlation Coefficient calculation



  (2 * xtie * ytie) / m + x0 * y0 / (9 * m * (size - 2)))


For AOL r(d) vs Artificial r(d) for c = 10:	 tau = 0.2208		p-value = 0.0
For AOL r(d) vs Artificial r(d) for c = 20:	 tau = 0.2785		p-value = 0.0
For AOL r(d) vs Artificial r(d) for c = 30:	 tau = 0.3157		p-value = 0.0
For AOL r(d) vs Artificial r(d) for c = 50:	 tau = 0.3625		p-value = 0.0
For AOL r(d) vs Artificial r(d) for c = 100:	 tau = 0.4194		p-value = 0.0

For AOL r(d) vs Artificial "Normalized" r(d) for c = 10:	 tau = 0.0593		p-value = 0.0
For AOL r(d) vs Artificial "Normalized" r(d) for c = 20:	 tau = 0.0620		p-value = 0.0
For AOL r(d) vs Artificial "Normalized" r(d) for c = 30:	 tau = 0.0614		p-value = 0.0
For AOL r(d) vs Artificial "Normalized" r(d) for c = 50:	 tau = 0.0620		p-value = 0.0
For AOL r(d) vs Artificial "Normalized" r(d) for c = 100:	 tau = 0.0695		p-value = 0.0


#### Rank Biased Overlap (RBO) measure

In [15]:
print("RBO measure calculation\n")

for c in sorted(allrd_artificial, key=lambda x: int(x.split('_')[-1])):
    common_pageids = set(allrd_aol[c]) & set(allrd_artificial[c])
    artificial_rd_list = [pageid for pageid,_ in sorted(dict(allrd_artificial[c]).items(), key=lambda x: x[1], reverse=True) if pageid in common_pageids]
    aol_rd_list = [pageid for pageid,_ in sorted(dict(allrd_aol[c]).items(), key=lambda x: x[1], reverse=True) if pageid in common_pageids]
    # RBO similarity measure computation
    rbo_measure = rbo.RankingSimilarity(aol_rd_list, artificial_rd_list).rbo()
    print(f'For AOL r(d) vs Artificial r(d) for c = {c.split("_")[-1]}:\t rbo = {rbo_measure:.4f}')

print()
for c in sorted(norm_allrd_artificial, key=lambda x: int(x.split('_')[-1])):
    common_pageids = set(allrd_aol[c]) & set(norm_allrd_artificial[c])
    artificial_rd_list = [pageid for pageid,_ in sorted(dict(norm_allrd_artificial[c]).items(), key=lambda x: x[1], reverse=True) if pageid in common_pageids]
    aol_rd_list = [pageid for pageid,_ in sorted(dict(allrd_aol[c]).items(), key=lambda x: x[1], reverse=True) if pageid in common_pageids]
    # RBO similarity measure computation
    rbo_measure = rbo.RankingSimilarity(aol_rd_list, artificial_rd_list).rbo()
    print(f'For AOL r(d) vs Artificial "Normalized" r(d) for c = {c.split("_")[-1]}:\t rbo = {rbo_measure:.4f}')

RBO measure calculation

For AOL r(d) vs Artificial r(d) for c = 10:	 rbo = 0.5898
For AOL r(d) vs Artificial r(d) for c = 20:	 rbo = 0.6106
For AOL r(d) vs Artificial r(d) for c = 30:	 rbo = 0.6235
For AOL r(d) vs Artificial r(d) for c = 50:	 rbo = 0.6397
For AOL r(d) vs Artificial r(d) for c = 100:	 rbo = 0.6594

For AOL r(d) vs Artificial "Normalized" r(d) for c = 10:	 rbo = 0.5242
For AOL r(d) vs Artificial "Normalized" r(d) for c = 20:	 rbo = 0.5253
For AOL r(d) vs Artificial "Normalized" r(d) for c = 30:	 rbo = 0.5253
For AOL r(d) vs Artificial "Normalized" r(d) for c = 50:	 rbo = 0.5255
For AOL r(d) vs Artificial "Normalized" r(d) for c = 100:	 rbo = 0.5276
