In [1]:
%load_ext autoreload


In [2]:
%autoreload 2

In [3]:
import numpy as np
from experiments.founder_rank.helpers import *
import experiments.founder_rank.metrics as metric
import experiments.founder_rank.learner as learn

In [4]:
import experiments.founder_rank.data.baseline as baseline
import experiments.founder_rank.data.random as random
import experiments.founder_rank.data.naive as naive
import experiments.founder_rank.data.graph_metrics as graph_metrics

In [5]:
datasets = get_datasets([baseline, random, naive])
with_graph_metrics = get_dataset(graph_metrics)

In [6]:
metrics = metric.Metrics(datasets['baseline'])
learner = learn.Learner(datasets['baseline'])

In [15]:
only_pr = with_graph_metrics.take([0, 1], 1)
only_betweenness = with_graph_metrics.take([0, 2], 1)
only_closeness = with_graph_metrics.take([0, 3], 1)

np.corrcoef(with_graph_metrics[:, 1:], rowvar=False)

array([[1.        , 0.91344508, 0.53108472],
       [0.91344508, 1.        , 0.51204197],
       [0.53108472, 0.51204197, 1.        ]])

In [14]:
VERSIONS = {
  'only_pr': [1], 
  'only_betweenness': [2],
  'only_closeness': [3],
  'pr_and_closeness': [1, 3],
  'weighted': [1, 2, 3],
}

for name, indexes in VERSIONS.items():
  features = with_graph_metrics.take([0] + indexes, 1)
  dataset, coeff, r2 = learner.linear_regression(features)
  datasets[name] = dataset
  print('{n}: r^2={r2}, coeff={c}'.format(n=name, r2=r2, c=coeff))

only_pr: r^2=-0.1331416774430687, coeff=[1.49142913]
only_betweenness: r^2=-0.296141680638095, coeff=[1.70210538]
only_closeness: r^2=0.3480972302779506, coeff=[0.4716454]
pr_and_closeness: r^2=0.3916299801281544, coeff=[0.42304162 0.38994958]
weighted: r^2=0.39264296907325147, coeff=[0.30290779 0.15360332 0.3912759 ]


In [8]:
print(metrics.idcg)
for_each_dataset(datasets, metrics.ndcg)

1.452553083084042e+168
baseline: 1.0
random: 0.2704474479896671
naive: 0.4482823680815219
only_pr: 0.5271513090577453
only_betweenness: 0.4009498350291547
only_closeness: 0.3754596919790231
pr_and_closeness: 0.5336061817283828
weighted: 0.44929269008633355


In [9]:
for n in [5, 10, 20, 50]:
  print('n = {n}'.format(n=n))
  for_each_dataset(datasets, lambda a: metrics.precision_at(n, a))

n = 5
baseline: 1.0
random: 0.2
naive: 0.2
only_pr: 0.2
only_betweenness: 0.2
only_closeness: 0.4
pr_and_closeness: 0.2
weighted: 0.2
n = 10
baseline: 1.0
random: 0.1
naive: 0.3
only_pr: 0.1
only_betweenness: 0.3
only_closeness: 0.2
pr_and_closeness: 0.2
weighted: 0.3
n = 20
baseline: 1.0
random: 0.05
naive: 0.2
only_pr: 0.2
only_betweenness: 0.25
only_closeness: 0.3
pr_and_closeness: 0.2
weighted: 0.2
n = 50
baseline: 1.0
random: 0.04
naive: 0.38
only_pr: 0.34
only_betweenness: 0.38
only_closeness: 0.38
pr_and_closeness: 0.38
weighted: 0.38


In [10]:
for_each_dataset(datasets, metrics.tau)

baseline: KendalltauResult(correlation=1.0, pvalue=2.676644885516571e-273)
random: KendalltauResult(correlation=0.006891758846354318, pvalue=0.8076705167919815)
naive: KendalltauResult(correlation=0.48454019549172156, pvalue=1.1463387315990024e-65)
only_pr: KendalltauResult(correlation=0.4056099303102257, pvalue=1.4829043811878737e-46)
only_betweenness: KendalltauResult(correlation=0.4432282517068526, pvalue=3.032208617194526e-55)
only_closeness: KendalltauResult(correlation=0.498388062006525, pvalue=2.288007691431624e-69)
pr_and_closeness: KendalltauResult(correlation=0.4879506830627466, pvalue=1.437857017879871e-66)
weighted: KendalltauResult(correlation=0.4904602871244442, pvalue=3.09217692129376e-67)


In [11]:
for_each_dataset(datasets, metrics.rho)

baseline: SpearmanrResult(correlation=1.0, pvalue=0.0)
random: SpearmanrResult(correlation=0.010010892448288154, pvalue=0.8134690167658944)
naive: SpearmanrResult(correlation=0.6734281623160935, pvalue=5.608369452835146e-75)
only_pr: SpearmanrResult(correlation=0.5741339698962857, pvalue=3.048662395906977e-50)
only_betweenness: SpearmanrResult(correlation=0.6238186739061509, pvalue=1.6703223496457425e-61)
only_closeness: SpearmanrResult(correlation=0.6908477913567607, pvalue=2.343937514807284e-80)
pr_and_closeness: SpearmanrResult(correlation=0.6773326738672653, pvalue=3.762866363594985e-76)
weighted: SpearmanrResult(correlation=0.679677480480464, pvalue=7.279808422349716e-77)


In [12]:
for_each_dataset(datasets, metrics.rmse)

baseline: 0.0
random: 0.25540572915869914
naive: 0.08274775113989331
only_pr: 0.14914810365195647
only_betweenness: 0.18892948559512504
only_closeness: 0.1359636565815242
pr_and_closeness: 0.10400477111947302
weighted: 0.10309881317230686


In [13]:
for_each_dataset(datasets, metrics.mae)

baseline: 0.0
random: 0.2344563094356908
naive: 0.06373322143862112
only_pr: 0.11721879798217179
only_betweenness: 0.1519589998141315
only_closeness: 0.09644202184152607
pr_and_closeness: 0.07861928356344625
weighted: 0.07870874698647526
