In [1]:
%load_ext autoreload
%autoreload 2

import os,sys,inspect
current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parent_dir = os.path.dirname(current_dir)
sys.path.insert(0, os.path.join(parent_dir)) 

%matplotlib inline

In [17]:
import networkx as nx
import numpy as np
from edge_prop.data_loader import DataLoader
from edge_prop.models.dense_edge_propagation import DenseEdgeProp
from edge_prop.models.dense_baseline import DenseBasline
from sklearn.metrics import accuracy_score
from edge_prop.constants import DATASET2PATH
from itertools import product

### Experiment on real world data

In [4]:
path = DATASET2PATH['slashdot']
graph, y_true, test_indices = DataLoader(path).load_data(1_000)
y_test = y_true[test_indices]

##### Our model

In [7]:
edge_prop = DenseEdgeProp(graph.y_attr, max_iter=50, alpha=0.8)
edge_prop.fit(graph)

HBox(children=(FloatProgress(value=0.0, description='Fitting model', max=50.0, style=ProgressStyle(description…

  mat = (B[:, np.newaxis, :] + B[np.newaxis, :, :]) / (D[:, np.newaxis] + D[np.newaxis, :])[:, :, np.newaxis]
  mat = (B[:, np.newaxis, :] + B[np.newaxis, :, :]) / (D[:, np.newaxis] + D[np.newaxis, :])[:, :, np.newaxis]
  mat[adj_mat != 0] = mat[adj_mat != 0] / np.sum(mat[adj_mat != 0], axis=-1, keepdims=True)







<edge_prop.models.dense_edge_propagation.DenseEdgeProp at 0x19f28fb3488>

In [9]:
y_pred = edge_prop.predict()[test_indices]
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")

Accuracy: 0.595243701436308


### Baseline

In [5]:
baseline = DenseBasline(graph.y_attr, max_iter=50, alpha=0.8)
baseline.fit(graph)

  last_Y = last_Y / np.sum(last_Y, axis=-1)[:, np.newaxis]


HBox(children=(FloatProgress(value=0.0, description='Fitting model', max=50.0, style=ProgressStyle(description…

  last_Y = B / D[:, np.newaxis]





<edge_prop.models.dense_baseline.DenseBasline at 0x19f2a4c1188>

In [6]:
y_pred = baseline.predict()[test_indices]
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")

Accuracy: 0.5939879130366533


### Wikipedia dataset

In [9]:
path = DATASET2PATH['wiki']
dtype_tuples = [('label', int), ('time', str)]
graph, y_true, test_indices = DataLoader(path, dtype_tuples=dtype_tuples, test_size=0.5).load_data()
y_test = y_true[test_indices]

#### Our model

In [14]:
edge_prop = DenseEdgeProp(graph.y_attr, max_iter=50, alpha=1)
edge_prop.fit(graph)

HBox(children=(FloatProgress(value=0.0, description='Fitting model', max=50.0, style=ProgressStyle(description…




<edge_prop.models.dense_edge_propagation.DenseEdgeProp at 0x1eae0cf1688>

In [15]:
y_pred = edge_prop.predict()[test_indices]
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")



Accuracy: 0.7179609337779895


#### Baseline

In [12]:
baseline = DenseBasline(graph.y_attr, max_iter=200, alpha=0.8)
baseline.fit(graph)

Fitting model:   0%|                                                                                                                            | 0/200 [00:00<?, ?iter/s, dif=0.000277]


<edge_prop.models.dense_baseline.DenseBasline at 0x1eaddf8dd48>

In [13]:
y_pred = baseline.predict()[test_indices]
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")

Accuracy: 0.7063879625218358




### Run Experiment

In [27]:
alphas = [0, 0.5, 0.8, 1]
test_sizes = [0.25, 0.5, 0.75]

path = DATASET2PATH['wiki']
dtype_tuples = [('label', int), ('time', str)]
results = {}

for alpha, test_size in product(alphas, test_sizes):
    # create dataset
    graph, y_true, test_indices = DataLoader(path, dtype_tuples=dtype_tuples, test_size=test_size).load_data()
    y_test = y_true[test_indices]
    
    edge_prop = DenseEdgeProp(graph.y_attr, max_iter=50, alpha=alpha)
    edge_prop.fit(graph)
    y_pred = edge_prop.predict()[test_indices]
    our_accuracy = accuracy_score(y_test, y_pred)
    
    baseline = DenseBasline(graph.y_attr, max_iter=50, alpha=alpha)
    baseline.fit(graph)
    y_pred = baseline.predict()[test_indices]
    baseline_accuracy = accuracy_score(y_test, y_pred)
    results[(alpha, test_size)] = (our_accuracy, baseline_accuracy)
    
for (alpha, test_size), (new_acc, baseline_acc) in results.items():
    print(f"alpha={alpha}, test_size={test_size}, \t Baseline: {round(baseline_acc,3)}\t New Model:{round(new_acc,3)}")

HBox(children=(FloatProgress(value=0.0, description='Fitting model', max=50.0, style=ProgressStyle(description…




Fitting model: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 480.65iter/s, dif=1.52]


HBox(children=(FloatProgress(value=0.0, description='Fitting model', max=50.0, style=ProgressStyle(description…




Fitting model: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 490.89iter/s, dif=1.51]


HBox(children=(FloatProgress(value=0.0, description='Fitting model', max=50.0, style=ProgressStyle(description…




Fitting model: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 515.35iter/s, dif=1.52]


HBox(children=(FloatProgress(value=0.0, description='Fitting model', max=50.0, style=ProgressStyle(description…




Fitting model:   0%|                                                                                                                             | 0/50 [00:00<?, ?iter/s, dif=0.000723]


HBox(children=(FloatProgress(value=0.0, description='Fitting model', max=50.0, style=ProgressStyle(description…




Fitting model:   0%|                                                                                                                             | 0/50 [00:00<?, ?iter/s, dif=0.000622]


HBox(children=(FloatProgress(value=0.0, description='Fitting model', max=50.0, style=ProgressStyle(description…




Fitting model: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 490.86iter/s, dif=0.00185]


HBox(children=(FloatProgress(value=0.0, description='Fitting model', max=50.0, style=ProgressStyle(description…




Fitting model:   0%|                                                                                                                             | 0/50 [00:00<?, ?iter/s, dif=0.000534]


HBox(children=(FloatProgress(value=0.0, description='Fitting model', max=50.0, style=ProgressStyle(description…




Fitting model:   0%|                                                                                                                             | 0/50 [00:00<?, ?iter/s, dif=0.000745]


HBox(children=(FloatProgress(value=0.0, description='Fitting model', max=50.0, style=ProgressStyle(description…




Fitting model:   0%|                                                                                                                             | 0/50 [00:00<?, ?iter/s, dif=0.000541]


HBox(children=(FloatProgress(value=0.0, description='Fitting model', max=50.0, style=ProgressStyle(description…




Fitting model:   0%|                                                                                                                             | 0/50 [00:00<?, ?iter/s, dif=0.000977]


HBox(children=(FloatProgress(value=0.0, description='Fitting model', max=50.0, style=ProgressStyle(description…




Fitting model:   0%|                                                                                                                               | 0/50 [00:00<?, ?iter/s, dif=0.0005]






HBox(children=(FloatProgress(value=0.0, description='Fitting model', max=50.0, style=ProgressStyle(description…




Fitting model:   0%|                                                                                                                             | 0/50 [00:00<?, ?iter/s, dif=0.000795]








alpha=0, test_size=0.25, 	 Baseline: 0.783	 New Model:0.783
alpha=0, test_size=0.5, 	 Baseline: 0.781	 New Model:0.781
alpha=0, test_size=0.75, 	 Baseline: 0.779	 New Model:0.779
alpha=0.5, test_size=0.25, 	 Baseline: 0.686	 New Model:0.685
alpha=0.5, test_size=0.5, 	 Baseline: 0.716	 New Model:0.713
alpha=0.5, test_size=0.75, 	 Baseline: 0.748	 New Model:0.744
alpha=0.8, test_size=0.25, 	 Baseline: 0.684	 New Model:0.678
alpha=0.8, test_size=0.5, 	 Baseline: 0.718	 New Model:0.705
alpha=0.8, test_size=0.75, 	 Baseline: 0.751	 New Model:0.731
alpha=1, test_size=0.25, 	 Baseline: 0.689	 New Model:0.678
alpha=1, test_size=0.5, 	 Baseline: 0.721	 New Model:0.699
alpha=1, test_size=0.75, 	 Baseline: 0.749	 New Model:0.713


In [28]:
for (alpha, test_size), (new_acc, baseline_acc) in results.items():
    print(f"alpha={alpha}, test_size={test_size}, \t Baseline: {round(baseline_acc,3)}\t New Model:{round(new_acc,3)}")

alpha=0, test_size=0.25, 	 Baseline: 0.783	 New Model:0.783
alpha=0, test_size=0.5, 	 Baseline: 0.781	 New Model:0.781
alpha=0, test_size=0.75, 	 Baseline: 0.779	 New Model:0.779
alpha=0.5, test_size=0.25, 	 Baseline: 0.685	 New Model:0.686
alpha=0.5, test_size=0.5, 	 Baseline: 0.713	 New Model:0.716
alpha=0.5, test_size=0.75, 	 Baseline: 0.744	 New Model:0.748
alpha=0.8, test_size=0.25, 	 Baseline: 0.678	 New Model:0.684
alpha=0.8, test_size=0.5, 	 Baseline: 0.705	 New Model:0.718
alpha=0.8, test_size=0.75, 	 Baseline: 0.731	 New Model:0.751
alpha=1, test_size=0.25, 	 Baseline: 0.678	 New Model:0.689
alpha=1, test_size=0.5, 	 Baseline: 0.699	 New Model:0.721
alpha=1, test_size=0.75, 	 Baseline: 0.713	 New Model:0.749


In [29]:
edge_prop = DenseEdgeProp(graph.y_attr, max_iter=50, alpha=0)
edge_prop.fit(graph)

baseline = DenseBasline(graph.y_attr, max_iter=50, alpha=0)
baseline.fit(graph)

HBox(children=(FloatProgress(value=0.0, description='Fitting model', max=50.0, style=ProgressStyle(description…




Fitting model: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:00<00:00, 533.41iter/s, dif=0.031]


<edge_prop.models.dense_baseline.DenseBasline at 0x1ea80185c48>

In [None]:
edge_prop.predict_proba() - baseline.predict_proba()