In [1]:
import numpy as np
import zipfile

from segk import segk
from evaluation import evaluate_clustering, evaluate_classification

import warnings
warnings.filterwarnings('ignore')

In [2]:
configuration = 'varied' # basic, varied, basic_perturbed or varied_perturbed

edges = list()
y = list()
with zipfile.ZipFile('datasets/synthetic/'+configuration+'.zip') as z:
    for i in range(20):
        edges.append(list())
        with z.open(configuration+'_'+str(i)+'.edgelist') as f:
            for line in f:
                t = line.decode("utf-8")
                t = t.split(' ')
                edges[i].append((int(t[0]),int(t[1][:-1])))
        
        y.append(list())
        with z.open(configuration+'_'+str(i)+'_class_labels.txt') as f:
            for line in f:
                t = line.decode("utf-8") 
                y[i].append(int(t[:-1]))
        y[i] = np.array(y[i])
                
nodes = list()
for i in range(20):
    nodes.append(list(range(len(y[i]))))

In [3]:
algorithms = ["SEGK-SP", "SEGK-WL"]

avg_homogeneity = list()
avg_completeness = list()
avg_silhouette = list()
avg_accs = list()
avg_f1 = list()

for i in range(len(algorithms)):
    avg_homogeneity.append(list())
    avg_completeness.append(list())
    avg_silhouette.append(list())
    avg_accs.append(list())
    avg_f1.append(list())

for i in range(20):
    E_segk_sp = segk(nodes[i], edges[i], radius=2, dim=10, kernel='shortest_path')
    E_segk_wl = segk(nodes[i], edges[i], radius=2, dim=10, kernel='weisfeiler_lehman')
       
    
    embeddings = [E_segk_sp, E_segk_wl]

    homogeneity, completeness, silhouette = evaluate_clustering(embeddings, y[i])
    accs, f1 = evaluate_classification(embeddings, y[i])

    for j in range(len(algorithms)):
        avg_homogeneity[j].append(homogeneity[j])
        avg_completeness[j].append(completeness[j])
        avg_silhouette[j].append(silhouette[j])
        avg_accs[j].append(accs[j])
        avg_f1[j].append(f1[j])
                      
for i in range(len(algorithms)):
    print("\n"+algorithms[i])
    print("Homogeneity:", np.mean(avg_homogeneity[i]))
    print("Completeness:", np.mean(avg_completeness[i]))
    print("Silhouette:", np.mean(avg_silhouette[i]))
    print("Accuracy:", np.mean(avg_accs[i]))
    print("F1-score:", np.mean(avg_f1[i]))


SEGK-SP
Homogeneity: 0.9840582207584688
Completeness: 0.9534026268575037
Silhouette: 0.944734630872291
Accuracy: 0.9982195652173914
F1-score: 0.9961055626911669

SEGK-WL
Homogeneity: 0.978258501295483
Completeness: 0.9501782427108607
Silhouette: 0.9519399106686516
Accuracy: 0.9962108695652174
F1-score: 0.9923358062907912
