<a href="https://colab.research.google.com/github/giordamaug/BIONETdatasets/blob/main/TUD/notebooks/NetPro2Vec example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>
[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/giordamaug/BIONETdatasets/main?filepath=TUD%2Fnotebooks%2FNetPro2Vec+example.ipynb)

# Cloning BIONETdatasets repository

In [None]:
!git clone http://github.com/giordamaug/BIONETdatasets

# Install Netpro2vec and python-igraph (if required)

In [1]:
# Install python-igraph (if required)
!pip install -q git+https://github.com/cds-group/Netpro2vec.git
!pip install -q python-igraph

[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m


# Load the dataset

In [1]:
dataname = 'MUTAG'
import sys, os
sys.path.append(os.path.abspath('BIONETdatasets/TUD'))
from wrappers.igraph_wrapper import MyTUDataset
import shutil
shutil.unpack_archive(f'BIONETdatasets/TUD/datasets/{dataname}.zip')
dataset = MyTUDataset(dataname, path=f'{dataname}', verbose=True)
print(dataset.graph_list[0])

Getting node indices:: 100%|██████████| 702/702 [00:27<00:00, 25.46it/s]
Loading nodes:: 100%|██████████| 702/702 [00:04<00:00, 163.13it/s]
Loading edges:: 100%|██████████| 6257628/6257628 [4:51:53<00:00, 357.30it/s]


Loading node labels...
Loading edge labels...
Loading edge attributes...
Loading graph labels...
IGRAPH UN-- 3928 7967 -- BRAIN_1
+ attr: class (g), name (g), label (v), name (v), attributes (e), label (e)
+ edges (vertex names):
   0 -- 1, 2080
   1 -- 0, 2, 3013
   2 -- 1, 3013, 3014
   3 -- 19, 228
   4 -- 5, 2079
   5 -- 4, 6, 3015
   6 -- 5, 3015, 3016
   7 -- 19, 228
   8 -- 10, 3017, 3018
   9 -- 10, 3017, 3019
  10 -- 8, 9, 3020
  11 -- 12
  12 -- 11
  13 -- 21, 26, 27, 260, 290, 302, 1678, 2251, 2253, 3099
  14 -- 22, 27, 282, 3096
  15 -- 27, 235, 247, 2720, 2725, 3099
  16 -- 17, 21, 27, 278, 3095
  17 -- 16, 18, 27, 619, 620, 695, 719, 752, 753, 1678, 2954, 3017, 3254, 3278
  18 -- 17, 21, 27
  19 -- 3, 7, 27, 290
  20 -- 27, 247, 273, 2721, 2722, 3099
  21 -- 13, 16, 18, 24, 25, 27, 235, 249, 261, 277, 279, 285, 287, 1678, 2281,
2920, 3032, 3095, 3629, 3631
  22 -- 14, 24, 27, 251, 262, 272, 273, 283, 286, 787, 3096
  23 -- 27, 268, 269, 288, 1678, 1690, 1782, 2249, 2251, 

# Create model and evaluate

In [30]:
import sys, os
import numpy as np
sys.path.append(os.path.abspath('.'))
from netpro2vec.Netpro2vec import Netpro2vec
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score
from wrappers.igraph_wrapper import MyTUDataset
# Unpack and load dataset
#dataname = 'BRAIN'
#shutil.unpack_archive(f'datasets/{dataname}.zip', '/Users/maurizio/Downloads')
#dataset = MyTUDataset(dataname, path=f'/Users/maurizio/Downloads/{dataname}', verbose=True)

# Set optimal params
params = {"agg_by": [1], "cut_off": [0.1], "dimensions": 512, "encodew": False, "epochs": 400, "extractor": [1], "min_count": 2, "prob_type": ["ndd"], "save_vocab": False, "seed": 1, "verbose": False, "vertex_attribute": "label", "workers": 4}
#PROTEINS params = {"agg_by": [1,0], "cut_off": [0,0], "dimensions": 256, "encodew": False, "epochs": 200, "extractor": [2,2], "min_count": 2, "prob_type": ["ndd","tm1"], "save_vocab": False, "seed": 1, "verbose": False, "vertex_attribute": "label", "workers": 4}
# Transductive embedding
print("TRANSDUCTIVE EMBEDDING:")
model = Netpro2vec(**params)
model.fit(dataset.graph_list)
X = model.get_embedding()
y = dataset.graph_labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=42)
clf = SVC(kernel='linear')
preds = clf.fit(X_train,y_train).predict(X_test)
print(confusion_matrix(y_test, preds),f'Acc. {accuracy_score(y_test, preds)}')
# Inductive embedding
print("INDUCTIVE EMBEDDING:")
G_train, G_test, y_train, y_test = train_test_split(dataset.graph_list, y, test_size=0.10, random_state=42)
model = Netpro2vec(**params)
model.fit(G_train)
X_train = model.get_embedding()
X_test = np.array(model.infer_vector(G_test,epochs=0))
clf = SVC(kernel='linear')
preds = clf.fit(X_train,y_train).predict(X_test)
print(confusion_matrix(y_test, preds),f'Acc. {accuracy_score(y_test, preds)}')

TRANSDUCTIVE EMBEDDING:
[[12  4]
 [ 1 53]] Acc. 0.9285714285714286
INDUCTIVE EMBEDDING:
[[13  3]
 [ 4 50]] Acc. 0.9
