# Tutorial: GraphRicciCurvature

This is a walk through tutorial of GraphRicciCurvature, and a demonstration of how to apply Ricci curvature for various tasks such as community detection. Please make sure you have the latest package to run this tutorial.

- Try this tutorial with interactive jupyter notebooks:

    - [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/saibalmars/GraphRicciCurvature/master?filepath=notebooks%2Ftutorial.ipynb)
    
    - [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/saibalmars/GraphRicciCurvature/blob/master/notebooks/tutorial.ipynb) (Faster, but Google account required.)



## Preparation:
### Load library

In [1]:
# colab setting
!pip install GraphRicciCurvature

import networkx as nx
import numpy as np
import math
import importlib

# matplotlib setting
%matplotlib inline
import matplotlib.pyplot as plt

# to print logs in jupyter notebook
import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

# load GraphRicciCuravture package
from GraphRicciCurvature.OllivierRicci import OllivierRicci

Collecting GraphRicciCurvature
  Downloading https://files.pythonhosted.org/packages/f9/a2/fd0d69f0aa003de7f1b93734d6d71a4e9dfec5e2c2b0ff682e1e80e1acf5/GraphRicciCurvature-0.4.5-py3-none-any.whl
Collecting pot (from GraphRicciCurvature)
[?25l  Downloading https://files.pythonhosted.org/packages/92/2e/2c11981114d0e37a3ac2e42486b4c1cd9aaa5951d4ea2da8adf416753f46/POT-0.7.0-cp37-cp37m-macosx_10_9_x86_64.whl (145kB)
[K    100% |████████████████████████████████| 153kB 1.5MB/s 
[?25hCollecting cvxpy (from GraphRicciCurvature)
[?25l  Downloading https://files.pythonhosted.org/packages/f3/ee/53cb23c078967564a42848e613d327ff19495678b8551a01d5341f56c772/cvxpy-1.1.1-cp37-cp37m-macosx_10_9_x86_64.whl (815kB)
[K    100% |████████████████████████████████| 819kB 3.3MB/s 
[?25hCollecting networkit>=6.1 (from GraphRicciCurvature)
[?25l  Downloading https://files.pythonhosted.org/packages/26/f9/a058af193cd2058c4326519693700eb6a093cc35e47e0a8ef4e122a452fa/networkit-7.0.tar.gz (1.9MB)
[K    100% |█

In [17]:
import torch
from torch_geometric.datasets import Planetoid
from torch_geometric.utils.convert import from_networkx, to_networkx
dataset = Planetoid( root='/tmp/Cora', name='Cora')

ModuleNotFoundError: No module named 'torch_sparse'

In [16]:
data = dataset[0]

NameError: name 'dataset' is not defined

In [None]:
data_nx = to_networkx(data)
orc = OllivierRicci(data_nx, alpha=0.5, verbose="INFO")
orc.compute_ricci_curvature()
G_orc = orc.G.copy()
ricci_curvatures = nx.get_edge_attributes(G_orc, "ricciCurvature")

In [6]:
print(nx.info(data_nx))

Name: 
Type: DiGraph
Number of nodes: 2708
Number of edges: 10556
Average in degree:   3.8981
Average out degree:   3.8981


In [None]:
data_ric = from_networkx(G_orc)

In [12]:
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RepeatedStratifiedKFold, StratifiedKFold, StratifiedShuffleSplit, cross_val_score, cross_val_predict, GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, roc_auc_score, confusion_matrix, fbeta_score
from sklearn.model_selection import RepeatedStratifiedKFold, StratifiedKFold, StratifiedShuffleSplit, cross_val_score, cross_val_predict, GridSearchCV, LeaveOneOut

  data = yaml.load(f.read()) or {}
  defaults = yaml.load(f)


In [14]:
from time import time

In [15]:
cv = RepeatedStratifiedKFold(
                n_splits = 5,
                n_repeats = 5)

In [None]:
params_grid_svc = {'estimator__kernel': ['rbf', 'linear'], 
                    'estimator__gamma': [1e-3, 1e-1, 1],
                    'estimator__C': [1e-2, 1, 10]}

In [None]:
params_grid_lr = {'estimator__C': [10**(-2), 1, 10]}

In [None]:
svm_model = GridSearchCV(OneVsRestClassifier(SVC(kernel='rbf')), 
                         params_grid_svc, cv=cv)
lr_model = GridSearchCV(OneVsRestClassifier(LogisticRegression(solver='liblinear')), 
                         params_grid_lr, cv=cv)
t_svm = time()
svm_model.fit(data.x.numpy(), 
              data.y.numpy())
print('SVC training took {}'.format(time()-t_svm))
t_lr = time()
lr_model.fit(data.x.numpy(), 
              data.y.numpy())
print('LR training took {}'.format(time()-t_lr))

In [None]:
svm_model.best_score_

In [None]:
lr_model.best_score_

In [None]:
f1_score(data.y[data.test_mask].numpy(), yhat, average='weighted')

0.5392738343154487

In [None]:
clf.score(data.x[data.test_mask].numpy(), data.y[data.test_mask].numpy())

0.542

In [None]:
loo_results = []

In [None]:
loo = LeaveOneOut()
loo.get_n_splits(data.x.numpy())
predict = []

for train_index, test_index in loo.split(data.x.numpy()):
    X_train, X_test = data.x[train_index].numpy(), data.x[test_index].numpy()
    y_train, y_test = data.y[train_index].numpy(), data.y[test_index].numpy()
    predict.append(clf.fit(X_train, y_train).predict(X_test)[0])

tpr, fpr, fnr, tnr = (confusion_matrix(data.y.numpy(), 
                                        predict
                                        ).astype('float') 
                      /confusion_matrix(data.y.numpy(), 
                                        predict
                                        ).sum(axis=1)[:, np.newaxis]
                      ).ravel()
loo_results.append([np.round(((tpr + tnr)*100) / 2, 2), 
                    np.round(tpr*100, 2), np.round(tnr*100, 2)])
print(' acc', np.round((tpr + tnr) / 2, 2), 
    ' tpr', np.round(tpr, 2),
    ' tnr', np.round(tnr, 2)
)

<generator object BaseCrossValidator.split at 0x7fbf57516d58>


In [None]:
clf.fit(data.x[data.train_mask].numpy(), data.y[data.train_mask].numpy())
yhat = clf.predict(data.x[data.test_mask].numpy())

In [None]:
print(nx.info(G_orc))

Name: 
Type: DiGraph
Number of nodes: 2708
Number of edges: 10556
Average in degree:   3.8981
Average out degree:   3.8981


In [13]:
lr = OneVsRestClassifier(LogisticRegression(solver='liblinear', C = 0.1)).fit(data.x[data.train_mask].numpy(), 
                                                                             data.y[data.train_mask].numpy())

NameError: name 'data' is not defined

In [None]:
clf.fit(data.x[data.train_mask].numpy(), data.y[data.train_mask].numpy())
yhat = clf.predict(data.x[data.test_mask].numpy())