# Scikit-Graph
### A simple graph machine learning example using sklearn and scikit-graph's transformers.

In [2]:
# Importing the packages and functions.
import scikitgraph as sg
import pandas as pd
import networkx as nx
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score

In [3]:
# Set a seed.
np.random.seed(31415) 

In [4]:
# Generate a Graph and a dataframe.
G = nx.karate_club_graph()
f = pd.DataFrame(data = {'name': range(34)}) # Each row of the dataframe corresponds to a node and each column to a feature. In this case there are no features, just the name of the node.
f["target"] = [1 if G.node[i]['club'] == 'Officer' else 0  for i in G.nodes] # The target of our machine learning model.

In [5]:
# Defining the transformers.
Degree = sg.Degree(G)
DropName = sg.DropName()
Pagerank = sg.Pagerank(G)
Centrality = sg.Centrality(G)
Communities_label_propagation = sg.Communities_label_propagation(G)
Communities_greedy_modularity = sg.Communities_greedy_modularity(G)

In [6]:
# Create the pipline object.
steps = [("Degree", Degree), ("Pagerank", Pagerank), ("Centrality", Centrality), 
        ("CLP", Communities_label_propagation), ("CGM", Communities_greedy_modularity),
         ("DropName", DropName), ('SVM', SVC())]

pipeline = Pipeline(steps)

In [7]:
# Split the dataset.
X = f.drop(['target'],axis=1)
Y = f['target']
X_train, X_test, y_train, y_test = train_test_split(X,Y,test_size=0.2, random_state=30, stratify=Y)

In [8]:
# Perform a randomized search with cross validation in order to find the best hyperparameters for the support vector machine model (SVM).
parameteres = {'SVM__C':[0.001,0.1,10,100,10e5], 'SVM__gamma':[0.1,0.01]}
random_search = RandomizedSearchCV(pipeline, param_distributions = parameteres, cv=3, n_iter = 4)
random_search.fit(X_train, y_train)



RandomizedSearchCV(cv=3, error_score='raise-deprecating',
                   estimator=Pipeline(memory=None,
                                      steps=[('Degree',
                                              Degree(G=<networkx.classes.graph.Graph object at 0x7f516fb107f0>)),
                                             ('Pagerank',
                                              Pagerank(G=<networkx.classes.graph.Graph object at 0x7f516fb107f0>)),
                                             ('Centrality',
                                              Centrality(G=<networkx.classes.graph.Graph object at 0x7f516fb107f0>)),
                                             ('CLP',
                                              Communities_lab...
                                                  gamma='auto_deprecated',
                                                  kernel='rbf', max_iter=-1,
                                                  probability=False,
                              

In [9]:
# The score of the best model.
y_pred = random_search.best_estimator_.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('Score: ', accuracy) 

Score:  1.0
