In [1]:
from xgboost import XGBClassifier
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.utils import add_self_loops
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC

dataset = Planetoid(root='/tmp/Cora', name='Cora', split="public")
dataset.transform = T.NormalizeFeatures()

X =  dataset[0].x 
y =  dataset[0].y 

test =  dataset[0].test_mask
train = dataset[0].train_mask 
val =  dataset[0].val_mask

edge_index = dataset[0].edge_index 
edge_index = add_self_loops(edge_index)[0]


clf_1 = XGBClassifier( tree_method='gpu_hist', 
                           n_estimators=1100,
                           max_depth=2,
                    random_state=42,
                    eta=0.3,
                    reg_lambda=0.001,
                           min_child_weight = 1,
                           max_delta_step= 3,
                           sampling_method= "uniform")
   
clf_2 = XGBClassifier( tree_method='gpu_hist', 
                           n_estimators=900,
                           max_depth=2,
                       random_state=42,
                       reg_lambda=0.2953684210526316,
                       eta=0.2733333333333333,
                           min_child_weight = 2,
                           max_delta_step= 4,
                           sampling_method= "uniform",
                      subsample=0.5)

clf_3 = SVC(probability=True, C=100, kernel="linear", degree=1)

def user_function(kwargs):
    return  kwargs["updated_features"] + kwargs["summed_neighbors"]

user_functions = [user_function, user_function,  user_function]
clfs = [clf_1, clf_2, clf_3]
hops_list = [0, 3,  8]
attention_configs = [ None, 
                     {'inter_layer_normalize': True,
                     'use_pseudo_attention':True,
                     'cosine_eps':.01,
                     'dropout_attn': None},
                     {'inter_layer_normalize': True,
                     'use_pseudo_attention':True,
                     'cosine_eps':.01,
                     'dropout_attn': None}
                    ]



In [25]:
from pycaret.classification import *
import numpy as np
import pandas as pd
train_df = pd.DataFrame(np.concatenate((np.expand_dims(y[train], axis=1), X[train]), axis=1))
val_df = pd.DataFrame(np.concatenate((np.expand_dims(y[val], axis=1), X[val]), axis=1))
test_df = pd.DataFrame(np.concatenate((np.expand_dims(y[test], axis=1), X[test]), axis=1))

train_df.rename(columns={0: 'y'}, inplace= True)
val_df.rename(columns={0: 'y'}, inplace= True)
test_df.rename(columns={0: 'y'}, inplace= True)

df_setup = setup(pd.DataFrame(np.concatenate((np.expand_dims(y, axis=1), X), axis=1)), target=0)

Unnamed: 0,Description,Value
0,Session id,5819
1,Target,0
2,Target type,Multiclass
3,Original data shape,"(2708, 1434)"
4,Transformed data shape,"(2708, 1434)"
5,Transformed train set shape,"(1895, 1434)"
6,Transformed test set shape,"(813, 1434)"
7,Numeric features,1433
8,Preprocess,True
9,Imputation type,simple


In [26]:
best = compare_models()

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.7547,0.9428,0.7547,0.7705,0.7505,0.6942,0.6993,0.206
rf,Random Forest Classifier,0.7457,0.9382,0.7457,0.7709,0.7425,0.6826,0.6887,0.127
catboost,CatBoost Classifier,0.7415,0.9403,0.7415,0.7627,0.7385,0.6777,0.6834,14.67
svm,SVM - Linear Kernel,0.7208,0.0,0.7208,0.7494,0.7154,0.655,0.662,0.271
lightgbm,Light Gradient Boosting Machine,0.7167,0.9259,0.7167,0.7263,0.7132,0.6495,0.6522,26.155
gbc,Gradient Boosting Classifier,0.7151,0.9287,0.7151,0.7399,0.7107,0.6425,0.6512,2.809
ridge,Ridge Classifier,0.6544,0.0,0.6544,0.7376,0.6373,0.5533,0.5834,0.193
dt,Decision Tree Classifier,0.6269,0.7707,0.6269,0.6372,0.6259,0.543,0.5446,0.249
ada,Ada Boost Classifier,0.5256,0.7151,0.5256,0.5837,0.5269,0.4122,0.4173,0.154
nb,Naive Bayes,0.5208,0.7061,0.5208,0.5322,0.5169,0.4033,0.4072,0.23


In [27]:
finalize_model(best)
evaluate_model(best)

interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

In [28]:
predict_model(best)

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Extra Trees Classifier,0.7614,0.949,0.7614,0.7711,0.7572,0.7024,0.7073


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,1427,1428,1429,1430,1431,1432,1433,0,prediction_label,prediction_score
1745,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.045455,0.0,0.0,0.0,0.0,0.000000,0.0,2.0,3,0.43
1446,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,3.0,3,0.88
2054,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0,0.41
880,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,0.0,0,0.43
1859,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,3.0,3,0.87
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51,0.000000,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,3.0,3,0.44
1100,0.055556,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,3.0,5,0.73
2549,0.000000,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.000000,0.0,4.0,4,0.81
2109,0.000000,0.0,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.047619,0.0,6.0,6,0.64
