### Graphical Bayesian predictive classification

Read classification dataset and compare to the existing classifiers. 
BayesPred is the standard Bayesian predictive classifier and GraphPred is the graphical predicitve classifier. 

In [6]:
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from trilearn.graph_predictive import GraphPredictive
from sklearn.model_selection import train_test_split
names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
         "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
         "Naive Bayes", "LDA", "QDA", "BayesPred", "GraphPred"]
classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1),
    AdaBoostClassifier(),
    GaussianNB(),
    LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto'),
    QuadraticDiscriminantAnalysis(),
    GraphPredictive(standard_bayes=True),
    GraphPredictive(n_particles=100, n_pgibbs_samples=5)]

In [8]:
df_full = pd.read_csv("classification_full_dataset.csv")

x_train, x_test, y_train, y_test = train_test_split(df_full.drop(["y"], axis=1), df_full["y"],
                                                    test_size=0.3, random_state=1)
# Comparison
for name, clf in zip(names, classifiers):
    clf.fit(x_train.get_values(), y_train.get_values())
    print str(name) + " " + str(clf.score(x_test.get_values(),
                                          y_test.get_values()))

Nearest Neighbors 0.6666666666666666
Linear SVM 0.5238095238095238
RBF SVM 0.40476190476190477
Gaussian Process 0.6666666666666666
Decision Tree 0.47619047619047616
Random Forest 0.5952380952380952
Neural Net 0.7380952380952381
AdaBoost 0.5476190476190477
Naive Bayes 0.5476190476190477
LDA 0.5952380952380952
QDA 0.7619047619047619


Particle Gibbs samples:   0%|          | 0/5 [00:00<?, ?it/s]

BayesPred 0.7619047619047619


Particle Gibbs samples: 100%|██████████| 5/5 [00:07<00:00,  1.54s/it]
Particle Gibbs samples: 100%|██████████| 5/5 [00:08<00:00,  1.65s/it]


GraphPred 0.8571428571428571
