In [10]:
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from matplotlib.lines import Line2D
import plotly.graph_objects as go
import numpy as np
from sklearn.metrics import accuracy_score
from SamBA.samba import NeighborHoodClassifier
from SamBA.distances import EuclidianDist
from SamBA.relevances import *

def plot_2d(X, y, margins=None):
    fig = go.Figure()
    labels = np.unique(y)
    for label in labels:
        data = X[np.where(y==label)[0], :]
        fig.add_trace(go.Scatter(x=data[:, 0], 
                                 y=data[:, 1],
                                  name="Class {}".format(label+1), 
                                  mode="markers", 
                                  marker=dict(
                            size=5,)))
    return fig

rs = np.random.RandomState(7)

X, y = make_moons(n_samples=1000, shuffle=True, noise=0.1, random_state=rs)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.02, shuffle=True, random_state=rs)

classifier = NeighborHoodClassifier(base_estimator=DecisionTreeClassifier(max_depth=1,
                                                       splitter='best'),
                 n_estimators=15,
                 estimator_params=tuple(),
                 keep_selected_features=True, 
                 relevance=ExpRelevance(),
                 distance=EuclidianDist(),
                 b=2,
                 forced_diversity=False
                 )

classifier.fit(X_train, y_train, save_data=True)
preds_train = classifier.predict(X_train)
preds_test = classifier.predict(X_test)
print("SamBA test acc: {}".format(accuracy_score(y_test, preds_test)))
fig = plot_2d(X_train, y_train)
fig.show()
from sklearn.ensemble import AdaBoostClassifier
ada = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1,
                                                       splitter='best'),
                 n_estimators=20,)

ada.fit(X_train, y_train)
preds = ada.predict(X_test)
print("Adaboost test acc: ",accuracy_score(y_test, preds))

[0.49821992 0.50178008]
[-1.  1.]
SamBA test acc: 0.8336734693877551


Adaboost test acc:  0.823469387755102


In [11]:
plot_2d(X_test, preds_test)

In [12]:
plot_2d(X_test, preds)