In [1]:
import sys
sys.path.insert(0, 'build/lib')

from turbocat import TurboCatClassifier as tc
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
import numpy as np

In [2]:
X, y = load_wine(return_X_y=True)
print(f'Dataset shape: {X.shape}')
print(f'Classes: {np.unique(y)}')
X, y

Dataset shape: (178, 13)
Classes: [0 1 2]


(array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
         1.065e+03],
        [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
         1.050e+03],
        [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
         1.185e+03],
        ...,
        [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
         8.350e+02],
        [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
         8.400e+02],
        [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
         5.600e+02]]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
 

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.6, stratify=y, random_state=42)
print(f'Train: {len(y_train)}, Test: {len(y_test)}')

Train: 71, Test: 107


In [4]:
model = tc(
    n_estimators=10,
    max_depth=4,
    n_jobs=1,
    verbosity=0
)

history = model.fit(X_train, y_train)
print(f'Trees: {model.n_trees}')

Trees: 30


In [5]:
# Test predict
preds = model.predict(X_test)
print(f'Predictions: {preds}')
print(f'Unique: {np.unique(preds, return_counts=True)}')

Predictions: [1 1 2 0 2 2 1 1 0 2 1 2 1 2 0 1 2 2 0 0 2 2 1 2 2 0 1 2 1 0 2 0 1 0 1 2 1
 0 2 0 1 1 1 0 1 0 2 1 1 0 1 2 0 1 2 0 0 2 0 1 1 1 1 2 0 1 1 0 0 1 1 1 1 2
 0 0 0 0 2 0 0 0 1 1 0 1 2 0 1 2 2 0 1 2 0 0 1 2 1 2 0 0 2 1 1 0 2]
Unique: (array([0, 1, 2], dtype=int32), array([36, 40, 31]))


In [6]:
# Test predict_proba
proba = model.predict_proba(X_test)
print(f'Proba shape: {proba.shape}')
print(f'First 5 proba:\n{proba[:5]}')

Proba shape: (107, 3)
First 5 proba:
[[0.18235715 0.6402931  0.17734975]
 [0.26379997 0.4672279  0.26897213]
 [0.18403001 0.3087004  0.5072696 ]
 [0.43853593 0.32407597 0.23738806]
 [0.19102407 0.26990163 0.53907436]]


In [7]:
# Accuracy
print(f'Accuracy: {(preds == y_test).mean():.4f}')

Accuracy: 0.9626
