In [3]:
import time

import numpy as np
import pandas as pd
import sklearn.datasets, sklearn.metrics, sklearn.model_selection, sklearn.tree

import subprocess, sys


In [2]:
!cd ../ && make print-SOURCES

SOURCES = cpp/conf.cpp cpp/criterion.cpp cpp/factories.cpp cpp/gbt.cpp cpp/lltrees.cpp cpp/metrics.cpp cpp/node.cpp cpp/threadpool.cpp cpp/tree.cpp cpp/wrapper.cpp


In [78]:
!cd ../ && make clean
!cd ../ && make
subprocess.call([sys.executable, "lltrees_python_import_for_debug.py"])

rm -rf build
mkdir -p build
g++ -g -o build/conf.o cpp/conf.cpp -I/usr/include/python3.10 -I/usr/include/python3.10  -Wno-unused-result -Wsign-compare -g      -fstack-protector-strong -Wformat -Werror=format-security  -DNDEBUG -g -fwrapv -O2 -Wall -fPIC -c -std=c++17 -DBOOST_BIND_GLOBAL_PLACEHOLDERS -DBOOST_ALLOW_DEPRECATED_HEADERS
g++ -g -o build/criterion.o cpp/criterion.cpp -I/usr/include/python3.10 -I/usr/include/python3.10  -Wno-unused-result -Wsign-compare -g      -fstack-protector-strong -Wformat -Werror=format-security  -DNDEBUG -g -fwrapv -O2 -Wall -fPIC -c -std=c++17 -DBOOST_BIND_GLOBAL_PLACEHOLDERS -DBOOST_ALLOW_DEPRECATED_HEADERS
g++ -g -o build/factories.o cpp/factories.cpp -I/usr/include/python3.10 -I/usr/include/python3.10  -Wno-unused-result -Wsign-compare -g      -fstack-protector-strong -Wformat -Werror=format-security  -DNDEBUG -g -fwrapv -O2 -Wall -fPIC -c -std=c++17 -DBOOST_BIND_GLOBAL_PLACEHOLDERS -DBOOST_ALLOW_DEPRECATED_HEADERS
g++ -g -o build/gbt.o cpp/gbt.cpp 

0

In [5]:
sys.path.append('/home/alexandre/Desktop/lltrees/build')
import lltrees

# Regression

In [6]:
X, Y = sklearn.datasets.make_regression(n_samples=1000, n_features=8, n_informative=5, n_targets=1, noise=1, random_state=42)
X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(X, Y, test_size=0.3, random_state=42)

## Tree

In [7]:
conf ={
    'mode' : 'regression',
    'epochs' : 1,
    'learning_rate' : 1,
    'metric' : 'mae', 
    'max_depth' : 5,
    'min_leaf_size' : 2,
    'criterion' : "absolute_error",
    'verbose' : 1, 
}
my_lltree = lltrees.lltree()
my_lltree.set_conf(conf)
my_lltree.get_conf()
time.sleep(0.1)

start_time = time.time()
my_lltree.fit(X_train, Y_train, X_test, Y_test)
print("FIT --- %s seconds ---" % (time.time() - start_time))

start_time = time.time()
YP = my_lltree.predict(X_test)
print("PREDICT --- %s seconds ---" % (time.time() - start_time))

print("rmse: %.2f" % np.sqrt(sklearn.metrics.mean_squared_error(Y_test,YP)))
print("mae: %.2f" % sklearn.metrics.mean_absolute_error(Y_test,YP))
print("r2: %.2f" % sklearn.metrics.r2_score(Y_test,YP))

-----------------------------------------
mode :              regression
epochs :            1
learning_rate :     1
metric :            mae
criterion :         absolute_error
max_depth :         5
min_leaf_size :     2
verbose :           1
-----------------------------------------
Type of Training Data : float64
Configuration mode : regression
FIT --- 0.05489802360534668 seconds ---
PREDICT --- 8.392333984375e-05 seconds ---
rmse: 63.46
mae: 49.21
r2: 0.74
Epoch : 1     Metric Train : 38.4851 Metric va : 49.2101 Residuals (sum) : -2355.95


In [8]:
my_lltree = sklearn.tree.DecisionTreeRegressor(max_depth = 5, criterion ="absolute_error", min_samples_split=2, random_state = 0)

start_time = time.time()
my_lltree.fit(X_train, Y_train)
print("FIT --- %s seconds ---" % (time.time() - start_time))

start_time = time.time()
YP = my_lltree.predict(X_test)
print("PREDICT --- %s seconds ---" % (time.time() - start_time))

print("rmse: %.2f" % np.sqrt(sklearn.metrics.mean_squared_error(Y_test,YP)))
print("mae: %.2f" % sklearn.metrics.mean_absolute_error(Y_test,YP))
print("r2: %.2f" % sklearn.metrics.r2_score(Y_test,YP))

FIT --- 0.023116588592529297 seconds ---
PREDICT --- 0.0002295970916748047 seconds ---
rmse: 63.68
mae: 49.35
r2: 0.74


## GBT

In [9]:
conf ={
    'mode' : 'regression',
    'epochs' : 50,
    'learning_rate' : 0.1,
    'metric' : 'mae',
    'max_depth' : 2,
    'min_leaf_size' : 2,
    'criterion' : "absolute_error", 
    'verbose' : 0, 
}
my_lltree = lltrees.lltree()
my_lltree.set_conf(conf)
my_lltree.get_conf()
time.sleep(0.1)

start_time = time.time()
my_lltree.fit(X_train, Y_train, X_test, Y_test)
print("FIT --- %s seconds ---" % (time.time() - start_time))

start_time = time.time()
YP = my_lltree.predict(X_test)
print("PREDICT --- %s seconds ---" % (time.time() - start_time))

print("rmse: %.2f" % np.sqrt(sklearn.metrics.mean_squared_error(Y_test,YP)))
print("mae: %.2f" % sklearn.metrics.mean_absolute_error(Y_test,YP))
print("r2: %.2f" % sklearn.metrics.r2_score(Y_test,YP))

-----------------------------------------
mode :              regression
epochs :            50
learning_rate :     0.1
metric :            mae
criterion :         absolute_error
max_depth :         2
min_leaf_size :     2
verbose :           0
-----------------------------------------
FIT --- 1.632075309753418 seconds ---
PREDICT --- 0.0002887248992919922 seconds ---
rmse: 41.05
mae: 32.04
r2: 0.89
Type of Training Data : float64
Configuration mode : regression


In [10]:
from sklearn.ensemble import GradientBoostingRegressor
my_lltree = GradientBoostingRegressor(learning_rate = 0.1, n_estimators =50, max_depth = 2, min_samples_leaf=2, min_samples_split=2,
                                 loss = 'absolute_error', criterion = 'squared_error',
                                 random_state = 0)

start_time = time.time()
my_lltree.fit(X_train, Y_train)
print("FIT --- %s seconds ---" % (time.time() - start_time))

start_time = time.time()
YP = my_lltree.predict(X_test)
print("PREDICT --- %s seconds ---" % (time.time() - start_time))

print("rmse: %.2f" % np.sqrt(sklearn.metrics.mean_squared_error(Y_test,YP)))
print("mae: %.2f" % sklearn.metrics.mean_absolute_error(Y_test,YP))
print("r2: %.2f" % sklearn.metrics.r2_score(Y_test,YP))

FIT --- 0.0948491096496582 seconds ---
PREDICT --- 0.0006465911865234375 seconds ---
rmse: 52.46
mae: 38.62
r2: 0.82


# Classification

In [11]:
X, Y = sklearn.datasets.make_classification(n_samples=1000, n_features=8, n_informative=5, n_classes=2, random_state=42)
X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(X, Y, test_size=0.3, random_state=42)

## Tree

In [12]:
from sklearn.tree import DecisionTreeClassifier
my_lltree = DecisionTreeClassifier(max_depth = 5, min_samples_split=5,
                                 splitter = 'best', criterion = 'gini',
                                 random_state = 0)

start_time = time.time()
my_lltree.fit(X_train, Y_train)
print("FIT --- %s seconds ---" % (time.time() - start_time))

start_time = time.time()
YP = my_lltree.predict(X_test)
print("PREDICT --- %s seconds ---" % (time.time() - start_time))

print("accuracy_score: %.2f" % np.sqrt(sklearn.metrics.accuracy_score(Y_test,YP)))
print("log_loss: %.2f" % sklearn.metrics.log_loss(Y_test,YP))
print("f1_score: %.2f" % sklearn.metrics.f1_score(Y_test,YP))

FIT --- 0.0031003952026367188 seconds ---
PREDICT --- 0.0003752708435058594 seconds ---
accuracy_score: 0.94
log_loss: 3.80
f1_score: 0.90


In [13]:
conf ={
    'mode' : 'classic_classification',
    'epochs' : 1,
    'learning_rate' : 1,
    'metric' : 'accuracy',
    'max_depth' : 5,
    'min_leaf_size' : 5,
    'criterion' : "absolute_error",  
    'verbose' : 0  
}
my_lltree = lltrees.lltree()
my_lltree.set_conf(conf)
my_lltree.get_conf()
time.sleep(0.5)

start_time = time.time()
my_lltree.fit(X_train, Y_train.astype(np.int32), X_test, Y_test.astype(np.int32))
print("FIT --- %s seconds ---" % (time.time() - start_time))

start_time = time.time()
YP = my_lltree.predict(X_test)
print("PREDICT --- %s seconds ---" % (time.time() - start_time))

print("accuracy_score: %.2f" % np.sqrt(sklearn.metrics.accuracy_score(Y_test,YP)))
print("log_loss: %.2f" % sklearn.metrics.log_loss(Y_test,YP))
print("f1_score: %.2f" % sklearn.metrics.f1_score(Y_test,YP))

-----------------------------------------
mode :              classic_classification
epochs :            1
learning_rate :     1
metric :            accuracy
criterion :         absolute_error
max_depth :         5
min_leaf_size :     5
verbose :           0
-----------------------------------------
Type of Training Data : int32
Configuration mode : classic_classification
FIT --- 0.10984277725219727 seconds ---
PREDICT --- 0.00010371208190917969 seconds ---
Gbt_classic_classification fit
All the distinct element for classification in sorted order are: 0 1 
accuracy_score: 0.94
log_loss: 4.26
f1_score: 0.88


## GBT

In [14]:
conf ={
    'mode' : 'adaboost_classification',
    'epochs' : 15,
    'learning_rate' : 1,
    'metric' : 'accuracy',
    'max_depth' : 1,
    'min_leaf_size' : 1,
    'criterion' : "gini",  
    'verbose' : 0,  
}

my_lltree = lltrees.lltree()
my_lltree.set_conf(conf)
my_lltree.get_conf()
time.sleep(0.1)

start_time = time.time()
my_lltree.fit(X_train, Y_train.astype(np.int32), X_test, Y_test.astype(np.int32))
print("FIT --- %s seconds ---" % (time.time() - start_time))

start_time = time.time()
YP = my_lltree.predict(X_test)
print("PREDICT --- %s seconds ---" % (time.time() - start_time))

print("accuracy_score: %.2f" % np.sqrt(sklearn.metrics.accuracy_score(Y_test,YP)))
print("log_loss: %.2f" % sklearn.metrics.log_loss(Y_test,YP))
print("f1_score: %.2f" % sklearn.metrics.f1_score(Y_test,YP))

-----------------------------------------
mode :              adaboost_classification
epochs :            15
learning_rate :     1
metric :            accuracy
criterion :         gini
max_depth :         1
min_leaf_size :     1
verbose :           0
-----------------------------------------
FIT --- 1.205146312713623 seconds ---
PREDICT --- 0.0002071857452392578 seconds ---
Type of Training Data : int32
Configuration mode : adaboost_classification
Gbt_classification fit
All the distinct element for classification in sorted order are: 0 1 
accuracy_score: 0.85
log_loss: 9.56
f1_score: 0.69


In [15]:
conf ={
    'mode' : 'classic_classification',
    'epochs' : 50,
    'learning_rate' : 0.1,
    'metric' : 'accuracy',
    'max_depth' : 2,
    'min_leaf_size' : 1,
    'criterion' : "absolute_error",  
    'verbose' : 0,  
}
my_lltree = lltrees.lltree()
my_lltree.set_conf(conf)
my_lltree.get_conf()
time.sleep(0.1)

start_time = time.time()
my_lltree.fit(X_train, Y_train.astype(np.int32), X_test, Y_test.astype(np.int32))
print("FIT --- %s seconds ---" % (time.time() - start_time))

start_time = time.time()
YP = my_lltree.predict(X_test)
print("PREDICT --- %s seconds ---" % (time.time() - start_time))

print("accuracy_score: %.2f" % np.sqrt(sklearn.metrics.accuracy_score(Y_test,YP)))
print("log_loss: %.2f" % sklearn.metrics.log_loss(Y_test,YP))
print("f1_score: %.2f" % sklearn.metrics.f1_score(Y_test,YP))

-----------------------------------------
mode :              classic_classification
epochs :            50
learning_rate :     0.1
metric :            accuracy
criterion :         absolute_error
max_depth :         2
min_leaf_size :     1
verbose :           0
-----------------------------------------
FIT --- 3.157616138458252 seconds ---
PREDICT --- 0.0005590915679931641 seconds ---
Type of Training Data : int32
Configuration mode : classic_classification
Gbt_classic_classification fit
All the distinct element for classification in sorted order are: 0 1 
accuracy_score: 0.95
log_loss: 3.68
f1_score: 0.90


In [16]:
from sklearn.ensemble import GradientBoostingClassifier
my_lltree = GradientBoostingClassifier(learning_rate = 0.1, n_estimators =50, max_depth = 2,
                                 loss = 'log_loss', criterion = 'friedman_mse',
                                 random_state = 0)

start_time = time.time()
my_lltree.fit(X_train, Y_train)
print("FIT --- %s seconds ---" % (time.time() - start_time))

start_time = time.time()
YP = my_lltree.predict(X_test)
print("PREDICT --- %s seconds ---" % (time.time() - start_time))

print("accuracy_score: %.2f" % np.sqrt(sklearn.metrics.accuracy_score(Y_test,YP)))
print("log_loss: %.2f" % sklearn.metrics.log_loss(Y_test,YP))
print("f1_score: %.2f" % sklearn.metrics.f1_score(Y_test,YP))

FIT --- 0.05683398246765137 seconds ---
PREDICT --- 0.0004990100860595703 seconds ---
accuracy_score: 0.96
log_loss: 2.99
f1_score: 0.92
