In [1]:
import time

import numpy as np
import pandas as pd
import sklearn.datasets, sklearn.metrics, sklearn.model_selection, sklearn.tree

import subprocess, sys

In [6]:
!cd ../ && make print-SOURCES

SOURCES = cpp/criterion.cpp cpp/factories.cpp cpp/gbt.cpp cpp/lltrees.cpp cpp/metrics.cpp cpp/tree.cpp cpp/wrapper.cpp


In [95]:
!cd ../ && make clean
!cd ../ && make 
subprocess.call([sys.executable, "lltrees_python_import_for_debug.py"])

rm -rf build
mkdir -p build


g++ -g -o build/criterion.o cpp/criterion.cpp -I/usr/include/python3.10 -I/usr/include/python3.10  -Wno-unused-result -Wsign-compare -g      -fstack-protector-strong -Wformat -Werror=format-security  -DNDEBUG -g -fwrapv -O2 -Wall -fPIC -c -std=c++20 -DBOOST_BIND_GLOBAL_PLACEHOLDERS -DBOOST_ALLOW_DEPRECATED_HEADERS
g++ -g -o build/factories.o cpp/factories.cpp -I/usr/include/python3.10 -I/usr/include/python3.10  -Wno-unused-result -Wsign-compare -g      -fstack-protector-strong -Wformat -Werror=format-security  -DNDEBUG -g -fwrapv -O2 -Wall -fPIC -c -std=c++20 -DBOOST_BIND_GLOBAL_PLACEHOLDERS -DBOOST_ALLOW_DEPRECATED_HEADERS
g++ -g -o build/gbt.o cpp/gbt.cpp -I/usr/include/python3.10 -I/usr/include/python3.10  -Wno-unused-result -Wsign-compare -g      -fstack-protector-strong -Wformat -Werror=format-security  -DNDEBUG -g -fwrapv -O2 -Wall -fPIC -c -std=c++20 -DBOOST_BIND_GLOBAL_PLACEHOLDERS -DBOOST_ALLOW_DEPRECATED_HEADERS
g++ -g -o build/lltrees.o cpp/lltrees.cpp -I/usr/include/python3

0

# make_regression

In [4]:
X, Y = sklearn.datasets.make_regression(n_samples=1000, n_features=8, n_informative=5, n_targets=1, noise=1, random_state=42)
X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(X, Y, test_size=0.3, random_state=42)

In [5]:
sys.path.append('/home/alexandre/Desktop/lltrees/src/build')
import lltrees
conf ={
    'epochs' : 1,
    'learning_rate' : 1,
    'metric' : 'mae', # Possible metrics for the moment : mae, mse
    'max_depth' : 5,
    'min_leaf_size' : 2,
    'criterion' : "absolute_error",  # Possible metrics for the moment : variance, absolute_error
    'verbose' : 0, 
}
my_lltree = lltrees.lltree()
my_lltree.set_conf(conf)
my_lltree.get_conf()

start_time = time.time()
my_lltree.fit(X_train, Y_train, X_test, Y_test)
print("FIT --- %s seconds ---" % (time.time() - start_time))

start_time = time.time()
YP = my_lltree.predict(X_test)
print("PREDICT --- %s seconds ---" % (time.time() - start_time))

print("rmse: %.2f" % np.sqrt(sklearn.metrics.mean_squared_error(Y_test,YP)))
print("mae: %.2f" % sklearn.metrics.mean_absolute_error(Y_test,YP))
print("r2: %.2f" % sklearn.metrics.r2_score(Y_test,YP))

FIT --- 0.0576777458190918 seconds ---
PREDICT --- 8.654594421386719e-05 seconds ---
rmse: 97.54
mae: 77.71
r2: 0.38
-----------------------------------------
mode :              regression
epochs :            1
learning_rate :     1
metric :            mae
criterion :         absolute_error
max_depth :         5
min_leaf_size :     2
verbose :           0
-----------------------------------------
Type of Training Data : float64
Configuration mode : regression
0 0 -35.4794 700 300 700
-145.717
-106.205
-46.1597
-79.3083
-42.911
-77.0408
-26.0728
-222.452
-47.5454
-14.3811
22.6894
79.199
67.3929
46.6714
54.8255
50.7381
-47.1084
-56.1447
99.4178
128.028
138.67
mae::get 138.67 175.878 52356.3
mae::get 138.67 81.8746 23311.9
-42.911 -46.1597 -35.4794 700 300 700
-42.911 -46.1597 7.43154 700 300 700


In [6]:
my_lltree = sklearn.tree.DecisionTreeRegressor(max_depth = 5, criterion ="absolute_error", random_state = 0)

start_time = time.time()
my_lltree.fit(X_train, Y_train)
print("FIT --- %s seconds ---" % (time.time() - start_time))

start_time = time.time()
YP = my_lltree.predict(X_test)
print("PREDICT --- %s seconds ---" % (time.time() - start_time))

print(YP[0:10])
print("rmse: %.2f" % np.sqrt(sklearn.metrics.mean_squared_error(Y_test,YP)))
print("mae: %.2f" % sklearn.metrics.mean_absolute_error(Y_test,YP))
print("r2: %.2f" % sklearn.metrics.r2_score(Y_test,YP))

FIT --- 0.025666236877441406 seconds ---
PREDICT --- 0.00044417381286621094 seconds ---
[  -5.25635722  127.29805997 -110.4560012   122.24421396 -110.4560012
  -39.90414254  106.73581898  -39.90414254  223.68925908  240.37517626]
rmse: 63.68
mae: 49.35
r2: 0.74


In [7]:
conf ={
    'epochs' : 50,
    'learning_rate' : 0.1,
    'metric' : 'mae', # Possible metrics for the moment : mae, mse
    'max_depth' : 5,
    'min_leaf_size' : 2,
    'criterion' : "absolute_error",  # Possible metrics for the moment : variance, absolute_error
    'verbose' : 0, 
}
my_lltree = lltrees.lltree()
my_lltree.set_conf(conf)
my_lltree.get_conf()

start_time = time.time()
my_lltree.fit(X_train, Y_train)
print("FIT --- %s seconds ---" % (time.time() - start_time))

start_time = time.time()
YP = my_lltree.predict(X_test)
print("PREDICT --- %s seconds ---" % (time.time() - start_time))

print("rmse: %.2f" % np.sqrt(sklearn.metrics.mean_squared_error(Y_test,YP)))
print("mae: %.2f" % sklearn.metrics.mean_absolute_error(Y_test,YP))
print("r2: %.2f" % sklearn.metrics.r2_score(Y_test,YP))

FIT --- 3.09287428855896 seconds ---
PREDICT --- 0.0004191398620605469 seconds ---
rmse: 123.99
mae: 101.51
r2: -0.00
-----------------------------------------
mode :              regression
epochs :            50
learning_rate :     0
metric :            mae
criterion :         absolute_error
max_depth :         5
min_leaf_size :     2
verbose :           0
-----------------------------------------
Type of Training Data : float64
Configuration mode : regression
No Validate Data, will use Training Data.
0 0 -35.4794 700 700 700
-119.058
-66.3371
-72.0952
0.688991
-42.911
-71.9097
-38.7297
-130.19
1.88323
-1.72046
129.138
-14.3811
110.278
76.5077
25.0161
57.4411
67.6968
58.2411
123.421
mae::get 0 175.878 66593.7
mae::get 0 175.878 66593.7
0 0 -35.4794 700 700 700
0 0 -35.4794 700 700 700
-45.802
-61.2591
-128.181
-47.0886
-71.9097
-38.7297
-130.19
1.88323
-1.72046
64.078
61.0629
111.106
30.1322
76.0072
142.43
149.404
mae::get 0 175.878 66593.7
mae::get 0 175.878 66593.7
0 0 -35.4794 700

In [8]:
from sklearn.ensemble import GradientBoostingRegressor
my_lltree = GradientBoostingRegressor(learning_rate = 0.1, n_estimators =50, 
                                 loss = 'absolute_error', criterion = 'squared_error',
                                 random_state = 0)

start_time = time.time()
my_lltree.fit(X_train, Y_train)
print("FIT --- %s seconds ---" % (time.time() - start_time))

start_time = time.time()
YP = my_lltree.predict(X_test)
print("PREDICT --- %s seconds ---" % (time.time() - start_time))

print("rmse: %.2f" % np.sqrt(sklearn.metrics.mean_squared_error(Y_test,YP)))
print("mae: %.2f" % sklearn.metrics.mean_absolute_error(Y_test,YP))
print("r2: %.2f" % sklearn.metrics.r2_score(Y_test,YP))

FIT --- 0.14905500411987305 seconds ---
PREDICT --- 0.0008254051208496094 seconds ---
rmse: 44.56
mae: 31.80
r2: 0.87


# make_classification

In [9]:
X, Y = sklearn.datasets.make_classification(n_samples=1000, n_features=8, n_informative=5, n_classes=2, random_state=42)
X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(X, Y, test_size=0.3, random_state=42)

In [10]:
# conf ={
#     'epochs' : 50,
#     'learning_rate' : 0.1,
#     'metric_name' : 'mae', # Possible metrics for the moment : mae, mse
#     'lltree_max_depth' : 3,
#     'lltree_min_size_split' : 1,
#     'lltree_criterion' : "gini",  # Possible metrics for the moment : variance, absolute_error
#     'verbose' : 1,  # Possible metrics for the moment : variance, absolute_error
# }
# my_lltree = lltrees.lltree()
# my_lltree.set_conf(conf)
# my_lltree.get_conf()

# start_time = time.time()
# my_lltree.fit(X_train, Y_train)
# print("FIT --- %s seconds ---" % (time.time() - start_time))

# start_time = time.time()
# YP = my_lltree.predict(X_test)
# print("PREDICT --- %s seconds ---" % (time.time() - start_time))

# print("accuracy_score: %.2f" % np.sqrt(sklearn.metrics.accuracy_score(Y_test,YP)))
# print("log_loss: %.2f" % sklearn.metrics.log_loss(Y_test,YP))
# print("f1_score: %.2f" % sklearn.metrics.f1_score(Y_test,YP))