# **Test RF with ObliqueTree from TorshaMajumder**

https://github.com/TorshaMajumder/Ensembles_of_Oblique_Decision_Trees 

https://github.com/valevalerio/Ensemble_Of_Oblique_Decision_Trees

In [1]:
FOLDERS= ['/content/drive/MyDrive/',
          '/content/drive/MyDrive/midas',
          '/content/drive/MyDrive/Testes e Experimentos/Testes/lib.external',
          '/content/drive/MyDrive/Testes e Experimentos/Testes/lib.external/TorshaMajumder',
          '/content/drive/MyDrive/Testes e Experimentos/Testes/lib.external/TorshaMajumder/Decision_trees']
import sys
for f in FOLDERS:
  sys.path.append(f)

import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier

from midas.v4.ensemble import RFClassifier

In [2]:
df_stream = pd.read_csv('https://github.com/scikit-multiflow/streaming-datasets/raw/master/agr_a.csv',
                        engine='c', low_memory=True, memory_map=True)

X = df_stream[df_stream.columns[:-1]]
y = df_stream['class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.70, random_state=100)

## Testa as árvores obliquas individualmente

In [None]:
from TorshaMajumder.Decision_trees.CO2 import ContinuouslyOptimizedObliqueTree, CO2Classifier
from TorshaMajumder.Decision_trees.segmentor import Twoing, MSE, MeanSegmentor
from TorshaMajumder.Decision_trees.split_criteria import gini, twoing

# Continuous Optimization of Oblique Splits (CO2) by [Norouzi et al.]
# CO2Classifier(MSE(),MeanSegmentor(), max_depth=50, min_samples_split=2, nu=4.0, tau=10, tol=1e-3, eta=0.01)
co2t = ContinuouslyOptimizedObliqueTree(impurity=MSE(),
                                        segmentor=MeanSegmentor(), 
                                        max_depth=5,
                                        min_samples_split=2,
                                        nu=1.0,
                                        tau=10,
                                        tol=1e-3,
                                        eta=0.1)

co2t.fit(X_train.to_numpy(copy=True), y_train.to_numpy(copy=True))

y_pred = co2t.predict(X_test.to_numpy(copy=True))

print('Acc: ', accuracy_score(y_test, y_pred))

Acc:  0.5279442857142858


In [None]:
from TorshaMajumder.Decision_trees.HouseHolder_CART import HouseHolderCART, HHCartClassifier
from TorshaMajumder.Decision_trees.segmentor import Twoing, MSE, MeanSegmentor
from TorshaMajumder.Decision_trees.split_criteria import gini, twoing

# HouseHolder CART-A by [Wickramarachchi et al.]
# HHCartClassifier(MSE(),MeanSegmentor(), max_depth=50, min_samples_split=2, alpha=None))
hhc = HouseHolderCART(impurity=MSE(),
                      segmentor=MeanSegmentor(),
                      max_depth=5,
                      min_samples_split=2,
                      method='eig',
                      tau=10,
                      alpha=None)
hhc.fit(X_train.to_numpy(copy=True), y_train.to_numpy(copy=True))

y_pred = hhc.predict(X_test.to_numpy(copy=True))

print('Acc: ', accuracy_score(y_test, y_pred))

Acc:  0.6686614285714285


In [None]:
from TorshaMajumder.Decision_trees.NDT import *
from TorshaMajumder.Decision_trees.segmentor import Twoing, MSE, MeanSegmentor
from TorshaMajumder.Decision_trees.split_criteria import gini, twoing

# Non-Linear Decision Trees by [Ittner et al.]
# 
ndtc = NDTClassifier(criterion="gini",
                     max_depth=5,
                     min_samples_split=2,
                     min_features_split=1)
ndtc.fit(X_train.to_numpy(copy=True), y_train.to_numpy(copy=True))

y_pred = ndtc.predict(X_test.to_numpy(copy=True))

print('Acc: ', accuracy_score(y_test, y_pred))

In [None]:
from TorshaMajumder.Decision_trees.Oblique_Classifier_1 import *
from TorshaMajumder.Decision_trees.segmentor import Twoing, MSE, MeanSegmentor
from TorshaMajumder.Decision_trees.split_criteria import gini, twoing

# Oblique Classifier 1 (OC1) by [Murthy et al.]
# 
obc1 = ObliqueClassifier1(criterion="gini",
                          max_depth=3,
                          min_samples_split=2,
                          min_features_split=1)
obc1.fit(X_train.to_numpy(copy=True), y_train.to_numpy(copy=True))

y_pred = obc1.predict(X_test.to_numpy(copy=True))

print('Acc: ', accuracy_score(y_test, y_pred))

In [None]:
from TorshaMajumder.Decision_trees.RandCART import *
from TorshaMajumder.Decision_trees.segmentor import Twoing, MSE, MeanSegmentor
from TorshaMajumder.Decision_trees.split_criteria import gini, twoing

# Randomized CART
# RandCARTClassifier(impurity=MSE(), segmentor=MeanSegmentor(), max_depth=50, min_samples_split=2)
rcc = Rand_CART(impurity=MSE(),
                segmentor=MeanSegmentor(),
                max_depth=5,
                min_samples_split=2,
                compare_with_cart=False)

rcc.fit(X_train.to_numpy(copy=True), y_train.to_numpy(copy=True))

y_pred = rcc.predict(X_test.to_numpy(copy=True))

print('Acc: ', accuracy_score(y_test, y_pred))

Acc:  0.61188


In [None]:
from TorshaMajumder.Decision_trees.WODT import *
from TorshaMajumder.Decision_trees.segmentor import Twoing, MSE, MeanSegmentor
from TorshaMajumder.Decision_trees.split_criteria import gini, twoing

# Weighted Oblique Decision Trees by [Bin-Bin Yang et al.]
# WeightedObliqueDecisionTreeClassifier(max_depth=50, min_samples_split=2, max_features='all')
wodt = WeightedObliqueDecisionTreeClassifier(max_depth=50,
                                             min_samples_split=2,
                                             max_features='all')

wodt.fit(X_train.to_numpy(copy=True), y_train.to_numpy(copy=True))

y_pred = wodt.predict(X_test.to_numpy(copy=True))

print('Acc: ', accuracy_score(y_test, y_pred))

Acc:  0.6096614285714286


##Testa com a implementação do RF

In [None]:
from TorshaMajumder.Decision_trees.CO2 import ContinuouslyOptimizedObliqueTree, CO2Classifier
from TorshaMajumder.Decision_trees.segmentor import Twoing, MSE, MeanSegmentor
from TorshaMajumder.Decision_trees.split_criteria import gini, twoing

# Continuous Optimization of Oblique Splits (CO2)

cfg_base_estimator = {'impurity':MSE(),
                      'segmentor':MeanSegmentor(),
                      'max_depth':3,
                      'min_samples_split':2,
                      'nu':1.0,
                      'tau':10,
                      'tol':1e-3,
                      'eta':0.1}

rf = RFClassifier(base_estimator=ContinuouslyOptimizedObliqueTree,
                  n_estimators=10,
                  params_estimators=cfg_base_estimator,
                  enable_logger=True)

rf._setup_logger(log_file='/content/drive/MyDrive/Testes e Experimentos/Testes/rf.co2.debuging.log')

rf = rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)

print('Acc with oblique tree', accuracy_score(y_test, y_pred))

INFO:rflog:Tree index [1] build with the feature index [0, 8, 1] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [0] build with the feature index [6, 0, 3] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [2] build with the feature index [4, 8, 6] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [4] build with the feature index [1, 8, 5] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [3] build with the feature index [8, 1, 5] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [6] build with the feature index [8, 7, 1] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [5] build with the feature index [3, 0, 1] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [8] build with the feature index [0, 5, 4] and data index [230065 250494  3

In [4]:
from TorshaMajumder.Decision_trees.HouseHolder_CART import HouseHolderCART, HHCartClassifier
from TorshaMajumder.Decision_trees.segmentor import Twoing, MSE, MeanSegmentor
from TorshaMajumder.Decision_trees.split_criteria import gini, twoing

# HouseHolder CART-A by [Wickramarachchi et al.]
# 

cfg_base_estimator = {'impurity':MSE(),
                      'segmentor':MeanSegmentor(),
                      'max_depth':3,
                      'min_samples_split':2,
                      'method':'eig',
                      'tau':10,
                      'alpha':None}

rf = RFClassifier(base_estimator=HouseHolderCART,
                  n_estimators=100,
                  params_estimators=cfg_base_estimator,
                  enable_logger=True)

rf._setup_logger(log_file='/content/drive/MyDrive/Testes e Experimentos/Testes/rf.hhc.debuging.log')

rf = rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)

print('Acc with oblique tree', accuracy_score(y_test, y_pred))

INFO:rflog:Tree index [1] build with the feature index [7, 3, 0] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [0] build with the feature index [2, 1, 3] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [3] build with the feature index [0, 1, 6] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [2] build with the feature index [2, 1, 8] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [5] build with the feature index [3, 6, 8] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [4] build with the feature index [1, 3, 2] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [6] build with the feature index [1, 5, 8] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [7] build with the feature index [3, 2, 4] and data index [230065 250494  3

Acc with oblique tree 0.6332371428571428


In [5]:
from TorshaMajumder.Decision_trees.RandCART import *
from TorshaMajumder.Decision_trees.segmentor import Twoing, MSE, MeanSegmentor
from TorshaMajumder.Decision_trees.split_criteria import gini, twoing

# Randomized CART
# 

cfg_base_estimator = {'impurity':MSE(),
                      'segmentor':MeanSegmentor(),
                      'max_depth':3,
                      'min_samples_split':2,
                      'compare_with_cart':False}

rf = RFClassifier(base_estimator=Rand_CART,
                  n_estimators=100,
                  params_estimators=cfg_base_estimator,
                  enable_logger=True)

rf._setup_logger(log_file='/content/drive/MyDrive/Testes e Experimentos/Testes/rf.rc.debuging.log')

rf = rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)

print('Acc with oblique tree', accuracy_score(y_test, y_pred))

INFO:rflog:Tree index [0] build with the feature index [5, 8, 0] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [1] build with the feature index [3, 0, 8] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [2] build with the feature index [4, 3, 0] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [3] build with the feature index [2, 1, 7] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [5] build with the feature index [1, 2, 0] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [4] build with the feature index [3, 2, 5] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [6] build with the feature index [3, 5, 4] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [7] build with the feature index [5, 4, 8] and data index [230065 250494  3

Acc with oblique tree 0.6073142857142857


In [6]:
from TorshaMajumder.Decision_trees.WODT import *
from TorshaMajumder.Decision_trees.segmentor import Twoing, MSE, MeanSegmentor
from TorshaMajumder.Decision_trees.split_criteria import gini, twoing

# Weighted Oblique Decision Trees by [Bin-Bin Yang et al.]
#

cfg_base_estimator = {'max_depth':3,
                      'min_samples_split':2,
                      'max_features':'all'}

rf = RFClassifier(base_estimator=WeightedObliqueDecisionTreeClassifier,
                  n_estimators=100,
                  params_estimators=cfg_base_estimator,
                  enable_logger=True)

rf._setup_logger(log_file='/content/drive/MyDrive/Testes e Experimentos/Testes/rf.wodt.debuging.log')

rf = rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)

print('Acc with oblique tree', accuracy_score(y_test, y_pred))

INFO:rflog:Tree index [1] build with the feature index [2, 8, 7] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [2] build with the feature index [3, 1, 0] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [0] build with the feature index [2, 7, 4] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [4] build with the feature index [5, 6, 7] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [5] build with the feature index [4, 5, 0] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [6] build with the feature index [5, 0, 6] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [3] build with the feature index [3, 8, 0] and data index [230065 250494  37253 ... 274949 143113  23736]

INFO:rflog:Tree index [7] build with the feature index [0, 5, 4] and data index [230065 250494  3

Acc with oblique tree 0.5279442857142858
