<center><h1  style="color:white; background-color:#000000; border-radius: 0px; padding:25px;"> Rules extraction </h1></center>

This notebook illustrates rules extraction on iris data set from various tree-based models.

In [None]:
import os
os.chdir('../')
import time

import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import average_precision_score, accuracy_score,roc_auc_score
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier

from woodtapper.extract_rules import SirusClassifier,GbExtractorClassifier
from woodtapper.extract_rules.visualization import show_rules


## Load data :

In [None]:
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names )
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

## Random forest : 

In [None]:
# Usual RandomForestClassifier
clf_rf = RandomForestClassifier(max_depth=10, random_state=0)
start = time.time()
clf_rf.fit(X_train, y_train)
end = time.time()
y_pred_dtree = clf_rf.predict(X_test)
y_pred_proba_dtree = clf_rf.predict_proba(X_test)
print('PR AUC :', average_precision_score(y_test, y_pred_proba_dtree))
print('ROC AUC :', roc_auc_score(y_test, y_pred_proba_dtree,average='micro',multi_class='ovr'))
print('Accuracy :', accuracy_score(y_test, y_pred_dtree))
time_2 = end-start
print('Fitting time = ',time_2 ,'s')

In [None]:
## RandomForestClassifier rules extraction
RFSirus = SirusClassifier(n_estimators=1000,max_depth=2,quantile=10,p0=0.0,num_rule=10, random_state=0,splitter="quantile")
start = time.time()
RFSirus.fit(X_train,y_train)
end = time.time()
y_pred_sirus = RFSirus.predict(X_test)
y_pred_proba_sirus = RFSirus.predict_proba(X_test)
print('PR AUC :', average_precision_score(y_test, y_pred_proba_sirus))
print('ROC AUC :', roc_auc_score(y_test, y_pred_proba_sirus,average='micro',multi_class='ovr'))
print('Accuracy :', accuracy_score(y_test, y_pred_sirus))
time_2 = end-start
print('Fitting time = ',time_2 ,'s')

In [None]:
show_rules(RFSirus,max_rules=20,target_class_index=2)

## GB :

In [None]:
y_train[y_train==2] = 1
y_test[y_test==2] = 1

In [None]:
y_train

In [None]:
y_test

In [None]:
# Usual GradientBoostingClassifier
GB = GradientBoostingClassifier(n_estimators=100,max_depth=3,criterion="squared_error",random_state=19)
start = time.time()
GB.fit(X_train, y_train)
end = time.time()
y_pred_dtree = GB.predict(X_test)
y_pred_proba_dtree = GB.predict_proba(X_test)
print('PR AUC :', average_precision_score(y_test, y_pred_proba_dtree[:,1]))
print('ROC AUC :', roc_auc_score(y_test, y_pred_proba_dtree[:,1]))
print('Accuracy :', accuracy_score(y_test, y_pred_dtree))
time_2 = end-start
print('Fitting time = ',time_2 ,'s')

In [None]:
# GradientBoostingClassifier rules extraction
GBSirus = GbExtractorClassifier(n_estimators=1000,max_depth=3,p0=0.01,quantile=10,
                            learning_rate=0.1,subsample=1.0,criterion="squared_error",loss="log_loss", 
                            random_state=19,splitter="quantile")
start = time.time()
GBSirus.fit(X_train, y_train)
end = time.time()
y_pred_dtree = GBSirus.predict(X_test)
y_pred_proba_dtree = GBSirus.predict_proba(X_test)
print('PR AUC :', average_precision_score(y_test, y_pred_proba_dtree[:,1]))
print('ROC AUC :', roc_auc_score(y_test, y_pred_proba_dtree[:,1]))
print('Accuracy :', accuracy_score(y_test, y_pred_dtree))
time_2 = end-start
print('Fitting time = ',time_2 ,'s')

In [None]:
show_rules(GBSirus,max_rules=25,target_class_index=1)