In [1]:
import os
import sys
sys.path.append("src")
from src import *

import pandas as pd
from prettytable import from_csv


from model.generation.helpers import init_dataset_dir
from model.generation.helpers.split_data import load_data
from cross_validations import *
from evaluate_rules.predict_explain import print_ruleset
from functionality_helpers import *
from model.generation import generate_data

  from pandas.core.index import Index as PandasIndex


# Breast Cancer UCI dataset

- Input: 30 real-valued (noramised to [0,1]) input features describing characteristics of the cell nuclei present in the gigitised images of breast mass

- Output: Malignant (encoded as 0) and Benign (encoded as 1)

In [2]:
dataset_name = 'breast_cancer_uci'
target_col_name = 'diagnosis'
data = pd.read_csv('../data/preprocessing/raw_data/breast_cancer_uci.csv')

In [3]:
data.shape

(569, 31)

In [4]:
data["diagnosis"].value_counts()

1    357
0    212
Name: diagnosis, dtype: int64

In [5]:
path_to_data_folder = '../data/'
init_dataset_dir.run(dataset_name=dataset_name, path_to_data_folder=path_to_data_folder)

Directory  ../data/breast_cancer_uci/  already exists
Directory  ../data/breast_cancer_uci/neural_network_initialisation  already exists
Directory  ../data/breast_cancer_uci/cross_validation  already exists


In [6]:
data_path = '../data/' + dataset_name + '/'

In [7]:
data.to_csv(data_path + 'data.csv', index=False)

# Classification using DNN

In [8]:
X, y = load_data(DATASET_INFO, DATA_FP)

In [9]:
generate_data.run(X=X, y=y,
                  split_data_flag=True,
                  grid_search_flag=False,
                  find_best_initialisation_flag=False,
                  generate_fold_data_flag=True)

5
Cleared contents of file ../data/breast_cancer_uci/neural_network_initialisation/data_split_indices.txt
Split data into train/test split for initialisation.
Directory  ../data/breast_cancer_uci/cross_validation/5_folds/  already exists
Directory  ../data/breast_cancer_uci/cross_validation/5_folds/rule_extraction/MOD_DeepRED_C5/  already exists
Directory  ../data/breast_cancer_uci/cross_validation/5_folds/rule_extraction/MOD_DeepRED_C5/rules_extracted/  already exists
Directory  ../data/breast_cancer_uci/cross_validation/5_folds/trained_models/  already exists
Cleared contents of file ../data/breast_cancer_uci/cross_validation/5_folds/data_split_indices.txt
Split data into 5 folds.
Training model 0/5


 1 1 1 1 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 1 0 0 1 0 1 0 0 1 1 1 0 0 1 0 0
 1 1 1 0 1 1 0 0 1 0 0 1 0 1 1 0 1 1 1 1 1 0 0 0 0 1 1 0 1 0 0 0 1 0 1 1 0
 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1 1 1 0 1 1 1 0 1 1 0 0 0 1 0 0 1 1
 1 0 0 0 0 0 1 0 0 1 0 1 0 1 1 1 0 0 0 0 1 1 0 0 1 1 1 1 1 1 1 1 0 0 1 0 1
 1 0 0 1 0 1 1 1 0 1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 1 1 0 1 0 1 1 0 0 1 0 0 1
 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 0 0 0 1 1
 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 0 1 0 0 0 1 0 0 1 1
 1 1 0 1 1 1 1 1 0 1 1 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 0 1
 1 0 1 1 1 1 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 1 0 0 1 1 1 0 1 0 1 1 1 1 1 1 1
 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 0 1 1 1 1 1 0 0 1 0 1 1 1
 1 1 0 1 1 0 0 1 0 0 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 0 0 0 0 0 1] as keyword args. From version 0.25 passing these as positional arguments will result in an error


Training model 1/5


 1 1 1 1 1 0 0 1 0 0 1 1 0 0 1 1 1 1 0 0 1 0 1 0 0 1 1 1 0 0 1 0 0 0 1 1 0
 1 1 1 1 1 0 0 1 1 1 1 0 1 1 1 1 1 1 1 1 0 0 1 0 1 1 1 0 1 0 1 0 0 1 0 1 1
 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 1 0 0
 0 1 0 1 0 1 1 1 0 1 0 0 1 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 1 1 0 0 1 1 1
 0 1 1 1 1 0 1 0 1 1 1 1 0 1 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0
 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1
 1 1 1 0 0 1 1 1 1 0 1 1 0 1 1 1 0 1 1 1 1 0 0 1 1 1 1 1 1 1 0 0 0 1 0 0 1
 1 1 1 0 1 1 1 1 0 1 1 1 0 1 1 0 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 0 1 0 1 1
 1 1 1 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 0 1 1 0 1 0 1 0 0 1 1 1 1 1 1 1 0 0 1
 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 0 1 0 1 0 1 1 1 0
 0 1 0 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 0 0 0 0 0 0] as keyword args. From version 0.25 passing these as positional arguments will result in an error


Training model 2/5


 1 0 1 1 0 1 0 0 1 1 1 0 1 0 0 1 0 1 0 0 1 0 0 0 1 1 0 0 1 1 1 1 1 0 0 1 1
 1 0 1 1 1 1 0 1 0 1 1 1 1 1 1 1 0 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 1 1 1 0
 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 0 0 1 1 0 0 1 1 1 1 0 1 1 0 0 0 1 1 1 1 0 1
 1 0 1 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 1 0 0 1 1 0 1 1 1 1 1 0 0 1 1 0 1 1 0
 0 1 1 1 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 0 1 1 0 1 1 0
 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0
 1 1 1 0 0 1 1 1 0 0 1 0 1 1 1 0 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 0 1 0 0 0
 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 1
 1 1 1 1 1 1 0 1 0 0 1 1 1 1 0 1 1 1 0 1 1 0 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1
 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 0 1 1 0 1 1 1 1 0 0 1 1 0 1 1 1 1 1
 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 0 0 0 0 1] as keyword args. From version 0.25 passing these as positional arguments will result in an error


Training model 3/5


 0 0 0 1 0 1 1 1 1 1 0 0 0 1 1 1 1 1 0 1 1 1 0 1 0 0 1 0 1 1 0 0 1 0 0 1 1
 0 1 1 0 0 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 0 0 1 0 1 0 0 1
 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 0 1 1 1 0 0 1 0 1 1 1 0 1 1 0 0 1
 1 0 1 1 0 1 1 0 0 1 0 0 0 0 1 0 1 0 1 1 0 1 0 0 0 1 1 0 1 1 1 0 1 1 1 1 0
 0 1 1 0 1 0 1 0 1 1 0 1 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 1 0 1
 0 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 0
 1 0 1 1 1 0 0 1 1 1 1 0 1 0 1 0 1 1 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0
 0 1 0 0 0 1 0 0 1 1 1 1 0 1 1 1 1 0 1 1 1 0 1 1 0 0 1 1 1 1 1 0 1 1 1 1 1
 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 0 1 0 1 1 1 1 0 1 1 0 1 0 1 0 1 0 1 1
 1 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 0 1 0 1 0 1
 1 1 0 1 1 0 1 0 0 1 1 0 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 0 0 0 1] as keyword args. From version 0.25 passing these as positional arguments will result in an error


Training model 4/5


 1 1 1 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0 1 0 0 1 1 0 1 1 1 0 0 0 0 0 1 1 0 0
 0 1 1 0 0 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0
 0 1 0 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 0 0 0 1 1 0 0 1 1 1
 0 1 0 0 1 0 1 0 1 1 0 1 1 0 1 0 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 1 0 1 0 1 1
 1 1 0 0 1 1 0 1 1 0 0 1 0 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1
 1 1 1 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 1 1 1
 1 1 1 0 1 0 1 0 1 1 1 0 0 0 1 1 0 1 0 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1
 1 1 1 0 0 1 0 0 1 1 1 1 0 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 0 1 1 0 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 0 1 1
 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 0 0 1 1 1 1 0 0 1 0 1 1 1 1 0 1
 1 0 1 0 1 0 0 1 1 0 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 0 0 0 0 0 0 1] as keyword args. From version 0.25 passing these as positional arguments will result in an error


Cleaning up temporary files...done


# Rule extraction using REM-D

In [10]:
cross_validate_rem_d(extract_rules_flag=True, evaluate_rules_flag=True)

Computed layerwise activations.
Extracting layer 2 rules:
.
.
Substituting layer 2 rules   Rule Premise Length:  1
    premise: 1
done
Extracting layer 1 rules:
.
.
Substituting layer 1 rules   Rule Premise Length:  1
    premise: 1
done
Extracting layer 0 rules:
...
...
Substituting layer 0 rules   Rule Premise Length:  2
    premise: 1
    premise: 2
done
Extracting layer 2 rules:
.
.
Substituting layer 2 rules   Rule Premise Length:  1
    premise: 1
done
Extracting layer 1 rules:
.
.
Substituting layer 1 rules   Rule Premise Length:  1
    premise: 1
done
Extracting layer 0 rules:
...
...
Substituting layer 0 rules   Rule Premise Length:  2
    premise: 1
    premise: 2
done
Saving fold 0/5 rules extracted...done
Saving fold 0/5 results...done
Computed layerwise activations.
Extracting layer 2 rules:
.
.
Substituting layer 2 rules   Rule Premise Length:  1
    premise: 1
done
Extracting layer 1 rules:
.
.
Substituting layer 1 rules   Rule Premise Length:  1
    premise: 1
done
Extr

In [11]:
with open(N_FOLD_RESULTS_FP, "r") as fp: 
    x = from_csv(fp)
    
print(x)

+---------+--------+--------+--------+--------+--------+---------------+----------------+-----------+--------------+
|   fold  | nn_acc | nn_auc | re_acc | re_auc | re_fid | re_time (sec) | re_memory (MB) | rules_num | rules_av_len |
+---------+--------+--------+--------+--------+--------+---------------+----------------+-----------+--------------+
|   0.0   | 0.965  | 0.965  | 0.939  | 0.919  | 0.956  |     1.131     |     355.21     |    24.0   |    2.792     |
|   1.0   | 0.939  | 0.939  | 0.956  | 0.956  | 0.965  |     0.947     |     72.029     |    12.0   |     2.25     |
|   2.0   | 0.956  | 0.956  |  0.93  | 0.944  | 0.939  |     1.467     |     90.059     |    42.0   |    3.405     |
|   3.0   | 0.991  | 0.991  | 0.974  | 0.974  | 0.965  |      0.95     |     77.77      |    20.0   |     3.3      |
|   4.0   | 0.991  | 0.991  | 0.956  | 0.955  | 0.947  |     1.152     |    199.162     |    69.0   |    6.261     |
| average | 0.968  | 0.968  | 0.951  |  0.95  | 0.954  |     1.1

In [12]:
# Printing rules extracted from fold 3

with open(n_fold_rules_fp(1), 'rb') as rules_file: 
    ruleset_fold_3 = pickle.load(rules_file)
print(print_ruleset(ruleset_fold_3))


If (perimeter_3 <= 0.34) AND (texture_1 <= 0.36) AND (smoothness_3 <= 0.35) Then B
If (concavepoints_3 <= 0.46) AND (perimeter_3 <= 0.31) AND (area_3 <= 0.14) Then B
If (perimeter_3 <= 0.33) AND (area_3 <= 0.14) AND (smoothness_3 <= 0.35) Then B
If (concavepoints_3 <= 0.46) AND (perimeter_3 <= 0.34) AND (texture_1 <= 0.36) Then B
If (smoothness_3 <= 0.42) AND (perimeter_3 <= 0.31) AND (area_3 <= 0.14) Then B
If (perimeter_3 <= 0.34) AND (smoothness_3 <= 0.42) AND (texture_1 <= 0.36) Then B
If (perimeter_3 > 0.34) Then M
If (perimeter_3 > 0.33) Then M
If (smoothness_3 > 0.35) AND (perimeter_3 > 0.31) Then M
If (texture_1 > 0.36) AND (area_3 > 0.14) Then M
If (smoothness_3 > 0.42) AND (concavepoints_3 > 0.46) Then M
If (radius_3 > 0.34) Then M



In [13]:
# Making a prediction for a random sample using rules from a random fold and 
# providing the explanation for the prediction.

np.random.seed(114)
instance = np.random.uniform(0, 1, 30)
fold = np.random.randint(5)

with open(n_fold_rules_fp(fold), 'rb') as rules_file:
    rules = pickle.load(rules_file)

prediction, explanation = predict_explain(rules, instance)
print(print_explanation(prediction, explanation))


If (concavepoints_3 <= 0.54) AND (radius_3 <= 0.28) Then B
If (concavepoints_3 <= 0.37) Then B



In [14]:
# List of top reoccuring features across all 5 folds.

# List of top reoccuring features across all 5 folds for each class.

# Frequency of operator for each feature appearing in the ruleset for each class
# {'B': {'concavepoints_3': [0, 28]}, 'M': {area_3': [53, 0]}
# In the above example feature 'concavepoints_3' appears with greater and less than operator
# 0 and 28 times respectively for class Benign.

compute_top_recurring_features_across_folds(flag=True)

features recurrence across folds:
concavepoints_3
texture_3
concavepoints_1
area_3
smoothness_3
radius_3
perimeter_3
compactness_2
compactness_3
concavity_3
symmerty_3
compactness_1
concavity_2
area_2
texture_1
radius_2
smoothness_1
fractaldimension_1
fractaldimension_3
smoothness_2
concavity_1
texture_2
perimeter_1


features recurrence per class across folds {'B': ['concavepoints_3', 'perimeter_3', 'area_3', 'texture_3', 'texture_1', 'smoothness_3', 'radius_3', 'compactness_3', 'concavepoints_1', 'smoothness_1', 'fractaldimension_1', 'fractaldimension_3', 'area_2', 'radius_2', 'concavity_3', 'perimeter_1', 'concavity_1', 'compactness_1'], 'M': ['concavepoints_3', 'concavepoints_1', 'texture_3', 'area_3', 'smoothness_3', 'compactness_2', 'symmerty_3', 'radius_3', 'concavity_3', 'compactness_3', 'compactness_1', 'concavity_2', 'perimeter_3', 'area_2', 'texture_1', 'radius_2', 'smoothness_1', 'fractaldimension_1', 'smoothness_2', 'texture_2', 'concavity_1', 'fractaldimension_3']}


top 

# Classification using decision tree and rule extarction using REM-T

In [15]:
cross_validated_rem_t(X, y, extract_evaluate_rules_flag=True, DT=True)

Saving fold 0/5 rules extracted...done
Evaulating rules extracted from fold 0/5...tree Rules accuracy 0.921053
Rules Auc 0.913691
done
Saving fold 1/5 rules extracted...done
Evaulating rules extracted from fold 1/5...tree Rules accuracy 0.912281
Rules Auc 0.906649
done
Saving fold 2/5 rules extracted...done
Evaulating rules extracted from fold 2/5...tree Rules accuracy 0.912281
Rules Auc 0.915675
done
Saving fold 3/5 rules extracted...done
Evaulating rules extracted from fold 3/5...

 1 1 1 1 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 1 0 0 1 0 1 0 0 1 1 1 0 0 1 0 0
 1 1 1 0 1 1 0 0 1 0 0 1 0 1 1 0 1 1 1 1 1 0 0 0 0 1 1 0 1 0 0 0 1 0 1 1 0
 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1 1 1 0 1 1 1 0 1 1 0 0 0 1 0 0 1 1
 1 0 0 0 0 0 1 0 0 1 0 1 0 1 1 1 0 0 0 0 1 1 0 0 1 1 1 1 1 1 1 1 0 0 1 0 1
 1 0 0 1 0 1 1 1 0 1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 1 1 0 1 0 1 1 0 0 1 0 0 1
 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 0 0 0 1 1
 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 0 1 0 0 0 1 0 0 1 1
 1 1 0 1 1 1 1 1 0 1 1 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 0 1
 1 0 1 1 1 1 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 1 0 0 1 1 1 0 1 0 1 1 1 1 1 1 1
 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 0 1 1 1 1 1 0 0 1 0 1 1 1
 1 1 0 1 1 0 0 1 0 0 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 0 0 0 0 0 1] as keyword args. From version 0.25 passing these as positional arguments will result in an error
 1 1 1 1 1 0 0 1 0 0 1 1 0 0 1 1 1 1 0 0 1 0 1 0 0 1 1

tree Rules accuracy 0.903509
Rules Auc 0.913690
done
Saving fold 4/5 rules extracted...done
Evaulating rules extracted from fold 4/5...tree Rules accuracy 0.938053
Rules Auc 0.931254
done


 1 1 1 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0 1 0 0 1 1 0 1 1 1 0 0 0 0 0 1 1 0 0
 0 1 1 0 0 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0
 0 1 0 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 0 0 0 1 1 0 0 1 1 1
 0 1 0 0 1 0 1 0 1 1 0 1 1 0 1 0 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 1 0 1 0 1 1
 1 1 0 0 1 1 0 1 1 0 0 1 0 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1
 1 1 1 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 1 1 1
 1 1 1 0 1 0 1 0 1 1 1 0 0 0 1 1 0 1 0 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1
 1 1 1 0 0 1 0 0 1 1 1 1 0 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 0 1 1 0 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 0 1 1
 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 0 0 1 1 1 1 0 0 1 0 1 1 1 1 0 1
 1 0 1 0 1 0 0 1 1 0 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 0 0 0 0 0 0 1] as keyword args. From version 0.25 passing these as positional arguments will result in an error


In [16]:
with open(N_FOLD_RESULTS_DT_FP, "r") as fp: 
    x = from_csv(fp)
    
print(x)

+---------+--------+--------+-----------+--------------+
|   fold  | re_acc | re_auc | rules_num | rules_av_len |
+---------+--------+--------+-----------+--------------+
|   0.0   | 0.921  | 0.914  |    17.0   |    4.588     |
|   1.0   | 0.912  | 0.907  |    18.0   |    4.389     |
|   2.0   | 0.912  | 0.916  |    20.0   |     4.7      |
|   3.0   | 0.904  | 0.914  |    19.0   |    4.368     |
|   4.0   | 0.938  | 0.931  |    21.0   |    4.905     |
| average | 0.917  | 0.916  |    19.0   |     4.59     |
+---------+--------+--------+-----------+--------------+


# Classification using Random Forest and rule extarction using REM-T

In [17]:
cross_validated_rem_t(X, y, extract_evaluate_rules_flag=True, DT=False)

 1 1 1 1 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 1 0 0 1 0 1 0 0 1 1 1 0 0 1 0 0
 1 1 1 0 1 1 0 0 1 0 0 1 0 1 1 0 1 1 1 1 1 0 0 0 0 1 1 0 1 0 0 0 1 0 1 1 0
 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1 1 1 0 1 1 1 0 1 1 0 0 0 1 0 0 1 1
 1 0 0 0 0 0 1 0 0 1 0 1 0 1 1 1 0 0 0 0 1 1 0 0 1 1 1 1 1 1 1 1 0 0 1 0 1
 1 0 0 1 0 1 1 1 0 1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 1 1 0 1 0 1 1 0 0 1 0 0 1
 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 0 0 0 1 1
 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 0 1 0 0 0 1 0 0 1 1
 1 1 0 1 1 1 1 1 0 1 1 0 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 0 1
 1 0 1 1 1 1 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 1 0 0 1 1 1 0 1 0 1 1 1 1 1 1 1
 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 0 1 1 1 1 1 0 0 1 0 1 1 1
 1 1 0 1 1 0 0 1 0 0 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 0 0 0 0 0 1] as keyword args. From version 0.25 passing these as positional arguments will result in an error


Saving fold 0/5 rules extracted...done
Evaulating rules extracted from fold 0/5...tree Rules accuracy 0.938596
Rules Auc 0.923190
done


 1 1 1 1 1 0 0 1 0 0 1 1 0 0 1 1 1 1 0 0 1 0 1 0 0 1 1 1 0 0 1 0 0 0 1 1 0
 1 1 1 1 1 0 0 1 1 1 1 0 1 1 1 1 1 1 1 1 0 0 1 0 1 1 1 0 1 0 1 0 0 1 0 1 1
 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 1 0 0
 0 1 0 1 0 1 1 1 0 1 0 0 1 0 0 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 1 1 0 0 1 1 1
 0 1 1 1 1 0 1 0 1 1 1 1 0 1 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0
 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1
 1 1 1 0 0 1 1 1 1 0 1 1 0 1 1 1 0 1 1 1 1 0 0 1 1 1 1 1 1 1 0 0 0 1 0 0 1
 1 1 1 0 1 1 1 1 0 1 1 1 0 1 1 0 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 0 1 0 1 1
 1 1 1 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1 0 1 1 0 1 0 1 0 0 1 1 1 1 1 1 1 0 0 1
 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 0 1 0 1 0 1 1 1 0
 0 1 0 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 0 0 0 0 0 0] as keyword args. From version 0.25 passing these as positional arguments will result in an error


Saving fold 1/5 rules extracted...done
Evaulating rules extracted from fold 1/5...tree Rules accuracy 0.956140
Rules Auc 0.951032
done


 1 0 1 1 0 1 0 0 1 1 1 0 1 0 0 1 0 1 0 0 1 0 0 0 1 1 0 0 1 1 1 1 1 0 0 1 1
 1 0 1 1 1 1 0 1 0 1 1 1 1 1 1 1 0 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 1 1 1 0
 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 0 0 1 1 0 0 1 1 1 1 0 1 1 0 0 0 1 1 1 1 0 1
 1 0 1 0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 1 0 0 1 1 0 1 1 1 1 1 0 0 1 1 0 1 1 0
 0 1 1 1 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 0 1 1 0 1 1 0
 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0
 1 1 1 0 0 1 1 1 0 0 1 0 1 1 1 0 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 0 1 0 0 0
 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 1
 1 1 1 1 1 1 0 1 0 0 1 1 1 1 0 1 1 1 0 1 1 0 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1
 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 0 1 1 0 1 1 1 1 0 0 1 1 0 1 1 1 1 1
 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 0 0 0 0 1] as keyword args. From version 0.25 passing these as positional arguments will result in an error


Saving fold 2/5 rules extracted...done
Evaulating rules extracted from fold 2/5...tree Rules accuracy 0.956140
Rules Auc 0.965278
done


 0 0 0 1 0 1 1 1 1 1 0 0 0 1 1 1 1 1 0 1 1 1 0 1 0 0 1 0 1 1 0 0 1 0 0 1 1
 0 1 1 0 0 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 0 0 1 0 1 0 0 1
 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 0 1 1 1 0 0 1 0 1 1 1 0 1 1 0 0 1
 1 0 1 1 0 1 1 0 0 1 0 0 0 0 1 0 1 0 1 1 0 1 0 0 0 1 1 0 1 1 1 0 1 1 1 1 0
 0 1 1 0 1 0 1 0 1 1 0 1 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 1 0 1
 0 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 0
 1 0 1 1 1 0 0 1 1 1 1 0 1 0 1 0 1 1 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0
 0 1 0 0 0 1 0 0 1 1 1 1 0 1 1 1 1 0 1 1 1 0 1 1 0 0 1 1 1 1 1 0 1 1 1 1 1
 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 0 1 0 1 1 1 1 0 1 1 0 1 0 1 0 1 0 1 1
 1 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 0 1 0 1 0 1
 1 1 0 1 1 0 1 0 0 1 1 0 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 0 0 0 1] as keyword args. From version 0.25 passing these as positional arguments will result in an error


Saving fold 3/5 rules extracted...done
Evaulating rules extracted from fold 3/5...tree Rules accuracy 0.956140
Rules Auc 0.955357
done


 1 1 1 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0 1 0 0 1 1 0 1 1 1 0 0 0 0 0 1 1 0 0
 0 1 1 0 0 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0
 0 1 0 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 0 0 0 1 1 0 0 1 1 1
 0 1 0 0 1 0 1 0 1 1 0 1 1 0 1 0 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 1 0 1 0 1 1
 1 1 0 0 1 1 0 1 1 0 0 1 0 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1
 1 1 1 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 1 1 1 1 1 1
 1 1 1 0 1 0 1 0 1 1 1 0 0 0 1 1 0 1 0 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1
 1 1 1 0 0 1 0 0 1 1 1 1 0 1 1 1 1 0 1 0 1 1 1 1 1 0 1 1 1 1 0 1 1 0 1 1 0
 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 0 1 1
 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 1 0 0 1 1 1 1 0 0 1 0 1 1 1 1 0 1
 1 0 1 0 1 0 0 1 1 0 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 0 0 0 0 0 0 1] as keyword args. From version 0.25 passing these as positional arguments will result in an error


Saving fold 4/5 rules extracted...done
Evaulating rules extracted from fold 4/5...tree Rules accuracy 0.964602
Rules Auc 0.962106
done


In [18]:
with open(N_FOLD_RESULTS_RF_FP, "r") as fp: 
    x = from_csv(fp)
    
print(x)

+---------+--------+--------+-----------+--------------+
|   fold  | re_acc | re_auc | rules_num | rules_av_len |
+---------+--------+--------+-----------+--------------+
|   0.0   | 0.939  | 0.923  |   409.0   |    4.892     |
|   1.0   | 0.956  | 0.951  |   417.0   |    4.799     |
|   2.0   | 0.956  | 0.965  |   443.0   |    4.921     |
|   3.0   | 0.956  | 0.955  |   395.0   |    4.673     |
|   4.0   | 0.965  | 0.962  |   439.0   |    4.845     |
| average | 0.954  | 0.951  |   420.6   |    4.826     |
+---------+--------+--------+-----------+--------------+
