In [1]:
from lorem_new import LOREM
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from collections import defaultdict

dataset = pd.read_csv('datasets/coinfeccion.csv')

X = dataset.drop(columns=['COINFECCION'])
X = pd.get_dummies(X, drop_first=True)
y = dataset['COINFECCION']

# Divide el dataset en conjuntos de entrenamiento y prueba
train_set, test_set, train_label, test_label = train_test_split(X, y, test_size=0.3, random_state=42)

# Definir y entrenar el RandomForestClassifier
rf = RandomForestClassifier(bootstrap=False, class_weight='balanced',
                       criterion='gini', max_depth=500, max_features='auto',
                       min_samples_leaf=10, min_samples_split=50,
                       n_estimators=100, random_state=0)
rf.fit(train_set, train_label)

# Evaluación del rendimiento en el conjunto de entrenamiento
predictions_train = rf.predict(train_set)
report_train = classification_report(train_label, predictions_train)
print('Classification report for train set:', report_train)

# Evaluación del rendimiento en el conjunto de prueba
predictions_test = rf.predict(test_set)
report_test = classification_report(test_label, predictions_test)
print('Classification report for test set:', report_test)

neigh_type = 'cfs' # the generation you want (random, genetic, geneticp, cfs, rndgen)
binary = 'binary_from_dts' #how to merge the trees (binary from dts, binary from bb are creating a binary tree, nari is creating a n ari tree)
cxpb = 0.7 # values to set for the genetic genreration
mutpb = 0.5 #values to set for the genetic generation
ngen = 2 # number of neighborhood generations to do
runs = 2 #how many neighbours and trees to create and then merge
class_name = "COINFECCION" 
class_values = [0,1] #values that the target may have

feature_names = test_set.columns
real_feature_names = test_set.columns
numeric_columns = list()
for c in test_set.columns:
    numeric_columns.append(test_set.columns.get_loc(c))

features_map = defaultdict(dict)
i = 0
j = 0

while i < len(feature_names) and j < len(real_feature_names):
    if feature_names[i] == real_feature_names[j]:
        features_map[j][feature_names[i].replace('%s=' % real_feature_names[j], '')] = i
        i += 1
        j += 1
    elif feature_names[i].startswith(real_feature_names[j]):
        features_map[j][feature_names[i].replace('%s=' % real_feature_names[j], '')] = i
        i += 1
    else:
        j += 1
        
neigh_kwargs = {
        "balance": False,
        "sampling_kind": "gaussian",
        "kind": "gaussian_global",
        "downward_only": True,
        "redo_search": True,
        "forced_balance_ratio": 0.5,
        "cut_radius": True,
        "n": 800,
        "normalize": 'minmax',
        "forced_balance_ratio": 0.5,
        "n_batch": 5,
        "datas": train_set.values
    }

explainer = LOREM(train_set.values, rf.predict, rf.predict_proba, feature_names, class_name, class_values, numeric_columns, features_map,
                      neigh_type=neigh_type, categorical_use_prob=True, continuous_fun_estimation=True, size=1000,
                      ocr=0.1, multi_label=False, one_vs_rest=False, random_state=42, verbose=True,
                      Kc=train_set, bb_predict_proba=rf.predict_proba, K_transformed=train_set, discretize=True,
                      encdec=None, binary=binary, **neigh_kwargs)

explanation = explainer.explain_instance_stable(test_set.loc[0].values.reshape(1,-1), 150, runs=runs, n_jobs=2)

print(explanation.rule)
for c in explanation.crules:
    print(c)

Classification report for train set:               precision    recall  f1-score   support

           0       0.77      0.84      0.81        32
           1       0.76      0.67      0.71        24

    accuracy                           0.77        56
   macro avg       0.77      0.76      0.76        56
weighted avg       0.77      0.77      0.77        56

Classification report for test set:               precision    recall  f1-score   support

           0       0.71      0.86      0.77        14
           1       0.75      0.55      0.63        11

    accuracy                           0.72        25
   macro avg       0.73      0.70      0.70        25
weighted avg       0.73      0.72      0.71        25

Neigh kind  cfs
sampling kind  gaussian_global
binary sampling search  800 5
binary sampling search  800 5


[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.


ZeroDivisionError: float division by zero