In [286]:
import numpy as np
import pandas as pd
import collections
from sklearn import tree 
from sklearn import ensemble
from sklearn import datasets
from sklearn.tree import _tree
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [287]:
def get_rules(tree, feature_names, class_names):
    tree_ = tree.tree_
    feature_name = [
        feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
        for i in tree_.feature
    ]

    paths = []
    path = []
    
    def recurse(node, path, paths):
        
        if tree_.feature[node] != _tree.TREE_UNDEFINED:
            name = feature_name[node]
            threshold = tree_.threshold[node]
            p1, p2 = list(path), list(path)
            p1 += [(name, "<=", threshold)]
            recurse(tree_.children_left[node], p1, paths)
            p2 += [(name, ">", threshold)]
            recurse(tree_.children_right[node], p2, paths)
        else:
            path += [(tree_.value[node], tree_.n_node_samples[node], node)]
            paths += [path]
            
    recurse(0, path, paths)

    # sort by samples count
    # samples_count = [p[-1][1] for p in paths]
    # ii = list(np.argsort(samples_count))
    # paths = [paths[i] for i in reversed(ii)]
    
    rules = []
    for path in paths:
        rule = list()
        
        for p in path[:-1]:
            # if rule != "if ":
            #     rule += " and "
            rule += [p,]
        # rule += " then "
        if class_names is None:
            # print("Here", path[-1])
            response = {'pred': path[-1][0][0][0], 'leaf': path[-1][2]}
        else:
            classes = path[-1][0][0]
            l = np.argmax(classes)
            response = {"class": class_names[l], "proba": {100.0*classes[l]/np.sum(classes)}}
        n_samples = path[-1][1]
        rules += [[rule, response, n_samples],]
        
    return rules

In [288]:
def interval_intersection(intervals):
    if not intervals:
        return None
    
    # Initialize with the first interval
    current_intersection = intervals[0]
    
    for interval in intervals[1:]:
        # Find the intersection of the current_intersection and the next interval
        start = max(current_intersection[0], interval[0])
        end = min(current_intersection[1], interval[1])
        
        # If there is no intersection, return None
        if start > end:
            return None
        
        # Update the current_intersection
        current_intersection = [start, end]
    
    return current_intersection

# # Example usage:
# intervals = [(1, 5), (2, 6), (3, 7), (4, 8)]
# intersection = interval_intersection(intervals)
# print(f"The intersection of the intervals is: {intersection}")

In [289]:
def rule_to_interval(rule):
    features = dict()
    for feat, op, val in rule:
        if feat in features:
            if op == '<=':
                features[feat].append([-np.inf, val])
            elif op == '>':
                features[feat].append([val, np.inf])
        else:
            if op == '<=':
                features[feat] = [[-np.inf, val], ]
            elif op == '>':
                features[feat]= [[val, np.inf], ]
    for feat in features:
        # print(feat, features)
        features[feat] = interval_intersection(features[feat])
    return features

In [290]:
# Load iris dataset
iris = datasets.load_iris()

# Split the data into features and target
dataset = iris['data']
labels = iris['target']

feature_names = iris['feature_names']
class_names = iris['target_names']

In [291]:
 # Create a decision tree classifier object
clf = ensemble.RandomForestClassifier(random_state=42)
 
# Train the decision tree classifier
clf = clf.fit(dataset, labels)

In [292]:
clfs = clf.estimators_

In [293]:
# Convert the decision tree to human readable rules
rules_set = [get_rules(clfs[i], feature_names, class_names) for i in range(len(clfs))]

In [294]:
for rules in rules_set:
    for rule in rules:
        rule[0] = list(rule_to_interval(rule[0]).items())

In [295]:
rules_set

[[[[('petal width (cm)', [-inf, 0.800000011920929])],
   {'class': 'setosa', 'proba': {100.0}},
   31],
  [[('petal width (cm)', [0.800000011920929, 1.449999988079071]),
    ('petal length (cm)', [-inf, 5.400000095367432])],
   {'class': 'versicolor', 'proba': {100.0}},
   25],
  [[('petal width (cm)', [1.449999988079071, 1.75]),
    ('petal length (cm)', [-inf, 4.950000047683716])],
   {'class': 'versicolor', 'proba': {100.0}},
   7],
  [[('petal width (cm)', [1.449999988079071, 1.75]),
    ('petal length (cm)', [4.950000047683716, 5.400000095367432]),
    ('sepal width (cm)', [-inf, 2.600000023841858])],
   {'class': 'virginica', 'proba': {100.0}},
   1],
  [[('petal width (cm)', [1.449999988079071, 1.75]),
    ('petal length (cm)', [4.950000047683716, 5.400000095367432]),
    ('sepal width (cm)', [2.600000023841858, inf])],
   {'class': 'versicolor', 'proba': {100.0}},
   1],
  [[('petal width (cm)', [0.800000011920929, 1.75]),
    ('petal length (cm)', [5.400000095367432, inf])],
 

In [296]:
# Load iris dataset
diabetes = datasets.load_diabetes()

# Split the data into features and target
dataset = diabetes['data']
labels = diabetes['target']

feature_names = diabetes['feature_names']
# class_names = diabetes['target_names']

In [297]:
 # Create a decision tree classifier object
clf = ensemble.RandomForestRegressor(random_state=42)
 
# Train the decision tree classifier
clfs = clf.fit(dataset, labels)

In [298]:
# Convert the decision tree to human readable rules
rules_set = [get_rules(clfs[i], feature_names, class_names) for i in range(len(clfs))]

In [302]:
rules_set[0][0]

[[('bmi', [-inf, -0.07626373693346977]),
  ('s5', [-inf, -0.04762060381472111]),
  ('bp', [-inf, 0.02531522512435913]),
  ('s6', [-inf, -0.09220404922962189]),
  ('s2', [-inf, -0.03341159410774708])],
 {'class': 'setosa', 'proba': {100.0}},
 1]

In [300]:
for rules in rules_set:
    for rule in rules:
        rule[0] = list(rule_to_interval(rule[0]).items())
        print(rule[0])

[('bmi', [-inf, -0.07626373693346977]), ('s5', [-inf, -0.04762060381472111]), ('bp', [-inf, 0.02531522512435913]), ('s6', [-inf, -0.09220404922962189]), ('s2', [-inf, -0.03341159410774708])]
[('bmi', [-inf, -0.07626373693346977]), ('s5', [-inf, -0.04762060381472111]), ('bp', [-inf, 0.02531522512435913]), ('s6', [-0.09220404922962189, 0.01756178354844451]), ('s2', [-inf, -0.03341159410774708])]
[('bmi', [-inf, -0.07626373693346977]), ('s5', [-inf, -0.04762060381472111]), ('bp', [-inf, 0.02531522512435913]), ('s6', [-inf, 0.01756178354844451]), ('s2', [-0.03341159410774708, inf])]
[('bmi', [-0.07626373693346977, -0.06117437034845352]), ('s5', [-inf, -0.04762060381472111]), ('bp', [-inf, 0.02531522512435913]), ('s6', [-inf, 0.01756178354844451]), ('s4', [-inf, -0.05794394388794899]), ('s1', [-inf, -0.04903933219611645])]
[('bmi', [-0.07626373693346977, -0.06117437034845352]), ('s5', [-inf, -0.04762060381472111]), ('bp', [-inf, 0.02531522512435913]), ('s6', [-inf, 0.01756178354844451]), ('

In [304]:
rules_set[0][0]

[[('bmi', [-inf, -0.07626373693346977]),
  ('s5', [-inf, -0.04762060381472111]),
  ('bp', [-inf, 0.02531522512435913]),
  ('s6', [-inf, -0.09220404922962189]),
  ('s2', [-inf, -0.03341159410774708])],
 {'class': 'setosa', 'proba': {100.0}},
 1]