In [4]:
import numpy as np
from math import log2

In [38]:
# Very simple CN2 learner that takes a single attribute per rule
# TBD - generalise to a learner that takes a conjunction of attributes per rule upto some depth

class CN2:
    
    class Rule:
        def __init__(self, attribute, operator, threshold, classes, default_class):
            self.attribute = attribute
            self.operator = operator
            self.threshold = threshold
            self.classes = classes
            self.default_class = default_class
            self.class_counts = np.zeros(len(classes))
            self.strength = 0
            self.class_label = default_class

        def applies(self, example):
            value = example[self.attribute]
            if self.operator == "==":
                return value == self.threshold
            elif self.operator == ">":
                return value > self.threshold
            elif self.operator == "<":
                return value < self.threshold
            elif self.operator == ">=":
                return value >= self.threshold
            elif self.operator == "<=":
                return value <= self.threshold
            else:
                raise ValueError("Invalid operator: {}".format(self.operator))

        def update_counts(self, example):
            class_label = example[-1]
            index = np.where(self.classes == class_label)
            self.class_counts[index] += 1

    def __init__(self, significance=0.5, min_coverage=2):
        self.significance = significance
        self.min_coverage = min_coverage
        self.rules = []

    def learn(self, data, target):
        self.rules = []
        self.classes = np.unique(target)
        self.attributes = list(range(data.shape[1]))
        self.examples = np.hstack((data, target.reshape(-1, 1)))
        self.default_class = np.argmax(np.bincount(target))

        while len(self.examples) > 0:
            rule = self.get_best_rule(self.examples)
            if rule is None:
                break
            self.rules.append(rule)
            covered_examples = self.apply_rule(rule, self.examples)
            self.examples = np.delete(self.examples, covered_examples, axis=0)

    def get_best_rule(self, examples):
        best_rule = None
        best_score = -1
        ruleL=[] #depth 1 rules
        # depth = 1
        for attribute in self.attributes:
            attribute_values = np.unique(examples[:, attribute])
            for value in attribute_values:
                for operator in ["==", ">", "<", ">=", "<="]:
                    conditions=[(attribute, operator, value)]
                    rule = DeepRule(conditions, self.classes, self.default_class)
                    ruleL+=[rule]
                    covered_examples = self.apply_rule(rule, examples)
                    if len(covered_examples) < self.min_coverage:
                        continue
                    score = self.calculate_score(examples,covered_examples,rule)
                    if score > best_score:
                        best_score = score
                        best_rule = rule
        # depth = 2
        for rule1 in ruleL:
            for attribute in self.attributes:
                attribute_values = np.unique(examples[:, attribute])
                for value in attribute_values:
                    for operator in ["==", ">", "<", ">=", "<="]:
                        conditions=rule1.conditions+[(attribute, operator, value)]
                        rule2 = DeepRule(conditions, self.classes, self.default_class)
                        covered_examples = self.apply_rule(rule2, examples)
                        # input(f'{len(covered_examples)},{rule2}')
                        if len(covered_examples) < self.min_coverage:
                            continue
                        score = self.calculate_score(examples,covered_examples,rule2)
                        if score > best_score:
                            best_score = score
                            best_rule = rule2

        if best_score < self.significance:
            return None
        return best_rule

    def apply_rule(self, rule, examples):
        mask = np.array([rule.applies(example) for example in examples])
        return np.where(mask)[0]

    def calculate_score(self, examples, covered_examples, rule):
        covered_targets=examples[covered_examples,-1]
        class_counts=np.bincount([int(c) for c in list(covered_targets)])
        rule.class_label=np.argmax(class_counts)
        index = np.where(self.classes == rule.class_label)
        for ce in examples[covered_examples]:
            rule.update_counts(ce)
        rule.strength = np.sum(rule.class_counts[index])/np.sum(rule.class_counts)
        return rule.strength
    
    def predict(self,example):
        for rule in self.rules:
            if rule.applies(example):
                return rule.class_label
                break
            else:
                return self.default_class


In [39]:
### To be completed and tested in a proper CN2 implementetion modifying above
class DeepRule:
    def __init__(self, conditions, classes, default_class):
        self.conditions = conditions
        self.classes = classes
        self.default_class = default_class
        self.class_counts = np.zeros(len(classes))
        self.strength = 0
        self.class_label = default_class
        
    def applies(self, example):
        for attr, op, threshold in self.conditions:
            if op == '<=':
                if example[attr] > threshold:
                    return False
            elif op == '>=':
                if example[attr] < threshold:
                    return False
            elif op == '<':
                if example[attr] >= threshold:
                    return False
            elif op == '>':
                if example[attr] <= threshold:
                    return False
        return True

    def update_counts(self, example):
        class_label = example[-1]
        index = np.where(self.classes == class_label)
        self.class_counts[index] += 1
    
    def __repr__(self):
        return ' AND '.join([f'{attr} {op} {threshold}' for attr, op, threshold in self.conditions]) + f' => {self.class_label}'
