In [13]:
import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from graphviz import Digraph

In [14]:
def gini_index(groups, classes):
    n_instances = float(sum([len(group) for group in groups]))
    gini = 0.0
    for group in groups:
        size = float(len(group))
        if size == 0:
            continue
        score = 0.0
        for class_val in classes:
            p = [row[-1] for row in group].count(class_val) / size
            score += p * p
        gini += (1.0 - score) * (size / n_instances)
    return gini

def test_split(index, value, dataset):
    left, right = list(), list()
    for row in dataset:
        if row[index] < value:
            left.append(row)
        else:
            right.append(row)
    return left, right

def get_best_split(dataset):
    class_values = list(set(row[-1] for row in dataset))
    best_index, best_value, best_score, best_groups = None, None, float('inf'), None
    for index in range(len(dataset[0]) - 1):
        for row in dataset:
            groups = test_split(index, row[index], dataset)
            gini = gini_index(groups, class_values)
            if gini < best_score:
                best_index, best_value, best_score, best_groups = index, row[index], gini, groups
    return {'index': best_index, 'value': best_value, 'groups': best_groups}

In [15]:
def to_terminal(group):
    outcomes = [row[-1] for row in group]
    return max(set(outcomes), key=outcomes.count)

def split(node, max_depth, min_size, depth):
    left, right = node['groups']
    del(node['groups'])
    if not left or not right:
        node['left'] = node['right'] = to_terminal(left + right)
        return
    if depth >= max_depth:
        node['left'], node['right'] = to_terminal(left), to_terminal(right)
        return
    if len(left) <= min_size:
        node['left'] = to_terminal(left)
    else:
        node['left'] = get_best_split(left)
        split(node['left'], max_depth, min_size, depth + 1)
    if len(right) <= min_size:
        node['right'] = to_terminal(right)
    else:
        node['right'] = get_best_split(right)
        split(node['right'], max_depth, min_size, depth + 1)

def build_tree(train, max_depth, min_size):
    root = get_best_split(train)
    split(root, max_depth, min_size, 1)
    return root

In [16]:
def predict(node, row):
    if row[node['index']] < node['value']:
        if isinstance(node['left'], dict):
            return predict(node['left'], row)
        else:
            return node['left']
    else:
        if isinstance(node['right'], dict):
            return predict(node['right'], row)
        else:
            return node['right']

In [17]:
def subsample(dataset, ratio=1.0):
    sample = list()
    n_sample = round(len(dataset) * ratio)
    while len(sample) < n_sample:
        index = np.random.randint(len(dataset))
        sample.append(dataset[index])
    return sample

def export_tree_to_dot(node, graph=None, node_id=0):
    if graph is None:
        graph = Digraph()

    if isinstance(node, dict):
        feature_index = node['index']
        threshold = node['value']
        left_id = node_id * 2 + 1
        right_id = node_id * 2 + 2
        graph.node(str(node_id), f"X{feature_index} < {threshold:.3f}")
        graph = export_tree_to_dot(node['left'], graph, left_id)
        graph.edge(str(node_id), str(left_id), label="Yes")
        graph = export_tree_to_dot(node['right'], graph, right_id)
        graph.edge(str(node_id), str(right_id), label="No")
    else:
        graph.node(str(node_id), f"Leaf: {node}", shape="ellipse")

    return graph

In [18]:
class RandomForest:
    def __init__(self, n_trees, max_depth, min_size, sample_size):
        self.n_trees = n_trees
        self.max_depth = max_depth
        self.min_size = min_size
        self.sample_size = sample_size
        self.trees = []

    def fit(self, train):
        self.trees = []
        for i in range(self.n_trees):
            sample = subsample(train, self.sample_size)
            tree = build_tree(sample, self.max_depth, self.min_size)
            self.trees.append(tree)
            print(f"Tree {i + 1} trained.")

    def bagging_predict(self, row):
        predictions = [predict(tree, row) for tree in self.trees]
        return max(set(predictions), key=predictions.count)

    def predict(self, test):
        predictions = [self.bagging_predict(row) for row in test]
        return predictions

    def visualize_tree(self, tree_index):
        if tree_index < len(self.trees):
            tree = self.trees[tree_index]
            dot = export_tree_to_dot(tree)
            dot.render(f"tree_{tree_index}", format="png", cleanup=False)
            return dot
        else:
            print("Invalid tree index.")

    def visualize_all_trees(self):
        for i in range(len(self.trees)):
            print(f"Visualizing Tree {i + 1}")
            self.visualize_tree(i)

In [20]:
data = load_wine()
dataset = np.c_[data.data, data.target]
train, test = train_test_split(dataset, test_size=0.3, random_state=42)
rf = RandomForest(n_trees=5, max_depth=5, min_size=10, sample_size=0.8)
rf.fit(train.tolist())

y_test = [row[-1] for row in test]
y_pred = rf.predict(test.tolist())

accuracy = accuracy_score(y_test, y_pred)
print(f'Random Forest Accuracy: {accuracy * 100:.2f}%')

rf.visualize_all_trees()

Tree 1 trained.
Tree 2 trained.
Tree 3 trained.
Tree 4 trained.
Tree 5 trained.
Random Forest Accuracy: 94.44%
Visualizing Tree 1
Visualizing Tree 2
Visualizing Tree 3
Visualizing Tree 4
Visualizing Tree 5


In [12]:
import numpy as np

y_true = np.array([1, 0, 1, 1, 0, 1, 0, 0, 1, 0])
y_pred = np.array([1, 0, 0, 1, 0, 1, 1, 0, 1, 0])

tp = sum((y_true == 1) & (y_pred == 1))
tn = sum((y_true == 0) & (y_pred == 0))
fp = sum((y_true == 0) & (y_pred == 1))
fn = sum((y_true == 1) & (y_pred == 0))

precision = tp / (tp + fp) if (tp + fp) != 0 else 0
recall = tp / (tp + fn) if (tp + fn) != 0 else 0
accuracy = (tp + tn) / len(y_true)
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0
tpr = tp / (tp + fn) if (tp + fn) != 0 else 0
fpr = fp / (fp + tn) if (fp + tn) != 0 else 0

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"Accuracy: {accuracy:.2f}")
print(f"F1 Score: {f1:.2f}")
print(f"TPR (Sensitivity): {tpr:.2f}")
print(f"FPR: {fpr:.2f}")

Precision: 0.80
Recall: 0.80
Accuracy: 0.80
F1 Score: 0.80
TPR (Sensitivity): 0.80
FPR: 0.20
