In [11]:
!git clone -b prune-experiment https://github.com/atikul-islam-sajib/TreeBasedModel.git

In [11]:
%cd /content/TreeBasedModel

In [11]:
!pip install shap

In [11]:
!pip install -e . --verbose

In [14]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.metrics import accuracy_score
from TreeModelsFromScratch.RandomForest import RandomForest

# Generate synthetic dataset
X, y = make_classification(n_samples=200, n_features=20, n_informative=10, n_redundant=10, n_clusters_per_class=2, random_state=42)
X = pd.DataFrame(X)
y = pd.Series(y)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Instantiate RandomForest
rf = RandomForest(n_trees=100, max_depth=10, min_samples_split=2, min_samples_leaf=1,
                  n_feature="sqrt", bootstrap=True, oob=True, criterion="gini",
                  treetype="classification", random_state=42)

# Fit the model
rf.fit(X_train, y_train)

# Function to write details to a file
def write_tree_details_to_file(filename, rf, stage):
    with open(filename, 'a') as file:
        file.write(f"\nTree structure {stage} pruning:\n")
        file.write(f"Tree depth {stage} pruning: {rf.trees[0].max_depth_}\n")

        for i, tree in enumerate(rf.trees):
            file.write(f"\nTree {i} {stage} pruning:\n")
            for node_id, node_info in tree.node_id_dict.items():
                if node_info['is_leaf_node']:
                    file.write(f"Node ID: {node_id}, Depth: {node_info['depth']}, Samples: {node_info['samples']}\n")

# Write details before pruning
write_tree_details_to_file('tree_details.txt', rf, 'before')

# Prune the forest with a higher min_samples_leaf value to force more pruning
rf.prune(min_samples_leaf=30)

# Write details after pruning
write_tree_details_to_file('tree_details.txt', rf, 'after')

# Evaluate the model
y_pred = rf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

# Write evaluation results to the file
with open('tree_details.txt', 'a') as file:
    file.write(f'\nAccuracy: {accuracy:.2f}\n')
    file.write(f'OOB Score: {rf.oob_score:.2f}\n')

# Optionally, print the evaluation results to the console
print(f'\nAccuracy: {accuracy:.2f}')
print(f'OOB Score: {rf.oob_score:.2f}')


Accuracy: 0.80
OOB Score: 0.82
