# Cancer Breast Dataset Classification with apenet's Random Forest

This notebook demonstrates how to build, train, and evaluate a Random Forest model using the apenet library, using the Cancer Breast Dataset as an example.


## Import Libraries

In [1]:
import os
os.chdir('..')

import numpy as np

# Data Loading
from sklearn.datasets import load_breast_cancer

# Data Visualization
import matplotlib.pyplot as plt

# Import our library
from apenet.rf.tree import DecisionTreeClassifier
from apenet.rf.forest import RandomForestClassifier
from apenet.utils.data import train_test_split
from apenet.utils.metrics import accuracy
from apenet.eye.tree import plot_feature_importance

## Set Random Generator

In [2]:
rng = np.random.default_rng()

## Load Data

In [3]:
cancer = load_breast_cancer()
X = cancer.data
y = cancer.target
feature_names = cancer.feature_names
class_names = cancer.target_names

## Preprocess Data

In [4]:
# 2. Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, rng=rng)

## Train models

In [5]:
dt = DecisionTreeClassifier(max_depth=5)
dt.fit(X_train, y_train)

rf = RandomForestClassifier(n_trees=20, max_depth=20, rng=rng)
rf.fit(X_train, y_train)

<apenet.rf.forest.RandomForestClassifier at 0x11c331d30>

## Predict and Evaluate

In [6]:
# Make predictions
dt_preds = dt.predict(X_test)
rf_preds = rf.predict(X_test)

# Print accuracy
print(f"Decision Tree Accuracy: {accuracy(y_test, dt_preds):.4f}")
print(f"Random Forest Accuracy: {accuracy(y_test, rf_preds):.4f}")

Decision Tree Accuracy: 0.9366
Random Forest Accuracy: 0.9789
