In [41]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score
import numpy as np

class CustomRandomForestClassifier(RandomForestClassifier):
    def __init__(self, n_trees=None, **kwargs):
        self.n_trees = n_trees  # Store the number of trees used during training
        super().__init__(**kwargs)  # Call the parent constructor with any additional arguments

    def custom_predict(self, X, return_proba=False):
        """
        Predict the class probabilities or classes using the specified number of trees
        and aggregate the predictions like a traditional Random Forest.

        Parameters:
        - X: Input features for prediction.
        - return_proba: If True, return class probabilities. If False, return predicted classes.

        Returns:
        - y_prob_or_pred: Predicted class probabilities or predicted classes aggregated across trees.
        """
        if self.n_trees is None:
            raise ValueError("Number of trees used during training is not available.")

        y_prob_or_pred = []

        for estimator in self.estimators_[:self.n_trees]:
            # Predict class probabilities for each tree
            if return_proba:
                y_prob_tree = estimator.predict_proba(X)
            else:
                y_prob_tree = estimator.predict(X)
            y_prob_or_pred.append(y_prob_tree)

        if not return_proba:
            # Compute predicted classes based on majority voting
            y_pred_or_proba = np.mean(y_prob_or_pred, axis=0)
            y_pred_or_proba = np.round(y_pred_or_proba)
        else:
            # Aggregate class probabilities by averaging
            y_pred_or_proba = np.mean(y_prob_or_pred, axis=0)

        return y_pred_or_proba

# Generate toy data for binary classification
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, n_clusters_per_class=1, random_state=42)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Example usage:
# Create a CustomRandomForestClassifier with a larger number of trees and matching hyperparameters
rf = CustomRandomForestClassifier(n_estimators=1000, n_trees=1000, random_state=42, max_depth=10, min_samples_split=2, min_samples_leaf=1)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Initialize lists to store accuracy and F1 scores for each tree
tree_accuracies = []
tree_f1_scores = []

# Get predictions and calculate accuracy and F1 score for each tree
for estimator in rf.estimators_[:rf.n_trees]:
    y_pred_tree = estimator.predict(X_test)
    accuracy_tree = accuracy_score(y_test, y_pred_tree)
    f1_tree = f1_score(y_test, y_pred_tree, average = 'micro')
    tree_accuracies.append(accuracy_tree)
    tree_f1_scores.append(f1_tree)

# Print accuracy and F1 score for each tree
for tree_num, (accuracy, f1) in enumerate(zip(tree_accuracies, tree_f1_scores), start=1):
    print(f"Tree {tree_num} - Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}")

Tree 1 - Accuracy: 0.9100, F1 Score: 0.9100
Tree 2 - Accuracy: 0.8850, F1 Score: 0.8850
Tree 3 - Accuracy: 0.9250, F1 Score: 0.9250
Tree 4 - Accuracy: 0.9200, F1 Score: 0.9200
Tree 5 - Accuracy: 0.8750, F1 Score: 0.8750
Tree 6 - Accuracy: 0.8850, F1 Score: 0.8850
Tree 7 - Accuracy: 0.8800, F1 Score: 0.8800
Tree 8 - Accuracy: 0.9300, F1 Score: 0.9300
Tree 9 - Accuracy: 0.9100, F1 Score: 0.9100
Tree 10 - Accuracy: 0.8100, F1 Score: 0.8100
Tree 11 - Accuracy: 0.9250, F1 Score: 0.9250
Tree 12 - Accuracy: 0.9250, F1 Score: 0.9250
Tree 13 - Accuracy: 0.8600, F1 Score: 0.8600
Tree 14 - Accuracy: 0.8800, F1 Score: 0.8800
Tree 15 - Accuracy: 0.8750, F1 Score: 0.8750
Tree 16 - Accuracy: 0.9200, F1 Score: 0.9200
Tree 17 - Accuracy: 0.9200, F1 Score: 0.9200
Tree 18 - Accuracy: 0.9250, F1 Score: 0.9250
Tree 19 - Accuracy: 0.9050, F1 Score: 0.9050
Tree 20 - Accuracy: 0.8900, F1 Score: 0.8900
Tree 21 - Accuracy: 0.9200, F1 Score: 0.9200
Tree 22 - Accuracy: 0.8950, F1 Score: 0.8950
Tree 23 - Accuracy:

In [42]:
from sklearn.metrics import roc_auc_score

# Create a CustomRandomForestClassifier with a larger number of trees and matching hyperparameters
rf = CustomRandomForestClassifier(n_estimators=200, n_trees=200, random_state=42, max_depth=10, min_samples_split=2, min_samples_leaf=1)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Get the final predictions on your test data
final_predictions = rf.custom_predict(X_test)

# Calculate the ROC AUC score
roc_auc = roc_auc_score(y_test, final_predictions)

# You can also calculate the accuracy and F1 score if needed
final_accuracy = accuracy_score(y_test, final_predictions)
final_f1_score = f1_score(y_test, final_predictions)

print(f"Final Accuracy: {final_accuracy:.4f}")
print(f"Final F1 Score: {final_f1_score:.4f}")
print(f"ROC AUC Score: {roc_auc:.4f}")

Final Accuracy: 0.9650
Final F1 Score: 0.9626
ROC AUC Score: 0.9639


In [44]:
RF = RandomForestClassifier(n_estimators=200,random_state=42, max_depth=10, min_samples_split=2, min_samples_leaf=1)
RF.fit(X_train, y_train)

predicted = RF.predict(X_test)

print(accuracy_score(predicted, y_test))
print(roc_auc_score(y_test, final_predictions))
print(f1_score(y_test, final_predictions))

0.96
0.9639175257731959
0.9625668449197862
