In [31]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
import numpy as np

class CustomRandomForestClassifier(RandomForestClassifier):
    def __init__(self, n_trees=None, **kwargs):
        self.n_trees = n_trees  # Store the number of trees used during training
        super().__init__(**kwargs)  # Call the parent constructor with any additional arguments

    def custom_predict(self, X, return_proba=True):
        """
        Predict the class probabilities or classes for each sample in X using the number of trees used during training.

        Parameters:
        - X: Input features for prediction.
        - return_proba: If True, return class probabilities. If False, return predicted classes.

        Returns:
        - y_prob_or_pred: List of predicted class probabilities or predicted classes for each tree.
        """
        if self.n_trees is None:
            raise ValueError("Number of trees used during training is not available.")

        y_prob_or_pred = []

        for estimator in self.estimators_[:self.n_trees]:
            # Predict class probabilities for each tree
            y_prob_tree = estimator.predict_proba(X)
            y_prob_or_pred.append(y_prob_tree)

        if not return_proba:
            # Compute predicted classes based on the probabilities
            y_pred_or_proba = [np.argmax(y_prob, axis=1) for y_prob in y_prob_or_pred]
        else:
            y_pred_or_proba = y_prob_or_pred

        return y_pred_or_proba

    def custom_scores(self, X, y):
        """
        Score the classifier using the number of trees used during training for ROC AUC, accuracy, and F1.

        Parameters:
        - X: Input features for scoring.
        - y: Ground truth labels.

        Returns:
        - scores: Dictionary containing scores for ROC AUC, accuracy, and F1 for each tree.
        """
        scores = {'roc_auc': [], 'accuracy': [], 'f1': []}
        y_preds = self.custom_predict(X, return_proba=True)  # Use class probabilities

        if isinstance(y_preds[0], list):
            # If y_preds is a list of lists, flatten it to a 2D array
            y_preds = [np.concatenate(y_pred_list, axis=1) for y_pred_list in y_preds]

        y_pred_avg = np.mean(y_preds, axis=0)  # Average the predicted probabilities across trees

        for y_pred in y_preds:
            # Check if it's a binary or multiclass classification task
            if len(np.unique(y)) == 2:
                binary = True
            else:
                binary = False

            # Calculate ROC AUC score
            if binary:
                roc_auc = roc_auc_score(y, y_pred[:, 1])  # Use the probabilities of the positive class
            else:
                roc_auc = roc_auc_score(y, y_pred, multi_class='ovr', average='macro')
            scores['roc_auc'].append(roc_auc)

            # Calculate accuracy score
            accuracy = accuracy_score(y, np.argmax(y_pred, axis=1))
            scores['accuracy'].append(accuracy)

            # Calculate F1 score
            f1 = f1_score(y, np.argmax(y_pred, axis=1), average='macro')
            scores['f1'].append(f1)

        return scores

# Generate toy data for classification
X, y = make_classification(n_samples=1000, n_features=20, n_classes=4, n_clusters_per_class=1, random_state=42)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Example usage:
# Create a CustomRandomForestClassifier with a specified number of trees for training
rf = CustomRandomForestClassifier(n_estimators=100, n_trees=50, random_state=42)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Get scores using the number of trees used during training for ROC AUC, accuracy, and F1
scores = rf.custom_scores(X_test, y_test)

# Print scores for each tree
for tree_num, tree_scores in enumerate(zip(*scores.values()), start=1):
    print(f"Tree {tree_num}:")
    for metric, score in zip(scores.keys(), tree_scores):
        print(f"   {metric}: {score:.4f}")

    # Print the probability array and predicted class for a specific sample (e.g., sample 1)
    sample_index = 0  # Change this index to print information for a different sample
    y_prob_sample = np.array([y_pred[sample_index] for y_pred in rf.custom_predict(X_test, return_proba=True)])
    y_pred_sample = np.array([y_pred[sample_index] for y_pred in rf.custom_predict(X_test, return_proba=False)])

    print(f"   Sample {sample_index + 1} - Probability Array: {y_prob_sample}")
    print(f"   Sample {sample_index + 1} - Predicted Class: {y_pred_sample}")
    print("_"*100,'\n')

Tree 1:
   roc_auc: 0.8428
   accuracy: 0.7600
   f1: 0.7607
   Sample 1 - Probability Array: [[0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 0. 1.]
 [0. 1. 0. 0.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [0. 1. 0. 0.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [1. 0. 0. 0.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [0. 1. 0. 0.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [0. 0. 0. 1.]
 [0. 0. 1. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 0. 1.]
 [0. 1. 0. 0.]
 [0. 0. 0. 1.]
 [0. 1. 0. 0.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [1. 0. 0. 0.]
 [0. 0. 0. 1.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]
 [0. 0. 0. 1.]
 [0. 0. 1. 0.]
 [0. 0. 1. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 0. 1.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [1. 0. 0. 0.]]
   Sample 1 - Predicted Class: [1 1 3 1 3 3 3 1 3 3 0 3 3 1 3 3 0 0 3 2 1 1 1 0 1 3 1 3 1 3 3 3 0 1 0 3 1
 1 2 2 3 3 2 2 1 1 3 1 2 0]
_____________________

#Binary

In [32]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
import numpy as np

class CustomRandomForestClassifier(RandomForestClassifier):
    def __init__(self, n_trees=None, **kwargs):
        self.n_trees = n_trees  # Store the number of trees used during training
        super().__init__(**kwargs)  # Call the parent constructor with any additional arguments

    def custom_predict(self, X, return_proba=True):
        """
        Predict the class probabilities or classes for each sample in X using the number of trees used during training.

        Parameters:
        - X: Input features for prediction.
        - return_proba: If True, return class probabilities. If False, return predicted classes.

        Returns:
        - y_prob_or_pred: List of predicted class probabilities or predicted classes for each tree.
        """
        if self.n_trees is None:
            raise ValueError("Number of trees used during training is not available.")

        y_prob_or_pred = []

        for estimator in self.estimators_[:self.n_trees]:
            # Predict class probabilities for each tree
            y_prob_tree = estimator.predict_proba(X)
            y_prob_or_pred.append(y_prob_tree)

        if not return_proba:
            # Compute predicted classes based on the probabilities
            y_pred_or_proba = [np.argmax(y_prob, axis=1) for y_prob in y_prob_or_pred]
        else:
            y_pred_or_proba = y_prob_or_pred

        return y_pred_or_proba

    def custom_scores(self, X, y):
        """
        Score the classifier using the number of trees used during training for ROC AUC, accuracy, and F1.

        Parameters:
        - X: Input features for scoring.
        - y: Ground truth labels.

        Returns:
        - scores: Dictionary containing scores for ROC AUC, accuracy, and F1 for each tree.
        """
        scores = {'roc_auc': [], 'accuracy': [], 'f1': []}
        y_preds = self.custom_predict(X, return_proba=True)  # Use class probabilities

        if isinstance(y_preds[0], list):
            # If y_preds is a list of lists, flatten it to a 2D array
            y_preds = [np.concatenate(y_pred_list, axis=1) for y_pred_list in y_preds]

        y_pred_avg = np.mean(y_preds, axis=0)  # Average the predicted probabilities across trees

        for y_pred in y_preds:
            # Check if it's a binary or multiclass classification task
            if len(np.unique(y)) == 2:
                binary = True
            else:
                binary = False

            # Calculate ROC AUC score
            if binary:
                roc_auc = roc_auc_score(y, y_pred[:, 1])  # Use the probabilities of the positive class
            else:
                roc_auc = roc_auc_score(y, y_pred, multi_class='ovr', average='macro')
            scores['roc_auc'].append(roc_auc)

            # Calculate accuracy score
            accuracy = accuracy_score(y, np.argmax(y_pred, axis=1))
            scores['accuracy'].append(accuracy)

            # Calculate F1 score
            f1 = f1_score(y, np.argmax(y_pred, axis=1), average='macro')
            scores['f1'].append(f1)

        return scores

# Generate toy data for classification
X, y = make_classification(n_samples=500, n_features=10, n_classes=2, n_clusters_per_class=1, random_state=42)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Example usage:
# Create a CustomRandomForestClassifier with a specified number of trees for training
rf = CustomRandomForestClassifier(n_estimators=100, n_trees=50, random_state=42)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Get scores using the number of trees used during training for ROC AUC, accuracy, and F1
scores = rf.custom_scores(X_test, y_test)

# Print scores for each tree
for tree_num, tree_scores in enumerate(zip(*scores.values()), start=1):
    print(f"Tree {tree_num}:")
    for metric, score in zip(scores.keys(), tree_scores):
        print(f"   {metric}: {score:.4f}")

    # Print the probability array and predicted class for a specific sample (e.g., sample 1)
    sample_index = 0  # Change this index to print information for a different sample
    y_prob_sample = np.array([y_pred[sample_index] for y_pred in rf.custom_predict(X_test, return_proba=True)])
    y_pred_sample = np.array([y_pred[sample_index] for y_pred in rf.custom_predict(X_test, return_proba=False)])

    print(f"   Sample {sample_index + 1} - Probability Array: {y_prob_sample}")
    print(f"   Sample {sample_index + 1} - Predicted Class: {y_pred_sample}")
    print("_"*100,'\n')

Tree 1:
   roc_auc: 0.8337
   accuracy: 0.8300
   f1: 0.8286
   Sample 1 - Probability Array: [[0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]]
   Sample 1 - Predicted Class: [1 0 1 0 0 1 0 0 1 0 0 1 0 1 1 0 1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0
 0 0 1 1 1 1 0 1 1 0 1 1 0]
____________________________________________________________________________________________________ 

Tree 2:
   roc_auc: 0.8833
   accuracy: 0.8800
   f1: 0.8788
   Sample 1 - Probability Array: [[0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.