In [1]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score

class CustomRandomForestClassifier(RandomForestClassifier):
    def __init__(self, n_trees=None, **kwargs):
        self.n_trees = n_trees  # Store the number of trees used during training
        super().__init__(**kwargs)  # Call the parent constructor with any additional arguments

    def custom_predict(self, X):
        """
        Predict the class probabilities for each sample in X using the number of trees used during training.

        Parameters:
        - X: Input features for prediction.

        Returns:
        - y_prob: Predicted class probabilities.
        """
        if self.n_trees is None:
            raise ValueError("Number of trees used during training is not available.")

        # Aggregate predicted class probabilities from selected trees
        y_probs = [estimator.predict_proba(X)[:, 1] for estimator in self.estimators_[:self.n_trees]]
        y_prob = sum(y_probs) / self.n_trees  # Average the predicted probabilities

        return y_prob

    def custom_score(self, X, y):
        """
        Score the classifier using the number of trees used during training.

        Parameters:
        - X: Input features for scoring.
        - y: Ground truth labels.

        Returns:
        - score: ROC AUC score of the classifier.
        """
        y_prob = self.custom_predict(X)
        score = roc_auc_score(y, y_prob)
        return score

# Generate toy data
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Example usage:
# Create a CustomRandomForestClassifier with a specified number of trees for training
rf = CustomRandomForestClassifier(n_estimators=100, n_trees=50, random_state=42)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Score using the number of trees used during training
score = rf.custom_score(X_test, y_test)

print("ROC AUC with 50 trees:", score)

ROC AUC with 50 trees: 0.9396040598934781


In [2]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
import numpy as np

class CustomRandomForestClassifier(RandomForestClassifier):
    def __init__(self, n_trees=None, **kwargs):
        self.n_trees = n_trees  # Store the number of trees used during training
        super().__init__(**kwargs)  # Call the parent constructor with any additional arguments

    def custom_predict(self, X):
        """
        Predict the class probabilities and classes for each sample in X using the number of trees used during training.

        Parameters:
        - X: Input features for prediction.

        Returns:
        - y_prob: List of predicted class probabilities for each tree.
        - y_pred: List of predicted classes for each tree.
        """
        if self.n_trees is None:
            raise ValueError("Number of trees used during training is not available.")

        y_prob = []
        y_pred = []

        for estimator in self.estimators_[:self.n_trees]:
            # Predict class probabilities for each tree
            y_prob_tree = estimator.predict_proba(X)[:, 1]
            y_prob.append(y_prob_tree)

            # Predict classes for each tree
            y_pred_tree = estimator.predict(X)
            y_pred.append(y_pred_tree)

        return y_prob, y_pred

    def custom_score(self, X, y):
        """
        Score the classifier using the number of trees used during training.

        Parameters:
        - X: Input features for scoring.
        - y: Ground truth labels.

        Returns:
        - score: ROC AUC score of the classifier.
        """
        y_probs, _ = self.custom_predict(X)

        # Calculate ROC AUC score for each tree and return as a list
        scores = [roc_auc_score(y, y_prob) for y_prob in y_probs]

        return scores

# Generate toy data
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Example usage:
# Create a CustomRandomForestClassifier with a specified number of trees for training
rf = CustomRandomForestClassifier(n_estimators=100, n_trees=50, random_state=42)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Get class probabilities and classes for each tree
y_prob_list, y_pred_list = rf.custom_predict(X_test)

# Score using the number of trees used during training
scores = rf.custom_score(X_test, y_test)

for tree_num, (y_prob, y_pred, score) in enumerate(zip(y_prob_list, y_pred_list, scores), start=1):
    print(f"Tree {tree_num}: ROC AUC = {score:.4f}")
    # You can also access y_prob and y_pred for each tree here if needed


Tree 1: ROC AUC = 0.8460
Tree 2: ROC AUC = 0.8173
Tree 3: ROC AUC = 0.7570
Tree 4: ROC AUC = 0.8011
Tree 5: ROC AUC = 0.7074
Tree 6: ROC AUC = 0.7705
Tree 7: ROC AUC = 0.7946
Tree 8: ROC AUC = 0.7616
Tree 9: ROC AUC = 0.7817
Tree 10: ROC AUC = 0.7789
Tree 11: ROC AUC = 0.7563
Tree 12: ROC AUC = 0.8589
Tree 13: ROC AUC = 0.7710
Tree 14: ROC AUC = 0.8119
Tree 15: ROC AUC = 0.7878
Tree 16: ROC AUC = 0.8023
Tree 17: ROC AUC = 0.8212
Tree 18: ROC AUC = 0.8086
Tree 19: ROC AUC = 0.7845
Tree 20: ROC AUC = 0.8025
Tree 21: ROC AUC = 0.8004
Tree 22: ROC AUC = 0.7696
Tree 23: ROC AUC = 0.8462
Tree 24: ROC AUC = 0.8198
Tree 25: ROC AUC = 0.7771
Tree 26: ROC AUC = 0.7771
Tree 27: ROC AUC = 0.7630
Tree 28: ROC AUC = 0.8000
Tree 29: ROC AUC = 0.8359
Tree 30: ROC AUC = 0.7724
Tree 31: ROC AUC = 0.8481
Tree 32: ROC AUC = 0.8341
Tree 33: ROC AUC = 0.8287
Tree 34: ROC AUC = 0.7925
Tree 35: ROC AUC = 0.8474
Tree 36: ROC AUC = 0.8065
Tree 37: ROC AUC = 0.8247
Tree 38: ROC AUC = 0.7757
Tree 39: ROC AUC = 0.

In [3]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score

class CustomRandomForestClassifier(RandomForestClassifier):
    def __init__(self, n_trees=None, **kwargs):
        self.n_trees = n_trees  # Store the number of trees used during training
        super().__init__(**kwargs)  # Call the parent constructor with any additional arguments

    def custom_predict(self, X):
        """
        Predict the class probabilities and classes for each sample in X using the number of trees used during training.

        Parameters:
        - X: Input features for prediction.

        Returns:
        - y_prob: List of predicted class probabilities for each tree.
        - y_pred: List of predicted classes for each tree.
        """
        if self.n_trees is None:
            raise ValueError("Number of trees used during training is not available.")

        y_prob = []
        y_pred = []

        for estimator in self.estimators_[:self.n_trees]:
            # Predict class probabilities for each tree
            y_prob_tree = estimator.predict_proba(X)
            y_prob.append(y_prob_tree)

            # Predict classes for each tree
            y_pred_tree = estimator.predict(X)
            y_pred.append(y_pred_tree)

        return y_prob, y_pred

    def custom_score(self, X, y):
        """
        Score the classifier using the number of trees used during training.

        Parameters:
        - X: Input features for scoring.
        - y: Ground truth labels.

        Returns:
        - scores: List of ROC AUC scores for each tree.
        """
        y_probs, _ = self.custom_predict(X)

        # Calculate ROC AUC score for each tree and return as a list
        scores = [roc_auc_score(y, y_prob[:, 1]) for y_prob in y_probs]

        return scores

# Generate toy data
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Example usage:
# Create a CustomRandomForestClassifier with a specified number of trees for training
rf = CustomRandomForestClassifier(n_estimators=100, n_trees=50, random_state=42)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Get class probabilities and classes for each tree
y_prob_list, y_pred_list = rf.custom_predict(X_test)

# Score using the number of trees used during training
scores = rf.custom_score(X_test, y_test)

for tree_num, (y_prob, y_pred, score) in enumerate(zip(y_prob_list, y_pred_list, scores), start=1):
    print(f"Tree {tree_num}: ROC AUC = {score:.4f}")

    # Print probability and predicted class for the first sample
    print(f"   Sample 1 - Probability: {y_prob[0, 1]:.4f}, Predicted Class: {y_pred[0]}")

    # You can loop through more samples or trees as needed


Tree 1: ROC AUC = 0.8460
   Sample 1 - Probability: 1.0000, Predicted Class: 1.0
Tree 2: ROC AUC = 0.8173
   Sample 1 - Probability: 0.0000, Predicted Class: 0.0
Tree 3: ROC AUC = 0.7570
   Sample 1 - Probability: 1.0000, Predicted Class: 1.0
Tree 4: ROC AUC = 0.8011
   Sample 1 - Probability: 1.0000, Predicted Class: 1.0
Tree 5: ROC AUC = 0.7074
   Sample 1 - Probability: 1.0000, Predicted Class: 1.0
Tree 6: ROC AUC = 0.7705
   Sample 1 - Probability: 1.0000, Predicted Class: 1.0
Tree 7: ROC AUC = 0.7946
   Sample 1 - Probability: 0.0000, Predicted Class: 0.0
Tree 8: ROC AUC = 0.7616
   Sample 1 - Probability: 1.0000, Predicted Class: 1.0
Tree 9: ROC AUC = 0.7817
   Sample 1 - Probability: 1.0000, Predicted Class: 1.0
Tree 10: ROC AUC = 0.7789
   Sample 1 - Probability: 1.0000, Predicted Class: 1.0
Tree 11: ROC AUC = 0.7563
   Sample 1 - Probability: 1.0000, Predicted Class: 1.0
Tree 12: ROC AUC = 0.8589
   Sample 1 - Probability: 1.0000, Predicted Class: 1.0
Tree 13: ROC AUC = 0.7710

In [5]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
import numpy as np

class CustomRandomForestClassifier(RandomForestClassifier):
    def __init__(self, n_trees=None, **kwargs):
        self.n_trees = n_trees  # Store the number of trees used during training
        super().__init__(**kwargs)  # Call the parent constructor with any additional arguments

    def custom_predict(self, X):
        """
        Predict the class probabilities for each sample in X using the number of trees used during training.

        Parameters:
        - X: Input features for prediction.

        Returns:
        - y_prob: List of predicted class probabilities for each tree.
        - y_pred: List of predicted classes for each tree.
        """
        if self.n_trees is None:
            raise ValueError("Number of trees used during training is not available.")

        y_prob = []
        y_pred = []

        for estimator in self.estimators_[:self.n_trees]:
            # Predict class probabilities for each tree
            y_prob_tree = estimator.predict_proba(X)
            y_prob.append(y_prob_tree)

            # Predict classes for each tree
            y_pred_tree = estimator.predict(X)
            y_pred.append(y_pred_tree)

        return y_prob, y_pred

    def custom_score(self, X, y):
        """
        Score the classifier using the number of trees used during training.

        Parameters:
        - X: Input features for scoring.
        - y: Ground truth labels.

        Returns:
        - scores: List of ROC AUC scores for each tree.
        """
        y_probs, _ = self.custom_predict(X)

        # Calculate ROC AUC score for each tree and return as a list
        scores = [roc_auc_score(y, y_prob, multi_class='ovr') for y_prob in y_probs]

        return scores

# Generate toy data for multi-class classification
X, y = make_classification(n_samples=1000, n_features=20, n_classes=3, n_clusters_per_class=1, random_state=42)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Example usage:
# Create a CustomRandomForestClassifier with a specified number of trees for training
rf = CustomRandomForestClassifier(n_estimators=100, n_trees=50, random_state=42)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Get class probabilities and classes for each tree
y_prob_list, y_pred_list = rf.custom_predict(X_test)

# Score using the number of trees used during training
scores = rf.custom_score(X_test, y_test)

for tree_num, (y_prob, y_pred, score) in enumerate(zip(y_prob_list, y_pred_list, scores), start=1):
    print(f"Tree {tree_num}: ROC AUC = {score:.4f}")

    # Determine the class with the highest probability for the first sample
    max_prob_class = np.argmax(y_prob[0])
    print(f"   Sample 1 - Probability Array: {y_prob[0]}, Predicted Class: {max_prob_class}")

    # You can loop through more samples or trees as needed


Tree 1: ROC AUC = 0.8767
   Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 2: ROC AUC = 0.8607
   Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 3: ROC AUC = 0.8713
   Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 4: ROC AUC = 0.8677
   Sample 1 - Probability Array: [0. 1. 0.], Predicted Class: 1
Tree 5: ROC AUC = 0.7799
   Sample 1 - Probability Array: [0. 1. 0.], Predicted Class: 1
Tree 6: ROC AUC = 0.8608
   Sample 1 - Probability Array: [0. 1. 0.], Predicted Class: 1
Tree 7: ROC AUC = 0.8705
   Sample 1 - Probability Array: [0. 1. 0.], Predicted Class: 1
Tree 8: ROC AUC = 0.8325
   Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 9: ROC AUC = 0.8811
   Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 10: ROC AUC = 0.8564
   Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 11: ROC AUC = 0.8331
   Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 12: ROC AUC = 

In [7]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
import numpy as np

class CustomRandomForestClassifier(RandomForestClassifier):
    def __init__(self, n_trees=None, **kwargs):
        self.n_trees = n_trees  # Store the number of trees used during training
        super().__init__(**kwargs)  # Call the parent constructor with any additional arguments

    def custom_predict(self, X):
        """
        Predict the class probabilities for each sample in X using the number of trees used during training.

        Parameters:
        - X: Input features for prediction.

        Returns:
        - y_prob: List of predicted class probabilities for each tree.
        - y_pred: List of predicted classes for each tree.
        """
        if self.n_trees is None:
            raise ValueError("Number of trees used during training is not available.")

        y_prob = []
        y_pred = []

        for estimator in self.estimators_[:self.n_trees]:
            # Predict class probabilities for each tree
            y_prob_tree = estimator.predict_proba(X)
            y_prob.append(y_prob_tree)

            # Predict classes for each tree
            y_pred_tree = estimator.predict(X)
            y_pred.append(y_pred_tree)

        return y_prob, y_pred

    def custom_score(self, X, y):
        """
        Score the classifier using the number of trees used during training.

        Parameters:
        - X: Input features for scoring.
        - y: Ground truth labels.

        Returns:
        - scores: List of ROC AUC scores for each tree.
        """
        y_probs, _ = self.custom_predict(X)

        # Calculate ROC AUC score for each tree and return as a list
        scores = [roc_auc_score(y, y_prob, multi_class='ovr') for y_prob in y_probs]

        return scores

    def get_probability_and_predicted_class(self, X, sample_index):
        """
        Get class probabilities and predicted class with the highest probability for a specific sample.

        Parameters:
        - X: Input features for prediction.
        - sample_index: Index of the sample for which to retrieve probabilities and predicted class.

        Returns:
        - y_prob_sample: List of predicted class probabilities for the specified sample for each tree.
        - y_pred_sample: List of predicted classes for the specified sample for each tree.
        - max_prob_class: Predicted class with the highest probability for the specified sample.
        """
        y_prob_list, y_pred_list = self.custom_predict(X)
        y_prob_sample = [y_prob[sample_index] for y_prob in y_prob_list]
        y_pred_sample = [y_pred[sample_index] for y_pred in y_pred_list]
        max_prob_class = np.argmax(np.mean(y_prob_sample, axis=0))
        return y_prob_sample, y_pred_sample, max_prob_class

# Generate toy data for multi-class classification
X, y = make_classification(n_samples=1000, n_features=20, n_classes=3, n_clusters_per_class=1, random_state=42)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a CustomRandomForestClassifier with a specified number of trees for training
rf = CustomRandomForestClassifier(n_estimators=100, n_trees=50, random_state=42)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Get class probabilities and predicted classes for the first sample
sample_index = 0
y_prob_sample, y_pred_sample, max_prob_class = rf.get_probability_and_predicted_class(X_test, sample_index)

print(f"Sample {sample_index + 1} - Predicted Class: {max_prob_class}")
print("Class Probabilities:")
for class_idx, prob in enumerate(y_prob_sample[0]):
    print(f"Class {class_idx}: {prob:.4f}")

Sample 1 - Predicted Class: 2
Class Probabilities:
Class 0: 0.0000
Class 1: 0.0000
Class 2: 1.0000


In [9]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
import numpy as np

class CustomRandomForestClassifier(RandomForestClassifier):
    def __init__(self, n_trees=None, **kwargs):
        self.n_trees = n_trees  # Store the number of trees used during training
        super().__init__(**kwargs)  # Call the parent constructor with any additional arguments

    def custom_predict(self, X, return_proba=True):
        """
        Predict the class probabilities or classes for each sample in X using the number of trees used during training.

        Parameters:
        - X: Input features for prediction.
        - return_proba: If True, return class probabilities. If False, return predicted classes.

        Returns:
        - y_prob_or_pred: List of predicted class probabilities or classes for each tree.
        """
        if self.n_trees is None:
            raise ValueError("Number of trees used during training is not available.")

        y_prob_or_pred = []

        for estimator in self.estimators_[:self.n_trees]:
            if return_proba:
                # Predict class probabilities for each tree
                y_prob_tree = estimator.predict_proba(X)
                y_prob_or_pred.append(y_prob_tree)
            else:
                # Predict classes for each tree
                y_pred_tree = estimator.predict(X)
                y_prob_or_pred.append(y_pred_tree)

        return y_prob_or_pred

    def custom_score(self, X, y):
        """
        Score the classifier using the number of trees used during training.

        Parameters:
        - X: Input features for scoring.
        - y: Ground truth labels.

        Returns:
        - scores: List of ROC AUC scores for each tree.
        """
        y_probs = self.custom_predict(X, return_proba=True)

        # Calculate ROC AUC score for each tree and return as a list
        scores = [roc_auc_score(y, y_prob, multi_class='ovr') for y_prob in y_probs]

        return scores


# Generate toy data for multi-class classification
X, y = make_classification(n_samples=1000, n_features=20, n_classes=3, n_clusters_per_class=1, random_state=42)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Example usage:
# Create a CustomRandomForestClassifier with a specified number of trees for training
rf = CustomRandomForestClassifier(n_estimators=100, n_trees=50, random_state=42)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Get class probabilities for each tree
y_prob_list = rf.custom_predict(X_test, return_proba=True)

# Get predicted classes for each tree
y_pred_list = rf.custom_predict(X_test, return_proba=False)

# Score using the number of trees used during training
scores = rf.custom_score(X_test, y_test)

for tree_num, (y_prob, y_pred, score) in enumerate(zip(y_prob_list, y_pred_list, scores), start=1):
    print(f"Tree {tree_num}: ROC AUC = {score:.4f}")

    # Determine the class with the highest probability for the first sample
    max_prob_class = np.argmax(y_prob[0])
    print(f"   Sample 1 - Probability Array: {y_prob[0]}, Predicted Class: {max_prob_class}")

    # You can loop through more samples or trees as needed


Tree 1: ROC AUC = 0.8767
   Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 2: ROC AUC = 0.8607
   Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 3: ROC AUC = 0.8713
   Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 4: ROC AUC = 0.8677
   Sample 1 - Probability Array: [0. 1. 0.], Predicted Class: 1
Tree 5: ROC AUC = 0.7799
   Sample 1 - Probability Array: [0. 1. 0.], Predicted Class: 1
Tree 6: ROC AUC = 0.8608
   Sample 1 - Probability Array: [0. 1. 0.], Predicted Class: 1
Tree 7: ROC AUC = 0.8705
   Sample 1 - Probability Array: [0. 1. 0.], Predicted Class: 1
Tree 8: ROC AUC = 0.8325
   Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 9: ROC AUC = 0.8811
   Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 10: ROC AUC = 0.8564
   Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 11: ROC AUC = 0.8331
   Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 12: ROC AUC = 

In [12]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
import numpy as np

class CustomRandomForestClassifier(RandomForestClassifier):
    def __init__(self, n_trees=None, **kwargs):
        self.n_trees = n_trees  # Store the number of trees used during training
        super().__init__(**kwargs)  # Call the parent constructor with any additional arguments

    def custom_predict(self, X, return_proba=True):
        """
        Predict the class probabilities or classes for each sample in X using the number of trees used during training.

        Parameters:
        - X: Input features for prediction.
        - return_proba: If True, return class probabilities. If False, return predicted classes.

        Returns:
        - y_prob_or_pred: List of predicted class probabilities or classes for each tree.
        """
        if self.n_trees is None:
            raise ValueError("Number of trees used during training is not available.")

        y_prob_or_pred = []

        for estimator in self.estimators_[:self.n_trees]:
            if return_proba:
                # Predict class probabilities for each tree
                y_prob_tree = estimator.predict_proba(X)
                y_prob_or_pred.append(y_prob_tree)
            else:
                # Predict classes for each tree
                y_pred_tree = estimator.predict(X)
                y_prob_or_pred.append(y_pred_tree)

        return y_prob_or_pred

    def custom_score(self, X, y):
        """
        Score the classifier using the number of trees used during training.

        Parameters:
        - X: Input features for scoring.
        - y: Ground truth labels.

        Returns:
        - scores: List of ROC AUC scores for each tree.
        """
        y_probs = self.custom_predict(X, return_proba=True)

        # Calculate ROC AUC score for each tree and return as a list
        scores = [roc_auc_score(y, y_prob, multi_class='ovr') for y_prob in y_probs]

        return scores

    def print_tree_information(self, X, y, sample_index):
        """
        Print tree-specific information for a specific sample.

        Parameters:
        - X: Input features for prediction.
        - y: Ground truth labels.
        - sample_index: Index of the sample for which to print the information.
        """
        y_prob_list = self.custom_predict(X, return_proba=True)
        max_prob_class = np.argmax(np.mean(y_prob_list, axis=0)[sample_index])
        print(f"Sample {sample_index + 1} - Probability Array: {y_prob_list[0][sample_index]}, Predicted Class: {max_prob_class}")

# Generate toy data for multi-class classification
X, y = make_classification(n_samples=1000, n_features=20, n_classes=3, n_clusters_per_class=1, random_state=42)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Example usage:
# Create a CustomRandomForestClassifier with a specified number of trees for training
rf = CustomRandomForestClassifier(n_estimators=100, n_trees=50, random_state=42)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Score using the number of trees used during training
scores = rf.custom_score(X_test, y_test)

for tree_num, score in enumerate(scores, start=1):
    print(f"Tree {tree_num}: ROC AUC = {score:.4f}")

    # Determine whether to print detailed information for a specific sample
    sample_index = 0  # Change this index to print information for a specific sample
    should_print_detailed_info = True  # Set to True to print detailed info, False to skip
    if should_print_detailed_info:
        rf.print_tree_information(X_test, y_test, sample_index)

Tree 1: ROC AUC = 0.8767
Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 2: ROC AUC = 0.8607
Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 3: ROC AUC = 0.8713
Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 4: ROC AUC = 0.8677
Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 5: ROC AUC = 0.7799
Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 6: ROC AUC = 0.8608
Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 7: ROC AUC = 0.8705
Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 8: ROC AUC = 0.8325
Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 9: ROC AUC = 0.8811
Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 10: ROC AUC = 0.8564
Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 11: ROC AUC = 0.8331
Sample 1 - Probability Array: [0. 0. 1.], Predicted Class: 2
Tree 12: ROC AUC = 0.8903
Sample 1 - Probability Arr

#BEST

In [52]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
import numpy as np

class CustomRandomForestClassifier(RandomForestClassifier):
    def __init__(self, n_trees=None, **kwargs):
        self.n_trees = n_trees  # Store the number of trees used during training
        super().__init__(**kwargs)  # Call the parent constructor with any additional arguments

    def custom_predict(self, X, return_proba=True):
        """
        Predict the class probabilities or classes for each sample in X using the number of trees used during training.

        Parameters:
        - X: Input features for prediction.
        - return_proba: If True, return class probabilities. If False, return predicted classes.

        Returns:
        - y_prob_or_pred: List of predicted class probabilities or predicted classes for each tree.
        """
        if self.n_trees is None:
            raise ValueError("Number of trees used during training is not available.")

        y_prob_or_pred = []

        for estimator in self.estimators_[:self.n_trees]:
            # Predict class probabilities for each tree
            y_prob_tree = estimator.predict_proba(X)
            y_prob_or_pred.append(y_prob_tree)

        if not return_proba:
            # Compute predicted classes based on the probabilities
            y_pred_or_proba = [np.argmax(y_prob, axis=1) for y_prob in y_prob_or_pred]
        else:
            y_pred_or_proba = y_prob_or_pred

        return y_pred_or_proba

    def custom_scores(self, X, y):
        """
        Score the classifier using the number of trees used during training for ROC AUC, accuracy, and F1.

        Parameters:
        - X: Input features for scoring.
        - y: Ground truth labels.

        Returns:
        - scores: Dictionary containing scores for ROC AUC, accuracy, and F1 for each tree.
        """
        scores = {'roc_auc': [], 'accuracy': [], 'f1': []}
        y_preds = self.custom_predict(X, return_proba=True)  # Use class probabilities

        if isinstance(y_preds[0], list):
            # If y_preds is a list of lists, flatten it to a 2D array
            y_preds = [np.concatenate(y_pred_list, axis=1) for y_pred_list in y_preds]

        y_pred_avg = np.mean(y_preds, axis=0)  # Average the predicted probabilities across trees

        for y_pred in y_preds:
            # Calculate ROC AUC score for each tree
            roc_auc = roc_auc_score(y, y_pred, multi_class='ovr', average='macro')
            scores['roc_auc'].append(roc_auc)

            # Calculate accuracy score for each tree
            accuracy = accuracy_score(y, np.argmax(y_pred, axis=1))
            scores['accuracy'].append(accuracy)

            # Calculate macro F1 score for each tree
            f1 = f1_score(y, np.argmax(y_pred, axis=1), average='macro')
            scores['f1'].append(f1)

        return scores


# Generate toy data for multi-class classification
X, y = make_classification(n_samples=1000, n_features=20, n_classes=3, n_clusters_per_class=1, random_state=42)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Example usage:
# Create a CustomRandomForestClassifier with a specified number of trees for training
rf = CustomRandomForestClassifier(n_estimators=100, n_trees=50, random_state=42)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Get scores using the number of trees used during training for ROC AUC, accuracy, and F1
scores = rf.custom_scores(X_test, y_test)

# Print scores for each tree
for tree_num, tree_scores in enumerate(zip(*scores.values()), start=1):
    print(f"Tree {tree_num}:")
    for metric, score in zip(scores.keys(), tree_scores):
        print(f"   {metric}: {score:.4f}")

    # Print the probability array and predicted class for a specific sample (e.g., sample 1)
    sample_index = 0  # Change this index to print information for a different sample
    y_prob_sample = np.array([y_pred[sample_index] for y_pred in rf.custom_predict(X_test, return_proba=True)])
    y_pred_sample = np.array([y_pred[sample_index] for y_pred in rf.custom_predict(X_test, return_proba=False)])

    print(f"   Sample {sample_index + 1} - Probability Array: {y_prob_sample}")
    print(f"   Sample {sample_index + 1} - Predicted Class: {y_pred_sample}")


Tree 1:
   roc_auc: 0.8767
   accuracy: 0.8350
   f1: 0.8357
   Sample 1 - Probability Array: [[0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]]
   Sample 1 - Predicted Class: [2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 2 2 2 1 1 2 2 2 2 2 2 1 2 0 2 1 2 1 2 2 1 2
 2 2 2 2 2 0 2 1 1 2 2 2 2]
Tree 2:
   roc_auc: 0.8607
   accuracy: 0.8150
   f1: 0.8131
   Sample 1 - Probability Array: [[0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 

###Final Code that should be submitted


In [63]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
import numpy as np

class CustomRandomForestClassifier(RandomForestClassifier):
    def __init__(self, n_trees=None, **kwargs):
        self.n_trees = n_trees  # Store the number of trees used during training
        super().__init__(**kwargs)  # Call the parent constructor with any additional arguments

    def custom_predict(self, X, return_proba=True):
        """
        Predict the class probabilities or classes for each sample in X using the number of trees used during training.

        Parameters:
        - X: Input features for prediction.
        - return_proba: If True, return class probabilities. If False, return predicted classes.

        Returns:
        - y_prob_or_pred: List of predicted class probabilities or predicted classes for each tree.
        """
        if self.n_trees is None:
            raise ValueError("Number of trees used during training is not available.")

        y_prob_or_pred = []

        for estimator in self.estimators_[:self.n_trees]:
            # Predict class probabilities for each tree
            y_prob_tree = estimator.predict_proba(X)
            y_prob_or_pred.append(y_prob_tree)

        if not return_proba:
            # Compute predicted classes based on the probabilities
            y_pred_or_proba = [np.argmax(y_prob, axis=1) for y_prob in y_prob_or_pred]
        else:
            y_pred_or_proba = y_prob_or_pred

        return y_pred_or_proba

    def custom_scores(self, X, y):
        """
        Score the classifier using the number of trees used during training for ROC AUC, accuracy, and F1.

        Parameters:
        - X: Input features for scoring.
        - y: Ground truth labels.

        Returns:
        - scores: Dictionary containing scores for ROC AUC, accuracy, and F1 for each tree.
        """
        scores = {'roc_auc': [], 'accuracy': [], 'f1': []}
        y_preds = self.custom_predict(X, return_proba=True)  # Use class probabilities

        if isinstance(y_preds[0], list):
            # If y_preds is a list of lists, flatten it to a 2D array
            y_preds = [np.concatenate(y_pred_list, axis=1) for y_pred_list in y_preds]

        y_pred_avg = np.mean(y_preds, axis=0)  # Average the predicted probabilities across trees

        for y_pred in y_preds:
            # Check if it's a binary or multiclass classification task
            if len(np.unique(y)) == 2:
                binary = True
            else:
                binary = False

            # Calculate ROC AUC score
            if binary:
                roc_auc = roc_auc_score(y, y_pred[:, 1])  # Use the probabilities of the positive class
            else:
                roc_auc = roc_auc_score(y, y_pred, multi_class='ovr', average='macro')
            scores['roc_auc'].append(roc_auc)

            # Calculate accuracy score
            accuracy = accuracy_score(y, np.argmax(y_pred, axis=1))
            scores['accuracy'].append(accuracy)

            # Calculate F1 score
            f1 = f1_score(y, np.argmax(y_pred, axis=1), average='macro')
            scores['f1'].append(f1)

        return scores

# Generate toy data for classification
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, n_clusters_per_class=1, random_state=42)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Example usage:
# Create a CustomRandomForestClassifier with a specified number of trees for training
rf = CustomRandomForestClassifier(n_estimators=100, n_trees=50, random_state=42)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Get scores using the number of trees used during training for ROC AUC, accuracy, and F1
scores = rf.custom_scores(X_test, y_test)

# Print scores for each tree
for tree_num, tree_scores in enumerate(zip(*scores.values()), start=1):
    print(f"Tree {tree_num}:")
    for metric, score in zip(scores.keys(), tree_scores):
        print(f"   {metric}: {score:.4f}")

    # Print the probability array and predicted class for a specific sample (e.g., sample 1)
    sample_index = 0  # Change this index to print information for a different sample
    y_prob_sample = np.array([y_pred[sample_index] for y_pred in rf.custom_predict(X_test, return_proba=True)])
    y_pred_sample = np.array([y_pred[sample_index] for y_pred in rf.custom_predict(X_test, return_proba=False)])

    print(f"   Sample {sample_index + 1} - Probability Array: {y_prob_sample}")
    print(f"   Sample {sample_index + 1} - Predicted Class: {y_pred_sample}")
    print("_"*100,'\n')


Tree 1:
   roc_auc: 0.9005
   accuracy: 0.9000
   f1: 0.9000
   Sample 1 - Probability Array: [[1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]]
   Sample 1 - Predicted Class: [0 1 1 0 1 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0
 0 0 0 0 0 0 0 0 1 0 0 0 0]
____________________________________________________________________________________________________ 

Tree 2:
   roc_auc: 0.8859
   accuracy: 0.8850
   f1: 0.8850
   Sample 1 - Probability Array: [[1. 0.]
 [0. 1.]
 [0. 1.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [0. 1.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.]
 [1. 0.

In [48]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
import numpy as np

class CustomRandomForestClassifier(RandomForestClassifier):
    def __init__(self, n_trees=None, default_scoring='roc_auc', **kwargs):
        self.n_trees = n_trees  # Store the number of trees used during training
        self.default_scoring = default_scoring
        super().__init__(**kwargs)  # Call the parent constructor with any additional arguments

    def custom_predict(self, X, return_proba=True):
        """
        Predict the class probabilities or classes for each sample in X using the number of trees used during training.

        Parameters:
        - X: Input features for prediction.
        - return_proba: If True, return class probabilities. If False, return predicted classes.

        Returns:
        - y_prob_or_pred: List of predicted class probabilities or predicted classes for each tree.
        """
        if self.n_trees is None:
            raise ValueError("Number of trees used during training is not available.")

        y_prob_or_pred = []

        for estimator in self.estimators_[:self.n_trees]:
            # Predict class probabilities for each tree
            y_prob_tree = estimator.predict_proba(X)
            y_prob_or_pred.append(y_prob_tree)

        if not return_proba:
            # Compute predicted classes based on the probabilities
            y_pred_or_proba = [np.argmax(y_prob, axis=1) for y_prob in y_prob_or_pred]
        else:
            y_pred_or_proba = y_prob_or_pred

        return y_pred_or_proba

    def custom_scores(self, X, y, scoring=None):
        """
        Score the classifier using the number of trees used during training for various scoring metrics.

        Parameters:
        - X: Input features for scoring.
        - y: Ground truth labels.
        - scoring: Scoring functions to use as a list (default is None for the default scoring metric).

        Returns:
        - scores: Dictionary containing scores for each scoring metric and each tree.
        """
        if scoring is None:
            scoring = [self.default_scoring]

        scores = {metric: [] for metric in scoring}
        y_preds = self.custom_predict(X, return_proba=True)  # Use class probabilities

        if isinstance(y_preds[0], list):
            # If y_preds is a list of lists, flatten it to a 2D array
            y_preds = [np.concatenate(y_pred_list, axis=1) for y_pred_list in y_preds]

        for y_pred in y_preds:
            for metric in scoring:
                if metric == 'roc_auc':
                    roc_auc = roc_auc_score(y, y_pred, multi_class='ovr', average='macro')
                    scores[metric].append(roc_auc)
                elif metric == 'accuracy':
                    accuracy = accuracy_score(y, np.argmax(y_pred, axis=1))
                    scores[metric].append(accuracy)
                elif metric == 'f1':
                    f1 = f1_score(y, np.argmax(y_pred, axis=1), average='macro')
                    scores[metric].append(f1)
                else:
                    raise ValueError("Unsupported scoring function. Choose from 'roc_auc', 'accuracy', 'f1', etc.")

        return scores


# Generate toy data for multi-class classification
X, y = make_classification(n_samples=1000, n_features=20, n_classes=3, n_clusters_per_class=1, random_state=42)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Example usage:
# Create a CustomRandomForestClassifier with a specified number of trees for training
rf = CustomRandomForestClassifier(n_estimators=100, n_trees=50, default_scoring='roc_auc', random_state=42)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Define scoring metrics (default is ROC AUC)
scoring_metrics = ['roc_auc', 'accuracy', 'f1']

# Get scores using the number of trees used during training for different scoring metrics
scores = rf.custom_scores(X_test, y_test, scoring=scoring_metrics)

# Print scores for each tree and each scoring metric
for tree_num, tree_scores in enumerate(zip(*scores.values()), start=1):
    print(f"Tree {tree_num}:")
    for metric, score in zip(scores.keys(), tree_scores):
        print(f"   {metric}: {score:.4f}")


Tree 1:
   roc_auc: 0.8767
   accuracy: 0.8350
   f1: 0.8357
Tree 2:
   roc_auc: 0.8607
   accuracy: 0.8150
   f1: 0.8131
Tree 3:
   roc_auc: 0.8713
   accuracy: 0.8300
   f1: 0.8286
Tree 4:
   roc_auc: 0.8677
   accuracy: 0.8250
   f1: 0.8235
Tree 5:
   roc_auc: 0.7799
   accuracy: 0.7050
   f1: 0.7106
Tree 6:
   roc_auc: 0.8608
   accuracy: 0.8150
   f1: 0.8150
Tree 7:
   roc_auc: 0.8705
   accuracy: 0.8300
   f1: 0.8262
Tree 8:
   roc_auc: 0.8325
   accuracy: 0.7800
   f1: 0.7752
Tree 9:
   roc_auc: 0.8811
   accuracy: 0.8450
   f1: 0.8406
Tree 10:
   roc_auc: 0.8564
   accuracy: 0.8100
   f1: 0.8090
Tree 11:
   roc_auc: 0.8331
   accuracy: 0.7750
   f1: 0.7807
Tree 12:
   roc_auc: 0.8903
   accuracy: 0.8550
   f1: 0.8534
Tree 13:
   roc_auc: 0.8429
   accuracy: 0.7900
   f1: 0.7913
Tree 14:
   roc_auc: 0.8836
   accuracy: 0.8450
   f1: 0.8449
Tree 15:
   roc_auc: 0.8753
   accuracy: 0.8300
   f1: 0.8320
Tree 16:
   roc_auc: 0.8366
   accuracy: 0.7800
   f1: 0.7859
Tree 17:
   roc_a

In [42]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score
import numpy as np

class CustomRandomForestClassifier(RandomForestClassifier):
    def __init__(self, n_trees=None, **kwargs):
        self.n_trees = n_trees  # Store the number of trees used during training
        super().__init__(**kwargs)  # Call the parent constructor with any additional arguments

    def custom_predict(self, X, return_proba=True):
        """
        Predict the class probabilities or classes for each sample in X using the number of trees used during training.

        Parameters:
        - X: Input features for prediction.
        - return_proba: If True, return class probabilities. If False, return predicted classes.

        Returns:
        - y_prob_or_pred: List of predicted class probabilities or predicted classes for each tree.
        """
        if self.n_trees is None:
            raise ValueError("Number of trees used during training is not available.")

        y_prob_or_pred = []

        for estimator in self.estimators_[:self.n_trees]:
            # Predict class probabilities for each tree
            y_prob_tree = estimator.predict_proba(X)
            y_prob_or_pred.append(y_prob_tree)

        if not return_proba:
            # Compute predicted classes based on the probabilities
            y_pred_or_proba = [np.argmax(y_prob, axis=1) for y_prob in y_prob_or_pred]
        else:
            y_pred_or_proba = y_prob_or_pred

        return y_pred_or_proba

    def custom_score(self, X, y, scoring='roc_auc'):
        """
        Score the classifier using the number of trees used during training.

        Parameters:
        - X: Input features for scoring.
        - y: Ground truth labels.
        - scoring: Scoring function to use ('roc_auc', 'accuracy', 'f1', etc.).

        Returns:
        - score: The score based on the chosen scoring function.
        """
        y_preds = self.custom_predict(X, return_proba=True)  # Use class probabilities

        if isinstance(y_preds[0], list):
            # If y_preds is a list of lists, flatten it to a 2D array
            y_preds = [np.concatenate(y_pred_list, axis=1) for y_pred_list in y_preds]

        y_pred_avg = np.mean(y_preds, axis=0)  # Average the predicted probabilities across trees

        if scoring == 'roc_auc':
            if len(np.unique(y)) == 2:
                score = roc_auc_score(y, y_pred_avg, multi_class='ovr', average='macro')
            else:
                score = roc_auc_score(y, y_pred_avg, multi_class='ovr')  # Use 'ovr' for multi-class
        elif scoring == 'accuracy':
            score = accuracy_score(y, np.argmax(y_pred_avg, axis=1))
        elif scoring == 'f1':
            # Calculate macro F1 score
            score = f1_score(y, np.argmax(y_pred_avg, axis=1), average='macro')
        else:
            raise ValueError("Unsupported scoring function. Choose from 'roc_auc', 'accuracy', 'f1', etc.")

        return score


# Generate toy data for multi-class classification
X, y = make_classification(n_samples=1000, n_features=20, n_classes=3, n_clusters_per_class=1, random_state=42)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Example usage:
# Create a CustomRandomForestClassifier with a specified number of trees for training
rf = CustomRandomForestClassifier(n_estimators=100, n_trees=50, random_state=42)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# ... (previous code)

# Score using the number of trees used during training with different scoring functions
scoring_functions = ['roc_auc', 'accuracy', 'f1']

for scoring_function in scoring_functions:
    score = rf.custom_score(X_test, y_test, scoring=scoring_function)
    print(f"Scoring Function: {scoring_function}")
    print(f"Score = {score:.4f}")

    # Print the probability array and predicted class for a specific sample (e.g., sample 1)
    sample_index = 0  # Change this index to print information for a different sample
    y_prob_sample = np.array([y_pred[sample_index] for y_pred in rf.custom_predict(X_test, return_proba=True)])
    y_pred_sample = np.array([y_pred[sample_index] for y_pred in rf.custom_predict(X_test, return_proba=False)])

    print(f"   Sample {sample_index + 1} - Probability Array: {y_prob_sample}")
    print(f"   Sample {sample_index + 1} - Predicted Class: {y_pred_sample}")

Scoring Function: roc_auc
Score = 0.9762
   Sample 1 - Probability Array: [[0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [1. 0. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]]
   Sample 1 - Predicted Class: [2 2 2 1 1 1 1 2 2 2 2 2 2 1 1 2 2 2 1 1 2 2 2 2 2 2 1 2 0 2 1 2 1 2 2 1 2
 2 2 2 2 2 0 2 1 1 2 2 2 2]
Scoring Function: accuracy
Score = 0.9100
   Sample 1 - Probability Array: [[0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 0. 

In [39]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
import numpy as np

class CustomRandomForestClassifier(RandomForestClassifier):
    def custom_predict(self, X, return_proba=True):
        """
        Predict the class probabilities for each sample in X using the number of trees used during training.

        Parameters:
        - X: Input features for prediction.
        - return_proba: If True, return class probabilities. If False, return predicted classes.

        Returns:
        - y_prob_or_pred: List of predicted class probabilities or predicted classes for each tree.
        """
        y_prob_or_pred = []

        for estimator in self.estimators_:
            # Predict class probabilities for each tree
            y_prob_tree = estimator.predict_proba(X)
            y_prob_or_pred.append(y_prob_tree)

        if not return_proba:
            # Compute predicted classes based on the probabilities
            y_pred_or_proba = [np.argmax(y_prob, axis=1) for y_prob in y_prob_or_pred]
        else:
            y_pred_or_proba = y_prob_or_pred

        return y_pred_or_proba

    def custom_score(self, X, y, scoring='roc_auc'):
        """
        Score the classifier using the number of trees used during training.

        Parameters:
        - X: Input features for scoring.
        - y: Ground truth labels.
        - scoring: Scoring function to use ('roc_auc', 'accuracy', 'f1', etc.).

        Returns:
        - scores: List of scores for each tree.
        """
        y_preds = self.custom_predict(X, return_proba=False)

        if scoring == 'roc_auc':
            if len(np.unique(y)) == 2:
                scores = [roc_auc_score(y, y_pred) for y_pred in y_preds]
            else:
                scores = [roc_auc_score(y, y_pred, multi_class='ovr', average='macro') for y_pred in y_preds]
        elif scoring == 'accuracy':
            scores = [accuracy_score(y, y_pred) for y_pred in y_preds]
        elif scoring == 'f1':
            scores = [f1_score(y, y_pred, average='macro') for y_pred in y_preds]
        else:
            raise ValueError("Unsupported scoring function. Choose from 'roc_auc', 'accuracy', 'f1', etc.")

        return scores

# Generate toy data for multi-class classification
X, y = make_classification(n_samples=1000, n_features=20, n_classes=3, n_clusters_per_class=1, random_state=42)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Example usage:
# Create a CustomRandomForestClassifier with a specified number of trees for training
rf = CustomRandomForestClassifier(n_estimators=100, random_state=42)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Get class probabilities for each tree
y_prob_list = rf.custom_predict(X_test, return_proba=True)

# Get predicted classes for each tree
y_pred_list = rf.custom_predict(X_test, return_proba=False)

# Define scoring functions
scoring_functions = ['roc_auc', 'accuracy', 'f1']

for scoring_function in scoring_functions:
    scores = rf.custom_score(X_test, y_test, scoring=scoring_function)
    print(f"Scoring Function: {scoring_function}")

    # Print scores for each tree
    for tree_num, score in enumerate(scores, start=1):
        print(f"   Tree {tree_num}: {scoring_function} = {score:.4f}")
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
import numpy as np

class CustomRandomForestClassifier(RandomForestClassifier):
    def custom_predict(self, X, return_proba=True):
        """
        Predict the class probabilities for each sample in X using the number of trees used during training.

        Parameters:
        - X: Input features for prediction.
        - return_proba: If True, return class probabilities. If False, return predicted classes.

        Returns:
        - y_prob_or_pred: List of predicted class probabilities or predicted classes for each tree.
        """
        y_prob_or_pred = []

        for estimator in self.estimators_:
            # Predict class probabilities for each tree
            y_prob_tree = estimator.predict_proba(X)
            y_prob_or_pred.append(y_prob_tree)

        if not return_proba:
            # Compute predicted classes based on the probabilities
            y_pred_or_proba = [np.argmax(y_prob, axis=1) for y_prob in y_prob_or_pred]
        else:
            y_pred_or_proba = y_prob_or_pred

        return y_pred_or_proba
    def custom_score(self, X, y, scoring='roc_auc'):
        """
        Score the classifier using the number of trees used during training.

        Parameters:
        - X: Input features for scoring.
        - y: Ground truth labels.
        - scoring: Scoring function to use ('roc_auc', 'accuracy', 'f1', etc.).

        Returns:
        - scores: List of scores for each tree.
        """
        y_preds = self.custom_predict(X, return_proba=False)

        if scoring == 'roc_auc':
            if len(np.unique(y)) == 2:
                scores = [roc_auc_score(y, y_pred) for y_pred in y_preds]
            else:
                # Calculate ROC AUC score for each class and average them
                scores = [roc_auc_score(y, y_pred, multi_class='ovr') for y_pred in y_preds]
        elif scoring == 'accuracy':
            # Calculate accuracy score for each tree
            scores = [accuracy_score(y, y_pred) for y_pred in y_preds]
        elif scoring == 'f1':
            # Calculate F1 score for each tree
            scores = [f1_score(y, y_pred, average='macro') for y_pred in y_preds]
        else:
            raise ValueError("Unsupported scoring function. Choose from 'roc_auc', 'accuracy', 'f1', etc.")

        return scores


# Generate toy data for multi-class classification
X, y = make_classification(n_samples=1000, n_features=20, n_classes=3, n_clusters_per_class=1, random_state=42)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Example usage:
# Create a CustomRandomForestClassifier with a specified number of trees for training
rf = CustomRandomForestClassifier(n_estimators=100, random_state=42)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Get class probabilities for each tree
y_prob_list = rf.custom_predict(X_test, return_proba=True)

# Get predicted classes for each tree
y_pred_list = rf.custom_predict(X_test, return_proba=False)

# Define scoring functions
scoring_functions = ['roc_auc', 'accuracy', 'f1']

for scoring_function in scoring_functions:
    scores = rf.custom_score(X_test, y_test, scoring=scoring_function)
    print(f"Scoring Function: {scoring_function}")

    # Print scores for each tree
    for tree_num, score in enumerate(scores, start=1):
        print(f"   Tree {tree_num}: {scoring_function} = {score:.4f}")


AxisError: ignored