In [None]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score
import numpy as np

class CustomRandomForestClassifier(RandomForestClassifier):
    def __init__(self, n_trees=None, **kwargs):
        self.n_trees = n_trees  # Store the number of trees used during training
        super().__init__(**kwargs)  # Call the parent constructor with any additional arguments

    def custom_predict(self, X, return_proba=False):
        """
        Predict the class probabilities or classes using the specified number of trees
        and aggregate the predictions like a traditional Random Forest.

        Parameters:
        - X: Input features for prediction.
        - return_proba: If True, return class probabilities. If False, return predicted classes.

        Returns:
        - y_prob_or_pred: Predicted class probabilities or predicted classes aggregated across trees.
        """
        if self.n_trees is None:
            raise ValueError("Number of trees used during training is not available.")

        y_prob_or_pred = []

        for estimator in self.estimators_[:self.n_trees]:
            # Predict class probabilities for each tree
            if return_proba:
                y_prob_tree = estimator.predict_proba(X)
            else:
                y_prob_tree = estimator.predict(X)
            y_prob_or_pred.append(y_prob_tree)

        if not return_proba:
            # Compute predicted classes based on majority voting
            y_pred_or_proba = np.mean(y_prob_or_pred, axis=0)
            y_pred_or_proba = np.round(y_pred_or_proba)
        else:
            # Aggregate class probabilities by averaging
            y_pred_or_proba = np.mean(y_prob_or_pred, axis=0)

        return y_pred_or_proba

# Generate toy data for binary classification
X, y = make_classification(n_samples=500, n_features=30, n_classes=2, n_clusters_per_class=1, random_state=42)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Example usage:
# Create a CustomRandomForestClassifier with a larger number of trees and matching hyperparameters
rf = CustomRandomForestClassifier(n_estimators=150, n_trees=150, random_state=42, max_depth=10, min_samples_split=2, min_samples_leaf=1)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Initialize lists to store accuracy and F1 scores for each tree
tree_accuracies = []
tree_f1_scores = []

# Get predictions and calculate accuracy and F1 score for each tree
for estimator in rf.estimators_[:rf.n_trees]:
    y_pred_tree = estimator.predict(X_test)
    accuracy_tree = accuracy_score(y_test, y_pred_tree)
    f1_tree = f1_score(y_test, y_pred_tree, average = 'micro')
    tree_accuracies.append(accuracy_tree)
    tree_f1_scores.append(f1_tree)

# Print accuracy and F1 score for each tree
for tree_num, (accuracy, f1) in enumerate(zip(tree_accuracies, tree_f1_scores), start=1):
    print(f"Tree {tree_num} - Accuracy: {accuracy:.4f}, F1 Score: {f1:.4f}")

Tree 1 - Accuracy: 0.7000, F1 Score: 0.7000
Tree 2 - Accuracy: 0.8000, F1 Score: 0.8000
Tree 3 - Accuracy: 0.8500, F1 Score: 0.8500
Tree 4 - Accuracy: 0.8000, F1 Score: 0.8000
Tree 5 - Accuracy: 0.7700, F1 Score: 0.7700
Tree 6 - Accuracy: 0.8600, F1 Score: 0.8600
Tree 7 - Accuracy: 0.9200, F1 Score: 0.9200
Tree 8 - Accuracy: 0.7900, F1 Score: 0.7900
Tree 9 - Accuracy: 0.8000, F1 Score: 0.8000
Tree 10 - Accuracy: 0.7600, F1 Score: 0.7600
Tree 11 - Accuracy: 0.7900, F1 Score: 0.7900
Tree 12 - Accuracy: 0.8200, F1 Score: 0.8200
Tree 13 - Accuracy: 0.8800, F1 Score: 0.8800
Tree 14 - Accuracy: 0.8300, F1 Score: 0.8300
Tree 15 - Accuracy: 0.7100, F1 Score: 0.7100
Tree 16 - Accuracy: 0.7700, F1 Score: 0.7700
Tree 17 - Accuracy: 0.8500, F1 Score: 0.8500
Tree 18 - Accuracy: 0.8300, F1 Score: 0.8300
Tree 19 - Accuracy: 0.7800, F1 Score: 0.7800
Tree 20 - Accuracy: 0.8400, F1 Score: 0.8400
Tree 21 - Accuracy: 0.8700, F1 Score: 0.8700
Tree 22 - Accuracy: 0.7900, F1 Score: 0.7900
Tree 23 - Accuracy:

In [None]:
from sklearn.metrics import roc_auc_score

# Create a CustomRandomForestClassifier with a larger number of trees and matching hyperparameters
rf = CustomRandomForestClassifier(n_estimators=150, n_trees=150, random_state=42, max_depth=10, min_samples_split=2, min_samples_leaf=1)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Get the final predictions on your test data
final_predictions = rf.custom_predict(X_test)

# Calculate the ROC AUC score
roc_auc = roc_auc_score(y_test, final_predictions)

# You can also calculate the accuracy and F1 score if needed
final_accuracy = accuracy_score(y_test, final_predictions)
final_f1_score = f1_score(y_test, final_predictions)

print(f"Final Accuracy: {final_accuracy:.4f}")
print(f"Final F1 Score: {final_f1_score:.4f}")
print(f"ROC AUC Score: {roc_auc:.4f}")

Final Accuracy: 0.8900
Final F1 Score: 0.8764
ROC AUC Score: 0.9042


In [None]:
RF = RandomForestClassifier(n_estimators=150,random_state=42, max_depth=10, min_samples_split=2, min_samples_leaf=1)
RF.fit(X_train, y_train)

predicted = RF.predict(X_test)

print(accuracy_score(predicted, y_test))
print(roc_auc_score(y_test, final_predictions))
print(f1_score(y_test, final_predictions))

0.89
0.9041666666666668
0.8764044943820225


#Cumulative Across the Tree

In [None]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
import numpy as np

class CustomRandomForestClassifier(RandomForestClassifier):
    def __init__(self, n_trees=None, **kwargs):
        self.n_trees = n_trees  # Store the number of trees used during training
        super().__init__(**kwargs)  # Call the parent constructor with any additional arguments

    def custom_predict(self, X):
        """
        Predict using the specified number of trees and accumulate the scores for each tree.

        Parameters:
        - X: Input features for prediction.

        Returns:
        - y_preds: List of predicted classes for each tree.
        """
        if self.n_trees is None:
            raise ValueError("Number of trees used during training is not available.")

        y_preds = []

        for estimator in self.estimators_[:self.n_trees]:
            y_pred_tree = estimator.predict(X)
            y_preds.append(y_pred_tree)

        return y_preds

# Generate toy data for binary classification
X, y = make_classification(n_samples=500, n_features=10, n_classes=2, n_clusters_per_class=1, random_state=42)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Example usage:
# Create a CustomRandomForestClassifier with a larger number of trees and matching hyperparameters
rf = CustomRandomForestClassifier(n_estimators=150, n_trees=150, random_state=42, max_depth=10, min_samples_split=2, min_samples_leaf=1)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Get predictions for each tree
tree_predictions = rf.custom_predict(X_test)

# Initialize an array to store cumulative predictions as floats
cumulative_predictions = np.zeros_like(y_test, dtype=float)

# Initialize arrays to store cumulative accuracy, F1, and ROC AUC scores
cumulative_accuracy_scores = []
cumulative_f1_scores = []
cumulative_roc_auc_scores = []

# Calculate scores and accumulate predictions
for i, y_pred_tree in enumerate(tree_predictions, start=1):
    cumulative_predictions += y_pred_tree

    # Calculate cumulative accuracy
    cumulative_accuracy = accuracy_score(y_test, np.round(cumulative_predictions / i))
    cumulative_accuracy_scores.append(cumulative_accuracy)

    # Calculate cumulative F1 score
    cumulative_f1 = f1_score(y_test, np.round(cumulative_predictions / i))
    cumulative_f1_scores.append(cumulative_f1)

    # Calculate cumulative ROC AUC score only when there are more than one tree
    if i > 1:
        cumulative_roc_auc = roc_auc_score(y_test, cumulative_predictions / i)
        cumulative_roc_auc_scores.append(cumulative_roc_auc)

    print(f"Tree {i} - Cumulative Accuracy: {cumulative_accuracy:.4f}, Cumulative F1 Score: {cumulative_f1:.4f}", end="")

    # Print cumulative ROC AUC score only when there are more than one tree
    if i > 1:
        print(f", Cumulative ROC AUC Score: {cumulative_roc_auc:.4f}")
    else:
        print("")  # Just print a newline for the first tree

# Print the cumulative scores
print("\nCumulative Scores:")
for i, (accuracy_score, f1_score, roc_auc_score) in enumerate(zip(cumulative_accuracy_scores, cumulative_f1_scores, cumulative_roc_auc_scores), start=1):
    print(f"For {i} trees - Cumulative Accuracy: {accuracy_score:.4f}, Cumulative F1 Score: {f1_score:.4f}", end="")

    # Print cumulative ROC AUC score only when there are more than one tree
    if i > 1:
        print(f", Cumulative ROC AUC Score: {roc_auc_score:.4f}")
    else:
        print("")  # Just print a newline for the first tree


Tree 1 - Cumulative Accuracy: 0.8300, Cumulative F1 Score: 0.8440
Tree 2 - Cumulative Accuracy: 0.8300, Cumulative F1 Score: 0.8317, Cumulative ROC AUC Score: 0.9188
Tree 3 - Cumulative Accuracy: 0.9000, Cumulative F1 Score: 0.9107, Cumulative ROC AUC Score: 0.9319
Tree 4 - Cumulative Accuracy: 0.8500, Cumulative F1 Score: 0.8571, Cumulative ROC AUC Score: 0.9388
Tree 5 - Cumulative Accuracy: 0.8800, Cumulative F1 Score: 0.8929, Cumulative ROC AUC Score: 0.9368
Tree 6 - Cumulative Accuracy: 0.9000, Cumulative F1 Score: 0.9091, Cumulative ROC AUC Score: 0.9396
Tree 7 - Cumulative Accuracy: 0.8900, Cumulative F1 Score: 0.9027, Cumulative ROC AUC Score: 0.9412
Tree 8 - Cumulative Accuracy: 0.9200, Cumulative F1 Score: 0.9273, Cumulative ROC AUC Score: 0.9527
Tree 9 - Cumulative Accuracy: 0.9100, Cumulative F1 Score: 0.9204, Cumulative ROC AUC Score: 0.9504
Tree 10 - Cumulative Accuracy: 0.9200, Cumulative F1 Score: 0.9286, Cumulative ROC AUC Score: 0.9529
Tree 11 - Cumulative Accuracy: 0.

In [None]:
from sklearn.metrics import accuracy_score, f1_score

# Create a CustomRandomForestClassifier with a larger number of trees and matching hyperparameters
rf = CustomRandomForestClassifier(n_estimators=150, n_trees=150, random_state=42, max_depth=10, min_samples_split=2, min_samples_leaf=1)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Get the predictions for each tree
tree_predictions = rf.custom_predict(X_test)

# Initialize an array to store cumulative predictions as floats
cumulative_predictions = np.zeros_like(y_test, dtype=float)

# Aggregate predictions from all trees
for y_pred_tree in tree_predictions:
    cumulative_predictions += y_pred_tree

# Calculate the final accuracy and F1 score
final_predictions = np.round(cumulative_predictions / len(tree_predictions))  # Average the predictions
final_accuracy = accuracy_score(y_test, final_predictions)
final_f1_score = f1_score(y_test, final_predictions)

print(f"Final Accuracy: {final_accuracy:.4f}")
print(f"Final F1 Score: {final_f1_score:.4f}")

Final Accuracy: 0.9200
Final F1 Score: 0.9310


In [None]:
RF = RandomForestClassifier(n_estimators=150,random_state=42, max_depth=10, min_samples_split=2, min_samples_leaf=1)
RF.fit(X_train, y_train)

predicted = RF.predict(X_test)

print(accuracy_score(predicted, y_test))
print(f1_score(y_test, final_predictions))

0.92
0.9310344827586206


##

In [1]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score
import numpy as np

class CustomRandomForestClassifier(RandomForestClassifier):
    def __init__(self, n_trees=None, **kwargs):
        self.n_trees = n_trees  # Store the number of trees used during training
        super().__init__(**kwargs)  # Call the parent constructor with any additional arguments

    def custom_predict(self, X):
        """
        Predict using the specified number of trees and accumulate the scores for each tree.

        Parameters:
        - X: Input features for prediction.

        Returns:
        - y_preds: List of predicted classes for each tree.
        """
        if self.n_trees is None:
            raise ValueError("Number of trees used during training is not available.")

        y_preds = []

        for estimator in self.estimators_[:self.n_trees]:
            y_pred_tree = estimator.predict(X)
            y_preds.append(y_pred_tree)

        return y_preds

# Generate toy data for binary classification
X, y = make_classification(n_samples=500, n_features=10, n_classes=3, n_clusters_per_class=1, random_state=42)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Example usage:
# Create a CustomRandomForestClassifier with a larger number of trees and matching hyperparameters
rf = CustomRandomForestClassifier(n_estimators=150, n_trees=150, random_state=42, max_depth=10, min_samples_split=2, min_samples_leaf=1)

# Fit the classifier to your training data
rf.fit(X_train, y_train)

# Get predictions for each tree
tree_predictions = rf.custom_predict(X_test)

# Initialize an array to store cumulative predictions as floats
cumulative_predictions = np.zeros_like(y_test, dtype=float)

# Initialize arrays to store cumulative accuracy and F1 scores
cumulative_accuracy_scores = []
cumulative_f1_scores = []

# Calculate scores and accumulate predictions
for i, y_pred_tree in enumerate(tree_predictions, start=1):
    cumulative_predictions += y_pred_tree

    # Calculate cumulative accuracy
    cumulative_accuracy = accuracy_score(y_test, np.round(cumulative_predictions / i))
    cumulative_accuracy_scores.append(cumulative_accuracy)

    # Calculate cumulative F1 score
    cumulative_f1 = f1_score(y_test, np.round(cumulative_predictions / i), average = 'micro')
    cumulative_f1_scores.append(cumulative_f1)

    print(f"Tree {i} - Cumulative Accuracy: {cumulative_accuracy:.4f}, Cumulative F1 Score: {cumulative_f1:.4f}")

# Print the cumulative scores
print("\nCumulative Scores:")
for i, (accuracy_score, f1_score) in enumerate(zip(cumulative_accuracy_scores, cumulative_f1_scores), start=1):
    print(f"For {i} trees - Cumulative Accuracy: {accuracy_score:.4f}, Cumulative F1 Score: {f1_score:.4f}")

Tree 1 - Cumulative Accuracy: 0.9100, Cumulative F1 Score: 0.9100
Tree 2 - Cumulative Accuracy: 0.8200, Cumulative F1 Score: 0.8200
Tree 3 - Cumulative Accuracy: 0.8500, Cumulative F1 Score: 0.8500
Tree 4 - Cumulative Accuracy: 0.8800, Cumulative F1 Score: 0.8800
Tree 5 - Cumulative Accuracy: 0.8800, Cumulative F1 Score: 0.8800
Tree 6 - Cumulative Accuracy: 0.9000, Cumulative F1 Score: 0.9000
Tree 7 - Cumulative Accuracy: 0.8800, Cumulative F1 Score: 0.8800
Tree 8 - Cumulative Accuracy: 0.8900, Cumulative F1 Score: 0.8900
Tree 9 - Cumulative Accuracy: 0.9000, Cumulative F1 Score: 0.9000
Tree 10 - Cumulative Accuracy: 0.8800, Cumulative F1 Score: 0.8800
Tree 11 - Cumulative Accuracy: 0.8700, Cumulative F1 Score: 0.8700
Tree 12 - Cumulative Accuracy: 0.9000, Cumulative F1 Score: 0.9000
Tree 13 - Cumulative Accuracy: 0.8800, Cumulative F1 Score: 0.8800
Tree 14 - Cumulative Accuracy: 0.9100, Cumulative F1 Score: 0.9100
Tree 15 - Cumulative Accuracy: 0.8800, Cumulative F1 Score: 0.8800
Tree