## Importing Libraries

In [1]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier

In [2]:
class RandomForestClassifier:
    def __init__(self, n_estimators = 100, max_depth = None, max_features=None):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.max_features = max_features
        self.estimators_ = []
        
    def fit(self, X, y):
        n_samples, n_features = X.shape
        max_features = self.max_features or int(np.sqrt(n_features))
        
        for _ in range(self.n_estimators):
            # randomly select subset of features
            feature_indices = np.random.choice(n_features, max_features, replace=False)
            X_subset = X[:, feature_indices]
            
            # randomly sample data with replacement
            sample_indices = np.random.choice(n_samples, n_samples, replace=True)
            X_sampled = X_subset[sample_indices]
            y_sampled = y[sample_indices]
            
            # train decision tree on sampled data
            tree = DecisionTreeClassifier(max_depth=self.max_depth, max_features=max_features)
            tree.fit(X_sampled, y_sampled)
            self.estimators_.append((tree, feature_indices))
            
    def predict(self, X):
        n_samples = X.shape[0]
        y_pred = np.zeros((n_samples, len(self.estimators_)))
        
        for i, (tree, feature_indices) in enumerate(self.estimators_):
            X_subset = X[:, feature_indices]
            y_pred[:, i] = tree.predict(X_subset)
            
        # aggregate predictions across all trees
        return np.mean(y_pred, axis=1) >= 0.5

## Testing on dummy dataset

In [3]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [4]:
X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
clf = RandomForestClassifier(n_estimators = 100, max_depth=10, max_features=5)

In [6]:
clf.fit(X_train, y_train)

In [7]:
y_pred = clf.predict(X_test)

In [8]:
accuracy = accuracy_score(y_test, y_pred)

In [9]:
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.88


In [13]:
X

array([[ 0.96479937, -0.06644898,  0.98676805, ..., -1.2101605 ,
        -0.62807677,  1.22727382],
       [-0.91651053, -0.56639459, -1.00861409, ..., -0.98453405,
         0.36389642,  0.20947008],
       [-0.10948373, -0.43277388, -0.4576493 , ..., -0.2463834 ,
        -1.05814521, -0.29737608],
       ...,
       [ 1.67463306,  1.75493307,  1.58615382, ...,  0.69272276,
        -1.50384972,  0.22526412],
       [-0.77860873, -0.83568901, -0.19484228, ..., -0.49735437,
         2.47213818,  0.86718741],
       [ 0.24845351, -1.0034389 ,  0.36046013, ...,  0.77323999,
         0.1857344 ,  1.41641179]])