# **Random Forest:** *Basic Implementation*

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, root_mean_squared_error, balanced_accuracy_score
from mlxtend.plotting import plot_decision_regions

In [2]:
try:
    df = pd.read_csv(r"/workspaces/ML-Journey/Datasets/Dataset for weight prediction with gender.csv")
except:
    df = pd.read_csv(r"C:\\Users\\Archit\\Documents\\GitHub\\ML-Journey\\Datasets\\Dataset for weight prediction with gender.csv")

In [3]:
X = df.drop(columns=['weight_kg'])
y = df[['weight_kg']]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)

In [5]:
scaler = StandardScaler()
X_train_trf = scaler.fit_transform(X_train)
X_test_trf = scaler.transform(X_test)

### Model with undscalised data

In [6]:
rf = RandomForestRegressor()
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)
r2_score(y_test, y_pred)

  return fit_method(estimator, *args, **kwargs)


0.8708962078941651

### Model with scalised data

In [7]:
rf1 = RandomForestRegressor()
rf1.fit(X_train_trf, y_train)
y_pred = rf1.predict(X_test_trf)
r2_score(y_test, y_pred)

  return fit_method(estimator, *args, **kwargs)


0.8710248342754277

### Manual hyperparameter tunning

In [8]:
rf2 = RandomForestRegressor(n_estimators=50, max_features=2, oob_score=True , ccp_alpha=0.09)
rf2.fit(X_train_trf, y_train)
y_pred = rf2.predict(X_test_trf)
r2_score(y_test, y_pred)

  return fit_method(estimator, *args, **kwargs)


0.8780955336777201

### Hyperparameter Tunning with grid search cv

In [9]:
param_grid = {
    "n_estimators": [10,30,50],
    "max_depth": [2,5,7],
    "ccp_alpha": [00000.1, 0.1, 1]
}

grid = GridSearchCV(estimator=RandomForestRegressor(),
                    cv = 5,
                    param_grid = param_grid,
                    scoring = 'neg_mean_squared_error')

In [10]:

grid.fit(X_train_trf, y_train.to_numpy().ravel())

In [11]:
best = grid.best_estimator_

In [12]:

y_pred = best.predict(X_test_trf)
r2_score(y_test, y_pred)

0.8762260136862893

### Exploring random forest

In [19]:
rf.estimators_

[DecisionTreeRegressor(max_features=1.0, random_state=1672049516),
 DecisionTreeRegressor(max_features=1.0, random_state=228990243),
 DecisionTreeRegressor(max_features=1.0, random_state=1261089399),
 DecisionTreeRegressor(max_features=1.0, random_state=1276383056),
 DecisionTreeRegressor(max_features=1.0, random_state=384384193),
 DecisionTreeRegressor(max_features=1.0, random_state=902648654),
 DecisionTreeRegressor(max_features=1.0, random_state=134044915),
 DecisionTreeRegressor(max_features=1.0, random_state=596806913),
 DecisionTreeRegressor(max_features=1.0, random_state=152455659),
 DecisionTreeRegressor(max_features=1.0, random_state=602755181),
 DecisionTreeRegressor(max_features=1.0, random_state=1414497188),
 DecisionTreeRegressor(max_features=1.0, random_state=550792755),
 DecisionTreeRegressor(max_features=1.0, random_state=1242447622),
 DecisionTreeRegressor(max_features=1.0, random_state=2052148686),
 DecisionTreeRegressor(max_features=1.0, random_state=1242749777),
 De

In [21]:
rf.estimator_

In [22]:
rf.feature_importances_

array([0.18711594, 0.50207602, 0.17596548, 0.06813937, 0.05162697,
       0.01507622])

In [25]:
rf.feature_names_in_

array(['height_cm', 'gender', 'bmi_class_Normal', 'bmi_class_Obesity',
       'bmi_class_Overweight', 'bmi_class_Underweight'], dtype=object)

In [26]:
rf.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'criterion': 'squared_error',
 'max_depth': None,
 'max_features': 1.0,
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'monotonic_cst': None,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

In [27]:
rf.n_features_in_

6

In [28]:
rf.n_outputs_

1

In [31]:
rf._estimator_type

'regressor'

In [35]:
rf.n_estimators

100