Todays Agenda
    Implement Random Forest, AdaBoost, and Gradient Boosting on the same dataset.
    Compare their performance using:
        Accuracy
        F1-score
        Cross-validation scores
    Experiment with hyperparameters:
        Number of estimators(n_estimators)
        Learning rate (for boosting)
        Maximum depth (for base learners)(max_depth)
        Visualize feature importance for Random Forest and Gradient Boosting.

In [6]:
#importing the required modules
import pandas as pd 
from sklearn.datasets import load_iris,make_classification
from sklearn.metrics import accuracy_score,f1_score,recall_score,precision_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import  RandomForestClassifier,AdaBoostClassifier,GradientBoostingClassifier


In [7]:
#loading the data
data = load_iris()
#splitting the data
X,y = data.data,data.target
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=.5)


In [8]:
# Creating RandomForestClassifier model 
rf_model = RandomForestClassifier(n_estimators=100,random_state=42)
rf_model.fit(x_train,y_train)
y_pred = rf_model.predict(x_test)
#calculating performance metrices
print(f'Accuracy Score:{accuracy_score(y_pred,y_test)}')
print(f'F1 Score:{f1_score(y_pred,y_test,average="weighted")}')
print(f'Precision Score:{precision_score(y_pred,y_test,average="weighted")}')
print(f'Recall Score:{recall_score(y_pred,y_test,average="weighted")}')

Accuracy Score:0.8933333333333333
F1 Score:0.8928592592592591
Precision Score:0.9083636363636364
Recall Score:0.8933333333333333


In [9]:
# Creating AdaBoostClassifier model 
ab_model = AdaBoostClassifier(n_estimators=100,random_state=42)
ab_model.fit(x_train,y_train)
y_pred = ab_model.predict(x_test)
#calculating performance metrices
print(f'Accuracy Score:{accuracy_score(y_pred,y_test)}')
print(f'F1 Score:{f1_score(y_pred,y_test,average="weighted")}')
print(f'Precision Score:{precision_score(y_pred,y_test,average="weighted")}')
print(f'Recall Score:{recall_score(y_pred,y_test,average="weighted")}')

Accuracy Score:0.8933333333333333
F1 Score:0.8928592592592591
Precision Score:0.9083636363636364
Recall Score:0.8933333333333333


In [10]:
# Creating GradientBoostingClassifier model 
gb_model = GradientBoostingClassifier(n_estimators=100,random_state=42)
gb_model.fit(x_train,y_train)
y_pred = gb_model.predict(x_test)
#calculating performance metrices
print(f'Accuracy Score:{accuracy_score(y_pred,y_test)}')
print(f'F1 Score:{f1_score(y_pred,y_test,average="weighted")}')
print(f'Precision Score:{precision_score(y_pred,y_test,average="weighted")}')
print(f'Recall Score:{recall_score(y_pred,y_test,average="weighted")}')

Accuracy Score:0.9066666666666666
F1 Score:0.906147186147186
Precision Score:0.9169696969696971
Recall Score:0.9066666666666666


In [11]:
# creating my own imbalanaced dataset to evaluate the performance of each model because above model show
# too similar performance metrices of each model
X,y = make_classification(
    n_samples=10000,       # Total samples
    n_features=20,        # Number of features
    n_informative=2,      # Number of informative features
    n_redundant=10,       # Number of redundant features
    n_clusters_per_class=1,
    weights=[0.9, 0.1],   # Class imbalance (90% class 0, 10% class 1)
    flip_y=0,             # No label noise
    random_state=42
)

x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=.2)


In [12]:
# Creating RandomForestClassifier model 
rf_model = RandomForestClassifier(n_estimators=100,random_state=42)
rf_model.fit(x_train,y_train)
y_pred = rf_model.predict(x_test)
#calculating performance metrices
print(f'Accuracy Score:{accuracy_score(y_pred,y_test)}')
print(f'F1 Score:{f1_score(y_pred,y_test,average="weighted")}')
print(f'Precision Score:{precision_score(y_pred,y_test,average="weighted")}')
print(f'Recall Score:{recall_score(y_pred,y_test,average="weighted")}')

Accuracy Score:0.996
F1 Score:0.9960089397570864
Precision Score:0.9960233333333333
Recall Score:0.996


In [13]:
# Creating AdaBoostClassifier model 
ab_model = AdaBoostClassifier(n_estimators=100,random_state=42)
ab_model.fit(x_train,y_train)
y_pred = ab_model.predict(x_test)
#calculating performance metrices
print(f'Accuracy Score:{accuracy_score(y_pred,y_test)}')
print(f'F1 Score:{f1_score(y_pred,y_test,average="weighted")}')
print(f'Precision Score:{precision_score(y_pred,y_test,average="weighted")}')
print(f'Recall Score:{recall_score(y_pred,y_test,average="weighted")}')

Accuracy Score:0.994
F1 Score:0.9940545726050952
Precision Score:0.9941955555555555
Recall Score:0.994


In [14]:
# Creating GradientBoostingClassifier model 
gb_model = GradientBoostingClassifier(n_estimators=100,random_state=42)
gb_model.fit(x_train,y_train)
y_pred = gb_model.predict(x_test)
#calculating performance metrices
print(f'Accuracy Score:{accuracy_score(y_pred,y_test)}')
print(f'F1 Score:{f1_score(y_pred,y_test,average="weighted")}')
print(f'Precision Score:{precision_score(y_pred,y_test,average="weighted")}')
print(f'Recall Score:{recall_score(y_pred,y_test,average="weighted")}')

Accuracy Score:0.995
F1 Score:0.9950454771709127
Precision Score:0.9951777777777778
Recall Score:0.995
