# Adaboost Implementation


In [1]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib as plt

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


***Data Set by make_classification***

In [2]:
from sklearn.datasets import make_classification
X , y  = make_classification(n_samples = 1000 , n_features = 20 , n_classes = 2 , random_state = 1)

In [3]:
X , y

(array([[-2.04582165, -0.13791624, -0.08071423, ...,  2.48194524,
          0.74236675,  0.23154789],
        [-0.98726024,  1.30120189,  2.37734888, ...,  0.55445754,
         -0.21892143, -0.37608578],
        [ 0.57335921,  0.09375582,  0.4662521 , ..., -0.6088508 ,
          0.79903499, -0.17121177],
        ...,
        [-0.70737159,  1.07650943,  0.58510456, ..., -1.51337602,
          0.90239871, -0.69230951],
        [-0.20706849,  1.17319848, -1.94478665, ..., -0.32820676,
          1.5711921 ,  1.14877729],
        [-2.16769231, -2.54871672,  2.89359255, ...,  0.71535366,
          0.34329241,  1.07350284]]),
 array([0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1,
        1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0,
        0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1,
        0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0,
   

***train_test_split***

In [4]:
from sklearn.model_selection import train_test_split

X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = 0.30 , random_state = 1)

# AdaBoostClassifier

***from sklearn.ensemble import AdaBoostClassifier***

In [5]:
from sklearn.ensemble import AdaBoostClassifier

AdaBoost = AdaBoostClassifier()

In [6]:
AdaBoost.fit(X_train, y_train)



In [7]:
y_pred = AdaBoost.predict(X_test)

In [8]:
from sklearn.metrics import accuracy_score , confusion_matrix , classification_report

# accuracy_score

In [9]:
accuracy_score(y_test , y_pred)*100

83.33333333333334

# confusion_matrix

In [10]:
print(confusion_matrix(y_test , y_pred))

[[117  22]
 [ 28 133]]


# classification_report

In [11]:
print(classification_report(y_test , y_pred))

              precision    recall  f1-score   support

           0       0.81      0.84      0.82       139
           1       0.86      0.83      0.84       161

    accuracy                           0.83       300
   macro avg       0.83      0.83      0.83       300
weighted avg       0.83      0.83      0.83       300



# Hyperparameter tuning

***Hyperparameter tuning on AdaBoostClassifier***

In [12]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import AdaBoostClassifier

param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.001, 0.01, 0.1, 0.5, 1.0],
    'algorithm': ['SAMME', 'SAMME.R']
}

model_name = AdaBoostClassifier()

In [13]:
grid_search = GridSearchCV(estimator=model_name  , param_grid=param_grid, cv=5, scoring='accuracy')
grid_search

In [14]:
grid_search.fit(X_train, y_train)



In [15]:
grid_search.best_params_

{'algorithm': 'SAMME', 'learning_rate': 0.5, 'n_estimators': 50}

In [18]:
best_model = grid_search.best_estimator_
best_model

In [22]:
best_pred = best_model.predict(X_test)
best_pred

array([0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1,
       0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0,
       1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0,
       1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1,
       0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1,
       1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1])

In [26]:
from sklearn.metrics import accuracy_score , confusion_matrix , classification_report

print(accuracy_score(y_test , best_pred)*100)

print(confusion_matrix(y_test, best_pred))

print(classification_report(y_test, best_pred))

85.33333333333334
[[120  19]
 [ 25 136]]
              precision    recall  f1-score   support

           0       0.83      0.86      0.85       139
           1       0.88      0.84      0.86       161

    accuracy                           0.85       300
   macro avg       0.85      0.85      0.85       300
weighted avg       0.85      0.85      0.85       300



In [35]:
from sklearn.datasets import make_regression
X , y  = make_regression(n_samples = 1000 , n_features = 2 , noise=10 , random_state = 1)

In [36]:
from sklearn.model_selection import train_test_split

X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = 0.30 , random_state = 1)

In [37]:
from sklearn.ensemble import AdaBoostRegressor
model = AdaBoostRegressor()

In [38]:
model.fit(X_train , y_train)

In [39]:
y_pred = model.predict(X_test)
y_pred

array([ -52.6639341 ,  -93.90522648,  169.35875035,    5.03037635,
         73.04406343,   20.72417465,  -18.15414281,  126.40030013,
       -161.07391777,  114.93964574,   56.93234904,   29.79266501,
        -93.90522648,   92.45899768,   30.21578349, -169.35154977,
        147.81426865,   39.21483681,  -19.87516716,   -9.6798913 ,
       -169.35154977,   20.72417465,   29.79266501,   51.78658501,
         11.19432454,   34.39709296,  -68.28706029,  -88.62712218,
        144.57170354,  201.33739211,  194.58696519,   37.41835335,
         97.5465956 ,   30.21578349,   57.78955718,   34.31297319,
        -73.72095333,   95.94991179, -171.16618516,  -19.31489427,
        144.57170354,  126.40030013,  -52.19161953, -172.12746101,
        -19.87516716,   23.60111038,  -19.87516716,   73.04406343,
        -88.62712218,  -52.58654165,   36.91894131,   24.24241108,
         11.19432454,  -52.19161953,   13.3478175 ,  -32.85553294,
        139.41965024,   56.28450696, -106.64318841,  139.38518

In [40]:
from sklearn.metrics import r2_score
r2_score(y_test , y_pred)*100

95.55231171784268

# Hyperparameter tuning

***Hyperparameter tuning on AdaBoostRegressor***

In [41]:
model_name = AdaBoostRegressor()

# Define the hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.001, 0.01],
    'loss': ['linear', 'square', 'exponential']
}

In [43]:
from sklearn.model_selection import GridSearchCV

GridSearch = GridSearchCV(estimator=model_name , param_grid=param_grid , cv=5)

In [44]:
GridSearch.fit(X_train , y_train)

In [45]:
GridSearch.best_params_

{'learning_rate': 0.01, 'loss': 'square', 'n_estimators': 200}

In [47]:
best_model = GridSearch.best_estimator_

In [49]:
best_pred = best_model.predict(X_test)

In [52]:
from sklearn.metrics import r2_score
r2_score(y_test , best_pred)*100

93.4906597500278