# Adaboost Classifier

In [1]:
from sklearn.datasets import make_classification 
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state = 1)

In [3]:
X

array([[-2.04582165, -0.13791624, -0.08071423, ...,  2.48194524,
         0.74236675,  0.23154789],
       [-0.98726024,  1.30120189,  2.37734888, ...,  0.55445754,
        -0.21892143, -0.37608578],
       [ 0.57335921,  0.09375582,  0.4662521 , ..., -0.6088508 ,
         0.79903499, -0.17121177],
       ...,
       [-0.70737159,  1.07650943,  0.58510456, ..., -1.51337602,
         0.90239871, -0.69230951],
       [-0.20706849,  1.17319848, -1.94478665, ..., -0.32820676,
         1.5711921 ,  1.14877729],
       [-2.16769231, -2.54871672,  2.89359255, ...,  0.71535366,
         0.34329241,  1.07350284]])

In [4]:
y

array([0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1,
       1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1,
       0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0,
       0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0,
       1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0,
       0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0,
       0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

# Split Dataset

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.33, random_state=1)

In [6]:
classifier = AdaBoostClassifier()
classifier.fit(X_train, y_train)

# Make Predictions 

In [7]:
y_pred = classifier.predict(X_test)

# Evaluate the model
print('Current model performance:')
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')
print('Classification Report:')
print(classification_report(y_test, y_pred))
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))

Current model performance:
Accuracy: 0.8238805970149253
Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.81      0.82       336
           1       0.81      0.84      0.83       334

    accuracy                           0.82       670
   macro avg       0.82      0.82      0.82       670
weighted avg       0.82      0.82      0.82       670

Confusion Matrix:
[[271  65]
 [ 53 281]]


# Hyperparameter Tunning

In [8]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1.0, 1.5, 2.0],
    'algorithm': ['SAMME', 'SAMME.R']
}

# Initialize the AdaBoostClassifier
ada = AdaBoostClassifier()

# Set up GridSearchCV
clf = GridSearchCV(estimator=ada, param_grid=param_grid, cv=5, verbose=3, n_jobs=-1)

# Assuming X_train and y_train are defined
clf.fit(X_train, y_train)

# Get the best parameters and best score
print(f"Best parameters: {clf.best_params_}")
print(f"Best score: {clf.best_score_}")


Fitting 5 folds for each of 30 candidates, totalling 150 fits
Best parameters: {'algorithm': 'SAMME', 'learning_rate': 0.1, 'n_estimators': 200}
Best score: 0.8272727272727272


In [9]:
clf.fit(X_train, y_train)

Fitting 5 folds for each of 30 candidates, totalling 150 fits


In [10]:
clf.best_params_

{'algorithm': 'SAMME', 'learning_rate': 0.1, 'n_estimators': 200}

In [11]:
clf.best_estimator_

In [12]:
best_model = clf.best_estimator_

In [13]:
best_model

In [14]:
y_pred_tuned = best_model.predict(X_test)

In [15]:
# Evaluate the model
print('Tunned model performance:')
print(f'Accuracy: {accuracy_score(y_test, y_pred_tuned)}')
print('Classification Report:')
print(classification_report(y_test, y_pred_tuned))
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred_tuned))

Tunned model performance:
Accuracy: 0.8656716417910447
Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.85      0.86       336
           1       0.86      0.88      0.87       334

    accuracy                           0.87       670
   macro avg       0.87      0.87      0.87       670
weighted avg       0.87      0.87      0.87       670

Confusion Matrix:
[[287  49]
 [ 41 293]]


# AdaBoost Regressor 

In [16]:
from sklearn.datasets import make_regression 
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [17]:
X,y = make_regression(n_samples=1000, n_features=2, noise=10, random_state=1)

In [18]:
X

array([[-0.91542437, -1.94504696],
       [-2.30490794, -0.59246129],
       [-0.43750898, -0.78191168],
       ...,
       [-0.46867382,  0.02186284],
       [-0.44265541,  0.55753264],
       [-0.13971173,  0.85328219]])

In [19]:
y

array([-1.98367142e+02, -7.18777056e+01, -8.69703239e+01, -6.96492401e+01,
        1.82745760e+01, -3.77114686e+01,  3.92726728e+01, -9.78831863e+01,
       -9.79418533e+01, -1.18326213e+02,  7.37176065e+01,  1.15947984e+01,
       -1.14639116e+02,  1.18554758e+02, -1.09479268e+02,  4.55822523e+01,
        1.22163385e+01, -7.55247901e+01, -1.07584299e+02, -1.52546032e+02,
        6.76376929e+00,  9.63814831e+00, -2.16279129e+01,  7.04567440e+01,
        1.07700199e+02,  1.09477518e+01,  1.17165036e+02,  2.78449353e+01,
        3.24248766e+01, -1.55808165e+01,  9.64321593e+01, -2.68980403e+02,
        3.04398725e+01, -5.03731238e+01,  1.83865434e+01, -4.20020243e+01,
        2.60722898e+01, -4.24419694e+01, -2.84245422e+01,  7.87732991e+01,
        6.38527251e+01,  1.05600811e+02,  2.54784577e+01, -4.37001825e+01,
       -4.56639834e+01,  2.25475298e+01, -2.44159979e+01,  2.14189935e+00,
       -7.55772986e+01,  9.71903066e+01, -1.01696566e+02,  1.10063672e+02,
        3.74493102e+01,  

# Split Dataset

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.33, random_state=1)

In [21]:
regressor = AdaBoostRegressor()
regressor.fit(X_train, y_train)

# Predict the Model

In [22]:
y_pred=regressor.predict(X_test)

In [23]:
# Evaluate the model
print('Current model performance:')
print(f'R2 score: {r2_score(y_test, y_pred)}')
print(f'Mean_Absolute_Error: {mean_absolute_error(y_test, y_pred)}')
print(f'Mean_Squared_Error: {mean_squared_error(y_test, y_pred)}')

Current model performance:
R2 score: 0.9561291350417511
Mean_Absolute_Error: 14.222869947305865
Mean_Squared_Error: 366.4121917586186


# Hyperparameter Tunning 

In [24]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1.0, 1.5, 2.0],
    'loss': ['linear', 'square', 'exponential']
}

# Set up GridSearchCV
grid_search =GridSearchCV(estimator=regressor, param_grid=param_grid, cv=5, verbose=3, n_jobs=-1)



In [25]:
grid_search

In [26]:
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 45 candidates, totalling 225 fits


In [27]:
grid_search.best_params_

{'learning_rate': 2.0, 'loss': 'exponential', 'n_estimators': 200}