In [1]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, r2_score



import warnings
warnings.filterwarnings('ignore')

In [3]:
x, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=1)

In [5]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.20,random_state=1)

In [9]:
x_train.shape,x_test.shape

((800, 20), (200, 20))

In [11]:
classifier=AdaBoostClassifier()

In [13]:
classifier

In [19]:
#n_estimator=>number of DT
#learning rate=1.0 
classifier.fit(x_train,y_train)

In [21]:
y_pred=classifier.predict(x_test)

In [23]:
y_pred

array([0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1,
       0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1,
       1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0,
       1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 1])

In [25]:
print("acc", accuracy_score(y_test, y_pred))
print("clf_report", classification_report(y_test, y_pred))
print("con_mat", confusion_matrix(y_test, y_pred))

acc 0.85
clf_report               precision    recall  f1-score   support

           0       0.84      0.82      0.83        90
           1       0.86      0.87      0.86       110

    accuracy                           0.85       200
   macro avg       0.85      0.85      0.85       200
weighted avg       0.85      0.85      0.85       200

con_mat [[74 16]
 [14 96]]


In [27]:
#Hyperparameter tuning
from sklearn.model_selection import GridSearchCV

In [29]:
params={"n_estimators":[50,100,200],"learning_rate": [0.001, 0.1, 1, 1.5, 2, 2.5],
             "algorithm": ["SAMME.R", "SAMME"]}

In [31]:
params

{'n_estimators': [50, 100, 200],
 'learning_rate': [0.001, 0.1, 1, 1.5, 2, 2.5],
 'algorithm': ['SAMME.R', 'SAMME']}

In [33]:
ada=AdaBoostClassifier()
clf=GridSearchCV(estimator=ada,param_grid=params,cv=5,verbose=3)

In [35]:
clf

In [37]:
clf.fit(x_train,y_train)

Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV 1/5] END algorithm=SAMME.R, learning_rate=0.001, n_estimators=50;, score=0.831 total time=   0.4s
[CV 2/5] END algorithm=SAMME.R, learning_rate=0.001, n_estimators=50;, score=0.856 total time=   0.3s
[CV 3/5] END algorithm=SAMME.R, learning_rate=0.001, n_estimators=50;, score=0.881 total time=   0.3s
[CV 4/5] END algorithm=SAMME.R, learning_rate=0.001, n_estimators=50;, score=0.819 total time=   0.3s
[CV 5/5] END algorithm=SAMME.R, learning_rate=0.001, n_estimators=50;, score=0.838 total time=   0.3s
[CV 1/5] END algorithm=SAMME.R, learning_rate=0.001, n_estimators=100;, score=0.831 total time=   0.7s
[CV 2/5] END algorithm=SAMME.R, learning_rate=0.001, n_estimators=100;, score=0.856 total time=   0.7s
[CV 3/5] END algorithm=SAMME.R, learning_rate=0.001, n_estimators=100;, score=0.881 total time=   0.8s
[CV 4/5] END algorithm=SAMME.R, learning_rate=0.001, n_estimators=100;, score=0.819 total time=   0.9s
[CV 5/5] END alg

In [41]:
best_model = clf.best_estimator_ #out of 180 model which is best
y_pred = best_model.predict(x_test)

In [43]:
y_pred

array([0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1,
       1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
       1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0,
       1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1])

In [45]:
clf.best_params_

{'algorithm': 'SAMME.R', 'learning_rate': 0.1, 'n_estimators': 200}

In [47]:
clf.best_score_

0.8637499999999999

In [49]:
accuracy_score(y_test, y_pred)

0.88

In [97]:
#ada boost regressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import r2_score


In [99]:
x, y = make_regression(n_samples=1000, n_features=2, noise = 10, random_state=1)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.33, random_state=1)

In [101]:
x_train.shape,x_test.shape

((670, 2), (330, 2))

NameError: name 'reg' is not defined