In [1]:
import pandas as pd

In [2]:
from sklearn.datasets import make_classification

In [3]:
X, y = make_classification(n_samples=2000, n_features=10, n_informative=8,n_redundant=2,random_state=11)

In [4]:
X

array([[-1.7938728 ,  1.37312582, -2.50427584, ..., -2.49061889,
         2.80534037,  0.39954423],
       [ 0.61902962,  0.44953056,  0.56743221, ..., -2.90247028,
         0.19213509, -0.75610232],
       [-0.78283253, -0.53410387,  3.17302481, ..., -2.37792204,
        -0.82675592,  0.97933202],
       ...,
       [ 2.14186277,  2.29528437, -5.51786062, ...,  3.70808129,
        -1.17004925, -3.05076628],
       [-2.15573699,  1.27625064, -0.92487689, ...,  2.08672623,
         0.4311174 ,  0.40586409],
       [ 0.71056324,  0.06807135, -0.82336149, ...,  1.30743311,
        -0.22427231, -0.83444747]], shape=(2000, 10))

In [5]:
y

array([1, 1, 0, ..., 1, 1, 1], shape=(2000,))

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=11)

In [8]:
from sklearn.ensemble import AdaBoostClassifier

In [9]:
abc = AdaBoostClassifier()

In [10]:
abc.fit(X_train, y_train)

In [11]:
'''We do this prediction because we need see how close the y_test
is in comparison to y_pred '''
y_pred = abc.predict(X_test)

In [12]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [13]:
print(accuracy_score(y_test, y_pred))

0.815


In [14]:
print(confusion_matrix(y_test, y_pred))

[[175  30]
 [ 44 151]]


In [15]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.80      0.85      0.83       205
           1       0.83      0.77      0.80       195

    accuracy                           0.81       400
   macro avg       0.82      0.81      0.81       400
weighted avg       0.82      0.81      0.81       400



In [16]:
#Logistic regression

from sklearn.linear_model import LogisticRegression

In [17]:
abclog = AdaBoostClassifier(estimator=LogisticRegression())

In [18]:
abclog.fit(X_train, y_train)

In [19]:
y_pred2 = abclog.predict(X_test)

In [20]:
print(accuracy_score(y_test, y_pred2))

0.785


In [21]:
#Support vector machine

from sklearn.svm import SVC

In [22]:
svc = SVC(kernel='linear',probability=True)

In [23]:
abcsvm = AdaBoostClassifier(estimator=svc, n_estimators=25, learning_rate=0.1)

In [24]:
abcsvm.fit(X_train, y_train)

In [25]:
y_pred3 = abcsvm.predict(X_test)

In [26]:
print(accuracy_score(y_test, y_pred3))

0.79


In [30]:
#Hyperparameter tuning

param_grid = {
    'n_estimators' : [1,10,20,100,650],
    'learning_rate' : [0.00000001, 0.0001, 0.001, 0.1, 1]
}

In [29]:
from sklearn.model_selection import GridSearchCV

In [32]:
abc_grid = GridSearchCV(abc, param_grid, cv=3, n_jobs=-1)

In [33]:
abc_grid.fit(X_train, y_train)

In [34]:
abc_grid.best_params_

{'learning_rate': 0.1, 'n_estimators': 650}

In [35]:
abc2 = AdaBoostClassifier(learning_rate=0.1, n_estimators=650)

In [36]:
abc2.fit(X_train, y_train)

In [37]:
y_pred4 = abc2.predict(X_test)

In [38]:
print(accuracy_score(y_test, y_pred4))

0.8275


In [40]:
print(accuracy_score(y_test, y_pred))
print(accuracy_score(y_test, y_pred2))
print(accuracy_score(y_test, y_pred3))
print(accuracy_score(y_test, y_pred4))

0.815
0.785
0.79
0.8275
