## Import require packages and files

In [1]:
import pandas as pd

from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report,confusion_matrix, recall_score,precision_score,accuracy_score,f1_score

import joblib

In [2]:
xtrain = pd.read_csv('xtrain.csv')
xtest = pd.read_csv('xtest.csv')
ytrain = pd.read_csv('ytrain.csv')
ytest= pd.read_csv('ytest.csv')

In [3]:
yt = ytrain.to_numpy()
yte = ytest.to_numpy()

In [4]:
y_train = yt.ravel()
y_test = yte.ravel()

In [5]:
print(xtest.shape);
print(xtrain.shape);
print(y_test.shape);
print(y_train.shape)

(179, 10)
(712, 10)
(179,)
(712,)


In [6]:
print(xtest.columns);
print(xtrain.columns);

Index(['Pclass', 'Age', 'SibSp', 'Parch', 'Fare', 'female', 'male', 'C', 'Q',
       'S'],
      dtype='object')
Index(['Pclass', 'Age', 'SibSp', 'Parch', 'Fare', 'female', 'male', 'C', 'Q',
       'S'],
      dtype='object')


## Model building

In [7]:
abc = AdaBoostClassifier()

In [8]:
abc.fit(xtrain, y_train)

AdaBoostClassifier()

### predict 

In [9]:
train_predict = abc.predict(xtrain)
test_predict = abc.predict(xtest)

### Evaluate the model

In [10]:
def evaluate_model(act, pred):
    print("Confusion Matrix \n", confusion_matrix(act, pred))
    print("Accuracy : ", accuracy_score(act, pred))
    print("Recall   : ", recall_score(act, pred))
    print("Precision: ", precision_score(act, pred))
    print("F1_score : ", f1_score(act, pred))

In [11]:
### Train data accuracy
print('----------------train predictions---------------')
evaluate_model(ytrain, train_predict)

### Test data accuracy
print('---------------------test predictions----------')
evaluate_model(ytest, test_predict)

----------------train predictions---------------
Confusion Matrix 
 [[385  51]
 [ 67 209]]
Accuracy :  0.8342696629213483
Recall   :  0.7572463768115942
Precision:  0.8038461538461539
F1_score :  0.7798507462686567
---------------------test predictions----------
Confusion Matrix 
 [[96 17]
 [20 46]]
Accuracy :  0.7932960893854749
Recall   :  0.696969696969697
Precision:  0.7301587301587301
F1_score :  0.7131782945736433


### Model selection using Gridsearchcv

In [12]:
grid = {"n_estimators" : [50, 100, 150],
            "learning_rate" : [0.5,1,1.5]}

In [13]:
model_grid = GridSearchCV(abc, grid)

In [14]:
model_grid.fit(xtrain, y_train)

GridSearchCV(estimator=AdaBoostClassifier(),
             param_grid={'learning_rate': [0.5, 1, 1.5],
                         'n_estimators': [50, 100, 150]})

## model_grid.fit(xtrain, y_train)

In [15]:
model_grid.best_params_

{'learning_rate': 0.5, 'n_estimators': 100}

### Predict

In [16]:
train_pred = model_grid.predict(xtrain)
test_pred = model_grid.predict(xtest)

### Evaluate model

In [17]:
### Train data accuracy
print('----------------train predictions---------------')
evaluate_model(ytrain, train_pred)

### Test data accuracy
print('---------------------test predictions----------')
evaluate_model(ytest, test_pred)

----------------train predictions---------------
Confusion Matrix 
 [[384  52]
 [ 69 207]]
Accuracy :  0.8300561797752809
Recall   :  0.75
Precision:  0.7992277992277992
F1_score :  0.7738317757009345
---------------------test predictions----------
Confusion Matrix 
 [[97 16]
 [21 45]]
Accuracy :  0.7932960893854749
Recall   :  0.6818181818181818
Precision:  0.7377049180327869
F1_score :  0.7086614173228346


### Export best model

In [18]:
ab = AdaBoostClassifier(learning_rate = 0.5, n_estimators = 100)

In [19]:
ab.fit(xtrain, y_train)

AdaBoostClassifier(learning_rate=0.5, n_estimators=100)

In [20]:
joblib.dump(ab,'Adaboostcassifier.joblib')

['Adaboostcassifier.joblib']