## Import all packages and files required

In [1]:
import pandas as pd

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report,confusion_matrix, recall_score,precision_score,accuracy_score,f1_score

import joblib

In [2]:
xtrain = pd.read_csv('xtrain.csv')
xtest = pd.read_csv('xtest.csv')
ytrain = pd.read_csv('ytrain.csv')
ytest= pd.read_csv('ytest.csv')

In [3]:
yt = ytrain.to_numpy()
yte = ytest.to_numpy()

In [4]:
y_train = yt.ravel()
y_test = yte.ravel()

In [5]:
print(xtest.shape);
print(xtrain.shape);
print(y_test.shape);
print(y_train.shape)

(179, 10)
(712, 10)
(179,)
(712,)


In [6]:
print(xtest.columns);
print(xtrain.columns);

Index(['Pclass', 'Age', 'SibSp', 'Parch', 'Fare', 'female', 'male', 'C', 'Q',
       'S'],
      dtype='object')
Index(['Pclass', 'Age', 'SibSp', 'Parch', 'Fare', 'female', 'male', 'C', 'Q',
       'S'],
      dtype='object')


### Model building

In [7]:
gbc = GradientBoostingClassifier()

In [8]:
gbc.fit(xtrain,y_train)

GradientBoostingClassifier()

### predict

In [9]:
train_predict = gbc.predict(xtrain)
test_predict = gbc.predict(xtest)

### Evaluate the model

In [10]:
def evaluate_model(act, pred):
    print("Confusion Matrix \n", confusion_matrix(act, pred))
    print("Accuracy : ", accuracy_score(act, pred))
    print("Recall   : ", recall_score(act, pred))
    print("Precision: ", precision_score(act, pred))
    print("F1_score : ", f1_score(act, pred))

In [11]:
### Train data accuracy
print('----------------train predictions---------------')
evaluate_model(ytrain, train_predict)

### Test data accuracy
print('---------------------test predictions----------')
evaluate_model(ytest, test_predict)

----------------train predictions---------------
Confusion Matrix 
 [[421  15]
 [ 58 218]]
Accuracy :  0.8974719101123596
Recall   :  0.7898550724637681
Precision:  0.9356223175965666
F1_score :  0.8565815324165029
---------------------test predictions----------
Confusion Matrix 
 [[103  10]
 [ 17  49]]
Accuracy :  0.8491620111731844
Recall   :  0.7424242424242424
Precision:  0.8305084745762712
F1_score :  0.784


### Model selection using GridSearchCV

In [12]:
grid = {'n_estimators':[100,50],
        'learning_rate':[0.3,0.2],
       'max_depth':[3,1],
       'min_samples_leaf':[5,3]}

In [13]:
gbc_grid = GridSearchCV(gbc, grid)

In [14]:
gbc_grid.fit(xtrain, y_train)

GridSearchCV(estimator=GradientBoostingClassifier(),
             param_grid={'learning_rate': [0.3, 0.2], 'max_depth': [3, 1],
                         'min_samples_leaf': [5, 3],
                         'n_estimators': [100, 50]})

In [15]:
gbc_grid.best_params_

{'learning_rate': 0.2,
 'max_depth': 3,
 'min_samples_leaf': 3,
 'n_estimators': 100}

### Predict

In [16]:
train_pred = gbc_grid.predict(xtrain)
test_pred = gbc_grid.predict(xtest)

### Evaluate model

In [17]:
### Train data accuracy
print('----------------train predictions---------------')
evaluate_model(ytrain, train_pred)

### Test data accuracy
print('---------------------test predictions----------')
evaluate_model(ytest, test_pred)

----------------train predictions---------------
Confusion Matrix 
 [[427   9]
 [ 44 232]]
Accuracy :  0.925561797752809
Recall   :  0.8405797101449275
Precision:  0.9626556016597511
F1_score :  0.8974854932301741
---------------------test predictions----------
Confusion Matrix 
 [[103  10]
 [ 17  49]]
Accuracy :  0.8491620111731844
Recall   :  0.7424242424242424
Precision:  0.8305084745762712
F1_score :  0.784


### Export best model

In [18]:
gb = GradientBoostingClassifier(learning_rate = 0.2, n_estimators = 100,max_depth = 3,
 min_samples_leaf = 3)

In [19]:
gb.fit(xtrain, y_train)

GradientBoostingClassifier(learning_rate=0.2, min_samples_leaf=3)

In [20]:
joblib.dump(gbc_grid,'Gradientboostcassifier.joblib')

['Gradientboostcassifier.joblib']