In [35]:
import numpy as np
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier


import warnings
warnings.filterwarnings('ignore')

In [4]:
%config Completer.use_jedi=False

In [18]:
load_breast_cancer().data.shape

(569, 30)

In [19]:
dt = load_breast_cancer()

In [20]:
X = pd.DataFrame(dt.data,columns=dt.feature_names)

In [21]:
y = pd.Series(dt.target)

In [26]:
dt.feature_names.shape

(30,)

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

## comparing the models with XGB

In [47]:
models = []
models.append(('XGB',XGBClassifier()))
models.append(('LogisticRegression',LogisticRegression()))
models.append(('DecisionTree', DecisionTreeClassifier()))
models.append(('RandomForest', RandomForestClassifier()))
models.append(('SVC',SVC()))
models.append(('KNN', KNeighborsClassifier()))


import time 


results = []
names = []
scoring = 'accuracy'


for name, model in models:
    model.fit(X_train,y_train)
    
    y_pred = model.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    
    print ( f'Accuracy score for model {name} is : {accuracy}')
    

Accuracy score for model XGB is : 0.956140350877193
Accuracy score for model LogisticRegression is : 0.9473684210526315
Accuracy score for model DecisionTree is : 0.9385964912280702
Accuracy score for model RandomForest is : 0.9649122807017544
Accuracy score for model SVC is : 0.9473684210526315
Accuracy score for model KNN is : 0.956140350877193


## Lets try with hyperparameter

In [41]:
from sklearn.model_selection import GridSearchCV

In [43]:
pr = {'eta':[0.15,0.1,0.01,0.05],'gamma':[10,15,20,25]}

In [44]:
tuning = GridSearchCV(estimator=XGBClassifier(),
                    param_grid = pr,
                     scoring = 'r2')

tuning.fit(X_train,y_train)
tuning.best_params_, tuning.best_score_

({'eta': 0.15, 'gamma': 10}, 0.7837686138636283)

In [45]:
accuracy_score(y_test,tuning.predict(X_test))

0.956140350877193