In [5]:
import pandas as pd 
import numpy as np
from xgboost import XGBClassifier
import pickle
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-1.5.2-py3-none-win_amd64.whl (106.6 MB)
Installing collected packages: xgboost
Successfully installed xgboost-1.5.2


In [65]:
data = pd.read_csv('pima-indians-diabetes.csv')

In [66]:
data.head()

Unnamed: 0,Number of times pregnant,Plasma glucose concentration,Diastolic blood pressure (mm Hg),Triceps skinfold thickness (mm),2-Hour serum insulin (mu U/ml),Body mass index (weight in kg/(height in m)^2),Diabetes pedigree function,Age,Is Diabetic
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [67]:
data.columns

Index(['Number of times pregnant', 'Plasma glucose concentration',
       'Diastolic blood pressure (mm Hg)', 'Triceps skinfold thickness (mm)',
       '2-Hour serum insulin (mu U/ml)',
       'Body mass index (weight in kg/(height in m)^2)',
       'Diabetes pedigree function', 'Age', 'Is Diabetic'],
      dtype='object')

In [68]:
cols = ['Plasma glucose concentration',
       'Diastolic blood pressure (mm Hg)', 'Triceps skinfold thickness (mm)',
       '2-Hour serum insulin (mu U/ml)',
       'Body mass index (weight in kg/(height in m)^2)',
       'Diabetes pedigree function', 'Age']

In [69]:
for col in cols:
    data[col] = data[col].replace(0, np.nan)

In [70]:
data.isna().sum()

Number of times pregnant                            0
Plasma glucose concentration                        5
Diastolic blood pressure (mm Hg)                   35
Triceps skinfold thickness (mm)                   227
2-Hour serum insulin (mu U/ml)                    374
Body mass index (weight in kg/(height in m)^2)     11
Diabetes pedigree function                          0
Age                                                 0
Is Diabetic                                         0
dtype: int64

In [71]:
data['Plasma glucose concentration']=data['Plasma glucose concentration'].fillna(data['Plasma glucose concentration'].mode()[0])
data['Diastolic blood pressure (mm Hg)']=data['Diastolic blood pressure (mm Hg)'].fillna(data['Diastolic blood pressure (mm Hg)'].mode()[0])
data['Triceps skinfold thickness (mm)']=data['Triceps skinfold thickness (mm)'].fillna(data['Triceps skinfold thickness (mm)'].mean())
data['2-Hour serum insulin (mu U/ml)']=data['2-Hour serum insulin (mu U/ml)'].fillna(data['2-Hour serum insulin (mu U/ml)'].mean())
data['Body mass index (weight in kg/(height in m)^2)']=data['Body mass index (weight in kg/(height in m)^2)'].fillna(data['Body mass index (weight in kg/(height in m)^2)'].mean())

In [72]:
data.isna().sum()

Number of times pregnant                          0
Plasma glucose concentration                      0
Diastolic blood pressure (mm Hg)                  0
Triceps skinfold thickness (mm)                   0
2-Hour serum insulin (mu U/ml)                    0
Body mass index (weight in kg/(height in m)^2)    0
Diabetes pedigree function                        0
Age                                               0
Is Diabetic                                       0
dtype: int64

In [73]:
x = data.drop(columns='Is Diabetic',axis=1)
y = data['Is Diabetic']

In [74]:
x.head()

Unnamed: 0,Number of times pregnant,Plasma glucose concentration,Diastolic blood pressure (mm Hg),Triceps skinfold thickness (mm),2-Hour serum insulin (mu U/ml),Body mass index (weight in kg/(height in m)^2),Diabetes pedigree function,Age
0,6,148.0,72.0,35.0,155.548223,33.6,0.627,50
1,1,85.0,66.0,29.0,155.548223,26.6,0.351,31
2,8,183.0,64.0,29.15342,155.548223,23.3,0.672,32
3,1,89.0,66.0,23.0,94.0,28.1,0.167,21
4,0,137.0,40.0,35.0,168.0,43.1,2.288,33


In [49]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaled_data = scaler.fit_transform(x)

In [50]:
train_x, test_x, train_y, test_y = train_test_split(scaled_data, y, test_size=0.3, random_state=42)

In [51]:
model = XGBClassifier(objective='binary:logistic')
model.fit(train_x,train_y)





XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=8,
              num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [52]:
y_pred = model.predict(train_x)
predictions = [round(value) for value in y_pred]
accuracy = accuracy_score(train_y, predictions)
accuracy

1.0

In [53]:
y_pred = model.predict(test_x)
predictions = [round(value) for value in y_pred]
accuracy = accuracy_score(test_y,predictions)
accuracy

0.7272727272727273

In [54]:
from sklearn.model_selection import GridSearchCV

In [55]:
param_grid={
   
    ' learning_rate':[1,0.5,0.1,0.01,0.001],
    'max_depth': [3,5,10,20],
    'n_estimators':[10,50,100,200]
    
}

In [56]:
grid= GridSearchCV(XGBClassifier(objective='binary:logistic'),param_grid, verbose=3)
grid.fit(train_x,train_y)

Fitting 5 folds for each of 80 candidates, totalling 400 fits
[CV]  learning_rate=1, max_depth=3, n_estimators=10 ..................
[CV]   learning_rate=1, max_depth=3, n_estimators=10, score=nan, total=   0.0s
[CV]  learning_rate=1, max_depth=3, n_estimators=10 ..................
[CV]   learning_rate=1, max_depth=3, n_estimators=10, score=nan, total=   0.0s
[CV]  learning_rate=1, max_depth=3, n_estimators=10 ..................
[CV]   learning_rate=1, max_depth=3, n_estimators=10, score=nan, total=   0.0s
[CV]  learning_rate=1, max_depth=3, n_estimators=10 ..................
[CV]   learning_rate=1, max_depth=3, n_estimators=10, score=nan, total=   0.0s
[CV]  learning_rate=1, max_depth=3, n_estimators=10 ..................
[CV]   learning_rate=1, max_depth=3, n_estimators=10, score=nan, total=   0.0s
[CV]  learning_rate=1, max_depth=3, n_estimators=50 ..................
[CV]   learning_rate=1, max_depth=3, n_estimators=50, score=nan, total=   0.0s
[CV]  learning_rate=1, max_depth=3, n_

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
Traceback (most recent call last):
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 506, in inner_f
    return f(**kwargs)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\sklearn.py", line 1250, in fit
    self._Booster = train(
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\training.py", line 188, in train
    bst = _train_internal(params, dtrain,
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\training.py", line 81, in _train_internal
    bst.update(dtrain, i, obj)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 1680, in update
    _check_call(_LIB.XGBoosterUpdateOneIter(self.handle,
  File "C:\Users\deepak


[CV]  learning_rate=1, max_depth=3, n_estimators=100 .................
[CV]   learning_rate=1, max_depth=3, n_estimators=100, score=nan, total=   0.0s
[CV]  learning_rate=1, max_depth=3, n_estimators=200 .................
[CV]   learning_rate=1, max_depth=3, n_estimators=200, score=nan, total=   0.0s
[CV]  learning_rate=1, max_depth=3, n_estimators=200 .................
[CV]   learning_rate=1, max_depth=3, n_estimators=200, score=nan, total=   0.0s
[CV]  learning_rate=1, max_depth=3, n_estimators=200 .................
[CV]   learning_rate=1, max_depth=3, n_estimators=200, score=nan, total=   0.0s
[CV]  learning_rate=1, max_depth=3, n_estimators=200 .................
[CV]   learning_rate=1, max_depth=3, n_estimators=200, score=nan, total=   0.0s
[CV]  learning_rate=1, max_depth=3, n_estimators=200 .................
[CV]   learning_rate=1, max_depth=3, n_estimators=200, score=nan, total=   0.0s
[CV]  learning_rate=1, max_depth=5, n_estimators=10 ..................
[CV]   learning_rate=1

Traceback (most recent call last):
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 506, in inner_f
    return f(**kwargs)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\sklearn.py", line 1250, in fit
    self._Booster = train(
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\training.py", line 188, in train
    bst = _train_internal(params, dtrain,
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\training.py", line 81, in _train_internal
    bst.update(dtrain, i, obj)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 1680, in update
    _check_call(_LIB.XGBoosterUpdateOneIter(self.handle,
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 218, in _check_call
  


[CV]  learning_rate=1, max_depth=20, n_estimators=100 ................
[CV]   learning_rate=1, max_depth=20, n_estimators=100, score=nan, total=   0.0s
[CV]  learning_rate=1, max_depth=20, n_estimators=100 ................
[CV]   learning_rate=1, max_depth=20, n_estimators=100, score=nan, total=   0.0s
[CV]  learning_rate=1, max_depth=20, n_estimators=100 ................
[CV]   learning_rate=1, max_depth=20, n_estimators=100, score=nan, total=   0.0s
[CV]  learning_rate=1, max_depth=20, n_estimators=100 ................
[CV]   learning_rate=1, max_depth=20, n_estimators=100, score=nan, total=   0.0s
[CV]  learning_rate=1, max_depth=20, n_estimators=100 ................
[CV]   learning_rate=1, max_depth=20, n_estimators=100, score=nan, total=   0.0s
[CV]  learning_rate=1, max_depth=20, n_estimators=200 ................
[CV]   learning_rate=1, max_depth=20, n_estimators=200, score=nan, total=   0.0s
[CV]  learning_rate=1, max_depth=20, n_estimators=200 ................
[CV]   learning_

Traceback (most recent call last):
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 506, in inner_f
    return f(**kwargs)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\sklearn.py", line 1250, in fit
    self._Booster = train(
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\training.py", line 188, in train
    bst = _train_internal(params, dtrain,
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\training.py", line 81, in _train_internal
    bst.update(dtrain, i, obj)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 1680, in update
    _check_call(_LIB.XGBoosterUpdateOneIter(self.handle,
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 218, in _check_call
  


[CV]  learning_rate=0.5, max_depth=10, n_estimators=50 ...............
[CV]   learning_rate=0.5, max_depth=10, n_estimators=50, score=nan, total=   0.0s
[CV]  learning_rate=0.5, max_depth=10, n_estimators=50 ...............
[CV]   learning_rate=0.5, max_depth=10, n_estimators=50, score=nan, total=   0.0s
[CV]  learning_rate=0.5, max_depth=10, n_estimators=100 ..............
[CV]   learning_rate=0.5, max_depth=10, n_estimators=100, score=nan, total=   0.0s
[CV]  learning_rate=0.5, max_depth=10, n_estimators=100 ..............
[CV]   learning_rate=0.5, max_depth=10, n_estimators=100, score=nan, total=   0.0s
[CV]  learning_rate=0.5, max_depth=10, n_estimators=100 ..............
[CV]   learning_rate=0.5, max_depth=10, n_estimators=100, score=nan, total=   0.0s
[CV]  learning_rate=0.5, max_depth=10, n_estimators=100 ..............
[CV]   learning_rate=0.5, max_depth=10, n_estimators=100, score=nan, total=   0.0s
[CV]  learning_rate=0.5, max_depth=10, n_estimators=100 ..............
[CV]  

Traceback (most recent call last):
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 506, in inner_f
    return f(**kwargs)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\sklearn.py", line 1250, in fit
    self._Booster = train(
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\training.py", line 188, in train
    bst = _train_internal(params, dtrain,
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\training.py", line 81, in _train_internal
    bst.update(dtrain, i, obj)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 1680, in update
    _check_call(_LIB.XGBoosterUpdateOneIter(self.handle,
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 218, in _check_call
  


[CV]  learning_rate=0.1, max_depth=3, n_estimators=200 ...............
[CV]   learning_rate=0.1, max_depth=3, n_estimators=200, score=nan, total=   0.0s
[CV]  learning_rate=0.1, max_depth=5, n_estimators=10 ................
[CV]   learning_rate=0.1, max_depth=5, n_estimators=10, score=nan, total=   0.0s
[CV]  learning_rate=0.1, max_depth=5, n_estimators=10 ................
[CV]   learning_rate=0.1, max_depth=5, n_estimators=10, score=nan, total=   0.0s
[CV]  learning_rate=0.1, max_depth=5, n_estimators=10 ................
[CV]   learning_rate=0.1, max_depth=5, n_estimators=10, score=nan, total=   0.0s
[CV]  learning_rate=0.1, max_depth=5, n_estimators=10 ................
[CV]   learning_rate=0.1, max_depth=5, n_estimators=10, score=nan, total=   0.0s
[CV]  learning_rate=0.1, max_depth=5, n_estimators=10 ................
[CV]   learning_rate=0.1, max_depth=5, n_estimators=10, score=nan, total=   0.0s
[CV]  learning_rate=0.1, max_depth=5, n_estimators=50 ................
[CV]   learning

Traceback (most recent call last):
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 506, in inner_f
    return f(**kwargs)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\sklearn.py", line 1250, in fit
    self._Booster = train(
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\training.py", line 188, in train
    bst = _train_internal(params, dtrain,
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\training.py", line 81, in _train_internal
    bst.update(dtrain, i, obj)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 1680, in update
    _check_call(_LIB.XGBoosterUpdateOneIter(self.handle,
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 218, in _check_call
  


[CV]  learning_rate=0.1, max_depth=20, n_estimators=100 ..............
[CV]   learning_rate=0.1, max_depth=20, n_estimators=100, score=nan, total=   0.0s
[CV]  learning_rate=0.1, max_depth=20, n_estimators=100 ..............
[CV]   learning_rate=0.1, max_depth=20, n_estimators=100, score=nan, total=   0.0s
[CV]  learning_rate=0.1, max_depth=20, n_estimators=100 ..............
[CV]   learning_rate=0.1, max_depth=20, n_estimators=100, score=nan, total=   0.0s
[CV]  learning_rate=0.1, max_depth=20, n_estimators=200 ..............
[CV]   learning_rate=0.1, max_depth=20, n_estimators=200, score=nan, total=   0.0s
[CV]  learning_rate=0.1, max_depth=20, n_estimators=200 ..............
[CV]   learning_rate=0.1, max_depth=20, n_estimators=200, score=nan, total=   0.0s
[CV]  learning_rate=0.1, max_depth=20, n_estimators=200 ..............
[CV]   learning_rate=0.1, max_depth=20, n_estimators=200, score=nan, total=   0.0s
[CV]  learning_rate=0.1, max_depth=20, n_estimators=200 ..............
[CV]

Traceback (most recent call last):
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 506, in inner_f
    return f(**kwargs)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\sklearn.py", line 1250, in fit
    self._Booster = train(
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\training.py", line 188, in train
    bst = _train_internal(params, dtrain,
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\training.py", line 81, in _train_internal
    bst.update(dtrain, i, obj)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 1680, in update
    _check_call(_LIB.XGBoosterUpdateOneIter(self.handle,
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 218, in _check_call
  


[CV]  learning_rate=0.01, max_depth=10, n_estimators=10 ..............
[CV]   learning_rate=0.01, max_depth=10, n_estimators=10, score=nan, total=   0.0s
[CV]  learning_rate=0.01, max_depth=10, n_estimators=10 ..............
[CV]   learning_rate=0.01, max_depth=10, n_estimators=10, score=nan, total=   0.0s
[CV]  learning_rate=0.01, max_depth=10, n_estimators=10 ..............
[CV]   learning_rate=0.01, max_depth=10, n_estimators=10, score=nan, total=   0.0s
[CV]  learning_rate=0.01, max_depth=10, n_estimators=50 ..............
[CV]   learning_rate=0.01, max_depth=10, n_estimators=50, score=nan, total=   0.0s
[CV]  learning_rate=0.01, max_depth=10, n_estimators=50 ..............
[CV]   learning_rate=0.01, max_depth=10, n_estimators=50, score=nan, total=   0.0s
[CV]  learning_rate=0.01, max_depth=10, n_estimators=50 ..............
[CV]   learning_rate=0.01, max_depth=10, n_estimators=50, score=nan, total=   0.0s
[CV]  learning_rate=0.01, max_depth=10, n_estimators=50 ..............
[CV]

Traceback (most recent call last):
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 506, in inner_f
    return f(**kwargs)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\sklearn.py", line 1250, in fit
    self._Booster = train(
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\training.py", line 188, in train
    bst = _train_internal(params, dtrain,
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\training.py", line 81, in _train_internal
    bst.update(dtrain, i, obj)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 1680, in update
    _check_call(_LIB.XGBoosterUpdateOneIter(self.handle,
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 218, in _check_call
  

[CV]   learning_rate=0.001, max_depth=3, n_estimators=100, score=nan, total=   0.0s
[CV]  learning_rate=0.001, max_depth=3, n_estimators=100 .............
[CV]   learning_rate=0.001, max_depth=3, n_estimators=100, score=nan, total=   0.0s
[CV]  learning_rate=0.001, max_depth=3, n_estimators=100 .............
[CV]   learning_rate=0.001, max_depth=3, n_estimators=100, score=nan, total=   0.0s
[CV]  learning_rate=0.001, max_depth=3, n_estimators=200 .............
[CV]   learning_rate=0.001, max_depth=3, n_estimators=200, score=nan, total=   0.0s
[CV]  learning_rate=0.001, max_depth=3, n_estimators=200 .............
[CV]   learning_rate=0.001, max_depth=3, n_estimators=200, score=nan, total=   0.0s
[CV]  learning_rate=0.001, max_depth=3, n_estimators=200 .............
[CV]   learning_rate=0.001, max_depth=3, n_estimators=200, score=nan, total=   0.0s
[CV]  learning_rate=0.001, max_depth=3, n_estimators=200 .............
[CV]   learning_rate=0.001, max_depth=3, n_estimators=200, score=nan, 

Traceback (most recent call last):
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 506, in inner_f
    return f(**kwargs)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\sklearn.py", line 1250, in fit
    self._Booster = train(
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\training.py", line 188, in train
    bst = _train_internal(params, dtrain,
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\training.py", line 81, in _train_internal
    bst.update(dtrain, i, obj)
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 1680, in update
    _check_call(_LIB.XGBoosterUpdateOneIter(self.handle,
  File "C:\Users\deepak.thakur\Anaconda3\lib\site-packages\xgboost\core.py", line 218, in _check_call
  

[CV]   learning_rate=0.001, max_depth=20, n_estimators=50, score=nan, total=   0.0s
[CV]  learning_rate=0.001, max_depth=20, n_estimators=50 .............
[CV]   learning_rate=0.001, max_depth=20, n_estimators=50, score=nan, total=   0.0s
[CV]  learning_rate=0.001, max_depth=20, n_estimators=50 .............
[CV]   learning_rate=0.001, max_depth=20, n_estimators=50, score=nan, total=   0.0s
[CV]  learning_rate=0.001, max_depth=20, n_estimators=50 .............
[CV]   learning_rate=0.001, max_depth=20, n_estimators=50, score=nan, total=   0.0s
[CV]  learning_rate=0.001, max_depth=20, n_estimators=100 ............
[CV]   learning_rate=0.001, max_depth=20, n_estimators=100, score=nan, total=   0.0s
[CV]  learning_rate=0.001, max_depth=20, n_estimators=100 ............
[CV]   learning_rate=0.001, max_depth=20, n_estimators=100, score=nan, total=   0.0s
[CV]  learning_rate=0.001, max_depth=20, n_estimators=100 ............
[CV]   learning_rate=0.001, max_depth=20, n_estimators=100, score=na

[Parallel(n_jobs=1)]: Done 400 out of 400 | elapsed:    7.3s finished


XGBoostError: [18:30:47] C:/Users/Administrator/workspace/xgboost-win64_release_1.5.1/src/learner.cc:553: Invalid parameter " learning_rate" contains whitespace.

In [57]:
grid.best_params_

{' learning_rate': 1, 'max_depth': 3, 'n_estimators': 10}

In [59]:
new_model=XGBClassifier(learning_rate= 1, max_depth= 3, n_estimators= 10)
new_model.fit(train_x, train_y)



XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=1, max_delta_step=0,
              max_depth=3, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=10, n_jobs=8,
              num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [60]:
y_pred_new = new_model.predict(test_x)
predictions_new = [round(value) for value in y_pred_new]
accuracy_new = accuracy_score(test_y,predictions_new)
accuracy_new

0.7619047619047619

In [78]:
from numpy import loadtxt
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [79]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

In [80]:
mydata = pd.read_csv('pima-indians-diabetes.csv')

In [81]:
data.shape

(768, 9)

In [82]:
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.20, random_state=1)

In [109]:
models=[]
models.append(('LogisticRegression', LogisticRegression()))
models.append(('KNN',KNeighborsClassifier()))
models.append(('SVM', SVC()))
models.append(('XGB', XGBClassifier(eta=.01, gamma=5)))

import time

#results = []
#names = []
#scoring = 'accuracy'
for name, model in models:
    start_time = time.time()
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    predictions = [round(value) for value in y_pred]
    
    accuracy = accuracy_score(y_test, predictions)
    print("Accuracy: %.2f%%" % (accuracy * 100.0), name)
    print("--- %s seconds ---" %(time.time()- start_time))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Accuracy: 76.62% LogisticRegression
--- 0.07111644744873047 seconds ---
Accuracy: 75.32% KNN
--- 0.019684314727783203 seconds ---
Accuracy: 76.62% SVM
--- 0.0344696044921875 seconds ---
Accuracy: 81.17% XGB
--- 0.3447422981262207 seconds ---
