In [102]:
import zipfile
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# import packages for hyperparameters tuning
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe

In [103]:
zf = zipfile.ZipFile("playground-series-s4e1.zip")
zf.namelist()

['sample_submission.csv', 'test.csv', 'train.csv']

In [104]:
df_train = pd.read_csv(zf.open("train.csv"))
df_test = pd.read_csv(zf.open("test.csv"))

In [105]:
#Checking to see if any rows/columns are missing any data
df_train.head(5)

Unnamed: 0,id,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,0,15674932,Okwudilichukwu,668,France,Male,33.0,3,0.0,2,1.0,0.0,181449.97,0
1,1,15749177,Okwudiliolisa,627,France,Male,33.0,1,0.0,2,1.0,1.0,49503.5,0
2,2,15694510,Hsueh,678,France,Male,40.0,10,0.0,2,1.0,0.0,184866.69,0
3,3,15741417,Kao,581,France,Male,34.0,2,148882.54,1,1.0,1.0,84560.88,0
4,4,15766172,Chiemenam,716,Spain,Male,33.0,5,0.0,2,1.0,1.0,15068.83,0


In [106]:
#Checking if any columns have words that need to be converted to integers/floats for prediction calculations

df_train.drop(['Surname', 'id', 'CustomerId'], axis =1, inplace=True)
df_train.head(5)

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,668,France,Male,33.0,3,0.0,2,1.0,0.0,181449.97,0
1,627,France,Male,33.0,1,0.0,2,1.0,1.0,49503.5,0
2,678,France,Male,40.0,10,0.0,2,1.0,0.0,184866.69,0
3,581,France,Male,34.0,2,148882.54,1,1.0,1.0,84560.88,0
4,716,Spain,Male,33.0,5,0.0,2,1.0,1.0,15068.83,0


In [107]:
df_dummy_train = pd.get_dummies(df_train, drop_first= False, dtype=float)

df_dummy_train

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain,Gender_Female,Gender_Male
0,668,33.0,3,0.00,2,1.0,0.0,181449.97,0,1.0,0.0,0.0,0.0,1.0
1,627,33.0,1,0.00,2,1.0,1.0,49503.50,0,1.0,0.0,0.0,0.0,1.0
2,678,40.0,10,0.00,2,1.0,0.0,184866.69,0,1.0,0.0,0.0,0.0,1.0
3,581,34.0,2,148882.54,1,1.0,1.0,84560.88,0,1.0,0.0,0.0,0.0,1.0
4,716,33.0,5,0.00,2,1.0,1.0,15068.83,0,0.0,0.0,1.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165029,667,33.0,2,0.00,1,1.0,1.0,131834.75,0,0.0,0.0,1.0,1.0,0.0
165030,792,35.0,3,0.00,1,0.0,0.0,131834.45,0,1.0,0.0,0.0,0.0,1.0
165031,565,31.0,5,0.00,1,1.0,1.0,127429.56,0,1.0,0.0,0.0,0.0,1.0
165032,554,30.0,7,161533.00,1,0.0,1.0,71173.03,0,0.0,0.0,1.0,1.0,0.0


In [108]:
y = df_dummy_train['Exited']
x = df_dummy_train.drop(['Exited'], axis =1)

In [109]:
x_train, x_test, y_train, y_test = train_test_split(x,y, stratify=y , random_state=42)

In [110]:
space={'max_depth': hp.quniform("max_depth", 3, 18, 1),
        'gamma': hp.uniform ('gamma', 1,9),
        'reg_alpha' : hp.quniform('reg_alpha', 40,180,1),
        'reg_lambda' : hp.uniform('reg_lambda', 0,1),
        'colsample_bytree' : hp.uniform('colsample_bytree', 0.5,1),
        'min_child_weight' : hp.quniform('min_child_weight', 0, 10, 1),
        'n_estimators': 180,
        'seed': 0
    }

In [111]:
def objective(space):
    clf=xgb.XGBClassifier(
                    n_estimators =space['n_estimators'], max_depth = int(space['max_depth']), gamma = space['gamma'],
                    reg_alpha = int(space['reg_alpha']),min_child_weight=int(space['min_child_weight']),
                    colsample_bytree=int(space['colsample_bytree']))
    
    evaluation = [( x_train, y_train), ( x_test, y_test)]
    
    clf.fit(x_train, y_train,
            eval_set=evaluation, eval_metric="auc",
            early_stopping_rounds=10,verbose=False)
    

    pred = clf.predict(x_test)
    accuracy = accuracy_score(y_test, pred>0.5)
    print ("SCORE:", accuracy)
    return {'loss': -accuracy, 'status': STATUS_OK }

In [112]:
trials = Trials()

best_hyperparams = fmin(fn = objective,
                        space = space,
                        algo = tpe.suggest,
                        max_evals = 100,
                        trials = trials)

  0%|          | 0/100 [00:00<?, ?trial/s, best loss=?]





SCORE:                                                 
0.8490995903923992                                     
  1%|          | 1/100 [00:01<02:49,  1.71s/trial, best loss: -0.8490995903923992]





SCORE:                                                                            
0.8508446641944788                                                                
  2%|▏         | 2/100 [00:03<02:48,  1.72s/trial, best loss: -0.8508446641944788]





SCORE:                                                                            
0.8505295814246588                                                                
  3%|▎         | 3/100 [00:05<02:45,  1.70s/trial, best loss: -0.8508446641944788]





SCORE:                                                                            
0.8482755277636395                                                                
  4%|▍         | 4/100 [00:06<02:47,  1.74s/trial, best loss: -0.8508446641944788]





SCORE:                                                                            
0.8510870355558787                                                                
  5%|▌         | 5/100 [00:08<02:46,  1.75s/trial, best loss: -0.8510870355558787]





SCORE:                                                                            
0.8487845076225793                                                                
  6%|▌         | 6/100 [00:10<02:44,  1.75s/trial, best loss: -0.8510870355558787]





SCORE:                                                                            
0.8490026418478392                                                                
  7%|▋         | 7/100 [00:12<02:38,  1.71s/trial, best loss: -0.8510870355558787]





SCORE:                                                                            
0.8493177246176592                                                                
  8%|▊         | 8/100 [00:13<02:35,  1.69s/trial, best loss: -0.8510870355558787]





SCORE:                                                                            
0.8490511161201193                                                                
  9%|▉         | 9/100 [00:15<02:32,  1.68s/trial, best loss: -0.8510870355558787]





SCORE:                                                                            
0.8505780556969389                                                                
 10%|█         | 10/100 [00:17<02:31,  1.68s/trial, best loss: -0.8510870355558787]





SCORE:                                                                             
0.8490511161201193                                                                 
 11%|█         | 11/100 [00:18<02:31,  1.70s/trial, best loss: -0.8510870355558787]





SCORE:                                                                             
0.8488814561671393                                                                 
 12%|█▏        | 12/100 [00:20<02:28,  1.69s/trial, best loss: -0.8510870355558787]





SCORE:                                                                             
0.8489541675755593                                                                 
 13%|█▎        | 13/100 [00:22<02:26,  1.68s/trial, best loss: -0.8510870355558787]





SCORE:                                                                             
0.8511112726920187                                                                 
 14%|█▍        | 14/100 [00:23<02:25,  1.70s/trial, best loss: -0.8511112726920187]





SCORE:                                                                             
0.8488814561671393                                                                 
 15%|█▌        | 15/100 [00:25<02:24,  1.70s/trial, best loss: -0.8511112726920187]





SCORE:                                                                             
0.8494631474344991                                                                 
 16%|█▌        | 16/100 [00:27<02:23,  1.71s/trial, best loss: -0.8511112726920187]





SCORE:                                                                             
0.8508446641944788                                                                 
 17%|█▋        | 17/100 [00:29<02:22,  1.71s/trial, best loss: -0.8511112726920187]





SCORE:                                                                             
0.8300249642502242                                                                 
 18%|█▊        | 18/100 [00:29<02:01,  1.48s/trial, best loss: -0.8511112726920187]





SCORE:                                                                             
0.8496085702513391                                                                 
 19%|█▉        | 19/100 [00:31<02:05,  1.55s/trial, best loss: -0.8511112726920187]





SCORE:                                                                             
0.8297098814804043                                                                 
 20%|██        | 20/100 [00:32<01:50,  1.38s/trial, best loss: -0.8511112726920187]





SCORE:                                                                             
0.8518141496400785                                                                 
 21%|██        | 21/100 [00:34<01:57,  1.49s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8517414382316586                                                                 
 22%|██▏       | 22/100 [00:36<02:02,  1.57s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8517656753677986                                                                 
 23%|██▎       | 23/100 [00:37<02:04,  1.61s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8516687268232386                                                                 
 24%|██▍       | 24/100 [00:39<02:05,  1.65s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8505295814246588                                                                 
 25%|██▌       | 25/100 [00:41<02:04,  1.67s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8515233040063986                                                                 
 26%|██▌       | 26/100 [00:43<02:05,  1.69s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8505780556969389                                                                 
 27%|██▋       | 27/100 [00:44<02:03,  1.70s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8509658498751788                                                                 
 28%|██▊       | 28/100 [00:46<02:02,  1.70s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8507234785137788                                                                 
 29%|██▉       | 29/100 [00:48<02:00,  1.70s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8514505925979786                                                                 
 30%|███       | 30/100 [00:49<02:00,  1.72s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8502872100632589                                                                 
 31%|███       | 31/100 [00:51<01:58,  1.72s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.850335684335539                                                                  
 32%|███▏      | 32/100 [00:53<01:56,  1.71s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.850044838701859                                                                  
 33%|███▎      | 33/100 [00:55<01:55,  1.72s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8507961899221987                                                                 
 34%|███▍      | 34/100 [00:56<01:53,  1.72s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.849972127293439                                                                  
 35%|███▌      | 35/100 [00:58<01:52,  1.73s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8514748297341186                                                                 
 36%|███▌      | 36/100 [01:00<01:53,  1.77s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8509658498751788                                                                 
 37%|███▋      | 37/100 [01:02<01:51,  1.77s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8506265299692188                                                                 
 38%|███▊      | 38/100 [01:03<01:48,  1.76s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8507719527860588                                                                 
 39%|███▉      | 39/100 [01:05<01:46,  1.74s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8488814561671393                                                                 
 40%|████      | 40/100 [01:07<01:43,  1.73s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8496328073874791                                                                 
 41%|████      | 41/100 [01:08<01:41,  1.71s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8514021183256987                                                                 
 42%|████▏     | 42/100 [01:10<01:40,  1.73s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8496328073874791                                                                 
 43%|████▎     | 43/100 [01:12<01:37,  1.72s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.850335684335539                                                                  
 44%|████▍     | 44/100 [01:14<01:36,  1.72s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8483967134443394                                                                 
 45%|████▌     | 45/100 [01:15<01:32,  1.69s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8492450132092392                                                                 
 46%|████▌     | 46/100 [01:17<01:31,  1.70s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8503841586078189                                                                 
 47%|████▋     | 47/100 [01:19<01:29,  1.69s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8514990668702586                                                                 
 48%|████▊     | 48/100 [01:21<01:29,  1.73s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8509900870113187                                                                 
 49%|████▉     | 49/100 [01:22<01:29,  1.75s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8493177246176592                                                                 
 50%|█████     | 50/100 [01:24<01:26,  1.73s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8495600959790591                                                                 
 51%|█████     | 51/100 [01:26<01:23,  1.70s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8506265299692188                                                                 
 52%|█████▏    | 52/100 [01:27<01:22,  1.72s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8509416127390388                                                                 
 53%|█████▎    | 53/100 [01:29<01:20,  1.72s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8506992413776389                                                                 
 54%|█████▍    | 54/100 [01:31<01:19,  1.72s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8514263554618386                                                                 
 55%|█████▌    | 55/100 [01:33<01:18,  1.74s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.849850941612739                                                                  
 56%|█████▌    | 56/100 [01:34<01:16,  1.74s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.850166024382559                                                                  
 57%|█████▋    | 57/100 [01:36<01:14,  1.73s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8506265299692188                                                                 
 58%|█████▊    | 58/100 [01:38<01:12,  1.73s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8517414382316586                                                                 
 59%|█████▉    | 59/100 [01:40<01:11,  1.75s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8507234785137788                                                                 
 60%|██████    | 60/100 [01:41<01:10,  1.76s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8492207760730992                                                                 
 61%|██████    | 61/100 [01:43<01:07,  1.74s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8489056933032793                                                                 
 62%|██████▏   | 62/100 [01:45<01:04,  1.70s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8490753532562593                                                                 
 63%|██████▎   | 63/100 [01:46<01:02,  1.70s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8514263554618386                                                                 
 64%|██████▍   | 64/100 [01:48<01:02,  1.74s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8295159843912844                                                                 
 65%|██████▌   | 65/100 [01:49<00:52,  1.51s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8515233040063986                                                                 
 66%|██████▌   | 66/100 [01:51<00:54,  1.60s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8516929639593785                                                                 
 67%|██████▋   | 67/100 [01:53<00:54,  1.65s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8509900870113187                                                                 
 68%|██████▊   | 68/100 [01:55<00:54,  1.69s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8509658498751788                                                                 
 69%|██████▉   | 69/100 [01:56<00:53,  1.74s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8517414382316586                                                                 
 70%|███████   | 70/100 [01:58<00:52,  1.76s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8509416127390388                                                                 
 71%|███████   | 71/100 [02:00<00:51,  1.78s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8517414382316586                                                                 
 72%|███████▏  | 72/100 [02:02<00:49,  1.78s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8508446641944788                                                                 
 73%|███████▎  | 73/100 [02:04<00:48,  1.79s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8509416127390388                                                                 
 74%|███████▍  | 74/100 [02:05<00:46,  1.78s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8517172010955185                                                                 
 75%|███████▌  | 75/100 [02:07<00:44,  1.78s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8514990668702586                                                                 
 76%|███████▌  | 76/100 [02:09<00:42,  1.78s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8508446641944788                                                                 
 77%|███████▋  | 77/100 [02:11<00:40,  1.77s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8509900870113187                                                                 
 78%|███████▊  | 78/100 [02:13<00:39,  1.80s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8506992413776389                                                                 
 79%|███████▉  | 79/100 [02:14<00:37,  1.79s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8517414382316586                                                                 
 80%|████████  | 80/100 [02:16<00:35,  1.78s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8517414382316586                                                                 
 81%|████████  | 81/100 [02:18<00:33,  1.77s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8302188613393442                                                                 
 82%|████████▏ | 82/100 [02:19<00:27,  1.54s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8515960154148186                                                                 
 83%|████████▎ | 83/100 [02:21<00:27,  1.60s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8505780556969389                                                                 
 84%|████████▍ | 84/100 [02:22<00:26,  1.64s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8517656753677986                                                                 
 85%|████████▌ | 85/100 [02:24<00:25,  1.67s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8489541675755593                                                                 
 86%|████████▌ | 86/100 [02:26<00:23,  1.68s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8501902615186989                                                                 
 87%|████████▋ | 87/100 [02:27<00:21,  1.69s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8516687268232386                                                                 
 88%|████████▊ | 88/100 [02:29<00:20,  1.70s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.850335684335539                                                                  
 89%|████████▉ | 89/100 [02:31<00:18,  1.71s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.850141787246419                                                                  
 90%|█████████ | 90/100 [02:33<00:17,  1.71s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8506750042414988                                                                 
 91%|█████████ | 91/100 [02:34<00:15,  1.71s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8505780556969389                                                                 
 92%|█████████▏| 92/100 [02:36<00:13,  1.72s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8508446641944788                                                                 
 93%|█████████▎| 93/100 [02:38<00:12,  1.73s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8300492013863642                                                                 
 94%|█████████▍| 94/100 [02:39<00:09,  1.51s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8506265299692188                                                                 
 95%|█████████▌| 95/100 [02:41<00:07,  1.58s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8496085702513391                                                                 
 96%|█████████▌| 96/100 [02:42<00:06,  1.60s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8517172010955185                                                                 
 97%|█████████▋| 97/100 [02:44<00:05,  1.67s/trial, best loss: -0.8518141496400785]





SCORE:                                                                             
0.8518626239123586                                                                 
 98%|█████████▊| 98/100 [02:46<00:03,  1.69s/trial, best loss: -0.8518626239123586]





SCORE:                                                                             
0.8509900870113187                                                                 
 99%|█████████▉| 99/100 [02:48<00:01,  1.72s/trial, best loss: -0.8518626239123586]





SCORE:                                                                             
0.8506265299692188                                                                 
100%|██████████| 100/100 [02:49<00:00,  1.70s/trial, best loss: -0.8518626239123586]


In [113]:
print("The best hyperparameters are : ","\n")
print(best_hyperparams)

The best hyperparameters are :  

{'colsample_bytree': 0.890109925715352, 'gamma': 6.280880383614678, 'max_depth': 12.0, 'min_child_weight': 10.0, 'reg_alpha': 49.0, 'reg_lambda': 0.45724982713787765}


In [128]:
model = xgb.XGBClassifier(seed = 42,
                          objective='binary:logistic',
                          gamma = 6.280880383614678,
                          colsample_bytree =0.890109925715352,
                          max_depth = 12,
                          min_child_weight = 10.0,
                          reg_alpha = 49.0,
                          reg_lambda = 0.45724982713787765)


In [129]:
model.fit(x_train, 
          y_train, 
          verbose=True,
          eval_metric='auc',
          eval_set=[(x_test, y_test)])

[0]	validation_0-auc:0.87214
[1]	validation_0-auc:0.88018
[2]	validation_0-auc:0.88141
[3]	validation_0-auc:0.88320




[4]	validation_0-auc:0.88407
[5]	validation_0-auc:0.88451
[6]	validation_0-auc:0.88482
[7]	validation_0-auc:0.88590
[8]	validation_0-auc:0.88612
[9]	validation_0-auc:0.88645
[10]	validation_0-auc:0.88691
[11]	validation_0-auc:0.88730
[12]	validation_0-auc:0.88730
[13]	validation_0-auc:0.88749
[14]	validation_0-auc:0.88759
[15]	validation_0-auc:0.88768
[16]	validation_0-auc:0.88781
[17]	validation_0-auc:0.88780
[18]	validation_0-auc:0.88780
[19]	validation_0-auc:0.88783
[20]	validation_0-auc:0.88790
[21]	validation_0-auc:0.88791
[22]	validation_0-auc:0.88799
[23]	validation_0-auc:0.88798
[24]	validation_0-auc:0.88805
[25]	validation_0-auc:0.88809
[26]	validation_0-auc:0.88809
[27]	validation_0-auc:0.88809
[28]	validation_0-auc:0.88809
[29]	validation_0-auc:0.88809
[30]	validation_0-auc:0.88809
[31]	validation_0-auc:0.88809
[32]	validation_0-auc:0.88809
[33]	validation_0-auc:0.88809
[34]	validation_0-auc:0.88809
[35]	validation_0-auc:0.88809
[36]	validation_0-auc:0.88809
[37]	validation_

In [130]:
y_pred = model.predict(x_test)

In [131]:
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

print('\nClassification Report:')
print(classification_report(y_test, y_pred))

Accuracy: 0.87

Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.95      0.92     32529
           1       0.75      0.55      0.63      8730

    accuracy                           0.87     41259
   macro avg       0.82      0.75      0.78     41259
weighted avg       0.86      0.87      0.86     41259



In [132]:

df_test.drop(['id', 'CustomerId', 'Surname'], axis = 1, inplace = True)

df_dummy_test = pd.get_dummies(df_test, drop_first= False, dtype=float)
df_dummy_test

KeyError: "['id', 'CustomerId', 'Surname'] not found in axis"

In [133]:
y_test_pred = model.predict(df_dummy_test)
y_test_prob = model.predict_proba(df_dummy_test)[:, 1]

y_test_pred
y_test_prob = y_test_prob.round(1)




In [134]:
index = pd.read_csv(zf.open("test.csv"))

In [135]:
df_submission = pd.DataFrame({
    'id': index['id'],
    'Exited': y_test_prob
})
df_submission

Unnamed: 0,id,Exited
0,165034,0.0
1,165035,0.8
2,165036,0.0
3,165037,0.2
4,165038,0.4
...,...,...
110018,275052,0.0
110019,275053,0.1
110020,275054,0.0
110021,275055,0.1


In [136]:
df_submission.to_csv('XGBoost_Submission_hypertuned_2.csv', index=False)