In [17]:
import zipfile
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# import packages for hyperparameters tuning
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe

In [18]:
zf = zipfile.ZipFile("playground-series-s4e6.zip")
df_train = pd.read_csv(zf.open("train.csv"))
df_test = pd.read_csv(zf.open("test.csv"))

In [19]:
df_train.head(5)

Unnamed: 0,id,Marital status,Application mode,Application order,Course,Daytime/evening attendance,Previous qualification,Previous qualification (grade),Nacionality,Mother's qualification,...,Curricular units 2nd sem (credited),Curricular units 2nd sem (enrolled),Curricular units 2nd sem (evaluations),Curricular units 2nd sem (approved),Curricular units 2nd sem (grade),Curricular units 2nd sem (without evaluations),Unemployment rate,Inflation rate,GDP,Target
0,0,1,1,1,9238,1,1,126.0,1,1,...,0,6,7,6,12.428571,0,11.1,0.6,2.02,Graduate
1,1,1,17,1,9238,1,1,125.0,1,19,...,0,6,9,0,0.0,0,11.1,0.6,2.02,Dropout
2,2,1,17,2,9254,1,1,137.0,1,3,...,0,6,0,0,0.0,0,16.2,0.3,-0.92,Dropout
3,3,1,1,3,9500,1,1,131.0,1,19,...,0,8,11,7,12.82,0,11.1,0.6,2.02,Enrolled
4,4,1,1,2,9500,1,1,132.0,1,19,...,0,7,12,6,12.933333,0,7.6,2.6,0.32,Graduate


In [60]:
#One liner to code the final column that is the outcome of our model
y_coded_column = pd.DataFrame({"Target": np.argmax((pd.get_dummies(df_train["Target"], dtype= float)), axis=1)})

#Setting up variables for train/test split
y = y_coded_column
x = df_train.drop(["id", "Target"], axis=1)

print (y)


       Target
0           2
1           0
2           0
3           1
4           2
...       ...
76513       2
76514       2
76515       1
76516       0
76517       2

[76518 rows x 1 columns]


In [21]:
x_train, x_test, y_train, y_test = train_test_split(x,y, random_state=42)

In [55]:
#Testing for best parameters
space={'learning_rate': hp.quniform("learning_rate", 0.01, 20,0.01),
        'max_depth': hp.quniform("max_depth", 3, 18, 1),
        'gamma': hp.uniform ('gamma', 1,9),
        'reg_alpha' : hp.quniform('reg_alpha', 40,180,1),
        'reg_lambda' : hp.uniform('reg_lambda', 0,1),
        'colsample_bytree' : hp.uniform('colsample_bytree', 0.5,1),
        'min_child_weight' : hp.quniform('min_child_weight', 0, 10, 1),
        'n_estimators': 180,
        'seed': 0,
        'scale_pos_weight':hp.quniform('scale_pos_weight',1,100,1)
    }
def objective(space):
    clf=xgb.XGBClassifier(
                    n_estimators =space['n_estimators'], max_depth = int(space['max_depth']), gamma = space['gamma'],
                    reg_alpha = int(space['reg_alpha']),min_child_weight=int(space['min_child_weight']),
                    colsample_bytree=int(space['colsample_bytree']))
    
    evaluation = [( x_train, y_train), ( x_test, y_test)]
    
    clf.fit(x_train, y_train,
            eval_set=evaluation, eval_metric="auc",
            early_stopping_rounds=10,verbose=False)
    

    pred = clf.predict(x_test)
    accuracy = accuracy_score(y_test, pred>0.5)
    print ("SCORE:", accuracy)
    return {'loss': -accuracy, 'status': STATUS_OK }

trials = Trials()

best_hyperparams = fmin(fn = objective,
                        space = space,
                        algo = tpe.suggest,
                        max_evals = 100,
                        trials = trials)

print("The best hyperparameters are : ","\n")
print(best_hyperparams)

  0%|          | 0/100 [00:00<?, ?trial/s, best loss=?]





SCORE:                                                 
0.4446941975953999                                     
  1%|          | 1/100 [00:03<05:59,  3.63s/trial, best loss: -0.4446941975953999]





SCORE:                                                                            
0.4445896497647674                                                                
  2%|▏         | 2/100 [00:07<05:57,  3.65s/trial, best loss: -0.4446941975953999]





SCORE:                                                                            
0.443282801881861                                                                 
  3%|▎         | 3/100 [00:10<05:42,  3.53s/trial, best loss: -0.4446941975953999]





SCORE:                                                                            
0.443282801881861                                                                 
  4%|▍         | 4/100 [00:14<05:34,  3.49s/trial, best loss: -0.4446941975953999]





SCORE:                                                                            
0.443282801881861                                                                 
  5%|▌         | 5/100 [00:17<05:26,  3.44s/trial, best loss: -0.4446941975953999]





SCORE:                                                                            
0.44453737584945113                                                               
  6%|▌         | 6/100 [00:21<05:27,  3.48s/trial, best loss: -0.4446941975953999]





SCORE:                                                                            
0.4445896497647674                                                                
  7%|▋         | 7/100 [00:24<05:26,  3.51s/trial, best loss: -0.4446941975953999]





SCORE:                                                                            
0.4442237323575536                                                                
  8%|▊         | 8/100 [00:28<05:24,  3.52s/trial, best loss: -0.4446941975953999]





SCORE:                                                                            
0.4430214323052797                                                                
  9%|▉         | 9/100 [00:31<05:15,  3.47s/trial, best loss: -0.4446941975953999]





SCORE:                                                                            
0.44385781495033977                                                               
 10%|█         | 10/100 [00:35<05:16,  3.52s/trial, best loss: -0.4446941975953999]





SCORE:                                                                             
0.4422373235755358                                                                 
 11%|█         | 11/100 [00:38<05:08,  3.46s/trial, best loss: -0.4446941975953999]





SCORE:                                                                             
0.4435441714584422                                                                 
 12%|█▏        | 12/100 [00:41<05:02,  3.44s/trial, best loss: -0.4446941975953999]





SCORE:                                                                             
0.44438055410350236                                                                
 13%|█▎        | 13/100 [00:45<05:04,  3.50s/trial, best loss: -0.4446941975953999]





SCORE:                                                                             
0.44385781495033977                                                                
 14%|█▍        | 14/100 [00:48<04:57,  3.45s/trial, best loss: -0.4446941975953999]





SCORE:                                                                             
0.44411918452692106                                                                
 15%|█▌        | 15/100 [00:52<04:56,  3.48s/trial, best loss: -0.4446941975953999]





SCORE:                                                                             
0.442080501829587                                                                  
 16%|█▌        | 16/100 [00:55<04:50,  3.46s/trial, best loss: -0.4446941975953999]





SCORE:                                                                             
0.4432305279665447                                                                 
 17%|█▋        | 17/100 [00:59<04:44,  3.43s/trial, best loss: -0.4446941975953999]





SCORE:                                                                             
0.4427600627286984                                                                 
 18%|█▊        | 18/100 [01:02<04:39,  3.41s/trial, best loss: -0.4446941975953999]





SCORE:                                                                             
0.4442237323575536                                                                 
 19%|█▉        | 19/100 [01:05<04:34,  3.39s/trial, best loss: -0.4446941975953999]





SCORE:                                                                             
0.44349189754312596                                                                
 20%|██        | 20/100 [01:09<04:35,  3.44s/trial, best loss: -0.4446941975953999]





SCORE:                                                                             
0.4447987454260324                                                                 
 21%|██        | 21/100 [01:13<04:36,  3.50s/trial, best loss: -0.4447987454260324]





SCORE:                                                                             
0.44474647151071617                                                                
 22%|██▏       | 22/100 [01:16<04:38,  3.57s/trial, best loss: -0.4447987454260324]





SCORE:                                                                             
0.4444851019341349                                                                 
 23%|██▎       | 23/100 [01:20<04:36,  3.60s/trial, best loss: -0.4447987454260324]





SCORE:                                                                             
0.4438055410350235                                                                 
 24%|██▍       | 24/100 [01:24<04:35,  3.62s/trial, best loss: -0.4447987454260324]





SCORE:                                                                             
0.44464192368008365                                                                
 25%|██▌       | 25/100 [01:27<04:31,  3.62s/trial, best loss: -0.4447987454260324]





SCORE:                                                                             
0.4450601150026137                                                                 
 26%|██▌       | 26/100 [01:31<04:29,  3.64s/trial, best loss: -0.4450601150026137]





SCORE:                                                                             
0.44500784108729746                                                                
 27%|██▋       | 27/100 [01:35<04:28,  3.68s/trial, best loss: -0.4450601150026137]





SCORE:                                                                             
0.44511238891793                                                                   
 28%|██▊       | 28/100 [01:38<04:24,  3.67s/trial, best loss: -0.44511238891793]  





SCORE:                                                                           
0.4447987454260324                                                               
 29%|██▉       | 29/100 [01:42<04:19,  3.65s/trial, best loss: -0.44511238891793]





SCORE:                                                                           
0.4451646628332462                                                               
 30%|███       | 30/100 [01:46<04:15,  3.65s/trial, best loss: -0.4451646628332462]





SCORE:                                                                             
0.4446941975953999                                                                 
 31%|███       | 31/100 [01:49<04:11,  3.64s/trial, best loss: -0.4451646628332462]





SCORE:                                                                             
0.44401463669628854                                                                
 32%|███▏      | 32/100 [01:53<04:06,  3.63s/trial, best loss: -0.4451646628332462]





SCORE:                                                                             
0.4448510193413487                                                                 
 33%|███▎      | 33/100 [01:57<04:04,  3.65s/trial, best loss: -0.4451646628332462]





SCORE:                                                                             
0.443910088865656                                                                  
 34%|███▍      | 34/100 [02:00<04:00,  3.64s/trial, best loss: -0.4451646628332462]





SCORE:                                                                             
0.44270778881338213                                                                
 35%|███▌      | 35/100 [02:04<03:51,  3.56s/trial, best loss: -0.4451646628332462]





SCORE:                                                                             
0.4438055410350235                                                                 
 36%|███▌      | 36/100 [02:07<03:48,  3.57s/trial, best loss: -0.4451646628332462]





SCORE:                                                                             
0.4444851019341349                                                                 
 37%|███▋      | 37/100 [02:11<03:45,  3.58s/trial, best loss: -0.4451646628332462]





SCORE:                                                                             
0.4445896497647674                                                                 
 38%|███▊      | 38/100 [02:14<03:43,  3.60s/trial, best loss: -0.4451646628332462]





SCORE:                                                                             
0.4446941975953999                                                                 
 39%|███▉      | 39/100 [02:18<03:41,  3.63s/trial, best loss: -0.4451646628332462]





SCORE:                                                                             
0.44474647151071617                                                                
 40%|████      | 40/100 [02:22<03:36,  3.61s/trial, best loss: -0.4451646628332462]





SCORE:                                                                             
0.4430214323052797                                                                 
 41%|████      | 41/100 [02:25<03:33,  3.61s/trial, best loss: -0.4451646628332462]





SCORE:                                                                             
0.4439623627809723                                                                 
 42%|████▏     | 42/100 [02:29<03:29,  3.61s/trial, best loss: -0.4451646628332462]





SCORE:                                                                             
0.4440669106116048                                                                 
 43%|████▎     | 43/100 [02:32<03:24,  3.59s/trial, best loss: -0.4451646628332462]





SCORE:                                                                             
0.44438055410350236                                                                
 44%|████▍     | 44/100 [02:36<03:23,  3.63s/trial, best loss: -0.4451646628332462]





SCORE:                                                                             
0.4438055410350235                                                                 
 45%|████▌     | 45/100 [02:40<03:19,  3.62s/trial, best loss: -0.4451646628332462]





SCORE:                                                                             
0.44375326711970725                                                                
 46%|████▌     | 46/100 [02:43<03:14,  3.61s/trial, best loss: -0.4451646628332462]





SCORE:                                                                             
0.4444851019341349                                                                 
 47%|████▋     | 47/100 [02:47<03:11,  3.61s/trial, best loss: -0.4451646628332462]





SCORE:                                                                             
0.4452692106638787                                                                 
 48%|████▊     | 48/100 [02:51<03:08,  3.63s/trial, best loss: -0.4452692106638787]





SCORE:                                                                             
0.4440669106116048                                                                 
 49%|████▉     | 49/100 [02:54<03:04,  3.63s/trial, best loss: -0.4452692106638787]





SCORE:                                                                             
0.44385781495033977                                                                
 50%|█████     | 50/100 [02:58<02:59,  3.58s/trial, best loss: -0.4452692106638787]





SCORE:                                                                             
0.4445896497647674                                                                 
 51%|█████     | 51/100 [03:01<02:56,  3.61s/trial, best loss: -0.4452692106638787]





SCORE:                                                                             
0.4446941975953999                                                                 
 52%|█████▏    | 52/100 [03:05<02:54,  3.64s/trial, best loss: -0.4452692106638787]





SCORE:                                                                             
0.4444328280188186                                                                 
 53%|█████▎    | 53/100 [03:09<02:51,  3.64s/trial, best loss: -0.4452692106638787]





SCORE:                                                                             
0.4445896497647674                                                                 
 54%|█████▍    | 54/100 [03:12<02:47,  3.65s/trial, best loss: -0.4452692106638787]





SCORE:                                                                             
0.4442237323575536                                                                 
 55%|█████▌    | 55/100 [03:16<02:43,  3.63s/trial, best loss: -0.4452692106638787]





SCORE:                                                                             
0.44427600627286984                                                                
 56%|█████▌    | 56/100 [03:19<02:37,  3.58s/trial, best loss: -0.4452692106638787]





SCORE:                                                                             
0.44385781495033977                                                                
 57%|█████▋    | 57/100 [03:23<02:34,  3.60s/trial, best loss: -0.4452692106638787]





SCORE:                                                                             
0.443910088865656                                                                  
 58%|█████▊    | 58/100 [03:26<02:29,  3.55s/trial, best loss: -0.4452692106638787]





SCORE:                                                                             
0.44427600627286984                                                                
 59%|█████▉    | 59/100 [03:30<02:26,  3.58s/trial, best loss: -0.4452692106638787]





SCORE:                                                                             
0.44281233664401465                                                                
 60%|██████    | 60/100 [03:34<02:21,  3.53s/trial, best loss: -0.4452692106638787]





SCORE:                                                                             
0.44255096706743335                                                                
 61%|██████    | 61/100 [03:37<02:16,  3.50s/trial, best loss: -0.4452692106638787]





SCORE:                                                                             
0.44500784108729746                                                                
 62%|██████▏   | 62/100 [03:41<02:14,  3.54s/trial, best loss: -0.4452692106638787]





SCORE:                                                                             
0.44385781495033977                                                                
 63%|██████▎   | 63/100 [03:44<02:10,  3.54s/trial, best loss: -0.4452692106638787]





SCORE:                                                                             
0.44474647151071617                                                                
 64%|██████▍   | 64/100 [03:48<02:08,  3.58s/trial, best loss: -0.4452692106638787]





SCORE:                                                                             
0.4448510193413487                                                                 
 65%|██████▌   | 65/100 [03:51<02:05,  3.59s/trial, best loss: -0.4452692106638787]





SCORE:                                                                             
0.4450601150026137                                                                 
 66%|██████▌   | 66/100 [03:55<02:03,  3.62s/trial, best loss: -0.4452692106638787]





SCORE:                                                                             
0.44532148457919496                                                                
 67%|██████▋   | 67/100 [03:59<02:00,  3.64s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.44511238891793                                                                    
 68%|██████▊   | 68/100 [04:03<01:58,  3.71s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.4446941975953999                                                                  
 69%|██████▉   | 69/100 [04:06<01:54,  3.69s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.44474647151071617                                                                 
 70%|███████   | 70/100 [04:10<01:50,  3.69s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.44464192368008365                                                                 
 71%|███████   | 71/100 [04:14<01:46,  3.68s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.4441714584422373                                                                  
 72%|███████▏  | 72/100 [04:17<01:43,  3.68s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.44464192368008365                                                                 
 73%|███████▎  | 73/100 [04:21<01:39,  3.69s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.4449555671719812                                                                  
 74%|███████▍  | 74/100 [04:25<01:35,  3.69s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.44500784108729746                                                                 
 75%|███████▌  | 75/100 [04:28<01:32,  3.69s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.44490329325666494                                                                 
 76%|███████▌  | 76/100 [04:32<01:28,  3.69s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.4450601150026137                                                                  
 77%|███████▋  | 77/100 [04:36<01:24,  3.69s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.4446941975953999                                                                  
 78%|███████▊  | 78/100 [04:39<01:20,  3.68s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.4447987454260324                                                                  
 79%|███████▉  | 79/100 [04:43<01:17,  3.69s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.44521693674856244                                                                 
 80%|████████  | 80/100 [04:47<01:14,  3.71s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.4446941975953999                                                                  
 81%|████████  | 81/100 [04:51<01:10,  3.70s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.4451646628332462                                                                  
 82%|████████▏ | 82/100 [04:54<01:06,  3.70s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.44307370622059594                                                                 
 83%|████████▎ | 83/100 [04:58<01:01,  3.61s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.44438055410350236                                                                 
 84%|████████▍ | 84/100 [05:01<00:58,  3.65s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.44474647151071617                                                                 
 85%|████████▌ | 85/100 [05:05<00:54,  3.65s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.4442237323575536                                                                  
 86%|████████▌ | 86/100 [05:09<00:51,  3.65s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.44464192368008365                                                                 
 87%|████████▋ | 87/100 [05:12<00:47,  3.66s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.4440669106116048                                                                  
 88%|████████▊ | 88/100 [05:16<00:44,  3.67s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.4441714584422373                                                                  
 89%|████████▉ | 89/100 [05:20<00:40,  3.71s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.4440669106116048                                                                  
 90%|█████████ | 90/100 [05:24<00:38,  3.81s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.4442237323575536                                                                  
 91%|█████████ | 91/100 [05:28<00:34,  3.81s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.44490329325666494                                                                 
 92%|█████████▏| 92/100 [05:32<00:30,  3.86s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.4442237323575536                                                                  
 93%|█████████▎| 93/100 [05:36<00:27,  3.88s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.44281233664401465                                                                 
 94%|█████████▍| 94/100 [05:40<00:23,  3.92s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.4449555671719812                                                                  
 95%|█████████▌| 95/100 [05:44<00:19,  3.88s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.4442237323575536                                                                  
 96%|█████████▌| 96/100 [05:47<00:15,  3.84s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.44411918452692106                                                                 
 97%|█████████▋| 97/100 [05:51<00:11,  3.83s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.4435441714584422                                                                  
 98%|█████████▊| 98/100 [05:55<00:07,  3.81s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.44427600627286984                                                                 
 99%|█████████▉| 99/100 [05:59<00:03,  3.83s/trial, best loss: -0.44532148457919496]





SCORE:                                                                              
0.4434396236278097                                                                  
100%|██████████| 100/100 [06:02<00:00,  3.63s/trial, best loss: -0.44532148457919496]
The best hyperparameters are :  

{'colsample_bytree': 0.5651153103317633, 'gamma': 1.1175786832078596, 'learning_rate': 0.07, 'max_depth': 14.0, 'min_child_weight': 9.0, 'reg_alpha': 40.0, 'reg_lambda': 0.3567502256248526, 'scale_pos_weight': 23.0}


In [56]:
model = xgb.XGBClassifier(learning_rate =0.07,
    colsample_bytree=0.56,
    n_estimators=1000,
    early_stopping_rounds=50,
    max_depth=14,
    min_child_weight=9,
    gamma=1.11,
    subsample=0.65,
    reg_alpha=40,
    reg_lambda=0.3567,
    objective= 'multi:softprob',
    nthread=-1,
    scale_pos_weight=23,
    random_state=42)

In [57]:
model.fit(x_train, 
          y_train, 
          verbose=True, 
          eval_set=[(x_test, y_test)])

[0]	validation_0-mlogloss:1.04687
[1]	validation_0-mlogloss:0.99868
[2]	validation_0-mlogloss:0.95302
[3]	validation_0-mlogloss:0.91410
[4]	validation_0-mlogloss:0.88025
[5]	validation_0-mlogloss:0.84687
[6]	validation_0-mlogloss:0.81586
[7]	validation_0-mlogloss:0.78913
[8]	validation_0-mlogloss:0.76493
[9]	validation_0-mlogloss:0.74174
[10]	validation_0-mlogloss:0.72073
[11]	validation_0-mlogloss:0.70257
[12]	validation_0-mlogloss:0.68591
[13]	validation_0-mlogloss:0.67047
[14]	validation_0-mlogloss:0.65571
[15]	validation_0-mlogloss:0.64153
[16]	validation_0-mlogloss:0.62940
[17]	validation_0-mlogloss:0.61905
[18]	validation_0-mlogloss:0.60838
[19]	validation_0-mlogloss:0.59815
[20]	validation_0-mlogloss:0.58930
[21]	validation_0-mlogloss:0.58083
[22]	validation_0-mlogloss:0.57245
[23]	validation_0-mlogloss:0.56580
[24]	validation_0-mlogloss:0.55914
[25]	validation_0-mlogloss:0.55294
[26]	validation_0-mlogloss:0.54705
[27]	validation_0-mlogloss:0.54111
[28]	validation_0-mlogloss:0.5

In [58]:
#Check feature Importance
importances = model.feature_importances_
columns = x.columns
i = 0

while i< len(columns):
    print (f" The importance of feature '{columns[i]}' is {round(importances[i]*100, 2)}%.")
    i += 1

 The importance of feature 'Marital status' is 0.32%.
 The importance of feature 'Application mode' is 0.69%.
 The importance of feature 'Application order' is 0.3%.
 The importance of feature 'Course' is 0.7%.
 The importance of feature 'Daytime/evening attendance' is 0.43%.
 The importance of feature 'Previous qualification' is 0.32%.
 The importance of feature 'Previous qualification (grade)' is 0.33%.
 The importance of feature 'Nacionality' is 0.22%.
 The importance of feature 'Mother's qualification' is 0.47%.
 The importance of feature 'Father's qualification' is 0.32%.
 The importance of feature 'Mother's occupation' is 0.44%.
 The importance of feature 'Father's occupation' is 0.38%.
 The importance of feature 'Admission grade' is 0.39%.
 The importance of feature 'Displaced' is 0.31%.
 The importance of feature 'Educational special needs' is 0.34%.
 The importance of feature 'Debtor' is 1.44%.
 The importance of feature 'Tuition fees up to date' is 6.83%.
 The importance of f

In [59]:
y_pred = model.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

print('\nClassification Report:')
print(classification_report(y_test, y_pred))


Accuracy: 0.83

Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.83      0.87      6287
           1       0.65      0.61      0.63      3746
           2       0.86      0.92      0.89      9097

    accuracy                           0.83     19130
   macro avg       0.80      0.79      0.80     19130
weighted avg       0.83      0.83      0.83     19130



In [61]:
x_test = df_test.drop(['id'], axis=1)

predictions = model.predict(x_test)

columns = ['Dropout', 'Enrolled', 'Graduate']

# Mapping dictionary
label_mapping = {i: column for i, column in enumerate(columns)}

# Map the numeric labels to column titles
predictions_mapped = np.array([label_mapping[label] for label in predictions])

predictions_mapped


array(['Dropout', 'Graduate', 'Graduate', ..., 'Dropout', 'Dropout',
       'Dropout'], dtype='<U8')

In [62]:
df_submission = pd.DataFrame({
    'id': df_test['id'],
    'NObeyesdad': predictions_mapped
})
df_submission

Unnamed: 0,id,NObeyesdad
0,76518,Dropout
1,76519,Graduate
2,76520,Graduate
3,76521,Graduate
4,76522,Enrolled
...,...,...
51007,127525,Dropout
51008,127526,Dropout
51009,127527,Dropout
51010,127528,Dropout


In [63]:
df_submission.to_csv('XGBoost_Submission_Hypertuned.csv', index=False)