In [111]:
#Import libraries
import pandas as pd
import numpy as np
from bayes_opt import BayesianOptimization
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score


In [116]:
from xgboost import XGBClassifier


In [95]:
#Bayesian optimization
def bayesian_optimization(dataset, function, parameters):
   X_train, y_train, X_test, y_test = dataset
   n_iterations = 5
   gp_params = {"alpha": 1e-4}

   BO = BayesianOptimization(function, parameters)
   BO.maximize(n_iter=n_iterations, **gp_params)

   return BO.max

In [88]:
rfBO = BayesianOptimization(rfc_optimization,{'n_estimators': (30,1000), 'max_depth' :(1,150), 'min_samples_split':(2,10)},verbose=True)

In [89]:
rfBO.maximize(init_points= 32, n_iter=40)

|   iter    |  target   | max_depth | min_sa... | n_esti... |
-------------------------------------------------------------
| [95m 2       [0m | [95m 0.9771  [0m | [95m 79.3    [0m | [95m 5.541   [0m | [95m 949.3   [0m |
| [95m 4       [0m | [95m 0.978   [0m | [95m 65.78   [0m | [95m 3.099   [0m | [95m 865.6   [0m |
| [95m 13      [0m | [95m 0.978   [0m | [95m 34.23   [0m | [95m 3.664   [0m | [95m 871.7   [0m |
| [95m 28      [0m | [95m 0.978   [0m | [95m 22.93   [0m | [95m 3.181   [0m | [95m 993.9   [0m |
| [95m 33      [0m | [95m 0.9782  [0m | [95m 148.7   [0m | [95m 2.025   [0m | [95m 999.0   [0m |
| [95m 44      [0m | [95m 0.9783  [0m | [95m 75.09   [0m | [95m 2.0     [0m | [95m 519.2   [0m |


In [130]:
def xgb_optimization(eta, gamma, max_depth):
    return cross_val_score(
        XGBClassifier(
            objective="binary:logistic",
                       learning_rate=max(eta, 0),
                       gamma=max(gamma, 0),
                       max_depth=int(max_depth),                                               
                       seed=42,
                       nthread=-1,
                       scale_pos_weight = len(y_train[y_train == 'Healthy'])/
                                          len(y_train[y_train == 'Diabetic'])),  
                   X=X_train, 
                   y=y_train, 
                   cv=10,
                   scoring="roc_auc",
                   fit_params={
                        "early_stopping_rounds": 10, 
                        "eval_metric": "auc", 
                        "eval_set": [(X_train,y_train),(X_test,y_test)]},
                   n_jobs=-1).mean()

    parameters = {"eta": eta,
                  "gamma": gamma,
                  "max_depth": max_depth}
    
    return function, parameters

In [131]:
xgbBO = BayesianOptimization(xgb_optimization,{'eta': (.001,0.4), 'gamma' :(0,20), 'max_depth':(1,2000)},verbose=True)

In [132]:
xgbBO.maximize(init_points= 32, n_iter=40)

|   iter    |  target   |    eta    |   gamma   | max_depth |
-------------------------------------------------------------
| [95m 2       [0m | [95m 0.9278  [0m | [95m 0.022   [0m | [95m 5.114   [0m | [95m 1.927e+0[0m |
| [95m 3       [0m | [95m 0.9577  [0m | [95m 0.1936  [0m | [95m 0.8304  [0m | [95m 1.158e+0[0m |
| [95m 5       [0m | [95m 0.9585  [0m | [95m 0.3766  [0m | [95m 3.324   [0m | [95m 1.088e+0[0m |
| [95m 10      [0m | [95m 0.9669  [0m | [95m 0.3499  [0m | [95m 2.684   [0m | [95m 1.832e+0[0m |
| [95m 46      [0m | [95m 0.9678  [0m | [95m 0.369   [0m | [95m 0.05048 [0m | [95m 1.342e+0[0m |
| [95m 66      [0m | [95m 0.9687  [0m | [95m 0.3683  [0m | [95m 0.03444 [0m | [95m 1.526e+0[0m |


In [121]:
 len(y_train[y_train == 1])

0

In [122]:
y_train

0        Healthy
1        Healthy
2        Healthy
3        Healthy
4        Healthy
5        Healthy
6        Healthy
7        Healthy
8        Healthy
9        Healthy
10       Healthy
11       Healthy
12       Healthy
13       Healthy
14       Healthy
15       Healthy
16       Healthy
17       Healthy
18       Healthy
19       Healthy
20       Healthy
21       Healthy
22       Healthy
23       Healthy
24       Healthy
25       Healthy
26       Healthy
27       Healthy
28       Healthy
29       Healthy
          ...   
2408    Diabetic
2409    Diabetic
2410    Diabetic
2411    Diabetic
2412    Diabetic
2413    Diabetic
2414    Diabetic
2415    Diabetic
2416    Diabetic
2417    Diabetic
2418    Diabetic
2419    Diabetic
2420    Diabetic
2421    Diabetic
2422    Diabetic
2423    Diabetic
2424    Diabetic
2425    Diabetic
2426    Diabetic
2427    Diabetic
2428    Diabetic
2429    Diabetic
2430    Diabetic
2431    Diabetic
2432    Diabetic
2433    Diabetic
2434    Diabetic
2435    Diabet

In [83]:
def rfc_optimization(n_estimators, max_depth, min_samples_split):
    return cross_val_score(
               RandomForestClassifier(
                   n_estimators=int(max(n_estimators,0)),                                                               
                   max_depth=int(max(max_depth,1)),
                   min_samples_split=int(max(min_samples_split,2)), 
                   n_jobs=-1, 
                   random_state=42,   
                   class_weight="balanced"),  
               X=X_train, 
               y=y_train, 
               cv=10,
               scoring="roc_auc",
               n_jobs=-1).mean()

    parameters = {"n_estimators":n_estimators,
                  "max_depth":max_depth,
                  "min_samples_split":min_samples_split}
    
    return function, parameters

In [65]:
#Train model
def train(X_train, y_train, X_test, y_test, function, parameters):
    dataset = (X_train, y_train, X_test, y_test)
    cv_splits = 4
    
    best_solution = bayesian_optimization(dataset, function, parameters)      
    params = best_solution["params"]

    model = RandomForestClassifier(
             n_estimators=int(),
             max_depth=int()),
             min_samples_split=int()), 
             n_jobs=-1, 
             random_state=42,   
             class_weight="balanced")

    model.fit(X_train, y_train)
    
    return model

In [90]:
y_pred = model.predict(X_test)
print(metrics.accuracy_score(y_test, y_pred))

NameError: name 'model' is not defined

In [76]:
train(X_train,y_train,X_test,y_test,rfc_optimization(10))

|   iter    |  target   | max_fe... | min_sa... | n_esti... |
-------------------------------------------------------------


TypeError: 'tuple' object is not callable

In [5]:
train =pd.read_csv('C:/Users/Rick/Desktop/18July_feature_nhanes/train.csv')

In [20]:
test = pd.read_csv('C:/Users/Rick/Desktop/18July_feature_nhanes/test.csv')

In [21]:
test = test.drop('Unnamed: 0', axis = 1)

In [22]:
test.head()

Unnamed: 0,ridageyr.y,dmdhrage,whd140,WaistCircumference,whq150,whd050,BPSystolic,BMI,Weight,paq655,...,drxtkcal_avg,drxtmfat_avg,drxtchol_avg,drxtphos_avg,drxtmagn_avg,drxtm181_avg,drxts160_avg,drxtchl_avg,drxts180_avg,Status
0,78,79,240.0,116.5,46,212.0,132,28.8,83.4,0,...,2205.0,29.409,416.0,1437.5,387.0,27.282,16.3785,514.8,8.2005,Healthy
1,24,24,130.0,79.5,24,122.0,108,25.3,61.8,0,...,1720.5,25.754,411.0,1349.0,265.5,24.392,10.613,415.45,4.06,Healthy
2,66,66,235.0,109.5,65,235.0,162,34.0,102.9,0,...,3104.5,47.438,466.0,1526.0,275.5,44.3705,27.0455,561.3,10.8185,Healthy
3,24,24,220.0,89.2,22,200.0,124,26.9,89.2,6,...,2585.0,30.903,337.0,1237.0,299.0,29.129,16.029,312.8,8.19,Healthy
4,20,20,130.0,78.4,20,126.0,116,22.2,60.4,0,...,833.0,8.282,124.0,394.0,98.0,7.584,4.503,133.6,2.579,Healthy


In [23]:
X_test = test.iloc[:,0:22]

In [25]:
X_test.head()

Unnamed: 0,ridageyr.y,dmdhrage,whd140,WaistCircumference,whq150,whd050,BPSystolic,BMI,Weight,paq655,...,drxtalco_avg,drxtkcal_avg,drxtmfat_avg,drxtchol_avg,drxtphos_avg,drxtmagn_avg,drxtm181_avg,drxts160_avg,drxtchl_avg,drxts180_avg
0,78,79,240.0,116.5,46,212.0,132,28.8,83.4,0,...,0.0,2205.0,29.409,416.0,1437.5,387.0,27.282,16.3785,514.8,8.2005
1,24,24,130.0,79.5,24,122.0,108,25.3,61.8,0,...,0.0,1720.5,25.754,411.0,1349.0,265.5,24.392,10.613,415.45,4.06
2,66,66,235.0,109.5,65,235.0,162,34.0,102.9,0,...,7.75,3104.5,47.438,466.0,1526.0,275.5,44.3705,27.0455,561.3,10.8185
3,24,24,220.0,89.2,22,200.0,124,26.9,89.2,6,...,0.0,2585.0,30.903,337.0,1237.0,299.0,29.129,16.029,312.8,8.19
4,20,20,130.0,78.4,20,126.0,116,22.2,60.4,0,...,0.0,833.0,8.282,124.0,394.0,98.0,7.584,4.503,133.6,2.579


In [26]:
y_test = test['Status']

In [27]:
y_test

0       Healthy
1       Healthy
2       Healthy
3       Healthy
4       Healthy
5       Healthy
6       Healthy
7       Healthy
8      Diabetic
9       Healthy
10      Healthy
11     Diabetic
12     Diabetic
13     Diabetic
14      Healthy
15      Healthy
16      Healthy
17      Healthy
18      Healthy
19     Diabetic
20     Diabetic
21     Diabetic
22      Healthy
23      Healthy
24      Healthy
25      Healthy
26      Healthy
27      Healthy
28      Healthy
29      Healthy
         ...   
519     Healthy
520     Healthy
521     Healthy
522     Healthy
523     Healthy
524     Healthy
525     Healthy
526     Healthy
527     Healthy
528     Healthy
529     Healthy
530     Healthy
531    Diabetic
532     Healthy
533     Healthy
534     Healthy
535     Healthy
536    Diabetic
537     Healthy
538     Healthy
539     Healthy
540     Healthy
541     Healthy
542     Healthy
543    Diabetic
544    Diabetic
545     Healthy
546     Healthy
547     Healthy
548     Healthy
Name: Status, Length: 54

In [7]:
train = train.drop('Unnamed: 0', axis = 1)


In [8]:
train.head()

Unnamed: 0,ridageyr.y,dmdhrage,whd140,WaistCircumference,whq150,whd050,BPSystolic,BMI,Weight,paq655,...,drxtkcal_avg,drxtmfat_avg,drxtchol_avg,drxtphos_avg,drxtmagn_avg,drxtm181_avg,drxts160_avg,drxtchl_avg,drxts180_avg,Status
0,58.0,60.0,185.0,93.0,58.0,180.0,140.0,26.6,83.9,1.0,...,2438.5,32.057,381.0,1270.0,281.0,29.412,15.5475,490.25,8.482,Healthy
1,47.0,45.0,175.0,90.0,45.0,165.0,128.0,24.8,77.9,0.0,...,1660.0,16.7055,209.5,891.5,210.0,15.383,7.606,294.2,3.549,Healthy
2,68.0,68.0,230.0,101.5,40.0,220.0,144.0,27.0,92.3,0.0,...,1219.0,32.027,207.0,980.0,108.0,29.038,14.884,199.8,8.532,Healthy
3,29.0,29.0,165.0,90.5,29.0,150.0,110.0,23.8,69.2,5.0,...,2857.0,24.272,357.0,1393.0,415.0,22.316,11.655,706.3,4.992,Healthy
4,28.0,28.0,105.0,71.0,18.0,100.0,108.0,18.4,45.9,3.0,...,1884.0,25.616,409.0,1517.0,316.0,23.655,11.437,442.6,5.793,Healthy


In [16]:
X_train = train.iloc[:,0:22]

In [17]:
X_train.head()

Unnamed: 0,ridageyr.y,dmdhrage,whd140,WaistCircumference,whq150,whd050,BPSystolic,BMI,Weight,paq655,...,drxtalco_avg,drxtkcal_avg,drxtmfat_avg,drxtchol_avg,drxtphos_avg,drxtmagn_avg,drxtm181_avg,drxts160_avg,drxtchl_avg,drxts180_avg
0,58.0,60.0,185.0,93.0,58.0,180.0,140.0,26.6,83.9,1.0,...,0.0,2438.5,32.057,381.0,1270.0,281.0,29.412,15.5475,490.25,8.482
1,47.0,45.0,175.0,90.0,45.0,165.0,128.0,24.8,77.9,0.0,...,44.65,1660.0,16.7055,209.5,891.5,210.0,15.383,7.606,294.2,3.549
2,68.0,68.0,230.0,101.5,40.0,220.0,144.0,27.0,92.3,0.0,...,0.0,1219.0,32.027,207.0,980.0,108.0,29.038,14.884,199.8,8.532
3,29.0,29.0,165.0,90.5,29.0,150.0,110.0,23.8,69.2,5.0,...,126.3,2857.0,24.272,357.0,1393.0,415.0,22.316,11.655,706.3,4.992
4,28.0,28.0,105.0,71.0,18.0,100.0,108.0,18.4,45.9,3.0,...,0.0,1884.0,25.616,409.0,1517.0,316.0,23.655,11.437,442.6,5.793


In [15]:
y_train.head()

Unnamed: 0,ridageyr.y,dmdhrage,whd140,WaistCircumference,whq150,whd050,BPSystolic,BMI,Weight,paq655,...,drxtalco_avg,drxtkcal_avg,drxtmfat_avg,drxtchol_avg,drxtphos_avg,drxtmagn_avg,drxtm181_avg,drxts160_avg,drxtchl_avg,drxts180_avg
0,58.0,60.0,185.0,93.0,58.0,180.0,140.0,26.6,83.9,1.0,...,0.0,2438.5,32.057,381.0,1270.0,281.0,29.412,15.5475,490.25,8.482
1,47.0,45.0,175.0,90.0,45.0,165.0,128.0,24.8,77.9,0.0,...,44.65,1660.0,16.7055,209.5,891.5,210.0,15.383,7.606,294.2,3.549
2,68.0,68.0,230.0,101.5,40.0,220.0,144.0,27.0,92.3,0.0,...,0.0,1219.0,32.027,207.0,980.0,108.0,29.038,14.884,199.8,8.532
3,29.0,29.0,165.0,90.5,29.0,150.0,110.0,23.8,69.2,5.0,...,126.3,2857.0,24.272,357.0,1393.0,415.0,22.316,11.655,706.3,4.992
4,28.0,28.0,105.0,71.0,18.0,100.0,108.0,18.4,45.9,3.0,...,0.0,1884.0,25.616,409.0,1517.0,316.0,23.655,11.437,442.6,5.793


In [18]:
y_train = train['Status']

In [19]:
y_train

0        Healthy
1        Healthy
2        Healthy
3        Healthy
4        Healthy
5        Healthy
6        Healthy
7        Healthy
8        Healthy
9        Healthy
10       Healthy
11       Healthy
12       Healthy
13       Healthy
14       Healthy
15       Healthy
16       Healthy
17       Healthy
18       Healthy
19       Healthy
20       Healthy
21       Healthy
22       Healthy
23       Healthy
24       Healthy
25       Healthy
26       Healthy
27       Healthy
28       Healthy
29       Healthy
          ...   
2408    Diabetic
2409    Diabetic
2410    Diabetic
2411    Diabetic
2412    Diabetic
2413    Diabetic
2414    Diabetic
2415    Diabetic
2416    Diabetic
2417    Diabetic
2418    Diabetic
2419    Diabetic
2420    Diabetic
2421    Diabetic
2422    Diabetic
2423    Diabetic
2424    Diabetic
2425    Diabetic
2426    Diabetic
2427    Diabetic
2428    Diabetic
2429    Diabetic
2430    Diabetic
2431    Diabetic
2432    Diabetic
2433    Diabetic
2434    Diabetic
2435    Diabet