In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier 
from sklearn.model_selection import GridSearchCV , train_test_split , RepeatedKFold , RandomizedSearchCV , KFold , cross_val_score
from sklearn.preprocessing import StandardScaler , MinMaxScaler
from sklearn.metrics import accuracy_score, plot_confusion_matrix , classification_report , roc_auc_score , confusion_matrix, roc_curve , recall_score , precision_score 
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import BaggingClassifier , VotingClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.inspection import permutation_importance

## Function for Report

In [2]:
# link: https://stackoverflow.com/questions/21986194/how-to-pass-dictionary-items-as-function-arguments-in-python
  
def report(score=None, clf=None, n=None):
    a = dict(score.iloc[0,:n])
    model = clf(**a)
    model.fit(x_train, y_train)
    
    print(classification_report(y_test , model.predict(x_test)))
    print('Training Accuracy : ', accuracy_score(y_train, model.predict(x_train))*100)
    print('Test/Validation Accuracy : ', accuracy_score(y_test , model.predict(x_test))*100)

In [3]:
def results(model):
    fp = np.sum((model.predict(x_test) == 1) & (y_test == 0))
    fn = np.sum((model.predict(x_test) == 0) & (y_test == 1))
    tn = np.sum((model.predict(x_test) == 0) & (y_test == 0))
    tp = np.sum((model.predict(x_test) == 1) & (y_test == 1))
    FPR = fp/(fp+tn)
    FNR = fn/(fn+tp)
    print('specifity',recall_score(np.logical_not(y_test) , np.logical_not(model.predict(x_test))))
    print('AUC : ',roc_auc_score(y_test , model.predict(x_test)))
    print('FPR :' , FPR )
    print('FNR :' , FNR )

## Hypert Parameters

In [4]:
param_grid_RF = {'criterion':['gini','entropy'],
                'n_estimators':np.arange(10,150,20),
                'max_depth':np.arange(2,11,2),
                'max_features':['sqrt', 'log2'],
                'random_state':[101,42,1,356]}

param_grid_DT = {'criterion':['gini','entropy'],
                 'splitter':['best', 'random'],
                 'max_depth':np.arange(2,15,1),
                 'max_features':['auto','sqrt', 'log2'],
                 'random_state':[101,42,1,356]}

param_grid_SVM = {'C':[0.5,1,1.5,2,2.5,3],
                 'kernel':['linear','poly', 'rbf', 'sigmoid'],
                 'degree':range(1,10),
                 'gamma':['scale','auto']}


param_grid_MLP = {'activation':['identity', 'logistic', 'tanh', 'relu'],
                  'solver':['lbfgs','sgd','adam'],
                  'hidden_layer_sizes':[50,100,150,200],
                  'learning_rate':['constant', 'invscaling', 'invscaling'],
                  'random_state':[101,42,1,356]}

param_grid_GP = {
    'kernel': [1.0 * RBF(length_scale) for length_scale in [0.1, 0.5, 1.0]],
    'optimizer': ['fmin_l_bfgs_b'],
    'random_state':[1,101,42,365]
}

In [5]:
df = pd.read_csv('finalData_.csv')

df.drop(['Trimester','Unnamed: 0','anx in 1st T','anx in 2st T','anx in 3st T','dep in 1st T','dep in 2st T',
        'BMI 0','BMI 1'] , axis = 1 , inplace = True)

df['delivery type'] = df['delivery type'].replace(['NVD','Cection'],[0,1])

data = pd.read_csv('data.csv')
df['anti TPO'] = data['anti TPO.1']

df['anti TPO'] = df['anti TPO'].replace(['Negative','posetive'],[1,0])

In [6]:
#Unfortunately, records for seven patients should be omitted the reason is that they are having an abortion. !
lst =  [1,13,46,47,57,60]
df = df.drop(lst, axis = 0)

df = df.drop('miscarriage' , axis = 1)

In [7]:
df['dep score in 2rd T'] = data['dep score in 2st T']

df['anx score in 2rd T'] = data['anxiety in the 2st T']

df.to_csv('data_final_NICU.csv')

In [8]:
demographic_factors = df[['Age', "Father's education", "Mother's education",
                          "Mother's job",
'Monthly income',"Father's job","live child", "Gravid" ,"parity", 
                          "Menstrual History","Recent miscarriage"]]


pregnancy_factors = df[['preterm labor','Preeclampsia','fetal distress'
                       ,'Common complaint Pregnancy',
                       'anti TPO','BMI 2','dep score in 2rd T',
                        'anx score in 2rd T'
                       ]]
neonatal_factors = df[['Birth weight','Sex','fetal distress'
                       ,'preterm labor' ,'Delivery Age','delivery type']]


delivery_factors = df[['preterm labor' , 'Delivery Age','delivery type']]

# Demographic```

In [42]:
x = demographic_factors
y = df['NICU']


oversample = SMOTE(k_neighbors=2 , random_state = 101)
x, y = oversample.fit_resample(x, y)

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.3,random_state=101)

scaler = StandardScaler()

x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)

### Bagging

In [26]:
l_acc_bag_DT = []
for ib in range (1,20):
    for jb in [101,42,0,1,356]:
        for i in param_grid_DT['criterion']:
            for j in param_grid_DT['splitter']:
                for k in param_grid_DT['max_depth']:
                    for l in param_grid_DT['max_features']:
                        for m in param_grid_DT['random_state']: 
                            
                            bagging = BaggingClassifier(base_estimator=DecisionTreeClassifier(criterion= i , splitter= j , max_depth= k , max_features= l , random_state= m),n_estimators=ib, random_state=jb).fit(x_train, y_train)
                            l_acc_bag_DT.append([i,j,k,l,m,ib,jb,accuracy_score(y_test , bagging.predict(x_test))])
l_acc_bag_DT = pd.DataFrame(l_acc_bag_DT)
l_acc_bag_DT.sort_values(by = 7 , ascending= False)

Unnamed: 0,0,1,2,3,4,5,6,7
27057,gini,random,7,log2,42,9,1,0.869565
55209,gini,random,13,log2,42,18,1,0.869565
55211,gini,random,13,log2,356,18,1,0.869565
55212,gini,random,14,auto,101,18,1,0.869565
55213,gini,random,14,auto,42,18,1,0.869565
...,...,...,...,...,...,...,...,...
1509,gini,random,10,log2,42,1,0,0.391304
1510,gini,random,10,log2,1,1,0,0.391304
1511,gini,random,10,log2,356,1,0,0.391304
1512,gini,random,11,auto,101,1,0,0.391304


In [77]:
bagging_demo = BaggingClassifier(base_estimator=DecisionTreeClassifier(criterion= 'gini' , splitter= 'random' , max_depth= 7 , max_features= 'log2' , random_state= 42),n_estimators=9, random_state=1).fit(x_train, y_train)
print(classification_report(y_test , bagging_demo.predict(x_test)))

              precision    recall  f1-score   support

           0       0.91      0.83      0.87        12
           1       0.83      0.91      0.87        11

    accuracy                           0.87        23
   macro avg       0.87      0.87      0.87        23
weighted avg       0.87      0.87      0.87        23



In [195]:
results(bagging_demo)

specifity 0.8333333333333334
AUC :  0.8712121212121213
FPR : 0.16666666666666666
FNR : 0.09090909090909091


### DT

In [161]:
score_DT = []
for i in param_grid_DT['criterion']:
    for j in param_grid_DT['splitter']:
        for k in param_grid_DT['max_depth']:
            for l in param_grid_DT['max_features']:
                for m in param_grid_DT['random_state']: 
                    DT = DecisionTreeClassifier(criterion= i , splitter= j , max_depth= k , max_features= l , random_state= m)
                    DT.fit(x_train ,y_train)
                    score_DT.append([i,j,k,l,m, accuracy_score(y_test , DT.predict(x_test))])

score_DT = pd.DataFrame(score_DT)
score_DT = score_DT.sort_values(by=[5], ascending=[False])
score_DT.columns=['criterion','splitter','max_depth','max_features','random_state','accuracy']      

score_DT                  











Unnamed: 0,criterion,splitter,max_depth,max_features,random_state,accuracy
529,entropy,random,7,auto,42,0.782609
537,entropy,random,7,log2,42,0.782609
536,entropy,random,7,log2,101,0.782609
533,entropy,random,7,sqrt,42,0.782609
216,gini,random,7,auto,101,0.782609
...,...,...,...,...,...,...
551,entropy,random,8,log2,356,0.521739
359,entropy,best,5,log2,356,0.521739
547,entropy,random,8,sqrt,356,0.521739
355,entropy,best,5,sqrt,356,0.521739


In [11]:
report(score=score_DT, clf=DecisionTreeClassifier, n=5)

              precision    recall  f1-score   support

           0       0.82      0.75      0.78        12
           1       0.75      0.82      0.78        11

    accuracy                           0.78        23
   macro avg       0.78      0.78      0.78        23
weighted avg       0.79      0.78      0.78        23

Training Accuracy :  92.15686274509804
Test/Validation Accuracy :  78.26086956521739




In [201]:
results(dt_demo)

specifity 0.75
AUC :  0.7840909090909092
FPR : 0.25
FNR : 0.18181818181818182


### SVM

In [12]:
score_SVM = []
for i in param_grid_SVM['C']:
    for j in param_grid_SVM['kernel']:
        for k in param_grid_SVM['degree']:
            for l in param_grid_SVM['gamma']:
                SVM = SVC(C=i, kernel=j, degree=k, gamma=l, probability = True)
                SVM.fit(x_train, y_train)
                score_SVM.append([i,j,k,l, accuracy_score(y_test , SVM.predict(x_test)) ])

score_SVM = pd.DataFrame(score_SVM)
score_SVM = score_SVM.sort_values(by=[4], ascending=[False])
score_SVM.columns=['C','kernel','degree','gamma', 'accuracy']      

score_SVM

Unnamed: 0,C,kernel,degree,gamma,accuracy
0,0.5,linear,1,scale,0.739130
13,0.5,linear,7,auto,0.739130
1,0.5,linear,1,auto,0.739130
91,1.0,poly,1,auto,0.739130
90,1.0,poly,1,scale,0.739130
...,...,...,...,...,...
103,1.0,poly,7,auto,0.478261
102,1.0,poly,7,scale,0.478261
101,1.0,poly,6,auto,0.478261
100,1.0,poly,6,scale,0.478261


In [13]:
report(score=score_SVM, clf=SVC, n=4)

              precision    recall  f1-score   support

           0       0.75      0.75      0.75        12
           1       0.73      0.73      0.73        11

    accuracy                           0.74        23
   macro avg       0.74      0.74      0.74        23
weighted avg       0.74      0.74      0.74        23

Training Accuracy :  88.23529411764706
Test/Validation Accuracy :  73.91304347826086


In [202]:
results(svm_demo)

specifity 0.75
AUC :  0.7386363636363636
FPR : 0.25
FNR : 0.2727272727272727


### RF

In [14]:
score_RF=[]
for i in param_grid_RF['criterion']:
    for j in param_grid_RF['n_estimators']:
        for k in param_grid_RF['max_depth']:
            for l in param_grid_RF['max_features']:
                for m in param_grid_RF['random_state']:
                    RF = RandomForestClassifier(criterion = i , n_estimators=j , max_depth=k, max_features=l , random_state=m )
                    RF.fit(x_train, y_train)
                    score_RF.append([i,j,k,l,m, accuracy_score(y_test , RF.predict(x_test)) ])

score_RF = pd.DataFrame(score_RF)
score_RF = score_RF.sort_values(by=[5], ascending=[False])
score_RF.columns=['criterion','n_estimators','max_depth','max_features','random_state','accuracy']      

score_RF

Unnamed: 0,criterion,n_estimators,max_depth,max_features,random_state,accuracy
59,gini,30,6,sqrt,356,0.826087
63,gini,30,6,log2,356,0.826087
23,gini,10,6,log2,356,0.826087
19,gini,10,6,sqrt,356,0.826087
181,gini,90,6,log2,42,0.782609
...,...,...,...,...,...,...
443,entropy,90,2,sqrt,356,0.565217
447,entropy,90,2,log2,356,0.565217
523,entropy,130,2,sqrt,356,0.565217
483,entropy,110,2,sqrt,356,0.565217


In [15]:
report(score=score_RF, clf=RandomForestClassifier, n=5)

              precision    recall  f1-score   support

           0       0.79      0.92      0.85        12
           1       0.89      0.73      0.80        11

    accuracy                           0.83        23
   macro avg       0.84      0.82      0.82        23
weighted avg       0.84      0.83      0.82        23

Training Accuracy :  98.0392156862745
Test/Validation Accuracy :  82.6086956521739


In [204]:
results(rf_demo)

specifity 0.9166666666666666
AUC :  0.8219696969696969
FPR : 0.08333333333333333
FNR : 0.2727272727272727


### MLP

In [16]:
score_MLP=[]
for i in param_grid_MLP['activation']:
    for j in param_grid_MLP['solver']:
        for k in param_grid_MLP['hidden_layer_sizes']:
            for l in param_grid_MLP['learning_rate']:
                for m in param_grid_MLP['random_state']:
                    MLP = MLPClassifier(activation=i, solver=j, hidden_layer_sizes=k, learning_rate=l, random_state = m)
                    MLP.fit(x_train, y_train)
                    score_MLP.append([i,j,k,l,m, accuracy_score(y_test , MLP.predict(x_test)) ])

score_MLP = pd.DataFrame(score_MLP)
score_MLP = score_MLP.sort_values(by=[5], ascending=[False])
score_MLP.columns=['activation','solver','hidden_layer_sizes','learning_rate','random_state', 'accuracy']      

score_MLP

















Unnamed: 0,activation,solver,hidden_layer_sizes,learning_rate,random_state,accuracy
50,identity,sgd,50,constant,1,0.782609
92,identity,sgd,200,invscaling,101,0.739130
380,tanh,sgd,200,invscaling,101,0.739130
379,tanh,sgd,200,invscaling,356,0.739130
351,tanh,sgd,100,constant,356,0.739130
...,...,...,...,...,...,...
525,relu,sgd,200,invscaling,42,0.391304
69,identity,sgd,100,invscaling,42,0.304348
357,tanh,sgd,100,invscaling,42,0.304348
65,identity,sgd,100,invscaling,42,0.304348


In [17]:
report(score=score_MLP, clf=MLPClassifier, n=5)

              precision    recall  f1-score   support

           0       0.82      0.75      0.78        12
           1       0.75      0.82      0.78        11

    accuracy                           0.78        23
   macro avg       0.78      0.78      0.78        23
weighted avg       0.79      0.78      0.78        23

Training Accuracy :  84.31372549019608
Test/Validation Accuracy :  78.26086956521739




In [205]:
results(mlp_demo)

specifity 0.75
AUC :  0.7840909090909092
FPR : 0.25
FNR : 0.18181818181818182


### GP

In [16]:
score_GP=[]
for i in param_grid_GP['kernel']:
    for j in param_grid_GP['optimizer']:
        for k in param_grid_GP['random_state']:

            GP = GaussianProcessClassifier(kernel = i ,optimizer = j,random_state = k )
            GP.fit(x_train, y_train)
            score_GP.append([i,j,k, accuracy_score(y_test , GP.predict(x_test)) ])

score_GP = pd.DataFrame(score_GP)
score_GP = score_GP.sort_values(by=[3], ascending=[False])
score_GP.columns=['activation','solver','random_state','accuracy']      

score_GP

Unnamed: 0,activation,solver,random_state,accuracy
0,1**2 * RBF(length_scale=0.1),fmin_l_bfgs_b,1,0.695652
1,1**2 * RBF(length_scale=0.1),fmin_l_bfgs_b,101,0.695652
2,1**2 * RBF(length_scale=0.1),fmin_l_bfgs_b,42,0.695652
3,1**2 * RBF(length_scale=0.1),fmin_l_bfgs_b,365,0.695652
4,1**2 * RBF(length_scale=0.5),fmin_l_bfgs_b,1,0.695652
5,1**2 * RBF(length_scale=0.5),fmin_l_bfgs_b,101,0.695652
6,1**2 * RBF(length_scale=0.5),fmin_l_bfgs_b,42,0.695652
7,1**2 * RBF(length_scale=0.5),fmin_l_bfgs_b,365,0.695652
8,1**2 * RBF(length_scale=1),fmin_l_bfgs_b,1,0.695652
9,1**2 * RBF(length_scale=1),fmin_l_bfgs_b,101,0.695652


In [17]:
GP_demo = GaussianProcessClassifier(kernel = 1**2 * RBF(length_scale=0.1) ,optimizer = 'fmin_l_bfgs_b',
                                           random_state = 1).fit(x_train,y_train)
print(classification_report(y_test , GP_demo.predict(x_test)))

              precision    recall  f1-score   support

           0       0.73      0.67      0.70        12
           1       0.67      0.73      0.70        11

    accuracy                           0.70        23
   macro avg       0.70      0.70      0.70        23
weighted avg       0.70      0.70      0.70        23



In [18]:
results(GP_demo)

specifity 0.6666666666666666
AUC :  0.696969696969697
FPR : 0.3333333333333333
FNR : 0.2727272727272727


### MV

In [197]:
dt_demo = DecisionTreeClassifier(criterion= 'entropy' , splitter= 'random' , max_depth= 7, max_features= 'auto' , random_state= 42)
svm_demo = SVC(C= 0.5, kernel='linear', degree=1, gamma='scale', probability = True)
rf_demo = RandomForestClassifier(criterion = 'gini' , n_estimators=30 , max_depth=6, max_features='sqrt' , random_state=356 )
mlp_demo = MLPClassifier(activation= 'identity', solver= 'sgd', hidden_layer_sizes= 50, learning_rate='constant', random_state = 1)

In [170]:
MV_demo = VotingClassifier(estimators=[('bagging',bagging_demo), ('dt',dt_demo),('rf', rf_demo),('mlp',mlp_demo), ('svm', svm_demo),
                                      ('GP',GP_demo)], voting='hard')
MV_demo.fit(x_train,y_train)



In [30]:
print(classification_report(y_test , MV_demo.predict(x_test)))

              precision    recall  f1-score   support

           0       0.82      0.75      0.78        12
           1       0.75      0.82      0.78        11

    accuracy                           0.78        23
   macro avg       0.78      0.78      0.78        23
weighted avg       0.79      0.78      0.78        23



In [171]:
results(MV_demo)

specifity 0.75
AUC :  0.7840909090909092
FPR : 0.25
FNR : 0.18181818181818182


# Feature Importance For Demographic Factors

In [None]:
dt_demo = DecisionTreeClassifier(criterion= 'entropy' , splitter= 'random' , max_depth= 7, max_features= 'auto' , random_state= 42).fit(x_train , y_train)
svm_demo = SVC(C= 0.5, kernel='linear', degree=1, gamma='scale', probability = True).fit(x_train , y_train)
rf_demo = RandomForestClassifier(criterion = 'gini' , n_estimators=30 , max_depth=6, max_features='sqrt' , random_state=356 ).fit(x_train , y_train)
mlp_demo = MLPClassifier(activation= 'identity', solver= 'sgd', hidden_layer_sizes= 50, learning_rate='constant', random_state = 1).fit(x_train , y_train)

## SVM

In [33]:
result = permutation_importance(svm_demo, x_test, y_test, n_repeats=10, random_state=42)

# Get feature importances
importances = result.importances_mean

a=pd.DataFrame(importances)
a.index = demographic_factors.columns
round(a[0],2)

Age                   0.13
Father's education    0.07
Mother's education    0.04
Mother's job          0.03
Monthly income       -0.04
Father's job          0.07
live child            0.08
Gravid                0.02
parity                0.11
Menstrual History     0.12
Recent miscarriage    0.05
Name: 0, dtype: float64

## DT

In [35]:
importances = dt_demo.feature_importances_

# Print feature importances
a=pd.DataFrame(importances)
a.index = demographic_factors.columns
round(a[0],2)

Age                   0.10
Father's education    0.26
Mother's education    0.10
Mother's job          0.02
Monthly income        0.07
Father's job          0.00
live child            0.00
Gravid                0.01
parity                0.07
Menstrual History     0.35
Recent miscarriage    0.03
Name: 0, dtype: float64

## RF

In [36]:
importances = rf_demo.feature_importances_

# Print feature importances
a=pd.DataFrame(importances)
a.index = demographic_factors.columns
round(a[0],2)

Age                   0.14
Father's education    0.06
Mother's education    0.04
Mother's job          0.10
Monthly income        0.15
Father's job          0.02
live child            0.09
Gravid                0.03
parity                0.12
Menstrual History     0.18
Recent miscarriage    0.06
Name: 0, dtype: float64

## Bagging

In [37]:
base_estimator=DecisionTreeClassifier(criterion= 'gini' , splitter= 'random' , max_depth= 7 , max_features= 'log2' , random_state= 42).fit(x_train , y_train)
bagging_demo = BaggingClassifier(base_estimator=base_estimator,
                                 n_estimators=9, random_state=1).fit(x_train, y_train)

importances = bagging_demo.base_estimator.feature_importances_

# Print feature importances
a=pd.DataFrame(importances)
a.index = demographic_factors.columns
round(a[0],2)

Age                   0.05
Father's education    0.20
Mother's education    0.08
Mother's job          0.01
Monthly income        0.19
Father's job          0.00
live child            0.00
Gravid                0.00
parity                0.05
Menstrual History     0.41
Recent miscarriage    0.02
Name: 0, dtype: float64

## MLP

In [38]:
result = permutation_importance(mlp_demo, x_test, y_test, n_repeats=10, random_state=42)

# Get feature importances
importances = result.importances_mean

# Print feature importances
a=pd.DataFrame(importances)
a.index = demographic_factors.columns
round(a[0],2)

Age                   0.13
Father's education    0.11
Mother's education    0.00
Mother's job          0.07
Monthly income        0.01
Father's job          0.12
live child            0.07
Gravid                0.05
parity                0.13
Menstrual History     0.11
Recent miscarriage    0.09
Name: 0, dtype: float64

# pregnancy Factors

In [48]:
x = pregnancy_factors
y = df['NICU']


oversample = SMOTE(k_neighbors=2 , random_state = 101)
x, y = oversample.fit_resample(x, y)

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.3,random_state=101)

scaler = StandardScaler()

x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)

### Bagging

In [55]:
l_acc_bag_DT = []
for ib in range (1,20):
    for jb in [101,42,0,1,356]:
        for i in param_grid_DT['criterion']:
            for j in param_grid_DT['splitter']:
                for k in param_grid_DT['max_depth']:
                    for l in param_grid_DT['max_features']:
                        for m in param_grid_DT['random_state']: 
                            
                            bagging = BaggingClassifier(base_estimator=DecisionTreeClassifier(criterion= i , splitter= j , max_depth= k , max_features= l , random_state= m),n_estimators=ib, random_state=jb).fit(x_train, y_train)
                            l_acc_bag_DT.append([i,j,k,l,m,ib,jb,accuracy_score(y_test , bagging.predict(x_test))])
l_acc_bag_DT = pd.DataFrame(l_acc_bag_DT)
l_acc_bag_DT.sort_values(by = 7 , ascending= False)

Unnamed: 0,0,1,2,3,4,5,6,7
31132,entropy,random,9,sqrt,101,10,356,0.956522
20821,gini,random,8,auto,42,7,1,0.956522
40495,entropy,random,9,sqrt,356,13,356,0.956522
40494,entropy,random,9,sqrt,1,13,356,0.956522
40493,entropy,random,9,sqrt,42,13,356,0.956522
...,...,...,...,...,...,...,...,...
1882,gini,best,2,log2,1,1,1,0.391304
1883,gini,best,2,log2,356,1,1,0.391304
2193,entropy,best,2,log2,42,1,1,0.391304
2194,entropy,best,2,log2,1,1,1,0.391304


In [56]:
bagging_preg = BaggingClassifier(base_estimator=DecisionTreeClassifier(criterion= 'entropy' , splitter= 'random' , max_depth= 9 , max_features= 'sqrt' , random_state= 101),n_estimators=10, random_state=356).fit(x_train, y_train)
print(classification_report(y_test , bagging_preg.predict(x_test)))

              precision    recall  f1-score   support

           0       0.92      1.00      0.96        12
           1       1.00      0.91      0.95        11

    accuracy                           0.96        23
   macro avg       0.96      0.95      0.96        23
weighted avg       0.96      0.96      0.96        23



In [57]:
results(bagging_preg)

specifity 1.0
AUC :  0.9545454545454546
FPR : 0.0
FNR : 0.09090909090909091


### DT

In [41]:
score_DT = []
for i in param_grid_DT['criterion']:
    for j in param_grid_DT['splitter']:
        for k in param_grid_DT['max_depth']:
            for l in param_grid_DT['max_features']:
                for m in param_grid_DT['random_state']: 
                    DT = DecisionTreeClassifier(criterion= i , splitter= j , max_depth= k , max_features= l , random_state= m)
                    DT.fit(x_train ,y_train)
                    score_DT.append([i,j,k,l,m, accuracy_score(y_test , DT.predict(x_test))])

score_DT = pd.DataFrame(score_DT)
score_DT = score_DT.sort_values(by=[5], ascending=[False])
score_DT.columns=['criterion','splitter','max_depth','max_features','random_state','accuracy']      

score_DT                  







Unnamed: 0,criterion,splitter,max_depth,max_features,random_state,accuracy
405,entropy,best,9,log2,42,0.913043
73,gini,best,8,auto,42,0.913043
81,gini,best,8,log2,42,0.913043
137,gini,best,13,sqrt,42,0.913043
429,entropy,best,11,log2,42,0.913043
...,...,...,...,...,...,...
179,gini,random,3,log2,356,0.521739
22,gini,best,3,log2,1,0.521739
227,gini,random,7,log2,356,0.478261
539,entropy,random,7,log2,356,0.478261


In [46]:
report(score=score_DT, clf=DecisionTreeClassifier, n=5)

              precision    recall  f1-score   support

           0       0.92      0.92      0.92        12
           1       0.91      0.91      0.91        11

    accuracy                           0.91        23
   macro avg       0.91      0.91      0.91        23
weighted avg       0.91      0.91      0.91        23

Training Accuracy :  100.0
Test/Validation Accuracy :  91.30434782608695


In [47]:
results(dt_preg)

specifity 0.9166666666666666
AUC :  0.9128787878787878
FPR : 0.08333333333333333
FNR : 0.09090909090909091


### SVM

In [48]:
score_SVM = []
for i in param_grid_SVM['C']:
    for j in param_grid_SVM['kernel']:
        for k in param_grid_SVM['degree']:
            for l in param_grid_SVM['gamma']:
                SVM = SVC(C=i, kernel=j, degree=k, gamma=l, probability = True)
                SVM.fit(x_train, y_train)
                score_SVM.append([i,j,k,l, accuracy_score(y_test , SVM.predict(x_test)) ])

score_SVM = pd.DataFrame(score_SVM)
score_SVM = score_SVM.sort_values(by=[4], ascending=[False])
score_SVM.columns=['C','kernel','degree','gamma', 'accuracy']      

score_SVM

Unnamed: 0,C,kernel,degree,gamma,accuracy
0,0.5,linear,1,scale,0.826087
252,2.0,rbf,1,scale,0.826087
258,2.0,rbf,4,scale,0.826087
257,2.0,rbf,3,auto,0.826087
256,2.0,rbf,3,scale,0.826087
...,...,...,...,...,...
418,3.0,sigmoid,3,scale,0.652174
417,3.0,sigmoid,2,auto,0.652174
415,3.0,sigmoid,1,auto,0.652174
414,3.0,sigmoid,1,scale,0.652174


In [53]:
report(score=score_SVM, clf=SVC, n=4)

              precision    recall  f1-score   support

           0       0.75      1.00      0.86        12
           1       1.00      0.64      0.78        11

    accuracy                           0.83        23
   macro avg       0.88      0.82      0.82        23
weighted avg       0.87      0.83      0.82        23

Training Accuracy :  80.3921568627451
Test/Validation Accuracy :  82.6086956521739


In [54]:
results(svm_preg)

specifity 1.0
AUC :  0.8181818181818181
FPR : 0.0
FNR : 0.36363636363636365


### RF

In [59]:
score_RF=[]
for i in param_grid_RF['criterion']:
    for j in param_grid_RF['n_estimators']:
        for k in param_grid_RF['max_depth']:
            for l in param_grid_RF['max_features']:
                for m in param_grid_RF['random_state']:
                    RF = RandomForestClassifier(criterion = i , n_estimators=j , max_depth=k, max_features=l , random_state=m )
                    RF.fit(x_train, y_train)
                    score_RF.append([i,j,k,l,m, accuracy_score(y_test , RF.predict(x_test)) ])

score_RF = pd.DataFrame(score_RF)
score_RF = score_RF.sort_values(by=[5], ascending=[False])
score_RF.columns=['criterion','n_estimators','max_depth','max_features','random_state','accuracy']      

score_RF

Unnamed: 0,criterion,n_estimators,max_depth,max_features,random_state,accuracy
139,gini,70,6,sqrt,356,0.913043
339,entropy,30,6,sqrt,356,0.869565
354,entropy,30,10,sqrt,1,0.869565
72,gini,30,10,sqrt,101,0.869565
67,gini,30,8,sqrt,356,0.869565
...,...,...,...,...,...,...
13,gini,10,4,log2,42,0.652174
9,gini,10,4,sqrt,42,0.652174
6,gini,10,2,log2,1,0.652174
285,entropy,10,2,log2,42,0.652174


In [60]:
report(score=score_RF, clf=RandomForestClassifier, n=5)

              precision    recall  f1-score   support

           0       0.86      1.00      0.92        12
           1       1.00      0.82      0.90        11

    accuracy                           0.91        23
   macro avg       0.93      0.91      0.91        23
weighted avg       0.93      0.91      0.91        23

Training Accuracy :  100.0
Test/Validation Accuracy :  91.30434782608695


In [61]:
results(rf_preg)

specifity 0.9166666666666666
AUC :  0.8219696969696969
FPR : 0.08333333333333333
FNR : 0.2727272727272727


### MLP

In [64]:
score_MLP=[]
for i in param_grid_MLP['activation']:
    for j in param_grid_MLP['solver']:
        for k in param_grid_MLP['hidden_layer_sizes']:
            for l in param_grid_MLP['learning_rate']:
                for m in param_grid_MLP['random_state']:
                    MLP = MLPClassifier(activation=i, solver=j, hidden_layer_sizes=k, learning_rate=l, random_state = m)
                    MLP.fit(x_train, y_train)
                    score_MLP.append([i,j,k,l,m, accuracy_score(y_test , MLP.predict(x_test)) ])

score_MLP = pd.DataFrame(score_MLP)
score_MLP = score_MLP.sort_values(by=[5], ascending=[False])
score_MLP.columns=['activation','solver','hidden_layer_sizes','learning_rate','random_state', 'accuracy']      

score_MLP















Unnamed: 0,activation,solver,hidden_layer_sizes,learning_rate,random_state,accuracy
290,tanh,lbfgs,50,constant,1,0.956522
463,relu,lbfgs,150,invscaling,356,0.956522
459,relu,lbfgs,150,constant,356,0.956522
475,relu,lbfgs,200,invscaling,356,0.956522
467,relu,lbfgs,150,invscaling,356,0.956522
...,...,...,...,...,...,...
503,relu,sgd,100,invscaling,356,0.260870
65,identity,sgd,100,invscaling,42,0.217391
69,identity,sgd,100,invscaling,42,0.217391
357,tanh,sgd,100,invscaling,42,0.217391


In [66]:
report(score=score_MLP, clf=MLPClassifier, n=5)

              precision    recall  f1-score   support

           0       0.92      1.00      0.96        12
           1       1.00      0.91      0.95        11

    accuracy                           0.96        23
   macro avg       0.96      0.95      0.96        23
weighted avg       0.96      0.96      0.96        23

Training Accuracy :  100.0
Test/Validation Accuracy :  95.65217391304348


In [67]:
results(mlp_preg)

specifity 0.9166666666666666
AUC :  0.9128787878787878
FPR : 0.08333333333333333
FNR : 0.09090909090909091


### GP

In [70]:
score_GP=[]
for i in param_grid_GP['kernel']:
    for j in param_grid_GP['optimizer']:
        for k in param_grid_GP['random_state']:

            GP = GaussianProcessClassifier(kernel = i ,optimizer = j,random_state = k )
            GP.fit(x_train, y_train)
            score_GP.append([i,j,k, accuracy_score(y_test , GP.predict(x_test)) ])

score_GP = pd.DataFrame(score_GP)
score_GP = score_GP.sort_values(by=[3], ascending=[False])
score_GP.columns=['activation','solver','random_state','accuracy']      

score_GP

Unnamed: 0,activation,solver,random_state,accuracy
0,1**2 * RBF(length_scale=0.1),fmin_l_bfgs_b,1,0.826087
1,1**2 * RBF(length_scale=0.1),fmin_l_bfgs_b,101,0.826087
2,1**2 * RBF(length_scale=0.1),fmin_l_bfgs_b,42,0.826087
3,1**2 * RBF(length_scale=0.1),fmin_l_bfgs_b,365,0.826087
4,1**2 * RBF(length_scale=0.5),fmin_l_bfgs_b,1,0.826087
5,1**2 * RBF(length_scale=0.5),fmin_l_bfgs_b,101,0.826087
6,1**2 * RBF(length_scale=0.5),fmin_l_bfgs_b,42,0.826087
7,1**2 * RBF(length_scale=0.5),fmin_l_bfgs_b,365,0.826087
8,1**2 * RBF(length_scale=1),fmin_l_bfgs_b,1,0.826087
9,1**2 * RBF(length_scale=1),fmin_l_bfgs_b,101,0.826087


In [71]:
GP_preg = GaussianProcessClassifier(kernel = 1**2 * RBF(length_scale=0.1) ,optimizer = 'fmin_l_bfgs_b',
                                           random_state = 1).fit(x_train,y_train)
print(classification_report(y_test , GP_preg.predict(x_test)))

              precision    recall  f1-score   support

           0       0.75      1.00      0.86        12
           1       1.00      0.64      0.78        11

    accuracy                           0.83        23
   macro avg       0.88      0.82      0.82        23
weighted avg       0.87      0.83      0.82        23



In [72]:
results(dt_preg)

specifity 0.9166666666666666
AUC :  0.9128787878787878
FPR : 0.08333333333333333
FNR : 0.09090909090909091


### MV

In [78]:
dt_preg = DecisionTreeClassifier(criterion= 'entropy' , splitter= 'best' , max_depth= 9, max_features= 'log2' , random_state= 42)
svm_preg = SVC(C= 0.5, kernel='linear', degree=1, gamma='scale', probability = True)
rf_preg = RandomForestClassifier(criterion = 'gini' , n_estimators=70 , max_depth=6, max_features='sqrt' , random_state=356 )
mlp_preg = MLPClassifier(activation= 'tanh', solver= 'lbfgs', hidden_layer_sizes= 50, learning_rate='constant', random_state = 1)
GP_preg = GaussianProcessClassifier(kernel = 1**2 * RBF(length_scale=0.1) ,optimizer = 'fmin_l_bfgs_b',
                                          random_state = 1)
bagging_preg = BaggingClassifier(base_estimator=DecisionTreeClassifier(criterion= 'entropy' , splitter= 'random' , max_depth= 9 , max_features= 'sqrt' , random_state= 101),n_estimators=10, random_state=356)


In [79]:
MV_preg = VotingClassifier(estimators=[('dt',dt_preg),('rf', rf_preg),('mlp',mlp_preg), ('svm', svm_preg),
                                      ('bagging',bagging_preg),('GP',GP_preg)], voting='hard')
MV_preg.fit(x_train,y_train)

In [75]:
print(classification_report(y_test , MV_preg.predict(x_test)))

              precision    recall  f1-score   support

           0       0.86      1.00      0.92        12
           1       1.00      0.82      0.90        11

    accuracy                           0.91        23
   macro avg       0.93      0.91      0.91        23
weighted avg       0.93      0.91      0.91        23



In [76]:
results(MV_preg)

specifity 1.0
AUC :  0.9090909090909092
FPR : 0.0
FNR : 0.18181818181818182


# Feature Importance For Pregnancy Factors

In [44]:
dt_preg = DecisionTreeClassifier(criterion= 'entropy' , splitter= 'best' , max_depth= 9, max_features= 'log2' , random_state= 42).fit(x_train , y_train)
svm_preg = SVC(C= 0.5, kernel='linear', degree=1, gamma='scale', probability = True).fit(x_train , y_train)
rf_preg = RandomForestClassifier(criterion = 'gini' , n_estimators=70 , max_depth=6, max_features='sqrt' , random_state=356 ).fit(x_train , y_train)
mlp_preg = MLPClassifier(activation= 'tanh', solver= 'lbfgs', hidden_layer_sizes= 50, learning_rate='constant', random_state = 1).fit(x_train , y_train)

## SVM

In [49]:
result = permutation_importance(svm_preg, x_test, y_test, n_repeats=10, random_state=42)
# Get feature importances
importances = result.importances_mean
a=pd.DataFrame(importances)
a.index = pregnancy_factors.columns
round(a[0],2)

preterm labor                 0.24
Preeclampsia                  0.04
fetal distress                0.00
Common complaint Pregnancy    0.00
anti TPO                      0.00
BMI 2                         0.00
dep score in 2rd T            0.00
anx score in 2rd T            0.00
Name: 0, dtype: float64

## DT

In [50]:
importances = dt_preg.feature_importances_
# Print feature importances
a=pd.DataFrame(importances)
a.index = pregnancy_factors.columns
round(a[0],2)

preterm labor                 0.28
Preeclampsia                  0.09
fetal distress                0.00
Common complaint Pregnancy    0.01
anti TPO                      0.19
BMI 2                         0.20
dep score in 2rd T            0.12
anx score in 2rd T            0.10
Name: 0, dtype: float64

## RF

In [51]:
importances = rf_preg.feature_importances_
# Print feature importances
a=pd.DataFrame(importances)
a.index = pregnancy_factors.columns
round(a[0],2)

preterm labor                 0.16
Preeclampsia                  0.14
fetal distress                0.03
Common complaint Pregnancy    0.03
anti TPO                      0.06
BMI 2                         0.21
dep score in 2rd T            0.20
anx score in 2rd T            0.16
Name: 0, dtype: float64

## Bagging

In [52]:
base_estimator = DecisionTreeClassifier(criterion= 'entropy' , splitter= 'random' , max_depth= 9 , max_features= 'sqrt' , random_state= 101).fit(x_train , y_train)
bagging_preg = BaggingClassifier(base_estimator=base_estimator,n_estimators=10, random_state=356).fit(x_train, y_train)
importances = bagging_preg.base_estimator.feature_importances_
a=pd.DataFrame(importances)
a.index = pregnancy_factors.columns
round(a[0],2)

preterm labor                 0.28
Preeclampsia                  0.03
fetal distress                0.08
Common complaint Pregnancy    0.07
anti TPO                      0.08
BMI 2                         0.09
dep score in 2rd T            0.23
anx score in 2rd T            0.13
Name: 0, dtype: float64

## MLP

In [53]:
result = permutation_importance(mlp_preg, x_test, y_test, n_repeats=10, random_state=42)
# Get feature importances
importances = result.importances_mean
# Print feature importances
a=pd.DataFrame(importances)
a.index = pregnancy_factors.columns
round(a[0],2)

preterm labor                 0.30
Preeclampsia                  0.04
fetal distress                0.02
Common complaint Pregnancy    0.08
anti TPO                      0.10
BMI 2                         0.15
dep score in 2rd T            0.09
anx score in 2rd T            0.12
Name: 0, dtype: float64

# Nenatal Factors

In [49]:
x = neonatal_factors
y = df['NICU']

oversample = SMOTE(k_neighbors=2 , random_state = 101)
x, y = oversample.fit_resample(x, y)

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.3,random_state=101)

scaler = StandardScaler()

x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)

### Bagging

In [102]:
l_acc_bag_DT = []
for ib in range (1,20):
    for jb in [101,42,0,1,356]:
        for i in param_grid_DT['criterion']:
            for j in param_grid_DT['splitter']:
                for k in param_grid_DT['max_depth']:
                    for l in param_grid_DT['max_features']:
                        for m in param_grid_DT['random_state']: 
                            
                            bagging = BaggingClassifier(base_estimator=DecisionTreeClassifier(criterion= i , splitter= j , max_depth= k , max_features= l , random_state= m),n_estimators=ib, random_state=jb).fit(x_train, y_train)
                            l_acc_bag_DT.append([i,j,k,l,m,ib,jb,accuracy_score(y_test , bagging.predict(x_test))])
l_acc_bag_DT = pd.DataFrame(l_acc_bag_DT)
l_acc_bag_DT.sort_values(by = 7 , ascending= False)

Unnamed: 0,0,1,2,3,4,5,6,7
1452,gini,random,6,auto,101,1,0,0.956522
1453,gini,random,6,auto,42,1,0,0.956522
1454,gini,random,6,auto,1,1,0,0.956522
1455,gini,random,6,auto,356,1,0,0.956522
1456,gini,random,6,sqrt,101,1,0,0.956522
...,...,...,...,...,...,...,...,...
59119,entropy,best,14,sqrt,356,19,356,0.956522
59120,entropy,best,14,log2,101,19,356,0.956522
59121,entropy,best,14,log2,42,19,356,0.956522
59122,entropy,best,14,log2,1,19,356,0.956522


In [223]:
bagging_neo = BaggingClassifier(base_estimator=DecisionTreeClassifier(criterion= 'gini' , splitter= 'random' , max_depth= 6 , max_features= 'auto' , random_state= 101),n_estimators=1, random_state=0).fit(x_train, y_train)
print(classification_report(y_test , bagging_neo.predict(x_test)))

              precision    recall  f1-score   support

           0       1.00      0.92      0.96        12
           1       0.92      1.00      0.96        11

    accuracy                           0.96        23
   macro avg       0.96      0.96      0.96        23
weighted avg       0.96      0.96      0.96        23



In [224]:
results(bagging_neo)

specifity 0.9166666666666666
AUC :  0.9583333333333333
FPR : 0.08333333333333333
FNR : 0.0


### DT

In [87]:
score_DT = []
for i in param_grid_DT['criterion']:
    for j in param_grid_DT['splitter']:
        for k in param_grid_DT['max_depth']:
            for l in param_grid_DT['max_features']:
                for m in param_grid_DT['random_state']: 
                    DT = DecisionTreeClassifier(criterion= i , splitter= j , max_depth= k , max_features= l , random_state= m)
                    DT.fit(x_train ,y_train)
                    score_DT.append([i,j,k,l,m, accuracy_score(y_test , DT.predict(x_test))])

score_DT = pd.DataFrame(score_DT)
score_DT = score_DT.sort_values(by=[5], ascending=[False])
score_DT.columns=['criterion','splitter','max_depth','max_features','random_state','accuracy']      

score_DT                  







Unnamed: 0,criterion,splitter,max_depth,max_features,random_state,accuracy
149,gini,best,14,sqrt,42,0.956522
373,entropy,best,7,auto,42,0.956522
65,gini,best,7,sqrt,42,0.956522
421,entropy,best,11,auto,42,0.956522
377,entropy,best,7,sqrt,42,0.956522
...,...,...,...,...,...,...
314,entropy,best,2,auto,1,0.608696
10,gini,best,2,log2,1,0.608696
474,entropy,random,2,sqrt,1,0.608696
166,gini,random,2,log2,1,0.608696


In [88]:
report(score=score_DT, clf=DecisionTreeClassifier, n=5)

              precision    recall  f1-score   support

           0       1.00      0.92      0.96        12
           1       0.92      1.00      0.96        11

    accuracy                           0.96        23
   macro avg       0.96      0.96      0.96        23
weighted avg       0.96      0.96      0.96        23

Training Accuracy :  98.0392156862745
Test/Validation Accuracy :  95.65217391304348


In [226]:
results(dt_neo)

specifity 0.9166666666666666
AUC :  0.9583333333333333
FPR : 0.08333333333333333
FNR : 0.0


### SVM

In [89]:
score_SVM = []
for i in param_grid_SVM['C']:
    for j in param_grid_SVM['kernel']:
        for k in param_grid_SVM['degree']:
            for l in param_grid_SVM['gamma']:
                SVM = SVC(C=i, kernel=j, degree=k, gamma=l, probability = True)
                SVM.fit(x_train, y_train)
                score_SVM.append([i,j,k,l, accuracy_score(y_test , SVM.predict(x_test)) ])

score_SVM = pd.DataFrame(score_SVM)
score_SVM = score_SVM.sort_values(by=[4], ascending=[False])
score_SVM.columns=['C','kernel','degree','gamma', 'accuracy']      

score_SVM

Unnamed: 0,C,kernel,degree,gamma,accuracy
0,0.5,linear,1,scale,0.956522
223,2.0,linear,4,auto,0.956522
163,1.5,poly,1,auto,0.956522
1,0.5,linear,1,auto,0.956522
217,2.0,linear,1,auto,0.956522
...,...,...,...,...,...
20,0.5,poly,2,scale,0.739130
164,1.5,poly,2,scale,0.739130
21,0.5,poly,2,auto,0.739130
236,2.0,poly,2,scale,0.695652


In [90]:
report(score=score_SVM, clf=SVC, n=4)

              precision    recall  f1-score   support

           0       1.00      0.92      0.96        12
           1       0.92      1.00      0.96        11

    accuracy                           0.96        23
   macro avg       0.96      0.96      0.96        23
weighted avg       0.96      0.96      0.96        23

Training Accuracy :  90.19607843137256
Test/Validation Accuracy :  95.65217391304348


In [227]:
results(svm_neo)

specifity 0.9166666666666666
AUC :  0.9583333333333333
FPR : 0.08333333333333333
FNR : 0.0


### RF

In [91]:
score_RF=[]
for i in param_grid_RF['criterion']:
    for j in param_grid_RF['n_estimators']:
        for k in param_grid_RF['max_depth']:
            for l in param_grid_RF['max_features']:
                for m in param_grid_RF['random_state']:
                    RF = RandomForestClassifier(criterion = i , n_estimators=j , max_depth=k, max_features=l , random_state=m )
                    RF.fit(x_train, y_train)
                    score_RF.append([i,j,k,l,m, accuracy_score(y_test , RF.predict(x_test)) ])

score_RF = pd.DataFrame(score_RF)
score_RF = score_RF.sort_values(by=[5], ascending=[False])
score_RF.columns=['criterion','n_estimators','max_depth','max_features','random_state','accuracy']      

score_RF

Unnamed: 0,criterion,n_estimators,max_depth,max_features,random_state,accuracy
559,entropy,130,10,log2,356,0.956522
351,entropy,30,8,log2,356,0.956522
539,entropy,130,6,sqrt,356,0.956522
540,entropy,130,6,log2,101,0.956522
364,entropy,50,2,log2,101,0.956522
...,...,...,...,...,...,...
444,entropy,90,2,log2,101,0.869565
445,entropy,90,2,log2,42,0.869565
480,entropy,110,2,sqrt,101,0.869565
484,entropy,110,2,log2,101,0.869565


In [92]:
report(score=score_RF, clf=RandomForestClassifier, n=5)

              precision    recall  f1-score   support

           0       1.00      0.92      0.96        12
           1       0.92      1.00      0.96        11

    accuracy                           0.96        23
   macro avg       0.96      0.96      0.96        23
weighted avg       0.96      0.96      0.96        23

Training Accuracy :  98.0392156862745
Test/Validation Accuracy :  95.65217391304348


In [228]:
results(rf_neo)

specifity 0.9166666666666666
AUC :  0.9583333333333333
FPR : 0.08333333333333333
FNR : 0.0


### MLP

In [93]:
score_MLP=[]
for i in param_grid_MLP['activation']:
    for j in param_grid_MLP['solver']:
        for k in param_grid_MLP['hidden_layer_sizes']:
            for l in param_grid_MLP['learning_rate']:
                for m in param_grid_MLP['random_state']:
                    MLP = MLPClassifier(activation=i, solver=j, hidden_layer_sizes=k, learning_rate=l, random_state = m)
                    MLP.fit(x_train, y_train)
                    score_MLP.append([i,j,k,l,m, accuracy_score(y_test , MLP.predict(x_test)) ])

score_MLP = pd.DataFrame(score_MLP)
score_MLP = score_MLP.sort_values(by=[5], ascending=[False])
score_MLP.columns=['activation','solver','hidden_layer_sizes','learning_rate','random_state', 'accuracy']      

score_MLP





STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("









Unnamed: 0,activation,solver,hidden_layer_sizes,learning_rate,random_state,accuracy
0,identity,lbfgs,50,constant,101,0.956522
309,tanh,lbfgs,100,invscaling,42,0.956522
272,logistic,adam,150,invscaling,101,0.956522
274,logistic,adam,150,invscaling,1,0.956522
276,logistic,adam,200,constant,101,0.956522
...,...,...,...,...,...,...
512,relu,sgd,150,invscaling,101,0.173913
354,tanh,sgd,100,invscaling,1,0.130435
70,identity,sgd,100,invscaling,1,0.130435
66,identity,sgd,100,invscaling,1,0.130435


In [94]:
report(score=score_MLP, clf=MLPClassifier, n=5)

              precision    recall  f1-score   support

           0       1.00      0.92      0.96        12
           1       0.92      1.00      0.96        11

    accuracy                           0.96        23
   macro avg       0.96      0.96      0.96        23
weighted avg       0.96      0.96      0.96        23

Training Accuracy :  90.19607843137256
Test/Validation Accuracy :  95.65217391304348


In [229]:
results(mlp_neo)

specifity 0.9166666666666666
AUC :  0.9583333333333333
FPR : 0.08333333333333333
FNR : 0.0


### GP

In [95]:
score_GP=[]
for i in param_grid_GP['kernel']:
    for j in param_grid_GP['optimizer']:
        for k in param_grid_GP['random_state']:

            GP = GaussianProcessClassifier(kernel = i ,optimizer = j,random_state = k )
            GP.fit(x_train, y_train)
            score_GP.append([i,j,k, accuracy_score(y_test , GP.predict(x_test)) ])

score_GP = pd.DataFrame(score_GP)
score_GP = score_GP.sort_values(by=[3], ascending=[False])
score_GP.columns=['activation','solver','random_state','accuracy']      

score_GP

Unnamed: 0,activation,solver,random_state,accuracy
0,1**2 * RBF(length_scale=0.1),fmin_l_bfgs_b,1,0.913043
1,1**2 * RBF(length_scale=0.1),fmin_l_bfgs_b,101,0.913043
2,1**2 * RBF(length_scale=0.1),fmin_l_bfgs_b,42,0.913043
3,1**2 * RBF(length_scale=0.1),fmin_l_bfgs_b,365,0.913043
4,1**2 * RBF(length_scale=0.5),fmin_l_bfgs_b,1,0.913043
5,1**2 * RBF(length_scale=0.5),fmin_l_bfgs_b,101,0.913043
6,1**2 * RBF(length_scale=0.5),fmin_l_bfgs_b,42,0.913043
7,1**2 * RBF(length_scale=0.5),fmin_l_bfgs_b,365,0.913043
8,1**2 * RBF(length_scale=1),fmin_l_bfgs_b,1,0.913043
9,1**2 * RBF(length_scale=1),fmin_l_bfgs_b,101,0.913043


In [230]:
GP_neo = GaussianProcessClassifier(kernel = 1**2 * RBF(length_scale=0.1) ,optimizer = 'fmin_l_bfgs_b',
                                           random_state = 1).fit(x_train,y_train)
print(classification_report(y_test , GP_neo.predict(x_test)))

              precision    recall  f1-score   support

           0       0.92      0.92      0.92        12
           1       0.91      0.91      0.91        11

    accuracy                           0.91        23
   macro avg       0.91      0.91      0.91        23
weighted avg       0.91      0.91      0.91        23



In [231]:
results(GP_neo)

specifity 0.9166666666666666
AUC :  0.9128787878787878
FPR : 0.08333333333333333
FNR : 0.09090909090909091


### MV

In [179]:
dt_neo = DecisionTreeClassifier(criterion= 'gini' , splitter= 'best' , max_depth= 14, max_features= 'sqrt' , random_state= 42)
svm_neo = SVC(C= 0.5, kernel='linear', degree=1, gamma='scale', probability = True)
rf_neo = RandomForestClassifier(criterion = 'entropy' , n_estimators=130 , max_depth=10, max_features='log2' , random_state=356 )
mlp_neo = MLPClassifier(activation= 'identity', solver= 'lbfgs', hidden_layer_sizes= 50, learning_rate='constant', random_state = 101)

In [180]:
MV_neo = VotingClassifier(estimators=[('dt',dt_neo),('rf', rf_neo),('mlp',mlp_neo), ('svm', svm_neo),
                                     ('GP',GP_neo),('bagging',bagging_neo)], voting='hard')
MV_neo.fit(x_train,y_train)

In [115]:
print(classification_report(y_test , MV_neo.predict(x_test)))

              precision    recall  f1-score   support

           0       1.00      0.92      0.96        12
           1       0.92      1.00      0.96        11

    accuracy                           0.96        23
   macro avg       0.96      0.96      0.96        23
weighted avg       0.96      0.96      0.96        23



In [183]:
results(MV_neo)

specifity 0.9166666666666666
AUC :  0.9583333333333333
FPR : 0.08333333333333333
FNR : 0.0


# Feature Importance For Neonatal Factors

In [57]:
dt_neo = DecisionTreeClassifier(criterion= 'gini' , splitter= 'best' , max_depth= 14, max_features= 'sqrt' , random_state= 42).fit(x_train , y_train)
svm_neo = SVC(C= 0.5, kernel='linear', degree=1, gamma='scale', probability = True).fit(x_train , y_train)
rf_neo = RandomForestClassifier(criterion = 'entropy' , n_estimators=130 , max_depth=10, max_features='log2' , random_state=356 ).fit(x_train , y_train)
mlp_neo = MLPClassifier(activation= 'identity', solver= 'lbfgs', hidden_layer_sizes= 50, learning_rate='constant', random_state = 101).fit(x_train , y_train)

## SVM

In [59]:
result = permutation_importance(svm_neo, x_test, y_test, n_repeats=10, random_state=42)
# Get feature importances
importances = result.importances_mean
a=pd.DataFrame(importances)
a.index = neonatal_factors.columns
round(a[0],2)

Birth weight      0.20
Sex               0.07
fetal distress    0.00
preterm labor     0.12
Delivery Age      0.20
delivery type     0.04
Name: 0, dtype: float64

## DT

In [60]:
importances = dt_neo.feature_importances_
# Print feature importances
a=pd.DataFrame(importances)
a.index = neonatal_factors.columns
round(a[0],2)

Birth weight      0.24
Sex               0.12
fetal distress    0.00
preterm labor     0.00
Delivery Age      0.59
delivery type     0.05
Name: 0, dtype: float64

## RF

In [61]:
importances = rf_neo.feature_importances_
# Print feature importances
a=pd.DataFrame(importances)
a.index = neonatal_factors.columns
round(a[0],2)

Birth weight      0.39
Sex               0.08
fetal distress    0.02
preterm labor     0.10
Delivery Age      0.34
delivery type     0.07
Name: 0, dtype: float64

## Bagging

In [62]:
base_estimator=DecisionTreeClassifier(criterion= 'gini' , splitter= 'random' , max_depth= 6 , max_features= 'auto' , random_state= 101).fit(x_train, y_train)
bagging_neo = BaggingClassifier(base_estimator = base_estimator ,n_estimators=1, random_state=0).fit(x_train, y_train)
importances = bagging_neo.base_estimator.feature_importances_
# Print feature importances
a=pd.DataFrame(importances)
a.index = neonatal_factors.columns
round(a[0],2)



Birth weight      0.10
Sex               0.16
fetal distress    0.19
preterm labor     0.00
Delivery Age      0.50
delivery type     0.05
Name: 0, dtype: float64

## MLP

In [63]:
result = permutation_importance(mlp_neo, x_test, y_test, n_repeats=10, random_state=42)
# Get feature importances
importances = result.importances_mean
# Print feature importances
a=pd.DataFrame(importances)
a.index = neonatal_factors.columns
round(a[0],2)

Birth weight      0.27
Sex               0.07
fetal distress    0.00
preterm labor     0.24
Delivery Age      0.02
delivery type     0.06
Name: 0, dtype: float64

# Delivery Factors

In [50]:
x = delivery_factors
y = df['NICU']

oversample = SMOTE(k_neighbors=2 , random_state = 101)
x, y = oversample.fit_resample(x, y)

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.3,random_state=101)

scaler = StandardScaler()

x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)

### DT

In [118]:
score_DT = []
for i in param_grid_DT['criterion']:
    for j in param_grid_DT['splitter']:
        for k in param_grid_DT['max_depth']:
            for l in param_grid_DT['max_features']:
                for m in param_grid_DT['random_state']: 
                    DT = DecisionTreeClassifier(criterion= i , splitter= j , max_depth= k , max_features= l , random_state= m)
                    DT.fit(x_train ,y_train)
                    score_DT.append([i,j,k,l,m, accuracy_score(y_test , DT.predict(x_test))])

score_DT = pd.DataFrame(score_DT)
score_DT = score_DT.sort_values(by=[5], ascending=[False])
score_DT.columns=['criterion','splitter','max_depth','max_features','random_state','accuracy']      

score_DT                  







Unnamed: 0,criterion,splitter,max_depth,max_features,random_state,accuracy
0,gini,best,2,auto,101,0.913043
317,entropy,best,2,sqrt,42,0.913043
235,gini,random,8,sqrt,356,0.913043
239,gini,random,8,log2,356,0.913043
243,gini,random,9,auto,356,0.913043
...,...,...,...,...,...,...
470,entropy,random,2,auto,1,0.565217
2,gini,best,2,auto,1,0.565217
162,gini,random,2,sqrt,1,0.565217
166,gini,random,2,log2,1,0.565217


In [119]:
report(score=score_DT, clf=DecisionTreeClassifier, n=3)

              precision    recall  f1-score   support

           0       0.86      1.00      0.92        12
           1       1.00      0.82      0.90        11

    accuracy                           0.91        23
   macro avg       0.93      0.91      0.91        23
weighted avg       0.93      0.91      0.91        23

Training Accuracy :  86.27450980392157
Test/Validation Accuracy :  91.30434782608695


In [235]:
results(dt_del)

specifity 1.0
AUC :  0.9090909090909092
FPR : 0.0
FNR : 0.18181818181818182


### SVM

In [120]:
score_SVM = []
for i in param_grid_SVM['C']:
    for j in param_grid_SVM['kernel']:
        for k in param_grid_SVM['degree']:
            for l in param_grid_SVM['gamma']:
                SVM = SVC(C=i, kernel=j, degree=k, gamma=l, probability = True)
                SVM.fit(x_train, y_train)
                score_SVM.append([i,j,k,l, accuracy_score(y_test , SVM.predict(x_test)) ])

score_SVM = pd.DataFrame(score_SVM)
score_SVM = score_SVM.sort_values(by=[4], ascending=[False])
score_SVM.columns=['C','kernel','degree','gamma', 'accuracy']      

score_SVM

Unnamed: 0,C,kernel,degree,gamma,accuracy
216,2.0,linear,1,scale,0.913043
64,0.5,sigmoid,6,scale,0.913043
305,2.5,linear,9,auto,0.913043
148,1.5,linear,3,scale,0.913043
149,1.5,linear,3,auto,0.913043
...,...,...,...,...,...
392,3.0,poly,8,scale,0.695652
384,3.0,poly,4,scale,0.695652
385,3.0,poly,4,auto,0.695652
389,3.0,poly,6,auto,0.695652


In [121]:
report(score=score_SVM, clf=SVC, n=3)

              precision    recall  f1-score   support

           0       0.86      1.00      0.92        12
           1       1.00      0.82      0.90        11

    accuracy                           0.91        23
   macro avg       0.93      0.91      0.91        23
weighted avg       0.93      0.91      0.91        23

Training Accuracy :  88.23529411764706
Test/Validation Accuracy :  91.30434782608695


In [236]:
results(svm_del)

specifity 1.0
AUC :  0.9090909090909092
FPR : 0.0
FNR : 0.18181818181818182


### RF

In [122]:
score_RF=[]
for i in param_grid_RF['criterion']:
    for j in param_grid_RF['n_estimators']:
        for k in param_grid_RF['max_depth']:
            for l in param_grid_RF['max_features']:
                for m in param_grid_RF['random_state']:
                    RF = RandomForestClassifier(criterion = i , n_estimators=j , max_depth=k, max_features=l , random_state=m )
                    RF.fit(x_train, y_train)
                    score_RF.append([i,j,k,l,m, accuracy_score(y_test , RF.predict(x_test)) ])

score_RF = pd.DataFrame(score_RF)
score_RF = score_RF.sort_values(by=[5], ascending=[False])
score_RF.columns=['criterion','n_estimators','max_depth','max_features','random_state','accuracy']      

score_RF

Unnamed: 0,criterion,n_estimators,max_depth,max_features,random_state,accuracy
244,gini,130,2,log2,101,0.913043
445,entropy,90,2,log2,42,0.913043
84,gini,50,2,log2,101,0.913043
83,gini,50,2,sqrt,356,0.913043
82,gini,50,2,sqrt,1,0.913043
...,...,...,...,...,...,...
559,entropy,130,10,log2,356,0.869565
283,entropy,10,2,sqrt,356,0.826087
287,entropy,10,2,log2,356,0.826087
7,gini,10,2,log2,356,0.826087


In [123]:
report(score=score_RF, clf=RandomForestClassifier, n=5)

              precision    recall  f1-score   support

           0       0.86      1.00      0.92        12
           1       1.00      0.82      0.90        11

    accuracy                           0.91        23
   macro avg       0.93      0.91      0.91        23
weighted avg       0.93      0.91      0.91        23

Training Accuracy :  86.27450980392157
Test/Validation Accuracy :  91.30434782608695


In [237]:
results(rf_del)

specifity 1.0
AUC :  0.9090909090909092
FPR : 0.0
FNR : 0.18181818181818182


### MLP

In [124]:
score_MLP=[]
for i in param_grid_MLP['activation']:
    for j in param_grid_MLP['solver']:
        for k in param_grid_MLP['hidden_layer_sizes']:
            for l in param_grid_MLP['learning_rate']:
                for m in param_grid_MLP['random_state']:
                    MLP = MLPClassifier(activation=i, solver=j, hidden_layer_sizes=k, learning_rate=l, random_state = m)
                    MLP.fit(x_train, y_train)
                    score_MLP.append([i,j,k,l,m, accuracy_score(y_test , MLP.predict(x_test)) ])

score_MLP = pd.DataFrame(score_MLP)
score_MLP = score_MLP.sort_values(by=[5], ascending=[False])
score_MLP.columns=['activation','solver','hidden_layer_sizes','learning_rate','random_state', 'accuracy']      

score_MLP



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("





Unnamed: 0,activation,solver,hidden_layer_sizes,learning_rate,random_state,accuracy
288,tanh,lbfgs,50,constant,101,0.913043
157,logistic,lbfgs,100,constant,42,0.913043
446,relu,lbfgs,100,constant,1,0.913043
447,relu,lbfgs,100,constant,356,0.913043
449,relu,lbfgs,100,invscaling,42,0.913043
...,...,...,...,...,...,...
487,relu,sgd,50,invscaling,356,0.217391
346,tanh,sgd,50,invscaling,1,0.173913
342,tanh,sgd,50,invscaling,1,0.173913
58,identity,sgd,50,invscaling,1,0.130435


In [125]:
report(score=score_MLP, clf=MLPClassifier, n=5)

              precision    recall  f1-score   support

           0       0.86      1.00      0.92        12
           1       1.00      0.82      0.90        11

    accuracy                           0.91        23
   macro avg       0.93      0.91      0.91        23
weighted avg       0.93      0.91      0.91        23

Training Accuracy :  90.19607843137256
Test/Validation Accuracy :  91.30434782608695


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [239]:
results(mlp_del)

specifity 1.0
AUC :  0.9090909090909092
FPR : 0.0
FNR : 0.18181818181818182


# GB

In [127]:
score_GP=[]
for i in param_grid_GP['kernel']:
    for j in param_grid_GP['optimizer']:
        for k in param_grid_GP['random_state']:

            GP = GaussianProcessClassifier(kernel = i ,optimizer = j,random_state = k )
            GP.fit(x_train, y_train)
            score_GP.append([i,j,k, accuracy_score(y_test , GP.predict(x_test)) ])

score_GP = pd.DataFrame(score_GP)
score_GP = score_GP.sort_values(by=[3], ascending=[False])
score_GP.columns=['activation','solver','random_state','accuracy']      

score_GP

Unnamed: 0,activation,solver,random_state,accuracy
0,1**2 * RBF(length_scale=0.1),fmin_l_bfgs_b,1,0.782609
1,1**2 * RBF(length_scale=0.1),fmin_l_bfgs_b,101,0.782609
2,1**2 * RBF(length_scale=0.1),fmin_l_bfgs_b,42,0.782609
3,1**2 * RBF(length_scale=0.1),fmin_l_bfgs_b,365,0.782609
4,1**2 * RBF(length_scale=0.5),fmin_l_bfgs_b,1,0.782609
5,1**2 * RBF(length_scale=0.5),fmin_l_bfgs_b,101,0.782609
6,1**2 * RBF(length_scale=0.5),fmin_l_bfgs_b,42,0.782609
7,1**2 * RBF(length_scale=0.5),fmin_l_bfgs_b,365,0.782609
8,1**2 * RBF(length_scale=1),fmin_l_bfgs_b,1,0.782609
9,1**2 * RBF(length_scale=1),fmin_l_bfgs_b,101,0.782609


In [240]:
GP_del = GaussianProcessClassifier(kernel = 1**2 * RBF(length_scale=0.1) ,optimizer = 'fmin_l_bfgs_b',
                                           random_state = 1).fit(x_train,y_train)
print(classification_report(y_test , GP_del.predict(x_test)))

              precision    recall  f1-score   support

           0       0.82      0.75      0.78        12
           1       0.75      0.82      0.78        11

    accuracy                           0.78        23
   macro avg       0.78      0.78      0.78        23
weighted avg       0.79      0.78      0.78        23



In [242]:
results(GP_del)

specifity 0.75
AUC :  0.7840909090909092
FPR : 0.25
FNR : 0.18181818181818182


# Bagging

In [129]:
l_acc_bag_DT = []
for ib in range (1,20):
    for jb in [101,42,0,1,356]:
        for i in param_grid_DT['criterion']:
            for j in param_grid_DT['splitter']:
                for k in param_grid_DT['max_depth']:
                    for l in param_grid_DT['max_features']:
                        for m in param_grid_DT['random_state']: 
                            
                            bagging = BaggingClassifier(base_estimator=DecisionTreeClassifier(criterion= i , splitter= j , max_depth= k , max_features= l , random_state= m),n_estimators=ib, random_state=jb).fit(x_train, y_train)
                            l_acc_bag_DT.append([i,j,k,l,m,ib,jb,accuracy_score(y_test , bagging.predict(x_test))])
l_acc_bag_DT = pd.DataFrame(l_acc_bag_DT)
l_acc_bag_DT.sort_values(by = 7 , ascending= False)

Unnamed: 0,0,1,2,3,4,5,6,7
29640,entropy,best,2,auto,101,10,0,0.913043
2812,entropy,best,2,sqrt,101,1,356,0.913043
2814,entropy,best,2,sqrt,1,1,356,0.913043
2815,entropy,best,2,sqrt,356,1,356,0.913043
2816,entropy,best,2,log2,101,1,356,0.913043
...,...,...,...,...,...,...,...,...
1407,gini,random,2,auto,356,1,0,0.478261
1408,gini,random,2,sqrt,101,1,0,0.478261
1409,gini,random,2,sqrt,42,1,0,0.478261
1410,gini,random,2,sqrt,1,1,0,0.478261


In [243]:
bagging_del = BaggingClassifier(base_estimator=DecisionTreeClassifier(criterion= 'entropy' , splitter= 'best' , max_depth= 2 , max_features= 'auto' , random_state= 101),n_estimators=10, random_state=0).fit(x_train, y_train)
print(classification_report(y_test , bagging_del.predict(x_test)))

              precision    recall  f1-score   support

           0       0.86      1.00      0.92        12
           1       1.00      0.82      0.90        11

    accuracy                           0.91        23
   macro avg       0.93      0.91      0.91        23
weighted avg       0.93      0.91      0.91        23



In [244]:
results(bagging_del)

specifity 1.0
AUC :  0.9090909090909092
FPR : 0.0
FNR : 0.18181818181818182


### MV

In [185]:
dt_del = DecisionTreeClassifier(criterion= 'gini' , splitter= 'best' , max_depth= 2, max_features= 'auto' , random_state= 101)
svm_del = SVC(C= 2, kernel='linear', degree=1, gamma='scale', probability = True)
rf_del = RandomForestClassifier(criterion = 'gini' , n_estimators=130 , max_depth=2, max_features='log2' , random_state=101 )
mlp_del= MLPClassifier(activation= 'tanh', solver= 'lbfgs', hidden_layer_sizes= 50, learning_rate='constant', random_state = 101)

In [186]:
MV_del = VotingClassifier(estimators=[('dt',dt_del),('rf', rf_del),('mlp',mlp_del), ('svm', svm_del),
                                     ('GP',GP_del),('bagging',bagging_del)], voting='hard')
MV_del.fit(x_train,y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [187]:
print(classification_report(y_test , MV_del.predict(x_test)))

              precision    recall  f1-score   support

           0       0.86      1.00      0.92        12
           1       1.00      0.82      0.90        11

    accuracy                           0.91        23
   macro avg       0.93      0.91      0.91        23
weighted avg       0.93      0.91      0.91        23



In [189]:
results(MV_del)

specifity 1.0
AUC :  0.9090909090909092
FPR : 0.0
FNR : 0.18181818181818182


# Feature Importance For Delivery Factors

In [66]:
dt_del = DecisionTreeClassifier(criterion= 'gini' , splitter= 'best' , max_depth= 2, max_features= 'auto' , random_state= 101).fit(x_train , y_train)
svm_del = SVC(C= 2, kernel='linear', degree=1, gamma='scale', probability = True).fit(x_train , y_train)
rf_del = RandomForestClassifier(criterion = 'gini' , n_estimators=130 , max_depth=2, max_features='log2' , random_state=101 ).fit(x_train , y_train)
mlp_del= MLPClassifier(activation= 'tanh', solver= 'lbfgs', hidden_layer_sizes= 50, learning_rate='constant', random_state = 101).fit(x_train , y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


## SVM

In [67]:
result = permutation_importance(svm_del, x_test, y_test, n_repeats=10, random_state=42)
# Get feature importances
importances = result.importances_mean
a=pd.DataFrame(importances)
a.index = delivery_factors.columns
round(a[0],2)

preterm labor    0.00
Delivery Age     0.37
delivery type    0.02
Name: 0, dtype: float64

## DT

In [68]:
importances = dt_del.feature_importances_
# Print feature importances
a=pd.DataFrame(importances)
a.index = delivery_factors.columns
round(a[0],2)

preterm labor    0.00
Delivery Age     0.92
delivery type    0.08
Name: 0, dtype: float64

## RF

In [69]:
importances = rf_del.feature_importances_
# Print feature importances
a=pd.DataFrame(importances)
a.index = delivery_factors.columns
round(a[0],2)

preterm labor    0.35
Delivery Age     0.55
delivery type    0.10
Name: 0, dtype: float64

## Bagging

In [70]:
base_estimator = DecisionTreeClassifier(criterion= 'entropy' , splitter= 'best' , max_depth= 2 , max_features= 'auto' , random_state= 101).fit(x_train, y_train)
bagging_del = BaggingClassifier(base_estimator=base_estimator,n_estimators=10, random_state=0).fit(x_train, y_train)
importances = bagging_del.base_estimator.feature_importances_
# Print feature importances
a=pd.DataFrame(importances)
a.index = delivery_factors.columns
round(a[0],2)



preterm labor    0.00
Delivery Age     0.89
delivery type    0.11
Name: 0, dtype: float64

## MLP

In [72]:
result = permutation_importance(mlp_del, x_test, y_test, n_repeats=10, random_state=42)
# Get feature importances
importances = result.importances_mean
# Print feature importances
a=pd.DataFrame(importances)
a.index = delivery_factors.columns
round(a[0],2)

preterm labor    0.15
Delivery Age     0.22
delivery type    0.00
Name: 0, dtype: float64

In [46]:
x = df.drop(['NICU','BMI 3','dep in 3st T'],axis = 1)
x

Unnamed: 0,Birth weight,Sex,Delivery Age,delivery type,preterm labor,Preeclampsia,fetal distress,Recent miscarriage,Age,Gravid,...,Mother's education,Mother's job,BMI 2,parity,live child,Common complaint Pregnancy,Menstrual History,Father's job,dep score in 2rd T,anx score in 2rd T
0,3.15,0,40,1,0,0,1,0,26,0,...,2,1,27.734375,0,0,1,1,0,5,0
2,2.45,1,34,0,1,0,0,1,33,1,...,0,0,27.734375,0,0,1,1,0,0,0
3,2.95,1,36,0,0,0,0,0,32,1,...,1,0,27.636054,1,1,1,1,1,12,5
4,3.45,0,40,0,0,0,0,0,45,1,...,2,0,31.934969,1,1,1,1,1,4,3
5,2.05,0,37,1,0,1,0,0,38,0,...,1,0,29.615806,0,0,1,0,1,4,1
6,2.85,0,37,1,1,0,1,0,38,0,...,1,0,32.388355,0,0,1,0,0,3,1
7,2.95,1,38,1,0,0,0,0,26,2,...,0,0,28.959,2,2,1,1,0,4,4
8,3.45,0,39,1,0,0,0,0,27,0,...,2,1,28.440955,0,0,1,0,1,1,3
9,1.9,1,36,1,1,0,1,0,22,0,...,1,0,32.007316,0,0,1,0,1,29,15
10,2.85,0,39,1,0,0,0,0,21,1,...,2,1,28.668892,1,1,1,1,1,5,3


In [11]:
whole_features = df[['Birth weight', 'Sex', 'Delivery Age', 'delivery type', 'preterm labor',
       'Preeclampsia', 'fetal distress', 'Recent miscarriage', 'Age', 'Gravid',
       'anti TPO', 'Monthly income', "Father's education",
       "Mother's education", "Mother's job", 'BMI 2', 'parity', 'live child',
       'Common complaint Pregnancy', 'Menstrual History', "Father's job",
       'dep score in 2rd T', 'anx score in 2rd T']]

# overall

In [12]:
x = whole_features
y = df['NICU']


oversample = SMOTE(k_neighbors=2, random_state=101)
x, y = oversample.fit_resample(x, y)

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size = 0.3,random_state=101)

scaler = StandardScaler()

x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)

## DT

In [21]:
score_DT = []
for i in param_grid_DT['criterion']:
    for j in param_grid_DT['splitter']:
        for k in param_grid_DT['max_depth']:
            for l in param_grid_DT['max_features']:
                for m in param_grid_DT['random_state']: 
                    DT = DecisionTreeClassifier(criterion= i , splitter= j , max_depth= k , max_features= l , random_state= m)
                    DT.fit(x_train ,y_train)
                    score_DT.append([i,j,k,l,m, accuracy_score(y_test , DT.predict(x_test))])

score_DT = pd.DataFrame(score_DT)
score_DT = score_DT.sort_values(by=[5], ascending=[False])
score_DT.columns=['criterion','splitter','max_depth','max_features','random_state','accuracy']      

score_DT                  







Unnamed: 0,criterion,splitter,max_depth,max_features,random_state,accuracy
390,entropy,best,8,sqrt,1,0.956522
38,gini,best,5,auto,1,0.956522
338,entropy,best,4,auto,1,0.956522
438,entropy,best,12,sqrt,1,0.956522
374,entropy,best,7,auto,1,0.956522
...,...,...,...,...,...,...
260,gini,random,10,log2,101,0.521739
532,entropy,random,7,sqrt,101,0.521739
192,gini,random,5,auto,101,0.521739
528,entropy,random,7,auto,101,0.521739


In [22]:
report(score=score_DT, clf=DecisionTreeClassifier, n=5)

              precision    recall  f1-score   support

           0       1.00      0.92      0.96        12
           1       0.92      1.00      0.96        11

    accuracy                           0.96        23
   macro avg       0.96      0.96      0.96        23
weighted avg       0.96      0.96      0.96        23

Training Accuracy :  100.0
Test/Validation Accuracy :  95.65217391304348


In [23]:
results(DecisionTreeClassifier(criterion= 'entropy' , splitter= 'best' , max_depth= 8, max_features= 'sqrt' , random_state= 1).fit(x_train,y_train))

specifity 0.9166666666666666
AUC :  0.9583333333333333
FPR : 0.08333333333333333
FNR : 0.0


In [24]:
importances = DecisionTreeClassifier(criterion= 'entropy' , splitter= 'best' , max_depth= 8, max_features= 'sqrt' , random_state= 1).fit(x_train,y_train).feature_importances_

# Print feature importances
a=pd.DataFrame(importances)
a.index = x.columns
round(a[0],2)

Birth weight                  0.07
Sex                           0.00
Delivery Age                  0.62
delivery type                 0.00
preterm labor                 0.02
Preeclampsia                  0.00
fetal distress                0.00
Recent miscarriage            0.00
Age                           0.00
Gravid                        0.15
anti TPO                      0.00
Monthly income                0.00
Father's education            0.03
Mother's education            0.04
Mother's job                  0.00
BMI 2                         0.00
parity                        0.00
live child                    0.00
Common complaint Pregnancy    0.00
Menstrual History             0.00
Father's job                  0.00
dep score in 2rd T            0.00
anx score in 2rd T            0.08
Name: 0, dtype: float64

## SVM

In [25]:
score_SVM = []
for i in param_grid_SVM['C']:
    for j in param_grid_SVM['kernel']:
        for k in param_grid_SVM['degree']:
            for l in param_grid_SVM['gamma']:
                SVM = SVC(C=i, kernel=j, degree=k, gamma=l, probability = True)
                SVM.fit(x_train, y_train)
                score_SVM.append([i,j,k,l, accuracy_score(y_test , SVM.predict(x_test)) ])

score_SVM = pd.DataFrame(score_SVM)
score_SVM = score_SVM.sort_values(by=[4], ascending=[False])
score_SVM.columns=['C','kernel','degree','gamma', 'accuracy']      

score_SVM

Unnamed: 0,C,kernel,degree,gamma,accuracy
39,0.5,rbf,2,auto,0.956522
45,0.5,rbf,5,auto,0.956522
110,1.0,rbf,2,scale,0.956522
109,1.0,rbf,1,auto,0.956522
36,0.5,rbf,1,scale,0.956522
...,...,...,...,...,...
30,0.5,poly,7,scale,0.521739
31,0.5,poly,7,auto,0.521739
106,1.0,poly,9,scale,0.521739
177,1.5,poly,8,auto,0.478261


In [26]:
report(score=score_SVM, clf=SVC, n=4)

              precision    recall  f1-score   support

           0       0.92      1.00      0.96        12
           1       1.00      0.91      0.95        11

    accuracy                           0.96        23
   macro avg       0.96      0.95      0.96        23
weighted avg       0.96      0.96      0.96        23

Training Accuracy :  100.0
Test/Validation Accuracy :  95.65217391304348


In [27]:
results(SVC(C= 0.5, kernel='rbf', degree=2, gamma='auto', probability = True).fit(x_train,y_train))

specifity 1.0
AUC :  0.9545454545454546
FPR : 0.0
FNR : 0.09090909090909091


In [28]:
result = permutation_importance(SVC(C= 0.5, kernel='rbf', degree=2, gamma='auto', probability = True).fit(x_train,y_train), x_test, y_test, n_repeats=10, random_state=42)
# Get feature importances
importances = result.importances_mean
a=pd.DataFrame(importances)
a.index = x.columns
round(a[0],2)

Birth weight                  0.11
Sex                           0.00
Delivery Age                  0.17
delivery type                 0.04
preterm labor                 0.10
Preeclampsia                  0.00
fetal distress                0.00
Recent miscarriage            0.01
Age                           0.06
Gravid                        0.01
anti TPO                      0.04
Monthly income                0.00
Father's education            0.00
Mother's education            0.01
Mother's job                  0.07
BMI 2                         0.01
parity                        0.00
live child                    0.00
Common complaint Pregnancy    0.02
Menstrual History             0.05
Father's job                  0.02
dep score in 2rd T            0.02
anx score in 2rd T            0.05
Name: 0, dtype: float64

## RF

In [32]:
score_RF=[]
for i in param_grid_RF['criterion']:
    for j in param_grid_RF['n_estimators']:
        for k in param_grid_RF['max_depth']:
            for l in param_grid_RF['max_features']:
                for m in param_grid_RF['random_state']:
                    RF = RandomForestClassifier(criterion = i , n_estimators=j , max_depth=k, max_features=l , random_state=m )
                    RF.fit(x_train, y_train)
                    score_RF.append([i,j,k,l,m, accuracy_score(y_test , RF.predict(x_test)) ])

score_RF = pd.DataFrame(score_RF)
score_RF = score_RF.sort_values(by=[5], ascending=[False])
score_RF.columns=['criterion','n_estimators','max_depth','max_features','random_state','accuracy']      

score_RF

Unnamed: 0,criterion,n_estimators,max_depth,max_features,random_state,accuracy
391,entropy,50,8,log2,356,0.956522
467,entropy,90,8,sqrt,356,0.956522
466,entropy,90,8,sqrt,1,0.956522
271,gini,130,8,log2,356,0.956522
273,gini,130,10,sqrt,42,0.956522
...,...,...,...,...,...,...
294,entropy,10,4,log2,1,0.782609
282,entropy,10,2,sqrt,1,0.782609
286,entropy,10,2,log2,1,0.782609
290,entropy,10,4,sqrt,1,0.782609


In [33]:
rf = RandomForestClassifier(criterion = 'entropy' , n_estimators=50 , max_depth=8, max_features='log2' , random_state=356).fit(x_train,y_train)
print(classification_report(y_test,rf.predict(x_test)))

              precision    recall  f1-score   support

           0       0.92      1.00      0.96        12
           1       1.00      0.91      0.95        11

    accuracy                           0.96        23
   macro avg       0.96      0.95      0.96        23
weighted avg       0.96      0.96      0.96        23



In [34]:
results(rf)

specifity 1.0
AUC :  0.9545454545454546
FPR : 0.0
FNR : 0.09090909090909091


In [36]:
importances = rf.feature_importances_
# Print feature importances
a=pd.DataFrame(importances)
a.index = x.columns
round(a[0],2)

Birth weight                  0.16
Sex                           0.02
Delivery Age                  0.19
delivery type                 0.01
preterm labor                 0.05
Preeclampsia                  0.01
fetal distress                0.01
Recent miscarriage            0.03
Age                           0.04
Gravid                        0.03
anti TPO                      0.02
Monthly income                0.02
Father's education            0.02
Mother's education            0.02
Mother's job                  0.06
BMI 2                         0.05
parity                        0.04
live child                    0.05
Common complaint Pregnancy    0.01
Menstrual History             0.07
Father's job                  0.01
dep score in 2rd T            0.04
anx score in 2rd T            0.04
Name: 0, dtype: float64

## MLP

In [40]:
score_MLP=[]
for i in param_grid_MLP['activation']:
    for j in param_grid_MLP['solver']:
        for k in param_grid_MLP['hidden_layer_sizes']:
            for l in param_grid_MLP['learning_rate']:
                for m in param_grid_MLP['random_state']:
                    MLP = MLPClassifier(activation=i, solver=j, hidden_layer_sizes=k, learning_rate=l, random_state = m)
                    MLP.fit(x_train, y_train)
                    score_MLP.append([i,j,k,l,m, accuracy_score(y_test , MLP.predict(x_test)) ])

score_MLP = pd.DataFrame(score_MLP)
score_MLP = score_MLP.sort_values(by=[5], ascending=[False])
score_MLP.columns=['activation','solver','hidden_layer_sizes','learning_rate','random_state', 'accuracy']      

score_MLP

Unnamed: 0,activation,solver,hidden_layer_sizes,learning_rate,random_state,accuracy
441,relu,lbfgs,50,invscaling,42,0.956522
61,identity,sgd,100,constant,42,0.956522
539,relu,adam,50,invscaling,356,0.956522
433,relu,lbfgs,50,constant,42,0.956522
456,relu,lbfgs,150,constant,101,0.956522
...,...,...,...,...,...,...
342,tanh,sgd,50,invscaling,1,0.391304
235,logistic,sgd,200,invscaling,356,0.391304
239,logistic,sgd,200,invscaling,356,0.391304
54,identity,sgd,50,invscaling,1,0.347826


In [41]:
MLP = MLPClassifier(activation='relu', solver='lbfgs', hidden_layer_sizes=50, learning_rate='invscaling', random_state = 42).fit(x_train,y_train)
print(classification_report(y_test,MLP.predict(x_test)))

              precision    recall  f1-score   support

           0       1.00      0.92      0.96        12
           1       0.92      1.00      0.96        11

    accuracy                           0.96        23
   macro avg       0.96      0.96      0.96        23
weighted avg       0.96      0.96      0.96        23



In [42]:
results(MLP)

specifity 0.9166666666666666
AUC :  0.9583333333333333
FPR : 0.08333333333333333
FNR : 0.0


In [44]:
result = permutation_importance(MLP, x_test, y_test, n_repeats=10, random_state=42)
# Get feature importances
importances = result.importances_mean
# Print feature importances
a=pd.DataFrame(importances)
a.index = x.columns
round(a[0],2)

Birth weight                  0.13
Sex                           0.01
Delivery Age                  0.27
delivery type                 0.00
preterm labor                 0.11
Preeclampsia                  0.04
fetal distress                0.00
Recent miscarriage            0.02
Age                           0.08
Gravid                        0.01
anti TPO                      0.00
Monthly income                0.03
Father's education            0.00
Mother's education            0.02
Mother's job                  0.07
BMI 2                         0.03
parity                        0.00
live child                   -0.00
Common complaint Pregnancy    0.02
Menstrual History             0.05
Father's job                  0.02
dep score in 2rd T            0.04
anx score in 2rd T            0.07
Name: 0, dtype: float64

## GB

In [45]:
score_GP=[]
for i in param_grid_GP['kernel']:
    for j in param_grid_GP['optimizer']:
        for k in param_grid_GP['random_state']:

            GP = GaussianProcessClassifier(kernel = i ,optimizer = j,random_state = k )
            GP.fit(x_train, y_train)
            score_GP.append([i,j,k, accuracy_score(y_test , GP.predict(x_test)) ])

score_GP = pd.DataFrame(score_GP)
score_GP = score_GP.sort_values(by=[3], ascending=[False])
score_GP.columns=['activation','solver','random_state','accuracy']      

score_GP

Unnamed: 0,activation,solver,random_state,accuracy
4,1**2 * RBF(length_scale=0.5),fmin_l_bfgs_b,1,0.956522
5,1**2 * RBF(length_scale=0.5),fmin_l_bfgs_b,101,0.956522
6,1**2 * RBF(length_scale=0.5),fmin_l_bfgs_b,42,0.956522
7,1**2 * RBF(length_scale=0.5),fmin_l_bfgs_b,365,0.956522
8,1**2 * RBF(length_scale=1),fmin_l_bfgs_b,1,0.956522
9,1**2 * RBF(length_scale=1),fmin_l_bfgs_b,101,0.956522
10,1**2 * RBF(length_scale=1),fmin_l_bfgs_b,42,0.956522
11,1**2 * RBF(length_scale=1),fmin_l_bfgs_b,365,0.956522
0,1**2 * RBF(length_scale=0.1),fmin_l_bfgs_b,1,0.73913
1,1**2 * RBF(length_scale=0.1),fmin_l_bfgs_b,101,0.73913


In [47]:
GP= GaussianProcessClassifier(kernel = 1**2 * RBF(length_scale=0.5) ,optimizer = 'fmin_l_bfgs_b',
                                           random_state = 1).fit(x_train,y_train)
print(classification_report(y_test , GP.predict(x_test)))

              precision    recall  f1-score   support

           0       1.00      0.92      0.96        12
           1       0.92      1.00      0.96        11

    accuracy                           0.96        23
   macro avg       0.96      0.96      0.96        23
weighted avg       0.96      0.96      0.96        23



In [48]:
results(GP)

specifity 0.9166666666666666
AUC :  0.9583333333333333
FPR : 0.08333333333333333
FNR : 0.0


## Bagging

In [52]:
l_acc_bag_DT = []
for ib in range (1,20):
    for jb in [101,42,0,1,356]:
        for i in param_grid_DT['criterion']:
            for j in param_grid_DT['splitter']:
                for k in param_grid_DT['max_depth']:
                    for l in param_grid_DT['max_features']:
                        for m in param_grid_DT['random_state']: 
                            
                            bagging = BaggingClassifier(base_estimator=DecisionTreeClassifier(criterion= i , splitter= j , max_depth= k , max_features= l , random_state= m),n_estimators=ib, random_state=jb).fit(x_train, y_train)
                            l_acc_bag_DT.append([i,j,k,l,m,ib,jb,accuracy_score(y_test , bagging.predict(x_test))])
l_acc_bag_DT = pd.DataFrame(l_acc_bag_DT)
l_acc_bag_DT.sort_values(by = 7 , ascending= False)

Unnamed: 0,0,1,2,3,4,5,6,7
26224,gini,best,3,sqrt,101,9,0,0.956522
26225,gini,best,3,sqrt,42,9,0,0.956522
30382,entropy,best,11,log2,1,10,1,0.956522
26226,gini,best,3,sqrt,1,9,0,0.956522
30377,entropy,best,11,sqrt,42,10,1,0.956522
...,...,...,...,...,...,...,...,...
6085,entropy,random,2,auto,42,2,356,0.478261
6084,entropy,random,2,auto,101,2,356,0.478261
5783,gini,random,2,log2,356,2,356,0.478261
5782,gini,random,2,log2,1,2,356,0.478261


In [53]:
bagging = BaggingClassifier(base_estimator=DecisionTreeClassifier(criterion= 'gini' , splitter= 'best' , max_depth= 3 , max_features= 'sqrt' , random_state= 101),n_estimators=9, random_state=0).fit(x_train, y_train)
print(classification_report(y_test , bagging.predict(x_test)))

              precision    recall  f1-score   support

           0       0.92      1.00      0.96        12
           1       1.00      0.91      0.95        11

    accuracy                           0.96        23
   macro avg       0.96      0.95      0.96        23
weighted avg       0.96      0.96      0.96        23



In [54]:
results(bagging)

specifity 1.0
AUC :  0.9545454545454546
FPR : 0.0
FNR : 0.09090909090909091


In [55]:
base_estimator = DecisionTreeClassifier(criterion= 'gini' , splitter= 'best' , max_depth= 3 , max_features= 'sqrt' , random_state= 101).fit(x_train, y_train)
bagging_del = BaggingClassifier(base_estimator=base_estimator,n_estimators=9, random_state=0).fit(x_train, y_train)
importances = bagging_del.base_estimator.feature_importances_
# Print feature importances
a=pd.DataFrame(importances)
a.index = x.columns
round(a[0],2)

Birth weight                  0.15
Sex                           0.00
Delivery Age                  0.64
delivery type                 0.00
preterm labor                 0.00
Preeclampsia                  0.00
fetal distress                0.00
Recent miscarriage            0.00
Age                           0.00
Gravid                        0.00
anti TPO                      0.00
Monthly income                0.00
Father's education            0.00
Mother's education            0.00
Mother's job                  0.00
BMI 2                         0.07
parity                        0.00
live child                    0.00
Common complaint Pregnancy    0.00
Menstrual History             0.04
Father's job                  0.00
dep score in 2rd T            0.00
anx score in 2rd T            0.11
Name: 0, dtype: float64

## MV

In [21]:
dt = DecisionTreeClassifier(criterion= 'entropy' , splitter= 'best' , max_depth= 8, max_features= 'sqrt' , random_state= 1)
svm = SVC(C= 0.5, kernel='rbf', degree=2, gamma='auto', probability = True)
MLP = MLPClassifier(activation='relu', solver='lbfgs', hidden_layer_sizes=50, learning_rate='invscaling', random_state = 42)
rf = RandomForestClassifier(criterion = 'entropy' , n_estimators=50 , max_depth=8, max_features='log2' , random_state=356)
bagging = BaggingClassifier(base_estimator=DecisionTreeClassifier(criterion= 'gini' , splitter= 'best' , max_depth= 3 , max_features= 'sqrt' , random_state= 101),n_estimators=9, random_state=0)
GP= GaussianProcessClassifier(kernel = 1**2 * RBF(length_scale=0.5) ,optimizer = 'fmin_l_bfgs_b',
                                           random_state = 1)

In [40]:
MV = VotingClassifier(estimators=[('dt',dt),('rf', rf),('mlp',MLP), ('svm', svm),
                                     ('GP',GP),('bagging',bagging)], voting='hard')
MV.fit(x_train,y_train)

In [38]:
print(classification_report(y_test , MV.predict(x_test)))

              precision    recall  f1-score   support

           0       1.00      0.92      0.96        12
           1       0.92      1.00      0.96        11

    accuracy                           0.96        23
   macro avg       0.96      0.96      0.96        23
weighted avg       0.96      0.96      0.96        23



In [39]:
results(MV)

specifity 0.9166666666666666
AUC :  0.9583333333333333
FPR : 0.08333333333333333
FNR : 0.0
