In [1]:
from pycaret.classification import *
import pandas as pd
from sklearn.metrics import confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelBinarizer
import os
import shutil
import numpy as np
import warnings

plt.rcParams['font.family'] = 'Arial'
warnings.filterwarnings('ignore')

# Train model

auto extracted data

In [2]:
# hearing loss classificaiton
# Load data
data_path = 'data/train_extract.csv'
# prepare data
data = pd.read_csv(data_path, index_col=0).drop(['severity'], axis=1)
label = ['CHL','SNHL', 'Mixed HL', 'Normal hearing']
data['hearing_loss'] = data['hearing_loss'].map(lambda x: label.index(x))

# setup
experiment_name = 'extract_hearing_loss'
clf = setup(data=data, target='hearing_loss', use_gpu=True, session_id=123, experiment_name=experiment_name)
best_auto = compare_models(n_select=5)

# save model
os.makedirs('models', exist_ok=True)
save_path = os.path.join('models', experiment_name + '_model')
save_model(best_auto[0], save_path)

# evaluate model
eval_dir = os.path.join('evaluate', experiment_name)
os.makedirs(eval_dir, exist_ok=True)    
cf = plot_model(best_auto[0], plot='confusion_matrix', save=True)
roc = plot_model(best_auto[0], save=True)
feature_importance = plot_model(best_auto[0], plot='feature', save=True)

# move evaluate files to evaluate folder
shutil.move('Confusion Matrix.png', os.path.join(eval_dir, 'confusion_matrix.png'))
shutil.move('AUC.png', os.path.join(eval_dir, 'roc.png'))
shutil.move('Feature Importance.png', os.path.join(eval_dir, 'feature_importance.png'))

# test on test set
data_test_path = 'data/test_extract.csv'
data_test = pd.read_csv(data_test_path, index_col=0).drop(['severity'], axis=1)
data_test['hearing_loss'] = data_test['hearing_loss'].map(lambda x: label.index(x))

model = load_model(save_path)
predict_model(model, data=data_test)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,hearing_loss
2,Target type,Multiclass
3,Original data shape,"(2810, 17)"
4,Transformed data shape,"(2810, 17)"
5,Transformed train set shape,"(1966, 17)"
6,Transformed test set shape,"(844, 17)"
7,Numeric features,16
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
gbc,Gradient Boosting Classifier,0.8448,0.9462,0.8448,0.8432,0.8413,0.7177,0.7198,3.123
lightgbm,Light Gradient Boosting Machine,0.8433,0.9466,0.8433,0.8418,0.8409,0.7186,0.72,2.532
rf,Random Forest Classifier,0.8357,0.9427,0.8357,0.8361,0.8301,0.6946,0.6997,0.251
et,Extra Trees Classifier,0.8326,0.9416,0.8326,0.8335,0.8234,0.6837,0.6926,0.2
dt,Decision Tree Classifier,0.7919,0.8118,0.7919,0.7932,0.7906,0.6272,0.6286,0.032
lr,Logistic Regression,0.7838,0.9022,0.7838,0.7767,0.7761,0.6027,0.606,0.32
ridge,Ridge Classifier,0.7747,0.0,0.7747,0.7546,0.7595,0.579,0.5839,0.018
lda,Linear Discriminant Analysis,0.7736,0.896,0.7736,0.7728,0.7704,0.5951,0.5967,0.022
knn,K Neighbors Classifier,0.7324,0.8334,0.7324,0.7252,0.7137,0.4791,0.4929,0.059
ada,Ada Boost Classifier,0.6902,0.7601,0.6902,0.7287,0.694,0.4721,0.4856,0.257


Transformation Pipeline and Model Successfully Saved


Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Gradient Boosting Classifier,0.8431,0.9501,0.8431,0.843,0.842,0.7111,0.7118


Unnamed: 0,pta_ac,pta_bc,sl,srt,pb,250_ac,500_ac,1000_ac,2000_ac,4000_ac,6000_ac,8000_ac,500_bc,1000_bc,2000_bc,4000_bc,hearing_loss,prediction_label,prediction_score
0,63.591644,64.071594,95.0,60.0,68.0,40.196838,54.898613,65.444397,64.824577,75.627983,86.033691,95.068665,54.550320,60.974411,65.577675,71.095963,1,1,0.9725
1,65.624359,50.412266,20.0,45.0,1.0,61.966286,51.229630,46.049465,30.854788,55.671494,66.460533,57.419689,51.462425,46.110741,70.737244,54.896503,1,1,0.8914
2,18.780752,17.239866,20.0,20.0,7.0,83.625534,60.467449,61.188725,62.374153,46.100647,51.843330,71.768677,17.725185,22.247004,31.614569,22.422012,3,3,0.4825
3,20.938475,20.690275,30.0,20.0,0.0,16.068274,21.318129,35.746330,51.314316,70.878555,79.959961,75.858986,22.282335,35.368431,45.790504,71.243340,1,1,0.8666
4,28.033876,25.640808,30.0,30.0,2.0,17.661442,22.450958,25.015066,25.196245,45.040977,31.812569,22.838676,21.910337,21.336765,25.935600,45.537395,1,1,0.9528
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
696,41.159649,27.472963,3.0,40.0,96.0,21.179117,16.434290,10.912289,20.798878,25.594488,25.648846,40.940872,15.324203,9.126860,21.081354,26.238901,2,2,0.8659
697,15.030734,15.166432,25.0,15.0,100.0,46.046581,41.542656,36.338326,40.677994,59.986267,49.410858,45.970680,21.830103,11.442636,17.679497,26.203426,3,3,0.7456
698,14.949298,13.343937,30.0,60.0,16.0,21.777626,21.495274,17.106976,21.407763,40.408695,61.193295,55.400532,20.939615,12.351708,20.610508,40.582226,1,1,0.5843
699,24.182043,23.148726,30.0,29.0,96.0,45.949978,25.913429,30.344566,16.912123,35.910408,26.038538,45.192055,24.936766,26.368303,17.177185,35.311920,1,1,0.7768


In [3]:
# severity classificaiton
# Load data
data_path = 'data/train_extract.csv'
# prepare data
data = pd.read_csv(data_path, index_col=0).drop(['hearing_loss'], axis=1)
label = ['Normal','Mild', 'Moderate', 'Moderately severe',  'Severe', 'Profound']
data['severity'] = data['severity'].map(lambda x: label.index(x))

# setup
experiment_name = 'extract_severity'
clf = setup(data=data, target='severity', use_gpu=True, session_id=123, experiment_name=experiment_name)
best_auto = compare_models(n_select=5)

# save model
os.makedirs('models', exist_ok=True)
save_path = os.path.join('models',experiment_name + '_model')
save_model(best_auto[0], save_path)

# evaluate model
eval_dir = os.path.join('evaluate', experiment_name)
os.makedirs(eval_dir,exist_ok=True)    
cf = plot_model(best_auto[0], plot='confusion_matrix', save=True)
roc = plot_model(best_auto[0], save=True)
feature_importance = plot_model(best_auto[0], plot='feature', save=True)

# move evaluate files to evaluate folder
shutil.move('Confusion Matrix.png', os.path.join(eval_dir, 'confusion_matrix.png'))
shutil.move('AUC.png', os.path.join(eval_dir, 'roc.png'))
shutil.move('Feature Importance.png', os.path.join(eval_dir, 'feature_importance.png'))

# test on test set
data_test_path = 'data/test_extract.csv'
data_test = pd.read_csv(data_test_path, index_col=0).drop(['hearing_loss'], axis=1)
data_test['severity'] = data_test['severity'].map(lambda x: label.index(x))

model = load_model(save_path)
predict_model(model, data=data_test)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,severity
2,Target type,Multiclass
3,Original data shape,"(2810, 17)"
4,Transformed data shape,"(2810, 17)"
5,Transformed train set shape,"(1966, 17)"
6,Transformed test set shape,"(844, 17)"
7,Numeric features,16
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lightgbm,Light Gradient Boosting Machine,0.9552,0.991,0.9552,0.9561,0.9548,0.9388,0.9391,3.507
gbc,Gradient Boosting Classifier,0.9527,0.9901,0.9527,0.9539,0.9524,0.9354,0.9356,4.696
rf,Random Forest Classifier,0.9461,0.9907,0.9461,0.9472,0.9452,0.9263,0.9266,0.253
dt,Decision Tree Classifier,0.9217,0.9496,0.9217,0.9243,0.9217,0.8931,0.8935,0.035
et,Extra Trees Classifier,0.9125,0.9891,0.9125,0.9134,0.9108,0.8799,0.8805,0.208
lda,Linear Discriminant Analysis,0.8718,0.9797,0.8718,0.8721,0.8689,0.8227,0.8242,0.025
ada,Ada Boost Classifier,0.8525,0.9412,0.8525,0.8489,0.8396,0.7983,0.8045,0.271
qda,Quadratic Discriminant Analysis,0.8036,0.9515,0.8036,0.8174,0.8048,0.735,0.7379,0.02
nb,Naive Bayes,0.7492,0.9364,0.7492,0.7707,0.7491,0.6626,0.6666,0.02
knn,K Neighbors Classifier,0.6851,0.9017,0.6851,0.6814,0.6759,0.5617,0.5637,0.054


Transformation Pipeline and Model Successfully Saved


Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Light Gradient Boosting Machine,0.9415,0.9884,0.9415,0.9421,0.9414,0.9188,0.9189


Unnamed: 0,pta_ac,pta_bc,sl,srt,pb,250_ac,500_ac,1000_ac,2000_ac,4000_ac,6000_ac,8000_ac,500_bc,1000_bc,2000_bc,4000_bc,severity,prediction_label,prediction_score
0,63.591644,64.071594,95.0,60.0,68.0,40.196838,54.898613,65.444397,64.824577,75.627983,86.033691,95.068665,54.550320,60.974411,65.577675,71.095963,3,3,1.0000
1,65.624359,50.412266,20.0,45.0,1.0,61.966286,51.229630,46.049465,30.854788,55.671494,66.460533,57.419689,51.462425,46.110741,70.737244,54.896503,3,3,0.9997
2,18.780752,17.239866,20.0,20.0,7.0,83.625534,60.467449,61.188725,62.374153,46.100647,51.843330,71.768677,17.725185,22.247004,31.614569,22.422012,0,0,0.9999
3,20.938475,20.690275,30.0,20.0,0.0,16.068274,21.318129,35.746330,51.314316,70.878555,79.959961,75.858986,22.282335,35.368431,45.790504,71.243340,0,0,1.0000
4,28.033876,25.640808,30.0,30.0,2.0,17.661442,22.450958,25.015066,25.196245,45.040977,31.812569,22.838676,21.910337,21.336765,25.935600,45.537395,1,1,1.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
696,41.159649,27.472963,3.0,40.0,96.0,21.179117,16.434290,10.912289,20.798878,25.594488,25.648846,40.940872,15.324203,9.126860,21.081354,26.238901,1,1,0.9974
697,15.030734,15.166432,25.0,15.0,100.0,46.046581,41.542656,36.338326,40.677994,59.986267,49.410858,45.970680,21.830103,11.442636,17.679497,26.203426,0,0,1.0000
698,14.949298,13.343937,30.0,60.0,16.0,21.777626,21.495274,17.106976,21.407763,40.408695,61.193295,55.400532,20.939615,12.351708,20.610508,40.582226,0,0,1.0000
699,24.182043,23.148726,30.0,29.0,96.0,45.949978,25.913429,30.344566,16.912123,35.910408,26.038538,45.192055,24.936766,26.368303,17.177185,35.311920,0,0,1.0000


manual extrated data

In [4]:
# hearing loss classificaiton
# Load data
data_path = 'data/train_true.csv'
# prepare data
data = pd.read_csv(data_path, index_col=0).drop('severity', axis=1)
label = ['CHL','SNHL', 'Mixed HL', 'Normal hearing']
data['hearing_loss'] = data['hearing_loss'].map(lambda x: label.index(x))

# setup
experiment_name = 'manual_hearing_loss'
clf = setup(data=data, target='hearing_loss', use_gpu=True, session_id=123, experiment_name=experiment_name)
best_auto = compare_models(n_select=5)

# save model
os.makedirs('models', exist_ok=True)
save_path = os.path.join('models',experiment_name + '_model')
save_model(best_auto[0], save_path)

# evaluate model
eval_dir = os.path.join('evaluate', experiment_name)
os.makedirs(eval_dir,exist_ok=True)    
cf = plot_model(best_auto[0], plot='confusion_matrix', save=True)
roc = plot_model(best_auto[0], save=True)
feature_importance = plot_model(best_auto[0], plot='feature', save=True)

# move evaluate files to evaluate folder
shutil.move('Confusion Matrix.png', os.path.join(eval_dir, 'confusion_matrix.png'))
shutil.move('AUC.png', os.path.join(eval_dir, 'roc.png'))
shutil.move('Feature Importance.png', os.path.join(eval_dir, 'feature_importance.png'))

# test on test set
data_test_path = 'data/test_true.csv'
data_test = pd.read_csv(data_test_path, index_col=0).drop(['severity'], axis=1)
data_test['hearing_loss'] = data_test['hearing_loss'].map(lambda x: label.index(x))

model = load_model(save_path)
predict_model(model, data=data_test)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,hearing_loss
2,Target type,Multiclass
3,Original data shape,"(2810, 17)"
4,Transformed data shape,"(2810, 17)"
5,Transformed train set shape,"(1966, 17)"
6,Transformed test set shape,"(844, 17)"
7,Numeric features,16
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9456,0.9881,0.9456,0.9481,0.9447,0.9012,0.9028,0.237
lightgbm,Light Gradient Boosting Machine,0.944,0.9874,0.944,0.9466,0.9438,0.8996,0.9006,2.503
et,Extra Trees Classifier,0.9425,0.9871,0.9425,0.9445,0.9418,0.8962,0.8974,0.201
gbc,Gradient Boosting Classifier,0.94,0.9836,0.94,0.9432,0.9395,0.892,0.8932,1.062
knn,K Neighbors Classifier,0.9181,0.9576,0.9181,0.9213,0.9157,0.8514,0.8538,0.059
dt,Decision Tree Classifier,0.9064,0.9184,0.9064,0.9079,0.9057,0.833,0.8339,0.023
lr,Logistic Regression,0.8804,0.9552,0.8804,0.8833,0.879,0.7869,0.7885,0.344
ridge,Ridge Classifier,0.851,0.0,0.851,0.8371,0.8396,0.7291,0.7328,0.018
lda,Linear Discriminant Analysis,0.8494,0.9413,0.8494,0.8528,0.8477,0.7351,0.7375,0.021
qda,Quadratic Discriminant Analysis,0.8423,0.9446,0.8423,0.8582,0.8464,0.7316,0.7359,0.019


Transformation Pipeline and Model Successfully Saved


Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Random Forest Classifier,0.9458,0.9888,0.9458,0.9465,0.9445,0.9002,0.9012


Unnamed: 0,pta_ac,pta_bc,sl,srt,pb,250_ac,500_ac,1000_ac,2000_ac,4000_ac,6000_ac,8000_ac,500_bc,1000_bc,2000_bc,4000_bc,hearing_loss,prediction_label,prediction_score
0,63.0,62.0,25.0,60.0,68.0,50.0,60.0,65.0,65.0,80.0,95.0,95.0,60.0,65.0,65.0,70.0,1,1,1.00
1,65.0,50.0,30.0,45.0,72.0,35.0,35.0,45.0,60.0,120.0,110.0,100.0,35.0,45.0,70.0,70.0,1,1,0.81
2,17.0,15.0,30.0,20.0,96.0,10.0,10.0,15.0,25.0,5.0,10.0,10.0,10.0,15.0,20.0,5.0,3,3,0.97
3,20.0,20.0,30.0,20.0,84.0,20.0,15.0,20.0,25.0,55.0,50.0,55.0,15.0,20.0,25.0,50.0,1,1,1.00
4,27.0,25.0,30.0,30.0,92.0,25.0,20.0,30.0,30.0,40.0,40.0,45.0,20.0,25.0,30.0,40.0,1,1,0.98
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
696,40.0,27.0,30.0,40.0,96.0,30.0,40.0,40.0,40.0,60.0,70.0,85.0,30.0,20.0,30.0,40.0,2,2,0.85
697,13.0,13.0,35.0,15.0,100.0,25.0,15.0,15.0,10.0,10.0,10.0,15.0,15.0,15.0,10.0,10.0,3,3,1.00
698,13.0,12.0,30.0,10.0,96.0,20.0,15.0,10.0,15.0,25.0,30.0,35.0,15.0,5.0,15.0,25.0,1,1,0.72
699,23.0,22.0,30.0,25.0,96.0,25.0,25.0,30.0,20.0,40.0,50.0,45.0,20.0,25.0,20.0,35.0,1,1,0.94


In [5]:
# severity classificaiton
# Load data
data_path = 'data/train_true.csv'
# prepare data
data = pd.read_csv(data_path, index_col=0).drop(['hearing_loss'], axis=1)
label = ['Normal','Mild', 'Moderate', 'Moderately severe',  'Severe', 'Profound']
data['severity'] = data['severity'].map(lambda x: label.index(x))

# setup
experiment_name = 'manual_severity'
clf = setup(data=data, target='severity', use_gpu=True, session_id=123, experiment_name=experiment_name)
best_auto = compare_models(n_select=5)

# save model
os.makedirs('models', exist_ok=True)
save_path = os.path.join('models',experiment_name + '_model')
save_model(best_auto[0], save_path)

# evaluate model
eval_dir = os.path.join('evaluate', experiment_name)
os.makedirs(eval_dir,exist_ok=True)    
cf = plot_model(best_auto[0], plot='confusion_matrix', save=True)
roc = plot_model(best_auto[0], save=True)
feature_importance = plot_model(best_auto[0], plot='feature', save=True)

# move evaluate files to evaluate folder
shutil.move('Confusion Matrix.png', os.path.join(eval_dir, 'confusion_matrix.png'))
shutil.move('AUC.png', os.path.join(eval_dir, 'roc.png'))
shutil.move('Feature Importance.png', os.path.join(eval_dir, 'feature_importance.png'))

# test on test set
data_test_path = 'data/test_true.csv'
data_test = pd.read_csv(data_test_path, index_col=0).drop(['hearing_loss'], axis=1)
data_test['severity'] = data_test['severity'].map(lambda x: label.index(x))

model = load_model(save_path)
predict_model(model, data=data_test)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,severity
2,Target type,Multiclass
3,Original data shape,"(2810, 17)"
4,Transformed data shape,"(2810, 17)"
5,Transformed train set shape,"(1966, 17)"
6,Transformed test set shape,"(844, 17)"
7,Numeric features,16
8,Preprocess,True
9,Imputation type,simple


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
rf,Random Forest Classifier,0.9674,0.9886,0.9674,0.9685,0.9673,0.9556,0.9558,0.238
gbc,Gradient Boosting Classifier,0.9654,0.9879,0.9654,0.9664,0.9653,0.9528,0.953,1.602
lightgbm,Light Gradient Boosting Machine,0.9654,0.9888,0.9654,0.9664,0.9653,0.9527,0.9529,3.331
et,Extra Trees Classifier,0.9527,0.9884,0.9527,0.9549,0.9523,0.9353,0.9357,0.191
dt,Decision Tree Classifier,0.9298,0.9538,0.9298,0.9319,0.9298,0.9043,0.9048,0.024
knn,K Neighbors Classifier,0.8916,0.9772,0.8916,0.8933,0.8903,0.8513,0.8519,0.057
nb,Naive Bayes,0.883,0.9786,0.883,0.8919,0.8843,0.8421,0.8439,0.021
lda,Linear Discriminant Analysis,0.883,0.9803,0.883,0.8872,0.8801,0.8391,0.8403,0.023
ada,Ada Boost Classifier,0.8571,0.891,0.8571,0.8232,0.8304,0.8039,0.8112,0.156
lr,Logistic Regression,0.8474,0.9689,0.8474,0.8488,0.8451,0.7908,0.7917,0.41


Transformation Pipeline and Model Successfully Saved


Transformation Pipeline and Model Successfully Loaded


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Random Forest Classifier,0.9643,0.9865,0.9643,0.9645,0.9643,0.9506,0.9506


Unnamed: 0,pta_ac,pta_bc,sl,srt,pb,250_ac,500_ac,1000_ac,2000_ac,4000_ac,6000_ac,8000_ac,500_bc,1000_bc,2000_bc,4000_bc,severity,prediction_label,prediction_score
0,63.0,62.0,25.0,60.0,68.0,50.0,60.0,65.0,65.0,80.0,95.0,95.0,60.0,65.0,65.0,70.0,3,3,1.00
1,65.0,50.0,30.0,45.0,72.0,35.0,35.0,45.0,60.0,120.0,110.0,100.0,35.0,45.0,70.0,70.0,3,3,0.57
2,17.0,15.0,30.0,20.0,96.0,10.0,10.0,15.0,25.0,5.0,10.0,10.0,10.0,15.0,20.0,5.0,0,0,0.98
3,20.0,20.0,30.0,20.0,84.0,20.0,15.0,20.0,25.0,55.0,50.0,55.0,15.0,20.0,25.0,50.0,0,0,1.00
4,27.0,25.0,30.0,30.0,92.0,25.0,20.0,30.0,30.0,40.0,40.0,45.0,20.0,25.0,30.0,40.0,1,1,0.82
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
696,40.0,27.0,30.0,40.0,96.0,30.0,40.0,40.0,40.0,60.0,70.0,85.0,30.0,20.0,30.0,40.0,1,1,0.93
697,13.0,13.0,35.0,15.0,100.0,25.0,15.0,15.0,10.0,10.0,10.0,15.0,15.0,15.0,10.0,10.0,0,0,1.00
698,13.0,12.0,30.0,10.0,96.0,20.0,15.0,10.0,15.0,25.0,30.0,35.0,15.0,5.0,15.0,25.0,0,0,1.00
699,23.0,22.0,30.0,25.0,96.0,25.0,25.0,30.0,20.0,40.0,50.0,45.0,20.0,25.0,20.0,35.0,0,0,0.91


# Test model

In [6]:
# # test model
# test_extract_path = 'data/test_extract.csv'

# y_true_extract = pd.read_csv(test_extract_path, index_col=0).drop(['hearing_loss'], axis=1)
# severity_label = ['Normal','Mild', 'Moderate', 'Moderately severe',  'Severe', 'Profound']
# y_true_extract['severity'] = y_true_extract['severity'].map(lambda x: severity_label.index(x))

# extract_hearing_loss_model = load_model('models/extract_severity_model')
# predict_model(extract_hearing_loss_model, data=y_true_extract)

In [7]:
# # test model
# test_extract_path = 'data/test_extract.csv'

# y_true_extract = pd.read_csv(test_extract_path, index_col=0).drop(['severity'], axis=1)
# hearing_loss_label = ['CHL','SNHL', 'Mixed HL', 'Normal hearing']
# y_true_extract['hearing_loss'] = y_true_extract['hearing_loss'].map(lambda x: hearing_loss_label.index(x))

# extract_hearing_loss_model = load_model('models/extract_hearing_loss_model')
# predict_model(extract_hearing_loss_model, data=y_true_extract)