In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.metrics import recall_score,accuracy_score,roc_auc_score,confusion_matrix,roc_curve






In [None]:
df_base=pd.read_excel('dataset_780.xlsx',sheet_name=0)

df_base=df_base.dropna(subset=['Diagnosis'])

mapping={
    'no':0,
    'yes':1,
    'male':1,
    'female':0,
    'normal':0,
    'diarrhea':1,
    'constipation':1,
    '+':1,
    '++':2,
    '+++':3,
    'local':1,
    'generalized':2,
    'appendicitis':1,
    'no appendicitis':0,
    'uncomplicated':0,
    'complicated':1,
}

for col in df_base.columns:
    df_base[col]=df_base[col].replace(mapping)


feat_all= ['Peritonitis','Ketones_in_Urine','Nausea','Free_Fluids','Loss_of_Appetite','Appendix_Diameter','CRP','WBC_Count','Neutrophil_Percentage','Diagnosis']

df=df_base[feat_all]
print (df)






In [None]:

x = df.iloc[:, 0:-1]  
y = df.iloc[:, -1]



x_temp,x_test, y_temp,y_test = train_test_split(x,y,test_size=0.2,random_state=88)
x_train,x_val,y_train,y_val = train_test_split(x_temp,y_temp,test_size=0.25,random_state=88)

In [None]:
model_xgb=xgb.XGBClassifier(
    n_estimators=222,
    max_depth=3,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=17,
    eval_metric='logloss',
    reg_alpha=0.1,
    reg_lambda=0.1,
)

In [None]:
model_xgb.fit(x_train, y_train)
scores = cross_val_score(model_xgb, x_train, y_train, cv=10, scoring='accuracy', n_jobs=2)
print("Mean cross_val accuracy Score:", scores.mean())
print("Standard Deviation of cross_val accuracy Scores:", scores.std())

scores_auc = cross_val_score(model_xgb, x_train, y_train, cv=10, scoring='roc_auc', n_jobs=2)
print("Mean cross_val auc-roc score:", scores_auc.mean())
print("Standard Deviation of auc-roc Scores:", scores_auc.std())

scores_recall = cross_val_score(model_xgb, x_train, y_train, cv=10, scoring='recall', n_jobs=2)
print("Mean cross_val recall score:", scores_recall.mean())
print("Standard Deviation of recall Scores:", scores_recall.std())

In [None]:


y_pred_prob=model_xgb.predict_proba(x_val)[:,1]
fpr, tpr, thresholds = roc_curve(y_val, y_pred_prob)
youden_index = tpr - fpr
optimal_idx = np.argmax(youden_index)
optimal_threshold = thresholds[optimal_idx]

distances = np.sqrt((1 - tpr)**2 + fpr**2)
optimal_idx_eu = np.argmin(distances)
optimal_threshold_eu = thresholds[optimal_idx_eu]

print(f"Optimal Threshold Youden: {optimal_threshold} and Optimal Euclidean Distance :{optimal_threshold_eu}")

threshold_y = optimal_threshold
threshold_eu = optimal_threshold_eu


y_val_prob=model_xgb.predict_proba(x_val)[:,1]
y_thres=(y_val_prob >= threshold_y).astype(int)
auc_score = roc_auc_score(y_val, y_val_prob)
print("Val AUC:", auc_score)
val_conf_matrix = confusion_matrix(y_val, y_thres)
TNv, FPv, FNv, TPv = val_conf_matrix.ravel()
val_sensitivity = TPv / (TPv + FNv)
val_specificity = TNv / (TNv + FPv)
val_ppv = TPv/(TPv+FPv)
val_npv = TNv/(TNv+FNv)

print("For Youden Index")

print(f"Sensitivity: {val_sensitivity:.3f} ({val_sensitivity * 100:.2f}%)")
print(f"Specificity: {val_specificity:.3f} ({val_specificity * 100:.2f}%)")

print (f"Positive predictive value : {val_ppv:.3f}({val_ppv*100:.2f}%)")
print (f"Negative predictive value : {val_npv:.3f}({val_npv*100:.2f}%)")

print("Val Confusion Matrix:\n", val_conf_matrix)
val_acu=accuracy_score(y_val,y_thres)
print("Val accuracy score:",val_acu)

y_val_prob=model_xgb.predict_proba(x_val)[:,1]
y_thres=(y_val_prob >= threshold_eu).astype(int)
auc_score = roc_auc_score(y_val, y_val_prob)
print("Val AUC:", auc_score)
val_conf_matrix = confusion_matrix(y_val, y_thres)
TNv, FPv, FNv, TPv = val_conf_matrix.ravel()
val_sensitivity = TPv / (TPv + FNv)
val_specificity = TNv / (TNv + FPv)
val_ppv = TPv/(TPv+FPv)
val_npv = TNv/(TNv+FNv)

print("For Eucledean distance")

print(f"Sensitivity: {val_sensitivity:.3f} ({val_sensitivity * 100:.2f}%)")
print(f"Specificity: {val_specificity:.3f} ({val_specificity * 100:.2f}%)")

print (f"Positive predictive value : {val_ppv:.3f}({val_ppv*100:.2f}%)")
print (f"Negative predictive value : {val_npv:.3f}({val_npv*100:.2f}%)")

print("Val Confusion Matrix:\n", val_conf_matrix)
val_acu=accuracy_score(y_val,y_thres)
print("Val accuracy score:",val_acu)
