In [37]:
# first make sure the input data are good 
import numpy as np 
import os 
import sys
from scipy.stats import zscore
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import Perceptron
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, recall_score, make_scorer, f1_score
from sklearn.metrics import balanced_accuracy_score, confusion_matrix, roc_auc_score
from sklearn.model_selection import cross_validate, LeaveOneOut, StratifiedKFold
from sklearn.model_selection import KFold, cross_val_predict
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
import shap
from scipy import stats
if sys.platform == 'darwin':
    print("Current system is macOS")
    main_fold_path = '/Users/shanxiafeng/Documents/Project/Research/fnirs-prognosis/code/fnirs-treatment-response-prediction'
elif sys.platform == 'linux':
    print("Current system is Ubuntu")
    main_fold_path = '/home/jy/Documents/fnirs/treatment_response/fnirs-depression-deeplearning'
else:
    print("Current system is neither macOS nor Ubuntu")
    
sys.path.append(main_fold_path)    
os.chdir(main_fold_path)
from utils.hyperopt_utils import get_best_hyperparameters, get_best_hyperparameters_skf_inside_loocv_monitoring_recall_bacc
from utils.fnirs_utils import print_md_table_val_test_AUC

from scripts.fusion_model.fusion_model_utils import derive_average_MMDR_score
from scripts.fusion_model.fusion_model_utils import replace_nan_with_mean
from scripts.fusion_model.fusion_model_utils import impute_nan_data
from scripts.fusion_model.fusion_model_utils import process_with_nan_using_imputation_zscore
from scripts.fusion_model.fusion_model_utils import read_base_T2_SDS_CGI 
from scripts.fusion_model.fusion_model_utils import read_pychiatry
from scripts.fusion_model.fusion_model_utils import read_HAMD_score
from scripts.fusion_model.fusion_model_utils import read_demographic
from scripts.fusion_model.fusion_model_utils import plot_avg_auc
from scripts.fusion_model.fusion_model_utils import train_xgboost_shuffle_feature 
from scripts.fusion_model.fusion_model_utils import save_shap
from scripts.fusion_model.fusion_model_utils import read_dose_information
import time


import time
start_time = time.time()

K_FOLD = 5
fold_path = 'allData/prognosis_mix_hb/pretreatment_response'
MMDR_path = 'allData/prognosis_mix_hb/pretreatment_response/MDDR/MDDR_derived_from_load_evaluate.npy'

base_T2_SDS_CGI = read_base_T2_SDS_CGI(fold_path)
pyschiatry = read_pychiatry(fold_path)
HAMD_score = read_HAMD_score(fold_path)
demographic = read_demographic(fold_path)
dose = read_dose_information(fold_path)
hamd_timeline = np.load("allData/prognosis_mix_hb/pretreatment_response/hamd_timeline.npy", allow_pickle=True)



Current system is Ubuntu


In [48]:
import math
def count_nan(data):
    count = 0 
    for i in data:
        if type(i) not in [str, int]:
            if math.isnan(i):
                count += 1
    return count
psychiatric_history_name = ['Past trauma', 
                        'Current psychiatric comorbidities - binary', 
                        'Current psychiatric comorbidities - coded',
                        'Family history of psychiatric illness',
                        'Age of depression onset (years)',
                        'Duration of depression (years)',
                        'Duration of untreated depression (months)',
                        'Past EmD visit(s) because of depression',
                        'Type of episode']
demographic_name = ['Age', 'Gender', 'Ethnicity', 'Handedness', 'Education years', 'Education level', 'Perceived social support']
dose_name = ['Fluoxetine equivalent dose (mg/day)']
hamd_timeline_name = [f'HAMD score at T_{Ti}' for Ti in range(1, 9)]

name_to_value = {}

all_values = np.concatenate((demographic, pyschiatry[:, :9], dose[:, 1:2], hamd_timeline), axis=1)
all_names = demographic_name + psychiatric_history_name + dose_name + hamd_timeline_name
for i, name in enumerate(all_names):
    name_to_value[name] = count_nan(all_values[:, i])

TOTAL_SUBJECT = all_values.shape[0]
print(TOTAL_SUBJECT)
# Prepare the table header and rows
header = "| Name       | Value | Percentage |\n|------------|-------|------------|"
rows = []

# Generate rows for the table
for name, value in name_to_value.items():
    percentage = value / TOTAL_SUBJECT
    rows.append(f"| {name} | {value} | {percentage:.2%} |")

# Combine header and rows
table = header + "\n" + "\n".join(rows)

# Print the Markdown table
print(table)
    
# print(pyschiatry[:, :9])
# print('psychiatric_history_name', len(psychiatric_history_name))
# print('demographic_name', len(demographic_name))
# print('dose', len(dose_name))
# print('hamd_timeline_name', len(hamd_timeline_name))



64
| Name       | Value | Percentage |
|------------|-------|------------|
| Age | 0 | 0.00% |
| Gender | 0 | 0.00% |
| Ethnicity | 0 | 0.00% |
| Handedness | 1 | 1.56% |
| Education years | 0 | 0.00% |
| Education level | 0 | 0.00% |
| Perceived social support | 0 | 0.00% |
| Past trauma | 0 | 0.00% |
| Current psychiatric comorbidities - binary | 0 | 0.00% |
| Current psychiatric comorbidities - coded | 0 | 0.00% |
| Family history of psychiatric illness | 0 | 0.00% |
| Age of depression onset (years) | 0 | 0.00% |
| Duration of depression (years) | 0 | 0.00% |
| Duration of untreated depression (months) | 1 | 1.56% |
| Past EmD visit(s) because of depression | 0 | 0.00% |
| Type of episode | 0 | 0.00% |
| Fluoxetine equivalent dose (mg/day) | 1 | 1.56% |
| HAMD score at T_1 | 0 | 0.00% |
| HAMD score at T_2 | 1 | 1.56% |
| HAMD score at T_3 | 2 | 3.12% |
| HAMD score at T_4 | 1 | 1.56% |
| HAMD score at T_5 | 2 | 3.12% |
| HAMD score at T_6 | 4 | 6.25% |
| HAMD score at T_7 | 2 | 3.

(64, 7)

In [None]:
# 1 demographics 
# 2 clinical 
# 3. HAMD timeline 