Aim: Preprocess the data using NMF, and then applied Decision Tree to see the result 

In [2]:
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import LeaveOneOut
import os

from sklearn.metrics import confusion_matrix, roc_auc_score, f1_score
from sklearn.preprocessing import label_binarize

from sklearn.decomposition import NMF


In [3]:
input_path = '/Users/shanxiafeng/Documents/Project/Research/fnirs-prognosis/code/fnirs-treatment-response-prediction/allData/prognosis/pre_treatment_hamd_reduction_50'

data = np.load(input_path + '/data.npy')
label = np.load(input_path + '/label.npy')


In [4]:

output_fold = '/Users/shanxiafeng/Documents/Project/Research/fnirs-prognosis/code/fnirs-treatment-response-prediction/results/ML_results'
if not os.path.exists(output_fold):
    os.makedirs(output_fold)




def NMF_decomposition(hb, num_components=100, rand_state=0):
    """
    NMF decomposition
    """
    print("NMF decomposition")
    model = NMF(n_components=num_components, init='random', random_state=rand_state)
    hb_2d = hb.reshape(hb.shape[0], -1) 
    hb_2d -= np.min(hb_2d)
    transposed_hb_2d = hb_2d.T


    W = model.fit_transform(transposed_hb_2d)
    H = model.components_

    nmf_data = H.T
    return nmf_data

def get_metrics(y_true, y_pred):
    # tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()

    # 明确指定labels参数
    cm = confusion_matrix(y_true, y_pred, labels=[0, 1])

    # 现在cm是一个2x2矩阵，即使数据只包含一个类别
    tn, fp, fn, tp = cm.ravel()

    accuracy = (tp + tn) / (tp + tn + fp + fn)
    sensitivity = tp / (tp + fn)
    specificity = tn / (tn + fp)
    f1 = f1_score(y_true, y_pred)

    return accuracy, sensitivity, specificity, f1

def train_DecisionTree(data, label):



    res = {}

    # 初始化模型，同时设置随机种子
    models = {
        # "Logistic Regression": LogisticRegression(max_iter=150),
        "Decision Tree": DecisionTreeClassifier()
    }
    for name, model in models.items():
        # run multiple time, using different time stamp as random seed
        # 使用当前时间戳作为随机种子
        current_time_seed = 1706166341

        # build model
        model.random_state = current_time_seed

        hb_result = {}
        HB_TYPE_accuraies = {}
        HB_TYPE_y_pred_and_y_test = {}
        for HB_TYPE in ['NMF']: #['HbO', 'HbR', 'HbO+HbR']:
            HB_TYPE_accuraies[HB_TYPE] = []
            HB_TYPE_y_pred_and_y_test[HB_TYPE] = []

            # read data

            hb = data

            if HB_TYPE == 'HbO':
                hb = hb[..., :hb.shape[-1]//2]
            elif HB_TYPE == 'HbR':
                hb = hb[..., hb.shape[-1]//2:]
            hb_2d = np.reshape(hb, (hb.shape[0], -1))

            # Apply LOOCV to train the model
            # Initialize LeaveOneOut
            loo = LeaveOneOut()

            # 存储每个模型的准确率
            accuracies = {}

            # Loop over each train/test split
            for train_index, test_index in loo.split(hb_2d):
                # Split the data into training and testing sets
                X_train, X_test = hb_2d[train_index], hb_2d[test_index]
                y_train, y_test = label[train_index], label[test_index]

                # Train the classifier
                model.fit(X_train, y_train)

                # Predict the label for the test set
                y_pred = model.predict(X_test)

                # Calculate the accuracy for the current fold
                accuracy = accuracy_score(y_test, y_pred)

                # Append the accuracy to the list
                HB_TYPE_accuraies[HB_TYPE].append(accuracy)

                HB_TYPE_y_pred_and_y_test[HB_TYPE].append([y_pred, y_test])
            accuracies[HB_TYPE] = 1
            accuracies[HB_TYPE] = np.mean(HB_TYPE_accuraies[HB_TYPE])

        save_result = {}
        save_result['accuracies'] = accuracies
        save_result['model_accuraies'] = HB_TYPE_accuraies
        save_result['current_time_seed'] = current_time_seed
        save_result['HB_TYPE_y_pred_and_y_test'] = HB_TYPE_y_pred_and_y_test
    return save_result
    # np.save(output_fold + f'/{name}_result_validate.npy', res)

In [5]:
# save_result['HB_TYPE_y_pred_and_y_test']



def print_result(res_true_pred):
    y_true = res_true_pred[:, 1]
    y_pred = res_true_pred[:, 0]

    def print_md_table(model_name, set, metrics):
        print()
        print('| Model Name |   Set   |Accuracy | Sensitivity | Specificity | F1 Score |')
        print('|------------|----------|----------|-------------|-------------|----------|')
        print(f'| {model_name} | {set} |', end = '')
        for i in range(4):
            print(f" {metrics[i]:.4f} |", end = '')
        print()

    res_metrics = get_metrics(y_true, y_pred)
    print_md_table('Decision Tree', 'Test', res_metrics)


In [10]:

import random

current_best_sen = 0 
best_seed = 0
best_n_c = 0 


while True:
    
    random_state = random.randint(1, 99999)
    n_components = random.randint(5, 50)
    model = NMF(n_components=n_components, init='random', random_state=random_state)
    hb_2d = data.reshape(data.shape[0], -1) 
    hb_2d -= np.min(hb_2d)
    transposed_hb_2d = hb_2d.T


    W = model.fit_transform(transposed_hb_2d)
    H = model.components_

    nmf_data = H.T
    
    save_result = train_DecisionTree(nmf_data, label)
    res_true_pred = np.array(save_result['HB_TYPE_y_pred_and_y_test']['NMF'])
    _, sen, _, _ = get_metrics(res_true_pred[:, 1], res_true_pred[:, 0])
    
    if sen > current_best_sen:
        current_best_sen = sen
        best_seed = random_state
        best_n_c = n_components
        record_file = '/Users/shanxiafeng/Documents/Project/Research/fnirs-prognosis/code/fnirs-treatment-response-prediction/scripts/ML/development/record.txt'

        with open(record_file, 'w') as file:
            file.write(f'current_best_sen: {current_best_sen}\n')
            file.write(f'best_seed: {best_seed}\n')
            file.write(f'best_n_c: {best_n_c}\n')
        



KeyboardInterrupt: 

In [46]:
# nmf_hb = NMF_decomposition(data, num_components=100, rand_state=0)
save_result = train_DecisionTree(nmf_data, label)
res_true_pred = np.array(save_result['HB_TYPE_y_pred_and_y_test']['NMF'])
print_result(res_true_pred)


| Model Name |   Set   |Accuracy | Sensitivity | Specificity | F1 Score |
|------------|----------|----------|-------------|-------------|----------|
| Decision Tree | Test | 0.5385 | 0.2000 | 0.6400 | 0.1667 |
