In [1]:
import os
# 下面注释掉的适合在非Pycharm 的Juypter Notebook上使用
# import sys
# path = os.path.join(os.path.dirname(os.getcwd()))
# sys.path.append(path) # 将所需要的根目录添加到路径
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
from IPython.display import clear_output as clear
# 导入模型
from models.MLClassifiers.MLClassifiers import GeneralMLClassifiers
# 导入小工具
from utils.common_utils import printlog
from trainTest.datasets.dataset_utils import get_fileName_weights, get_save_path, get_print_info
from trainTest.datasets.ml_dataset_utils import get_intra_ml_datasets
# 导入模型分类性能评价工具
from trainTest.train.intra_train_mls import train_test_intra_ml_model

In [2]:
### 1. 获取文件路径和文件名的设置
file_path = os.path.join(os.path.dirname(os.getcwd()) , 'preProcessing', 'trainData')
subjects_list_global = list(['01', '02', '03', '04', '06', '31', '32', '33', '34', '36'])
### 2. 获取dataset的设置
total_exp_time = 5
test_ratio = 0.2
feature_list = ['sub_emg_features', 'sub_angle_features']
train_test_utils = {'save_model': True, 'parameter_optimization': False, 
                    'test_metrics': ['accuracy', 'precision', 'recall', 'f1', 'specificity', 'npv'],
                    'confusion_matrix': {'get_cm': True, 'params':{'show_type': 'all', 'plot': True, 'save_fig': True,
                                                                   'save_results': True, 'cmap': 'YlGnBu'}},
                    }            

In [None]:
settings_dict = {'total_exp_time': total_exp_time}
data_params = {'gait_or_motion': ['motion', 'gait', 'gait', 'gait'],
               'motion_type': ['WAK', 'WAK', 'UPS', 'DNS']}

# 要更改的参数
classifiers = ['KNN', 'LDA', 'SVM', 'RF']

for index in range(len(data_params['gait_or_motion'])):
    gait_or_motion = data_params['gait_or_motion'][index]
    motion_type = data_params['motion_type'][index]
   
    for classifier in classifiers:
        model = GeneralMLClassifiers(classifier)
        model_name = model.get_model_name()
        printlog(info=get_print_info(gait_or_motion, motion_type, subjects_list_global), time=False, line_break=False)
        
        ## 开始训练和测试
        for subject_order in range(len(subjects_list_global)):
            subject = subjects_list_global[subject_order]
            file_name, _, encoded_label_name, raw_label_type, _ = get_fileName_weights(file_path, gait_or_motion, motion_type, subject, subjects_list_global)
            basic_save_path = os.path.join(os.path.dirname(os.getcwd()) , 'results', 'Intra-Subject', '12-Start-of-the-artMethodsComparison', 'AttenuationWeightedCE')
            save_path = get_save_path(basic_save_path, gait_or_motion, motion_type, model_name, subject)
            print('保存结果的绝对路径： ', save_path['absolute_path'])
            print('保存结果的相对路径： ', save_path['relative_path'])
            train_test_utils['confusion_matrix']['params']['label_type'] = raw_label_type
            
            for exp_tim in range(total_exp_time):
                clear()
                current_exp_time = exp_tim + 1
                settings_dict['current_exp_time'] = current_exp_time
                printlog(info='当前模型：%s'% model_name, time=True, line_break=False)
                printlog(info='当前受试者编号：%s' % subject, time=True, line_break=False)
                printlog(info='当前实验次数：%d / %d' % (current_exp_time, total_exp_time), time=True, line_break=False)
                x_train, y_train, x_test, y_test = get_intra_ml_datasets(file_name, feature_list, encoded_label_name, total_exp_time, current_exp_time, test_ratio)
                
                model = GeneralMLClassifiers(classifier)
                train_test_intra_ml_model(settings_dict, model, x_train, y_train, x_test, y_test, save_path, train_test_utils)

        # 当一个模型针对所有受试者全部训练测试后， 计算保存所有受试者的平均结果
        # 新增一个记录所有受试者的所有测试结果的df1
        df1_metrics = []
        df2_metrics_mean = []
        df2_metrics_std = []
        printlog(info='当前模型：%s' % model_name, time=True, line_break=True)
        basic_file_path = os.path.join(os.path.dirname(os.getcwd()) , 'results', 'Intra-Subject', '12-Start-of-the-artMethodsComparison', 'AttenuationWeightedCE')

        for subject_order in range(len(subjects_list_global)):
            subject = subjects_list_global[subject_order]
            metrics_file_path = get_save_path(basic_file_path, gait_or_motion, motion_type, model_name, subject)
            metrics_file_name = os.path.join(metrics_file_path['absolute_path'], 'test_metrics.csv')
            # 判断文件是否存在
            if not os.path.exists(metrics_file_name):
                print("受试者：%s 的文件: %s， 不存在！" %(subject, metrics_file_name))
            else:
                # 读取每个受试者的test_metrics
                print("读取受试者：%s 的test_metrics: " %subject)
                df = pd.read_csv(metrics_file_name, header=0, index_col=0)
                # ignore_index=True参数用于重置索引，以确保索引是连续的
                df1_metrics.extend(df.T.values[:-2, :])
                df2_metrics_mean.append(df.T.values[-2, :])
                df2_metrics_std.append(df.T.values[-1, :])

        printlog(info='当前模型：%s, 保存所有受试者所有测试指标的平均结果' % model_name, time=False, line_break=False)
        df1 = pd.DataFrame(df1_metrics, index=range(1, len(df1_metrics) + 1), columns=df.index)
        # 计算平均值并添加到DataFrame
        mean_row = df1.mean().to_frame().T  # 转换为DataFrame并进行转置
        mean_row.index = ['mean']  # 设置索引名称
        df1 = pd.concat([df1, mean_row])
        # 计算标准差并添加到DataFrame
        std_row = df1[:-1].std().to_frame().T  # 转换为DataFrame并进行转置，排除最后一行(mean行)来计算标准差
        std_row.index = ['std']  # 设置索引名称
        df1 = pd.concat([df1, std_row]).round(3)
        # 保存df1  
        dfs_save_path = os.path.dirname(metrics_file_path['absolute_path'])
        df1_save_name = os.path.join(dfs_save_path, 'all_metrics_averaged_results.csv')
        df1.to_csv(df1_save_name, index=True)

        printlog(info='当前模型：%s, 保存单个受试者测试指标平均的平均结果' % model_name, time=False, line_break=False)
        # 保存df2
        df2_metrics_mean, df2_metrics_std = np.round(np.array(df2_metrics_mean), 3), np.round(np.array(df2_metrics_std), 3)
        df2_metrics = np.array([str(df2_metrics_mean[i, j]) +'+'+ str(df2_metrics_std[i, j]) for i in range(df2_metrics_mean.shape[0]) for j in range(df2_metrics_mean.shape[1])])
        df2_metrics = df2_metrics.reshape(df2_metrics_mean.shape)
        df2 = pd.DataFrame(df2_metrics, index=['Sub'+i for i in subjects_list_global], columns=df.index)
        # 计算平均值并添加到DataFrame
        mean_row = np.round(np.mean(df2_metrics_mean, axis=0), 3)
        std_row = np.round(np.std(df2_metrics_mean, axis=0), 3)
        df2.loc['mean'] = mean_row
        df2.loc['std'] = std_row
        df2_save_name = os.path.join(dfs_save_path, 'alone_subject_averaged_results.csv')
        df2.to_csv(df2_save_name, index=True)