In [7]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from utils.common_utils import printlog, make_dir
from utils.processing_tools.processing import emg_filtering, gait_classification_sample_segmentation, data_nomalize, delete_row_with_nan
from utils.feature_extraction_tools.feature_extraction import emg_kinematic_feature_extraction
from utils.common_utils import calculate_samples_per_class, calculate_class_weights

In [9]:
### 参数设置
## 1. 原始文件路径
raw_file_path = 'rawData'
## 2. 受试者、运动类型和步态相位类型设置
subjects_list_global = list(['01','02','03','04', '06', '31','32','33','34','36']) 
motions_list_global = ['WAK', 'UPS', 'DNS']
gait_list_global = {'WAK': [1, 2, 3, 4, 5], 'UPS': [1, 2, 3], 'DNS': [1, 2]}
# motions_number = len(motions_list_global)
## 3. 所使用的原始数据中emg和关节角度的通道（列）名
emg_name = ['sEMG: tensor fascia lata', 'sEMG: rectus femoris', 'sEMG: vastus medialis',
            'sEMG: semimembranosus', 'sEMG: upper tibialis anterior', 'sEMG: lower tibialis anterior',
            'sEMG: lateral gastrocnemius', 'sEMG: medial gastrocnemius', 'sEMG: soleus']
angle_name = [ 'Kinematic: left hip flexion angle', 'Kinematic: left knee flexion angle', 'Kinematic: left ankle flexion angle',
               'Kinematic: right hip flexion angle', 'Kinematic: right knee flexion angle', 'Kinematic: right ankle flexion angle']
## 4. 采样频率fs、预处理：emg陷波滤波(频率:50 Hz;滤波器质量因数:100)，emg巴特沃斯滤波(低截止频率:15hz;高截止频率:450hz;采样频率，fs: 1920 Hz，阶数:6)，emg和角度归一化
fs, lowcut, highcut, imf_band, imf_freq = 1920, 15, 450, 2, 50
data_normalize_method = 'positive_negative_one' # 支持：'min-max' （0-1之间），'max-abs' (-1,1之间)，'positive_negative_one' （-1，1之间）
data_normalize_level = 'matrix' # 'matrix' （矩阵层面，使用整个矩阵的最大值最小值来对每一列进行操作，这样保持了每种模态信号的相对幅值），
                          # 'rows' （列层面，使用每一列（通道）的最大值最小值来对每一列进行操作，即每一列都被单独归一化）
## 5. 重叠窗口采样：重叠窗长， 步进长度
window, step = 96, 72  ## 重叠长度为：window-step
## 6. emg和关节角度数据的特征提取设置
emg_channels = ['TFL', 'RF', 'VM', 'SEM', 'UTA', 'LTA', 'LG', 'MG', 'SOL']
angle_channels =['L_Hip', 'L_Knee', 'L_Ankle', 'R_Hip', 'R_Knee', 'R_Ankle']
emg_feature_type = ['MAV', 'RMS', 'WL', 'ZC', 'SSC', 'WAMP']
angle_feature_type = ['RMS', 'AVG', 'MAX', 'MIN','PP', 'STD']
fea_normalize_method = 'max-abs'
fea_normalize_level = 'rows'

In [None]:
### 处理和保存数据
for i in range(len(subjects_list_global)):
    subject = 'Sub'+subjects_list_global[i]
    data_file_dir = os.path.join(raw_file_path,subject,'Data')
    label_file_dir = os.path.join(raw_file_path,subject,'Labels')
    printlog(info = ''.join(['开始处理： ', subject]), time = True, line_break = False)
    for j in range(len(motions_list_global)):
        sub_emg_sample, sub_angle_sample, sub_emg_features = [], [], []
        sub_angle_features, sub_gait_label_raw, sub_group_label_raw = [], [], []
        motion = motions_list_global[j]
        gait_list = gait_list_global[motion]
        printlog(info = "\n"+"********"*4 +''.join([' movement: ', motion, ' '])+"********"*4, time = False, line_break = False)
        printlog (info = 'step1: 加载data和label文件...', time = False, line_break = False)
        ## 1.1 data和label文件名
        data_file_name = os.path.join(data_file_dir,''.join([subject, '_', motion,'_Data.csv']))
        label_file_name = os.path.join(label_file_dir,''.join([subject,'_', motion,'_Label.csv']))
        ## 1.2 read data and label
        df_data = pd.read_csv(data_file_name,header=0)
        df_label = pd.read_csv(label_file_name,header=0)
        ## 1.2 判断df_label列索引为'Status'的列中是否有NAN值，如果有，删除df_data和df_label中包含NAN的行索引
        df_data_pre, df_label_pre = delete_row_with_nan(df_data, df_label, rows = ['Status'], nan_index = 'Status')
        time = df_data_pre.loc[:, ['Time']].values
        emg_raw_data = df_data_pre.loc[:, emg_name]
        angle_raw_data = df_data_pre.loc[:, angle_name].values
        gait_label = df_label_pre.loc[:, ['Status']].values.astype('int')
        group_label = df_label_pre.loc[:, ['Group']].values.astype('int')
        printlog ('step2: 数据预处理...', time = False, line_break = False)
        cows, rows = emg_raw_data.values.shape[0], emg_raw_data.values.shape[1] #shape: datapoint*channel
        emg_preprocessing=emg_filtering(fs,lowcut,highcut,imf_band,imf_freq)
        ## 2.1 emg陷波滤波
        emg_inf_data = np.zeros((cows, rows))
        for i in range(rows):
            emg_inf_data[:, i] = emg_preprocessing.Implement_Notch_Filter(emg_raw_data.values[:, i])
        ## 2.2 emg butterWorth带通滤波
        emg_bpf_data = np.zeros((cows, rows))
        for i in range(rows):
            emg_bpf_data[:, i] = emg_preprocessing.butter_bandpass_filter(emg_inf_data[:, i])
        ## 2.3 emg和关节角度的归一化
        emg_data_pre = data_nomalize(emg_bpf_data,data_normalize_method, data_normalize_level)
        angle_data_pre = data_nomalize(angle_raw_data,data_normalize_method, data_normalize_level)
        printlog ('step3: 重叠窗口分割...', time = False, line_break = False)
        emg_sample, angle_sample, gait_label_raw, group_label_raw = gait_classification_sample_segmentation(emg_data_pre, angle_data_pre,gait_label, group_label, window, step)
        printlog ('step4: emg和关节角度数据的特征提取...', time = False, line_break = False)
        emg_features, emg_feature_list, angle_features, angle_feature_list = emg_kinematic_feature_extraction(emg_sample, angle_sample, emg_channels, angle_channels,emg_feature_type, angle_feature_type, fea_normalize_method, fea_normalize_level)
        printlog ('step5: 合并：所有的emg和角度数据、emg和角度特征、标签...', time = False, line_break = False)
        sub_emg_sample.extend(emg_sample)
        sub_angle_sample.extend(angle_sample)
        sub_emg_features.extend(emg_features)
        sub_angle_features.extend(angle_features)
        sub_gait_label_raw.extend(gait_label_raw)
        sub_group_label_raw.extend(group_label_raw)

        sub_emg_sample = np.swapaxes(np.array(sub_emg_sample), 1, 2)   # 使用 np.swapaxes 将第二维和第三维进行交换，变为num*len(emg_channels)*len(window)
        sub_angle_sample = np.swapaxes(np.array(sub_angle_sample), 1, 2)   # 使用 np.swapaxes 将第二维和第三维进行交换，变为num*len(angle_channels)*len(window)
        sub_emg_features, sub_angle_features, sub_gait_label_raw, sub_group_label_raw = np.array(sub_emg_features), np.array(sub_angle_features), np.array(sub_gait_label_raw), np.array(sub_group_label_raw)
        printlog(info = ''.join(['       获取受试者：', subject, ' ,运动类型：', motion, ' 中所有步态相位类型的十进制标签和one-hot标签...']), time = False, line_break = False)
        ## 创建并使用 LabelEncoder 对象，并使用 fit_transform 方法进行编码
        label_encoder = LabelEncoder()
        sub_movement_label_encoded_raw = label_encoder.fit_transform(sub_gait_label_raw) ## 默认会根据标签的字母顺序为它们分配编码
        # 为了根据自定义顺序重新排列标签，可以使用 classes_ 属性的索引来获取标签所对应的编码，然后使用 transform 方法进行编码
        custom_order = gait_list
        label_encoder.classes_ = custom_order
        sub_gait_label_encoded = np.array([custom_order.index(label) for label in sub_gait_label_raw])
        ## 创建并使用 OneHotEncoder 对象，并使用 fit_transform 方法进行编码
        onehot_encoder = OneHotEncoder(sparse_output=False) #  sparse '在1.2中已弃用，并将在1.4中删除，使用' sparse_output '代替
        sub_gait_label_onehot = onehot_encoder.fit_transform(sub_gait_label_encoded.reshape(-1, 1))
        printlog(info = ''.join(['       受试者：', subject, ' ,运动类型：', motion, ' 处理完毕！']), time = False, line_break = False)
        print('emg_sample.shape: ', sub_emg_sample.shape, ', angle_sample.shape: ', sub_angle_sample.shape)
        print('emg_features.shape: ', sub_emg_features.shape, ', angle_features.shape: ', sub_angle_features.shape)
        print('gait_label_raw.shape: ', sub_gait_label_raw.shape, ', gait_label_encoded.shape: ', sub_gait_label_encoded.shape)
        print('gait_label_onehot.shape: ', sub_gait_label_onehot.shape, ', group_label_raw: ', sub_group_label_raw.shape)
        printlog(info = ''.join(['保存受试者：', subject, ' ,运动类型：', motion, ' 的数据...']), time = True, line_break = False)
        ## 获取路径和文件名
        file_save_path = os.path.join(os.getcwd(), 'trainData', 'gaitClassification', motion)
        make_dir(file_save_path)
        file_save_name = os.path.join(file_save_path,''.join([subject,'_targetTrainData.npz']))
        ## 保存文件
        with open(file_save_name, 'wb') as f:
            np.savez(f, sub_emg_sample = sub_emg_sample, sub_angle_sample = sub_angle_sample, sub_emg_features = sub_emg_features, sub_angle_features = sub_angle_features, sub_gait_label_raw = sub_gait_label_raw,
            sub_gait_label_encoded = sub_gait_label_encoded, sub_gait_label_onehot = sub_gait_label_onehot,
                    sub_group_label_raw =  sub_group_label_raw)
        printlog(info = ''.join(['受试者: ',subject, ' ,运动类型：', motion, ' 的数据保存完毕！']), time = False, line_break = False)
        printlog(info = file_save_name, time = False, line_break = False)

In [None]:
## 1. 受试者、运动类型设置
# subjects_list_global = list(['01','02','03','04','05','06', '31','32','33','34','36', '37']) ## Sub35_DNS数据有问题
subjects_list_global = list(['01','02','03','04','06', '31','32','33','34','36']) ## Sub35_DNS数据有问题
motions_list_global = ['WAK', 'UPS', 'DNS']
## 2. 读取文件，for check
for i in range(len(motions_list_global)):
    motion = motions_list_global[i]
    file_path = os.path.join(os.getcwd(), 'trainData', 'gaitClassification', motion)
    column_names1 = ['步态%s样本数'% k for k in gait_list_global[motion]]   
    column_names2 = ['步态%s不平衡率'% k for k in gait_list_global[motion]] 
    column_names = ['总样本数'] + column_names1 + column_names2
    df = pd.DataFrame(columns=column_names)
    for j in range(len(subjects_list_global)):
        subject = 'Sub'+subjects_list_global[j]
        printlog(info = ''.join(['读取文件：', motion, ', ', subject]), time = True, line_break = False)
        file_name = os.path.join(file_path,''.join([subject,'_targetTrainData.npz']))
        with open(file_name, 'rb') as f:
            sub_emg_sample = np.load(f)['sub_emg_sample']
            sub_angle_sample = np.load(f)['sub_angle_sample']
            sub_emg_features = np.load(f)['sub_emg_features']
            sub_angle_features = np.load(f)['sub_angle_features']
            sub_gait_label_raw = np.load(f)['sub_gait_label_raw']
            sub_gait_label_encoded= np.load(f)['sub_gait_label_encoded']
            sub_gait_label_onehot = np.load(f)['sub_gait_label_onehot']
            sub_group_label_raw = np.load(f)['sub_group_label_raw']
        print('emg_sample.shape: ', sub_emg_sample.shape, ', angle_sample.shape: ', sub_angle_sample.shape)
        print('emg_features.shape: ', sub_emg_features.shape, ', angle_features.shape: ', sub_angle_features.shape)
        print('gait_label_raw.shape: ', sub_gait_label_raw.shape, ', gait_label_encoded.shape: ', sub_gait_label_encoded.shape)
        print('gait_label_onehot.shape: ', sub_gait_label_onehot.shape, ', group_label_raw: ', sub_group_label_raw.shape)
        element_counts = calculate_samples_per_class(sub_gait_label_encoded, verbose=True)
        class_weights = calculate_class_weights(sub_gait_label_encoded, verbose=True)
        ## 保存不平衡率统计信息
        df.loc[subject] = np.concatenate(([len(sub_gait_label_raw)], element_counts, class_weights))
    df.iloc[:, 0:len(gait_list_global[motion])+1] = df.iloc[:, 0:len(gait_list_global[motion])+1].astype(int)
    df.iloc[:, len(gait_list_global[motion])+1:] = df.iloc[:, len(gait_list_global[motion])+1:].round(4).astype(float)
    df_save_path = os.path.join(os.getcwd(), 'ClassImbalancedInfo')
    make_dir(df_save_path)
    df_save_name = os.path.join(df_save_path, motion+'_gait_classification.csv')
    df.to_csv(df_save_name, encoding='GBK', index=True)