In [1]:
import os
import numpy as np
import pandas as pd
from utils.common_utils import printlog, make_dir, list_dir_files
from utils.processing_tools.processing import movement_classification_sample_segmentation, data_nomalize
from IPython.display import clear_output as clear
from utils.common_utils import calculate_samples_per_class, calculate_class_weights

In [2]:
### 参数设置
## 1. 原始文件路径
raw_file_path = os.path.join(os.getcwd(), 'rawData')
## 2. 受试者和运动类型设置
subjects_list_global = list(['AB156','AB185','AB186','AB188','AB189','AB190','AB191','AB192','AB193','AB194'])
motions_list_global = ['Sitting', 'Walking', 'RA', 'RD', 'SA', 'SD', 'Standing']
## 3. 所使用的原始数据中emg、IMU和关节角度的通道（列）名
emg_name = ['Left_TA', 'Left_MG','Left_SOL', 'Left_BF', 'Left_ST','Left_VL', 'Left_RF',
            'Right_TA', 'Right_MG','Right_SOL', 'Right_BF', 'Right_ST','Right_VL', 'Right_RF']
imu_name = ['Left_Shank_Ax', 'Left_Shank_Ay','Left_Shank_Az', 'Left_Thigh_Ax','Left_Thigh_Ay','Left_Thigh_Az',
            'Waist_Ax','Waist_Ay','Waist_Az',
            'Right_Thigh_Ax','Right_Thigh_Ay','Right_Thigh_Az', 'Right_Shank_Ax', 'Right_Shank_Ay','Right_Shank_Az']
angle_name = ['Left_Ankle', 'Left_Knee','Right_Knee','Right_Ankle']
raw_label_name = ['Mode']
## 4. 由于使用的是post数据，除了归一化不再做多余的处理
# post数据中，EMG信号在20 Hz下进行高通滤波（六阶巴特沃斯），在350 Hz下进行低通滤波（六阶巴特沃斯），在6、60和180 Hz下进行陷波滤波（六阶巴特沃斯，300 Hz宽度），以衰减运动伪影和环境干扰。
# 测角仪和IMU信号分别在10和25 Hz下进行低通滤波（六阶巴特沃斯）。
data_normalize_method = 'positive_negative_one' # 支持：'min-max' （0-1之间），'max-abs' (-1,1之间)，'positive_negative_one' （-1，1之间）
data_normalize_level = 'matrix' # 'matrix' （矩阵层面，使用整个矩阵的最大值最小值来对每一列进行操作，这样保持了每种模态信号的相对幅值），
                          # 'rows' （列层面，使用每一列（通道）的最大值最小值来对每一列进行操作，即每一列都被单独归一化）
## 5. 重叠窗口采样：重叠窗长， 步进长度
window, step = 256, 64*3  ## 重叠长度为：window-step
discard_transition_sample = True #丢弃过渡态样本
## 6. 其他设置
# modals = ['E', 'A', 'G', 'E-A', 'E-G']
file_num = 40
classes = 7

In [13]:
### 处理和保存数据
for i in range(len(subjects_list_global)):
    subject = subjects_list_global[i]
    data_file_dir = os.path.join(raw_file_path, subject, 'Processed')
    sub_emg_sample, sub_imu_sample, sub_angle_sample, sub_label_encoded = [], [], [], []
    printlog(info = ''.join(['开始处理： ', subject]), time = True, line_break = False)
    all_files_list = list_dir_files(data_file_dir, verbose=False)
    files_list = all_files_list[0:file_num]
    
    for k, file_name in enumerate(files_list):
        ## 1.1 data和label文件名
        printlog (info = 'step1: 加载data和label文件...'+file_name, time = False, line_break = False)
        data = pd.read_csv(file_name, usecols=emg_name, header=0)
        emg_data = data.loc[:, emg_name].values.astype('float')
        data = pd.read_csv(file_name, usecols=imu_name, header=0)
        imu_data = data.loc[:, imu_name].values.astype('float')
        data = pd.read_csv(file_name, usecols=angle_name, header=0)
        angle_data = data.loc[:, angle_name].values.astype('float')
        data = pd.read_csv(file_name, usecols=raw_label_name, header=0)
        raw_label = data.loc[:, raw_label_name].values.astype('int')
        print(emg_data.shape, imu_data.shape, angle_data.shape, raw_label.shape)
        printlog ('step2: 数据预处理...', time = False, line_break = False)
        ## 2.1 emg、imu、关节角度的归一化
        emg_data_pre = data_nomalize(emg_data, data_normalize_method, data_normalize_level)
        imu_data_pre = data_nomalize(imu_data, data_normalize_method, data_normalize_level)
        angle_data_pre = data_nomalize(angle_data, data_normalize_method, data_normalize_level)
        printlog ('step3: 重叠窗口分割...', time = False, line_break = False)
        emg_sample, imu_sample, angle_sample, label_encoded = movement_classification_sample_segmentation(emg_data_pre, imu_data_pre, angle_data_pre, raw_label, window, step, classes, discard_transition_sample=discard_transition_sample, verbose=True)
        printlog ('step4: 合并：所有的emg、IMU和角度数据...', time = False, line_break = False)
        sub_emg_sample.extend(emg_sample)
        sub_imu_sample.extend(imu_sample)
        sub_angle_sample.extend(angle_sample)
        sub_label_encoded.extend(label_encoded)
    
    printlog(info = ''.join(['合并受试者：', subject, '所有运动类型的数据...']), time = True, line_break = False)
    sub_emg_sample = np.swapaxes(np.array(sub_emg_sample), 1, 2)   # 使用 np.swapaxes 将第二维和第三维进行交换，变为num*len(emg_channels)*len(window)
    sub_imu_sample = np.swapaxes(np.array(sub_imu_sample), 1, 2) 
    sub_angle_sample = np.swapaxes(np.array(sub_angle_sample), 1, 2)  
    sub_label_encoded = np.array(sub_label_encoded)
    printlog(info = ''.join(['受试者：', subject, ', ', '处理完毕！']), time = False, line_break = False)
    print('emg_sample.shape: ', sub_emg_sample.shape, ', imu_sample.shape: ', sub_imu_sample.shape)
    print('angle_sample.shape: ', sub_angle_sample.shape, ', label_encoded.shape: ', sub_label_encoded.shape)
    
    printlog(info = ''.join(['保存受试者：', subject, '数据...']), time = True, line_break = False)
    ## 获取路径和文件
    file_save_path = os.path.join(os.getcwd(), 'trainData')
    make_dir(file_save_path)
    file_save_name = os.path.join(file_save_path,''.join([subject, '_targetTrainData.npz']))
    ## 保存文件
    with open(file_save_name, 'wb') as f:
        np.savez(f, sub_emg_sample = sub_emg_sample, sub_imu_sample = sub_imu_sample, sub_angle_sample = sub_angle_sample, 
        sub_label_encoded = sub_label_encoded)
    printlog(info = ''.join(['受试者: ',subject, '数据保存完毕！']), time = False, line_break = False)
    printlog(info = file_save_name, time = False, line_break = False)
    clear()

In [3]:
## 1. 受试者设置
subjects_list_global = list(['AB156','AB185','AB186','AB188','AB189','AB190','AB191','AB192','AB193','AB194'])
## 2. 读取文件，for check
file_path = os.path.join(os.getcwd(), 'trainData')
column_names1 = ['运动%s样本数'% k for k in motions_list_global]   
column_names2 = ['运动%s不平衡率'% k for k in motions_list_global]   
column_names = ['总样本数'] + column_names1 + column_names2
df = pd.DataFrame(columns=column_names)

for i in range(len(subjects_list_global)):
    subject = subjects_list_global[i]
    printlog(info = ''.join(['读取文件：', subject]), time = True, line_break = False)
    file_name = os.path.join(file_path, ''.join([subject,'_targetTrainData.npz']))
    with open(file_name, 'rb') as f:
        sub_emg_sample = np.load(f)['sub_emg_sample']
        sub_imu_sample = np.load(f)['sub_imu_sample']
        sub_angle_sample = np.load(f)['sub_angle_sample']
        sub_label_encoded = np.load(f)['sub_label_encoded']

    print('emg_sample.shape: ', sub_emg_sample.shape, ', imu_sample.shape: ', sub_imu_sample.shape)
    print('angle_sample.shape: ', sub_angle_sample.shape, ', label_encoded.shape: ', sub_label_encoded.shape)
    element_counts = calculate_samples_per_class(sub_label_encoded, verbose=True)
    class_weights = calculate_class_weights(sub_label_encoded, verbose=True)
    ## 保存不平衡率统计信息
    df.loc[subject] = np.concatenate(([len(sub_label_encoded)], element_counts, class_weights))

df.iloc[:, 0:len(motions_list_global)+1] = df.iloc[:, 0:len(motions_list_global)+1].astype(int)
df.iloc[:, len(motions_list_global)+1:] = df.iloc[:, len(motions_list_global)+1:].round(4).astype(float)
df_save_path = os.path.join(os.getcwd(), 'ClassImbalancedInfo')
make_dir(df_save_path)
df_save_name = os.path.join(df_save_path, 'motion_classification.csv')
df.to_csv(df_save_name, encoding='GBK', index=True)  


读取文件：AB156
emg_sample.shape:  (3989, 14, 256) , imu_sample.shape:  (3989, 15, 256)
angle_sample.shape:  (3989, 4, 256) , label_encoded.shape:  (3989,)
类别 0 样本数为: 870
类别 1 样本数为: 1505
类别 2 样本数为: 378
类别 3 样本数为: 476
类别 4 样本数为: 129
类别 5 样本数为: 130
类别 6 样本数为: 501
类别 0 不平衡率为: 0.6550082101806239
类别 1 不平衡率为: 0.3786426198386331
类别 2 不平衡率为: 1.5075585789871504
类别 3 不平衡率为: 1.1971788715486193
类别 4 不平衡率为: 4.41749723145072
类别 5 不平衡率为: 4.383516483516484
类别 6 不平衡率为: 1.1374394069004847

读取文件：AB185
emg_sample.shape:  (3946, 14, 256) , imu_sample.shape:  (3946, 15, 256)
angle_sample.shape:  (3946, 4, 256) , label_encoded.shape:  (3946,)
类别 0 样本数为: 864
类别 1 样本数为: 1536
类别 2 样本数为: 356
类别 3 样本数为: 442
类别 4 样本数为: 106
类别 5 样本数为: 123
类别 6 样本数为: 519
类别 0 不平衡率为: 0.65244708994709
类别 1 不平衡率为: 0.3670014880952381
类别 2 不平衡率为: 1.5834670947030498
类别 3 不平衡率为: 1.275371687136393
类别 4 不平衡率为: 5.318059299191375
类别 5 不平衡率为: 4.583042973286876
类别 6 不平衡率为: 1.0861546930911092

读取文件：AB186
emg_sample.shape:  (4081, 14, 256) , imu_sampl