In [12]:
'''
sEMG特征 + 受试者个人特征模型，用于比较
单独归一化
'''

import joblib
import time
import pandas as pd
import numpy as np

from sklearn import preprocessing
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.feature_selection import RFE

from utils_logger import logger_init
import logging

from utils_model import form_params_grid
from utils_iFEMG_feature import df_save_csv

In [13]:
"""
准备数据集
"""
data_df = pd.read_csv(r"E:\Data\paper2\iFEMG\iFEMG_extension_abs_normed_all.csv", index_col = None, header = 0)

print(data_df.shape)
data_df.head()

(363, 33)


Unnamed: 0,subject_info_height,subject_info_weight,subject_info_age,subject_info_gender,subject_info_name,subject_info_label,bicps_br_initial_pressure_ave,bicps_br_FMG,bicps_br_mav,bicps_br_rms,...,tricps_br_medial_mean_power_freq,tricps_br_lateral_initial_pressure_ave,tricps_br_lateral_FMG,tricps_br_lateral_mav,tricps_br_lateral_rms,tricps_br_lateral_wave_length,tricps_br_lateral_zero_crossing,tricps_br_lateral_slope_sign_change,tricps_br_lateral_mean_freq,tricps_br_lateral_mean_power_freq
0,172,80,23,1,hpy,0.0,0.39202,0.564416,-0.783479,-0.588296,...,-2.082373,1.337618,-0.433583,-0.337397,-0.341393,-0.299173,1.594339,0.471496,0.863801,0.682046
1,172,80,23,1,hpy,0.0,0.39202,-0.398311,4.001066,4.361124,...,-1.675564,1.337618,-0.688947,3.836401,3.234571,3.946198,-3.039795,-4.013581,-2.828318,-2.948408
2,172,80,23,1,hpy,0.0,0.39202,-0.221025,-0.701112,-0.411915,...,-0.065325,1.337618,-0.513747,-0.335,-0.341471,-0.30569,0.87224,0.930294,0.641435,0.52762
3,172,80,23,1,hpy,0.0,0.39202,-0.258831,-0.811689,-0.592715,...,-0.145184,1.337618,-0.074921,-0.313549,-0.331923,-0.33471,-0.234919,-0.023807,0.120876,0.114766
4,172,80,23,1,hpy,0.0,0.39202,-0.078175,-0.295556,-0.304528,...,0.631359,1.337618,0.408739,-0.305507,-0.329471,-0.305711,-0.04322,0.020746,-0.038234,0.067408


In [14]:
# 对数据中的个人信息进行标准化
# 注意请勿多次运行
subject_feature_columes = ['subject_info_height', 'subject_info_weight', 'subject_info_age']
data_df[subject_feature_columes] = preprocessing.StandardScaler().fit_transform(data_df[subject_feature_columes])
data_df

Unnamed: 0,subject_info_height,subject_info_weight,subject_info_age,subject_info_gender,subject_info_name,subject_info_label,bicps_br_initial_pressure_ave,bicps_br_FMG,bicps_br_mav,bicps_br_rms,...,tricps_br_medial_mean_power_freq,tricps_br_lateral_initial_pressure_ave,tricps_br_lateral_FMG,tricps_br_lateral_mav,tricps_br_lateral_rms,tricps_br_lateral_wave_length,tricps_br_lateral_zero_crossing,tricps_br_lateral_slope_sign_change,tricps_br_lateral_mean_freq,tricps_br_lateral_mean_power_freq
0,-1.52530,0.603985,0.260090,1,hpy,0.0,0.392020,0.564416,-0.783479,-0.588296,...,-2.082373,1.337618,-0.433583,-0.337397,-0.341393,-0.299173,1.594339,0.471496,0.863801,0.682046
1,-1.52530,0.603985,0.260090,1,hpy,0.0,0.392020,-0.398311,4.001066,4.361124,...,-1.675564,1.337618,-0.688947,3.836401,3.234571,3.946198,-3.039795,-4.013581,-2.828318,-2.948408
2,-1.52530,0.603985,0.260090,1,hpy,0.0,0.392020,-0.221025,-0.701112,-0.411915,...,-0.065325,1.337618,-0.513747,-0.335000,-0.341471,-0.305690,0.872240,0.930294,0.641435,0.527620
3,-1.52530,0.603985,0.260090,1,hpy,0.0,0.392020,-0.258831,-0.811689,-0.592715,...,-0.145184,1.337618,-0.074921,-0.313549,-0.331923,-0.334710,-0.234919,-0.023807,0.120876,0.114766
4,-1.52530,0.603985,0.260090,1,hpy,0.0,0.392020,-0.078175,-0.295556,-0.304528,...,0.631359,1.337618,0.408739,-0.305507,-0.329471,-0.305711,-0.043220,0.020746,-0.038234,0.067408
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
358,-0.53481,-0.457742,0.846506,1,zx,1.0,1.373585,1.356243,-1.739720,-1.746915,...,-0.142765,-1.412972,-0.700391,0.827688,0.952529,1.023123,-0.204017,-0.910203,0.507252,0.380126
359,-0.53481,-0.457742,0.846506,1,zx,1.0,1.373585,1.343570,-0.436713,-0.490570,...,2.276380,-1.412972,-0.699043,0.823401,0.899010,0.901988,-0.344486,-0.874743,0.379317,0.176260
360,-0.53481,-0.457742,0.846506,1,zx,1.0,1.373585,1.383356,0.089016,-0.048551,...,-0.876041,-1.412972,-0.510883,1.924468,1.778597,0.868482,-2.448931,-1.110668,-1.180412,-1.262641
361,-0.53481,-0.457742,0.846506,1,zx,1.0,1.373585,1.406133,1.436197,1.040483,...,0.598261,-1.412972,-0.527897,0.911058,0.859205,1.082908,-0.216781,-1.196006,0.609825,0.453691


In [4]:
logger_init(log_file_name="SEMG model")

In [15]:
'''
* 每次取一个受试者作为测试集，其余人的数据作为训练集，使用cv参数搜索
* 受试者个人特征 + sEMG特征，不包含FMG特征
* 单人双次测试结果合并为一次，n=10
'''
# 合并单人双次测试结果
data_df['subject_info_name'] = data_df['subject_info_name'].str.replace('-1', '').str.replace('-2', '')
print(data_df.shape)
print(set(data_df['subject_info_name']))

subject_name_list = ['wcx', 'lpy', 'hpy', 'zjz', 'zk', 'lmt', 'zx', 'lmh', 'zpk', 'pym']

# 选取数据集名称
columns_to_scale = ['subject_info_height',
                    'subject_info_weight',
                    'subject_info_age',
                    'bicps_br_mav',
                    'bicps_br_rms',
                    'bicps_br_wave_length',
                    'bicps_br_zero_crossing',
                    'bicps_br_slope_sign_change',
                    'bicps_br_mean_freq',
                    'bicps_br_mean_power_freq',
                    'tricps_br_medial_mav',
                    'tricps_br_medial_rms',
                    'tricps_br_medial_wave_length',
                    'tricps_br_medial_zero_crossing',
                    'tricps_br_medial_slope_sign_change',
                    'tricps_br_medial_mean_freq',
                    'tricps_br_medial_mean_power_freq',
                    'tricps_br_lateral_mav',
                    'tricps_br_lateral_rms',
                    'tricps_br_lateral_wave_length',
                    'tricps_br_lateral_zero_crossing',
                    'tricps_br_lateral_slope_sign_change',
                    'tricps_br_lateral_mean_freq',
                    'tricps_br_lateral_mean_power_freq']

# 用于存储每个被试作为test时的模型score
test_name_list = []
train_score_list = []
test_score_list = []
train_mse_list = []
test_mse_list = []
test_err_df_list = []   # 用于存储多个df，每个df是当天test subject的true label 和 predicted label数据

for subject in subject_name_list:
    logging.info(f"=======================================================")
    logging.info(f"test subject: {subject}")
    test_df = data_df[data_df["subject_info_name"] == subject]  # 测试集
    train_df = data_df[data_df["subject_info_name"] != subject] # 训练集
    x_test = test_df.loc[:, columns_to_scale].values
    y_test = test_df.loc[:, 'subject_info_label'].values
    x_train = train_df.loc[:, columns_to_scale].values
    y_train = train_df.loc[:, 'subject_info_label'].values
    logging.info(f"x_test: {x_test.shape}")
    logging.info(f"y_test: {y_test.shape}")
    logging.info(f"x_train: {x_train.shape}")
    logging.info(f"y_train: {y_train.shape}")

    """
    Grid searching best parameters of SVR model
    """
    # 记录开始训练时间
    start_time = time.perf_counter()

    # 自动选择合适的参数
    svr = GridSearchCV(SVR(kernel='rbf'), param_grid={"C": np.logspace(-3, 3, 7), "gamma": np.logspace(-3, 3, 7)}, n_jobs=-1)
    svr.fit(x_train, y_train)
    logging.info(f"Best params: {svr.best_params_}")

    end_time = time.perf_counter()
    logging.info(f"training time(min): {(end_time - start_time)/60}")

    score_test = svr.score(x_test, y_test)
    score_train = svr.score(x_train, y_train)
    logging.info(f"train score: {str(score_train)}")
    logging.info(f"test score: {str(score_test)}")
    test_pre = svr.predict(x_test)
    train_pre = svr.predict(x_train)
    logging.info(f"train mean squared error: {mean_squared_error(y_train, train_pre)}")
    logging.info(f"test mean squared error: {mean_squared_error(y_test, test_pre)}")
    temp_predict_label_df = pd.DataFrame({'y_true': y_test,
                                          'y_predicted': test_pre,
                                          'diff': y_test - test_pre})
    temp_predict_label_df['test_subject'] = subject
    logging.info(f"predicted label: \n{temp_predict_label_df}")

    test_name_list.append(subject)
    train_score_list.append(score_train)
    test_score_list.append(score_test)
    train_mse_list.append(mean_squared_error(y_train, train_pre))
    test_mse_list.append(mean_squared_error(y_test, test_pre))
    test_err_df_list.append(temp_predict_label_df)
    pass

(363, 33)
{'lpy', 'lmh', 'lmt', 'zjz', 'pym', 'zk', 'wcx', 'zpk', 'hpy', 'zx'}


In [16]:
# 通用代码模块，用于合并表征模型结果的df并保存
model_performance_df = pd.DataFrame({'test_subject': test_name_list,
                                     'train_score': train_score_list,
                                     'test_score': test_score_list,
                                     'train_mse': train_mse_list,
                                     'test_mse': test_mse_list})
test_err_df = pd.concat(test_err_df_list, axis=0, ignore_index=True)

In [17]:
df_save_csv(model_performance_df, r"E:\Data\paper2\肌力预测模型结果\fuck.csv")

File E:\Data\paper2\肌力预测模型结果\fuck.csv saved!


In [None]:
df_save_csv(test_err_df, r"E:\Data\paper2\肌力预测模型结果\test_err_extension.csv")