In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import warnings
np.random.seed(618)
from hrvanalysis import get_time_domain_features, get_frequency_domain_features, get_sampen, get_csi_cvi_features
warnings.filterwarnings('ignore')

#读取数据
train_df = np.load("./data/训练集/train_x.npy")
train_y = np.load("./data/训练集/train_y.npy")
test_df = np.load("./data/测试集A/test_x_A.npy")


def get_feats(data, sampling_rate):
    # feats=[]
    feat_all = pd.DataFrame()
    for i in tqdm(data):
        feat = pd.DataFrame()
        raw_df = pd.DataFrame(
            {"bo": i[0].reshape(-1, sampling_rate).mean(axis=1), "hr": i[1].reshape(-1, sampling_rate).mean(axis=1)})

        # 滞后特征（0.665）
        for gap in [1, 5, 10, 15, 20, 30]:
            raw_df[f"bo_shift_{gap}"] = raw_df['bo'].shift(gap)
            raw_df[f"bo_diff_{gap}"] = raw_df['bo'].diff(gap)

            raw_df[f"hr_shift_{gap}"] = raw_df['hr'].shift(gap)
            raw_df[f"hr_diff_{gap}"] = raw_df['hr'].diff(gap)
        # 窗口统计（0.652）
        for col in ['bo', 'hr']:
            for win in [10]:
                raw_df[f'{col}_win{win}_mean'] = raw_df[col].rolling(window=win, min_periods=1).mean().values
                raw_df[f'{col}_win{win}_std'] = raw_df[col].rolling(window=win, min_periods=1).std().values
                raw_df[f'{col}_win{win}_skew'] = raw_df[col].rolling(window=win, min_periods=1).skew().values

        # 组合特征（0.633）
        raw_df['bo_hr_sub'] = raw_df['bo'] / raw_df['hr']
        raw_df['bo_hr_diff'] = raw_df['bo'] - raw_df['hr']

        # 描述性特征
        mean_df = raw_df.mean(axis=0)
        feat = pd.concat([feat, pd.DataFrame(mean_df.values.reshape(1, -1),
                                             columns=mean_df.index + '_mean'
                                             )], axis=1, ignore_index=False)

        max_df = raw_df.max(axis=0)
        feat = pd.concat([feat, pd.DataFrame(max_df.values.reshape(1, -1),
                                             columns=max_df.index + '_max'
                                             )], axis=1, ignore_index=False)
        min_df = raw_df.min(axis=0)
        feat = pd.concat([feat, pd.DataFrame(min_df.values.reshape(1, -1),
                                             columns=min_df.index + '_min'
                                             )], axis=1, ignore_index=False)
        std_df = raw_df.std(axis=0)
        feat = pd.concat([feat, pd.DataFrame(std_df.values.reshape(1, -1),
                                             columns=std_df.index + '_std'
                                             )], axis=1, ignore_index=False)
        skew_df = raw_df.skew(axis=0)
        feat = pd.concat([feat, pd.DataFrame(skew_df.values.reshape(1, -1),
                                             columns=skew_df.index + '_skew'
                                             )], axis=1, ignore_index=False)
        median_df = raw_df.median(axis=0)
        feat = pd.concat([feat, pd.DataFrame(median_df.values.reshape(1, -1),
                                             columns=median_df.index + '_median'
                                             )], axis=1, ignore_index=False)
        var_df = raw_df.var(axis=0)
        feat = pd.concat([feat, pd.DataFrame(var_df.values.reshape(1, -1),
                                             columns=var_df.index + '_var'
                                             )], axis=1, ignore_index=False)
        kurt_df = raw_df.kurt(axis=0)
        feat = pd.concat([feat, pd.DataFrame(kurt_df.values.reshape(1, -1),
                                             columns=kurt_df.index + '_kurt'
                                             )], axis=1, ignore_index=False)

        # 统计特征
        feat = pd.concat([feat, pd.DataFrame({'bo_95_count': [len(raw_df[raw_df.bo < 95])]})], axis=1,
                         ignore_index=False)
        feat = pd.concat([feat, pd.DataFrame({'bo_94_count': [len(raw_df[raw_df.bo < 94])]})], axis=1,
                         ignore_index=False)
        feat = pd.concat([feat, pd.DataFrame({'bo_93_count': [len(raw_df[raw_df.bo < 93])]})], axis=1,
                         ignore_index=False)
        feat = pd.concat([feat, pd.DataFrame({'bo_92_count': [len(raw_df[raw_df.bo < 92])]})], axis=1,
                         ignore_index=False)

        feat = pd.concat([feat, pd.DataFrame({'hr_70_count': [len(raw_df[raw_df.hr < 70])]})], axis=1,
                         ignore_index=False)
        feat = pd.concat([feat, pd.DataFrame({'hr_65_count': [len(raw_df[raw_df.hr < 65])]})], axis=1,
                         ignore_index=False)
        feat = pd.concat([feat, pd.DataFrame({'hr_60_count': [len(raw_df[raw_df.hr < 60])]})], axis=1,
                         ignore_index=False)
        feat = pd.concat([feat, pd.DataFrame({'hr_55_count': [len(raw_df[raw_df.hr < 55])]})], axis=1,
                         ignore_index=False)

        # HRV特征
        bo_hrv = get_time_domain_features(list(raw_df['bo']))
        bo_hrv_fd = get_csi_cvi_features(list(raw_df['bo']))
        bo_hrv_df = pd.DataFrame({key + '_bo': [value] for key, value in bo_hrv.items() if
                                  key in ['cvsd', 'sdsd', 'std_hr', 'sdnn', 'cvnni']})
        bo_hrvfd_df = pd.DataFrame({key + '_bo': [value] for key, value in bo_hrv_fd.items()})
        feat = pd.concat([feat, bo_hrv_df], axis=1, ignore_index=False)
        feat = pd.concat([feat, bo_hrvfd_df], axis=1, ignore_index=False)

        hr_hrv = get_time_domain_features(list(raw_df['hr']))
        hr_hrv_df = pd.DataFrame({key + '_hr': [value] for key, value in hr_hrv.items()})
        feat = pd.concat([feat, hr_hrv_df], axis=1, ignore_index=False)

        # 合并
        feat_all = pd.concat([feat_all, feat], axis=0, ignore_index=True)

    return feat_all


train_feat = get_feats(train_df, 3)
test_feat = get_feats(test_df, 3)

#replace inf
train_feat = train_feat.replace([-np.inf,np.inf],0)
test_feat = test_feat.replace([-np.inf,np.inf],0)
#save
train_feat.to_csv('./data/train_feat.csv', index = False)
test_feat.to_csv('./data/test_feat.csv', index = False)