# IWALQQ_AE를 위한 데이터 코드

## 목적
- IWALQQ_AE 용 데이터셋을 만들기 위한 코드

## 목표
- (N, row, column, channel) 순으로 데이터를 정리할 것
    - N : 배치사이즈
    - row: 세로길이, 여기서는 time normalization된 데이터를 사용하므로 101 개
    - column: 총 feature의 개수. 2 type(Acc, Gyro) *  3 axis(x,y,z) * 7 sensors = 42개
    - channel: 총 channel의 개수 1개
## 비고
- 해당 코드는 IWALQQ_1st_correction 이후에 만들어짐
    - 따라서, 센서 coordinate system의 calibration이 적용된 allnew데이터 셋(***NORM_CORRECTION***)을 이용함

In [63]:
# 필요한 라이브러리 불러오기
import random
import pandas as pd 
import numpy as np
from numpy import savez_compressed
from numpy import load
from natsort import natsorted
import os

from sklearn.preprocessing import MinMaxScaler, StandardScaler ,RobustScaler
from sklearn.model_selection import KFold

from tqdm.notebook import trange, tqdm
from os.path import join

from pickle import dump, load

# seed_rand = 777 # 2nd 777 # 1st 41
# nameDataset = "IWALQQ_AE_2nd"

seed_rand = 41 # 2nd 777 # 1st 41
nameDataset = "IWALQQ_AE_1st"

In [64]:
# 필요한 함수 정의
def makeColumnsWOMAG():
    SIDEIDX = ['non','oa']
    PARTIDX = ['shank','shoe','thigh']
    TYPEIDX = ['ACC','GYRO','MAG']
    AXISIDX = ['X', 'Y', 'Z']
    LEGCOLUMNSLENGTH = 54
    COl_imu_legs = [f'{SIDEIDX[int(i//(LEGCOLUMNSLENGTH/2))]}\
_{PARTIDX[(i//(len(TYPEIDX)*len(AXISIDX)))%len(PARTIDX)]}\
_{TYPEIDX[(i//(len(AXISIDX)))%len(TYPEIDX)]}\
_{AXISIDX[i%len(AXISIDX)]}' for i in range(0,LEGCOLUMNSLENGTH)]
    TRKCOLUMNSLENGTH = 9
    Col_imu_trunk = [f'trunk_{TYPEIDX[(i//(len(AXISIDX)))%len(TYPEIDX)]}_{AXISIDX[i%len(AXISIDX)]}' for i in range(0,TRKCOLUMNSLENGTH)]
    # Columns for forceplate
    FPCOLUMNSLENGTH = 12
    FPTYPEIDX = ['GRF','ANGLE','MONM','MOBWHT']
    Col_FP = [f'{FPTYPEIDX[(i//(len(AXISIDX)))%len(FPTYPEIDX)]}_{AXISIDX[i%len(AXISIDX)]}' for i in range(0,FPCOLUMNSLENGTH)]
    # 최종 column
    newColumns = COl_imu_legs+Col_imu_trunk+Col_FP
    newColumnswithoutMAG = [col for col in newColumns if not 'MAG' in col] 
    return newColumnswithoutMAG

def makeColumns_target():
    SIDEIDX = ['target','nontarget']
    PARTIDX = ['shank','shoe','thigh']
    TYPEIDX = ['ACC','GYRO'] # 여기서는 MAG는 제낌
    AXISIDX = ['X', 'Y', 'Z']
    # 다리용
    LEGCOLUMNSLENGTH = 36 # 이거는 양 다리 데이터 
    COl_imu_legs = [f'{SIDEIDX[int(i//(LEGCOLUMNSLENGTH/2))]}\
_{PARTIDX[(i//(len(TYPEIDX)*len(AXISIDX)))%len(PARTIDX)]}\
_{TYPEIDX[(i//(len(AXISIDX)))%len(TYPEIDX)]}\
_{AXISIDX[i%len(AXISIDX)]}' for i in range(0,LEGCOLUMNSLENGTH)]
    # 트렁크용
    TRKCOLUMNSLENGTH = 6
    Col_imu_trunk = [f'trunk_{TYPEIDX[(i//(len(AXISIDX)))%len(TYPEIDX)]}_{AXISIDX[i%len(AXISIDX)]}' for i in range(0,TRKCOLUMNSLENGTH)]
    # 최종 column
    newColumns = COl_imu_legs+Col_imu_trunk
    return newColumns
# subject로 분류한 것을 다시 전체 데이터 index 기준으로 변경하기
# subject로 Kfold로 된것을 subject fold로 바꿔주는 함수

def kfold2subfold(arrName,listData,train,test):
    arrTrain = []
    arrTest = []
    for pID in arrName[train]:
        idxofID = listData.index[listData['patientID']==pID].copy()
        arrTrain.extend(idxofID.to_list())

    for pID in arrName[test]:
        idxofID = listData.index[listData['patientID']==pID].copy()
        arrTest.extend(idxofID.to_list())
    return arrTrain, arrTest

In [65]:
# 필요한 함수 및 class 설정 
def make_dir(file_path):
    if not os.path.exists(file_path):
        os.makedirs(file_path)
        
# 센서별로 scaling 함
# 그러니까 모든 acc와 모든 gyro는 동일한 scaling으로 적용됨
# 즉 이말은 센서별로 물리적 특성이 보장된다는 이야기임
class MinMaxScalerSensor(MinMaxScaler):
    def fit(self, X, y=None):
        x = np.reshape(X, newshape=(X.shape[0]*X.shape[1], -1))
        super().fit(x, y=y)

    def transform(self, X):
        x = np.reshape(X, newshape=(X.shape[0]*X.shape[1], -1))
        return np.reshape(super().transform(x), newshape=X.shape)
    
    def inverse_transform(self, X):
        x = np.reshape(X, newshape=(X.shape[0]*X.shape[1], -1))
        return np.reshape(super().inverse_transform(x), newshape=X.shape)


### local 설정
- 현재는 없음
- 본 코드는 scc내에서만 구동됨

### SCC 설정

In [66]:
# 필요한 dir 설정
dataDir =     r'.'
normalizedDir = join(dataDir, r'NORM_CORRECTION')
#######################################################
# 설정창
# 이번에는 time-normalized 한 data를 씀 무슨 데이터 길이든 101로 만든 것
TargetDir = normalizedDir
#######################################################
# 파일목록 가져오기
# 파일 목록 전체 들고오고 필요한 확장자만 고르고 들고온 파일 목록에서 .txt만 남기고 나머지 것들 제외시키기
dataExt = r".csv"
listFromFolder = natsorted([_ for _ in os.listdir(TargetDir) if _.endswith(dataExt)])
# 파일 정리한 목록 불러오기
listfileName  = r'list_dataset_correction.xlsx'
listFromxlsx = pd.read_excel(join(dataDir,listfileName))
#인원 추출하기
arrName = listFromxlsx.patientID.unique()
print("항상 listfileName의 수와 dataList는 순서와 개수가 일치해야한다")
print(f"\nNum_listFromxlsx: {len(listFromxlsx)} | Num_listFromFolder: {len(listFromFolder)}")
print(f'Is same size: {len(listFromxlsx)==len(listFromFolder)}')
listFromxlsx.head(), listFromFolder[:5]

항상 listfileName의 수와 dataList는 순서와 개수가 일치해야한다

Num_listFromxlsx: 877 | Num_listFromFolder: 877
Is same size: True


(  patientID  dateVisit speed  numtrial    side  numStep
 0      P002      31220     w         7  nonleg        1
 1      P002      31220     w         7  nonleg        2
 2      P002      31220     w         7   oaleg        1
 3      P002      31220     w         8  nonleg        1
 4      P002      31220     w         8   oaleg        1,
 ['N_F_P002_031220_w_0007_nonleg_imu_knee_angle_moment_R_1_Step.csv',
  'N_F_P002_031220_w_0007_nonleg_imu_knee_angle_moment_R_2_Step.csv',
  'N_F_P002_031220_w_0007_oaleg_imu_knee_angle_moment_R_1_Step.csv',
  'N_F_P002_031220_w_0008_nonleg_imu_knee_angle_moment_R_1_Step.csv',
  'N_F_P002_031220_w_0008_oaleg_imu_knee_angle_moment_R_1_Step.csv'])

### Demographic 데이터 추출하기

In [68]:
# Demographic 파일 주소 및 불러오기
dgDir = r'.'
dgName = r'demographics.xlsx'

dg_Fromxlsx = pd.read_excel(join(dgDir,dgName))

In [69]:
# 개발용
# KFOLD 선언
kfold = KFold(n_splits=5, random_state=seed_rand, shuffle=True)
# 항상 피험자 명단을 kfold.split(여기)에 넣어야 됨
countfold = -1
print(f"Total subject:{len(arrName)}")
print(f"Total Data size:{len(listFromxlsx)}")

for train,test in kfold.split(arrName):
    # 해당 fold numbering
    countfold = countfold + 1
    # 데이터 셋 만들기
    # 이번 학습에 사용되는 피험자 명단
    print("+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+")
    print(f"Num. of fold: {countfold}\n")
    print(f'{arrName[train]}\nNum for train:{len(arrName[train])}\n')
    print(f'{arrName[test]}\nNum for test:{len(arrName[test])}\n')
    print(f'Num total:{len(arrName[train])+len(arrName[test])}')

    idx4train,idx4test = kfold2subfold(arrName,listFromxlsx,train,test)
    print(f"\n{countfold}_fold\nidx4train:{len(idx4train)} | idx4test:{len(idx4test)} | total:{len(idx4train)+len(idx4test)}/{len(listFromxlsx)}")
    # 전체 목록을 한번에 만들고 필요할 때마다 쓰도록 세팅
    # 전체 데이터를 담고, 이걸 피험자 별로 추출할 수 있도록 만들기
    columnsWOMAG = makeColumnsWOMAG()
    X_columns = makeColumns_target()
    df_trainData_X = pd.DataFrame(columns=X_columns)

    Y_columns = ['X','Y','Z']
    df_trainData_Y_angle = pd.DataFrame(columns=Y_columns) # angle은 3축
    df_trainData_Y_moBWHT = pd.DataFrame(columns=Y_columns) # moment도 3축
    
    # Demographic 정보 넣기
    # fold별 Demographic arr 초기화
    arr_dg_train = []
    # Train 데이터에 해당하는 것만 담기!
    # 아래와 동일하게 valid와 test 셋도 만들 수 있음!
    for idx, datum in enumerate(tqdm([listFromFolder[i] for i in idx4train])):
        df = pd.read_csv(join(TargetDir,datum))
        # 데이터에서 일단 MAG 모두 제외하기
        dfWOMAG = df.loc[:,columnsWOMAG]
        # 측정된 moment 다리가 nonleg이면 그대로 file 두기
        if listFromxlsx.loc[idx,'side'] == "oaleg":
            targetLegArr = dfWOMAG.loc[:,'oa_shank_ACC_X':'oa_thigh_GYRO_Z']
            nonTargetLegArr = dfWOMAG.loc[:,'non_shank_ACC_X':'non_thigh_GYRO_Z'] # mag 빼기
            otherArr = dfWOMAG.loc[:,'trunk_ACC_X':'trunk_GYRO_Z'] 
        else:
            targetLegArr = dfWOMAG.loc[:,'non_shank_ACC_X':'non_thigh_GYRO_Z'] # mag 빼기
            nonTargetLegArr = dfWOMAG.loc[:,'oa_shank_ACC_X':'oa_thigh_GYRO_Z'] # mag 빼기
            otherArr = dfWOMAG.loc[:,'trunk_ACC_X':'trunk_GYRO_Z']
        # 데이터를 항상 동일한 순서로 만들기
        concated = pd.concat([targetLegArr, nonTargetLegArr, otherArr],axis=1)
        # rename하기
        # 쌓아야 하니까..
        # X_columns 순서는 (shank[acc, gyro], shoe[acc, gyro], thigh[acc, gyro]), (shank[acc, gyro], shoe[acc, gyro], thigh[acc, gyro]), trunk[acc, gyro]
        concated.columns = X_columns
        # input data 누적
        df_trainData_X = pd.concat([df_trainData_X, concated],axis=0,ignore_index=True)
        ##############################################################
        # output data 만들기
        # kinematic(Angle)
        angle = dfWOMAG.loc[:,'ANGLE_X':'ANGLE_Z']
        angle.columns = Y_columns
        # output data 누적
        df_trainData_Y_angle = pd.concat([df_trainData_Y_angle, angle],axis=0,ignore_index=True)
        # kinetic(moment)
        moBWHT = dfWOMAG.loc[:,'MOBWHT_X':'MOBWHT_Z']
        moBWHT.columns = Y_columns
        # output data 누적
        df_trainData_Y_moBWHT = pd.concat([df_trainData_Y_moBWHT, moBWHT],axis=0,ignore_index=True)
        
        # demographic 넣기
        patientID = datum.split('_')[2]
        dg_infos = dg_Fromxlsx.loc[dg_Fromxlsx['ID']==patientID][['age','height','weight_bl']].reset_index(drop=True)
        dg_list = list(dg_infos.loc[0])
        arr_dg_train.append(dg_list)

    # scaler 적용하기
    scaler4X_acc = MinMaxScalerSensor()
    acc_df_trainData_X = np.array(df_trainData_X[[col for col in df_trainData_X.columns if 'ACC' in col ]])
    scaler4X_acc.fit(acc_df_trainData_X)
    scaled_acc_df_trainData_X = scaler4X_acc.transform(acc_df_trainData_X)

    scaler4X_gyro = MinMaxScalerSensor()
    gyro_df_trainData_X = np.array(df_trainData_X[[col for col in df_trainData_X.columns if 'GYRO' in col ]])
    scaler4X_gyro.fit(gyro_df_trainData_X) # ONLY FOR TRAIN DATA!!!!ONLY FOR TRAIN DATA!!!!
    scaled_gyro_df_trainData_X = scaler4X_gyro.transform(gyro_df_trainData_X)
    # 최종 구조는 [[shnak shoe thigh의 acc], [shnak shoe thigh의 gyro]] 로 구성됨
    scaled_X_train = np.concatenate((scaled_acc_df_trainData_X,scaled_gyro_df_trainData_X), axis=1)

    # angle용
    scaler4Y_angle = MinMaxScalerSensor()
    narr_trainData_Y_angle = np.array(df_trainData_Y_angle)
    scaler4Y_angle.fit(narr_trainData_Y_angle) # ONLY FOR TRAIN DATA!!!!ONLY FOR TRAIN DATA!!!!
    scaled_Y_angle_train = scaler4Y_angle.transform(narr_trainData_Y_angle)
    # moBWHT 용
    scaler4Y_moBWHT = MinMaxScalerSensor()
    narr_trainData_Y_moBWHT = np.array(df_trainData_Y_moBWHT)
    scaler4Y_moBWHT.fit(narr_trainData_Y_moBWHT) # ONLY FOR TRAIN DATA!!!!ONLY FOR TRAIN DATA!!!!
    scaled_Y_moBWHT_train = scaler4Y_moBWHT.transform(narr_trainData_Y_moBWHT)

    # Demographic 용
    scaler4Demographic = StandardScaler() # demographic은 정규scaler를 사용할 것
    narr_dg_train = np.array(arr_dg_train)
    scaler4Demographic.fit(narr_dg_train) # ONLY FOR TRAIN DATA!!!!ONLY FOR TRAIN DATA!!!!
    scaled_DG_train = scaler4Demographic.transform(narr_dg_train)

    # scaler 저장 위치
    scalerDir = join(dataDir, r'SAVE_dataSet',nameDataset)
    make_dir(scalerDir)
    # scaler 저장
    dump(scaler4X_acc, open(join(scalerDir,f"{countfold}_fold_scaler4X_acc.pkl"), 'wb'))
    dump(scaler4X_gyro, open(join(scalerDir,f"{countfold}_fold_scaler4X_gyro.pkl"), 'wb'))
    dump(scaler4Y_angle, open(join(scalerDir,f'{countfold}_fold_scaler4Y_angle.pkl'), 'wb'))
    dump(scaler4Y_moBWHT, open(join(scalerDir,f'{countfold}_fold_scaler4Y_moBWHT.pkl'), 'wb'))
    dump(scaler4Demographic, open(join(scalerDir,f'{countfold}_fold_scaler4Demographic.pkl'), 'wb'))
        # 이제 제대로 scaling 했으니까 원상복구 하자!
    # 원하는 shape 형태 
    # (N, 4242, 1), (N, 303, 1), (N, 303, 1)   N 은 데이터 수
    X_train = []
    Y_angle_train = []
    Y_moBWHT_train = []
    dg_train = []
    for i in range(0,len(idx4train)):
        chopped_X_train = scaled_X_train[i*101:101+i*101,:]
        X_train.append(chopped_X_train)
        
        chopped_Y_angle_train= scaled_Y_angle_train[i*101:101+i*101,:]
        Y_angle_train.append(chopped_Y_angle_train)

        chopped_Y_moBWHT_train= scaled_Y_moBWHT_train[i*101:101+i*101,:]
        Y_moBWHT_train.append(chopped_Y_moBWHT_train)        

    final_X_train = np.array(X_train)
    final_Y_angle_train = np.array(Y_angle_train)
    final_Y_moBWHT_train = np.array(Y_moBWHT_train)
    final_DG_train = scaled_DG_train
    # 만들어진 데이터 shape 확인! 
    print(f'TRAIN data  :  {len(idx4train)}')
    print(f'Final shape: X:{final_X_train.shape}, Y_angle:{final_Y_angle_train.shape}, Y_moBWHT:{final_Y_moBWHT_train.shape}')
    print(f'Demographic shape: {final_DG_train.shape}')
    # 데이터 저장 위치
    setDir = join(dataDir, r'SAVE_dataSet',nameDataset)
    make_dir(setDir)
    # 데이터 저장
    savez_compressed(join(setDir,f"{countfold}_fold_final_train.npz"), 
    final_X_train=final_X_train,
    final_Y_angle_train=final_Y_angle_train,
    final_Y_moBWHT_train=final_Y_moBWHT_train,
    final_DG_train=final_DG_train)
    #############################################################################################################################################
    # test set 용
    # 전체 목록을 한번에 만들고 필요할 때마다 쓰도록 세팅
    # 전체 데이터를 담고, 이걸 피험자 별로 추출할 수 있도록 만들기
    columnsWOMAG = makeColumnsWOMAG()
    X_columns = makeColumns_target()
    df_testData_X = pd.DataFrame(columns=X_columns)

    Y_columns = ['X','Y','Z']
    df_testData_Y_angle = pd.DataFrame(columns=Y_columns) # angle은 3축
    df_testData_Y_moBWHT = pd.DataFrame(columns=Y_columns) # moment도 3축

    # Demographic 정보 넣기
    # fold별 Demographic arr 초기화
    arr_dg_test = []
    # test 데이터에 해당하는 것만 담기!
    # 아래와 동일하게 valid와 test 셋도 만들 수 있음!
    for idx, datum in enumerate(tqdm([listFromFolder[i] for i in idx4test])):
        df = pd.read_csv(join(TargetDir,datum))
        # 데이터에서 일단 MAG 모두 제외하기
        dfWOMAG = df.loc[:,columnsWOMAG]
        # 측정된 moment 다리가 nonleg이면 그대로 file 두기
        if listFromxlsx.loc[idx,'side'] == "oaleg":
            targetLegArr = dfWOMAG.loc[:,'oa_shank_ACC_X':'oa_thigh_GYRO_Z']
            nonTargetLegArr = dfWOMAG.loc[:,'non_shank_ACC_X':'non_thigh_GYRO_Z'] # mag 빼기
            otherArr = dfWOMAG.loc[:,'trunk_ACC_X':'trunk_GYRO_Z'] 
        else:
            targetLegArr = dfWOMAG.loc[:,'non_shank_ACC_X':'non_thigh_GYRO_Z'] # mag 빼기
            nonTargetLegArr = dfWOMAG.loc[:,'oa_shank_ACC_X':'oa_thigh_GYRO_Z'] # mag 빼기
            otherArr = dfWOMAG.loc[:,'trunk_ACC_X':'trunk_GYRO_Z']
        # 데이터를 항상 동일한 순서로 만들기
        concated = pd.concat([targetLegArr, nonTargetLegArr, otherArr],axis=1)
        # rename하기
        # 쌓아야 하니까..
        # X_columns 순서는 (shank[acc, gyro], shoe[acc, gyro], thigh[acc, gyro]), (shank[acc, gyro], shoe[acc, gyro], thigh[acc, gyro]), trunk[acc, gyro]
        concated.columns = X_columns
        # input data 누적
        df_testData_X = pd.concat([df_testData_X, concated],axis=0,ignore_index=True)
        ##############################################################
        # output data 만들기
        # kinematic(Angle)
        angle = dfWOMAG.loc[:,'ANGLE_X':'ANGLE_Z']
        angle.columns = Y_columns
        # output data 누적
        df_testData_Y_angle = pd.concat([df_testData_Y_angle, angle],axis=0,ignore_index=True)
        # kinetic(moment)
        moBWHT = dfWOMAG.loc[:,'MOBWHT_X':'MOBWHT_Z']
        moBWHT.columns = Y_columns
        # output data 누적
        df_testData_Y_moBWHT = pd.concat([df_testData_Y_moBWHT, moBWHT],axis=0,ignore_index=True)

        # demographic 넣기
        patientID = datum.split('_')[2]
        dg_infos = dg_Fromxlsx.loc[dg_Fromxlsx['ID']==patientID][['age','height','weight_bl']].reset_index(drop=True)
        dg_list = list(dg_infos.loc[0])
        arr_dg_test.append(dg_list)

    # scaler 적용하기
    acc_df_testData_X = np.array(df_testData_X[[col for col in df_testData_X.columns if 'ACC' in col ]])
    scaled_acc_df_testData_X = scaler4X_acc.transform(acc_df_testData_X)

    gyro_df_testData_X = np.array(df_testData_X[[col for col in df_testData_X.columns if 'GYRO' in col ]])
    scaled_gyro_df_testData_X = scaler4X_gyro.transform(gyro_df_testData_X)
    # 최종 구조는 [[shnak shoe thigh의 acc], [shnak shoe thigh의 gyro]] 로 구성됨
    scaled_X_test = np.concatenate((scaled_acc_df_testData_X,scaled_gyro_df_testData_X), axis=1)

    narr_testData_Y_angle = np.array(df_testData_Y_angle)
    scaled_Y_angle_test = scaler4Y_angle.transform(narr_testData_Y_angle)

    narr_testData_Y_moBWHT = np.array(df_testData_Y_moBWHT)
    scaled_Y_moBWHT_test = scaler4Y_moBWHT.transform(narr_testData_Y_moBWHT)

    narr_dg_test = np.array(arr_dg_test)
    scaled_DG_test = scaler4Demographic.transform(narr_dg_test)

    # 이제 제대로 scaling 했으니까 원상복구 하자!
    # 원하는 shape 형태 
    # (N, 4242, 1), (N, 303, 1), (N, 303, 1)   N 은 데이터 수
    X_test = []
    Y_angle_test = []
    Y_moBWHT_test = []
    for i in range(0,len(idx4test)):
        chopped_X_test = scaled_X_test[i*101:101+i*101,:]
        X_test.append(chopped_X_test)
        
        chopped_Y_angle_test= scaled_Y_angle_test[i*101:101+i*101,:]
        Y_angle_test.append(chopped_Y_angle_test)

        chopped_Y_moBWHT_test= scaled_Y_moBWHT_test[i*101:101+i*101,:]
        Y_moBWHT_test.append(chopped_Y_moBWHT_test)

    final_X_test = np.array(X_test)
    final_Y_angle_test = np.array(Y_angle_test)
    final_Y_moBWHT_test = np.array(Y_moBWHT_test)
    final_DG_test = scaled_DG_test
    # 만들어진 데이터 shape 확인! 
    print(f'test data  :  {len(idx4test)}')
    print(f'Final shape: X:{final_X_test.shape}, Y_angle:{final_Y_angle_test.shape}, Y_moBWHT:{final_Y_moBWHT_test.shape}')
    print(f'Demographic shape: {final_DG_test.shape}')
    # 데이터 저장 위치
    setDir = join(dataDir, r'SAVE_dataSet',nameDataset)
    make_dir(setDir)
    # 데이터 저장
    savez_compressed(join(setDir,f"{countfold}_fold_final_test.npz"), 
    final_X_test=final_X_test,
    final_Y_angle_test=final_Y_angle_test,
    final_Y_moBWHT_test=final_Y_moBWHT_test,
    final_DG_test=final_DG_test)

Total subject:44
Total Data size:877
+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
Num. of fold: 0

['P002' 'P007' 'P017' 'P029' 'P050' 'P065' 'P104' 'P106' 'P115' 'P119'
 'P134' 'P135' 'P136' 'P142' 'P147' 'P149' 'P155' 'P168' 'P169' 'P172'
 'P196' 'P203' 'P222' 'P225' 'P226' 'P243' 'P245' 'P258' 'P263' 'P266'
 'P270' 'P272' 'P273' 'P277' 'P290']
Num for train:35

['P061' 'P066' 'P069' 'P105' 'P121' 'P132' 'P205' 'P229' 'P297']
Num for test:9

Num total:44

0_fold
idx4train:722 | idx4test:155 | total:877/877


  0%|          | 0/722 [00:00<?, ?it/s]

TRAIN data  :  722
Final shape: X:(722, 101, 42), Y_angle:(722, 101, 3), Y_moBWHT:(722, 101, 3)
Demographic shape: (722, 3)


  0%|          | 0/155 [00:00<?, ?it/s]

test data  :  155
Final shape: X:(155, 101, 42), Y_angle:(155, 101, 3), Y_moBWHT:(155, 101, 3)
Demographic shape: (155, 3)
+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
Num. of fold: 1

['P002' 'P007' 'P017' 'P029' 'P061' 'P065' 'P066' 'P069' 'P104' 'P105'
 'P106' 'P115' 'P119' 'P121' 'P132' 'P134' 'P135' 'P142' 'P147' 'P149'
 'P155' 'P168' 'P169' 'P172' 'P196' 'P205' 'P222' 'P225' 'P229' 'P245'
 'P258' 'P263' 'P272' 'P290' 'P297']
Num for train:35

['P050' 'P136' 'P203' 'P226' 'P243' 'P266' 'P270' 'P273' 'P277']
Num for test:9

Num total:44

1_fold
idx4train:668 | idx4test:209 | total:877/877


  0%|          | 0/668 [00:00<?, ?it/s]

TRAIN data  :  668
Final shape: X:(668, 101, 42), Y_angle:(668, 101, 3), Y_moBWHT:(668, 101, 3)
Demographic shape: (668, 3)


  0%|          | 0/209 [00:00<?, ?it/s]

test data  :  209
Final shape: X:(209, 101, 42), Y_angle:(209, 101, 3), Y_moBWHT:(209, 101, 3)
Demographic shape: (209, 3)
+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
Num. of fold: 2

['P002' 'P007' 'P029' 'P050' 'P061' 'P065' 'P066' 'P069' 'P104' 'P105'
 'P115' 'P121' 'P132' 'P134' 'P136' 'P142' 'P147' 'P149' 'P155' 'P168'
 'P172' 'P196' 'P203' 'P205' 'P226' 'P229' 'P243' 'P245' 'P258' 'P266'
 'P270' 'P272' 'P273' 'P277' 'P297']
Num for train:35

['P017' 'P106' 'P119' 'P135' 'P169' 'P222' 'P225' 'P263' 'P290']
Num for test:9

Num total:44

2_fold
idx4train:723 | idx4test:154 | total:877/877


  0%|          | 0/723 [00:00<?, ?it/s]

TRAIN data  :  723
Final shape: X:(723, 101, 42), Y_angle:(723, 101, 3), Y_moBWHT:(723, 101, 3)
Demographic shape: (723, 3)


  0%|          | 0/154 [00:00<?, ?it/s]

test data  :  154
Final shape: X:(154, 101, 42), Y_angle:(154, 101, 3), Y_moBWHT:(154, 101, 3)
Demographic shape: (154, 3)
+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
Num. of fold: 3

['P002' 'P007' 'P017' 'P050' 'P061' 'P066' 'P069' 'P105' 'P106' 'P115'
 'P119' 'P121' 'P132' 'P134' 'P135' 'P136' 'P168' 'P169' 'P172' 'P203'
 'P205' 'P222' 'P225' 'P226' 'P229' 'P243' 'P245' 'P258' 'P263' 'P266'
 'P270' 'P273' 'P277' 'P290' 'P297']
Num for train:35

['P029' 'P065' 'P104' 'P142' 'P147' 'P149' 'P155' 'P196' 'P272']
Num for test:9

Num total:44

3_fold
idx4train:671 | idx4test:206 | total:877/877


  0%|          | 0/671 [00:00<?, ?it/s]

TRAIN data  :  671
Final shape: X:(671, 101, 42), Y_angle:(671, 101, 3), Y_moBWHT:(671, 101, 3)
Demographic shape: (671, 3)


  0%|          | 0/206 [00:00<?, ?it/s]

test data  :  206
Final shape: X:(206, 101, 42), Y_angle:(206, 101, 3), Y_moBWHT:(206, 101, 3)
Demographic shape: (206, 3)
+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
Num. of fold: 4

['P017' 'P029' 'P050' 'P061' 'P065' 'P066' 'P069' 'P104' 'P105' 'P106'
 'P119' 'P121' 'P132' 'P135' 'P136' 'P142' 'P147' 'P149' 'P155' 'P169'
 'P196' 'P203' 'P205' 'P222' 'P225' 'P226' 'P229' 'P243' 'P263' 'P266'
 'P270' 'P272' 'P273' 'P277' 'P290' 'P297']
Num for train:36

['P002' 'P007' 'P115' 'P134' 'P168' 'P172' 'P245' 'P258']
Num for test:8

Num total:44

4_fold
idx4train:724 | idx4test:153 | total:877/877


  0%|          | 0/724 [00:00<?, ?it/s]

TRAIN data  :  724
Final shape: X:(724, 101, 42), Y_angle:(724, 101, 3), Y_moBWHT:(724, 101, 3)
Demographic shape: (724, 3)


  0%|          | 0/153 [00:00<?, ?it/s]

test data  :  153
Final shape: X:(153, 101, 42), Y_angle:(153, 101, 3), Y_moBWHT:(153, 101, 3)
Demographic shape: (153, 3)


In [22]:
scaler4X_acc.min_

array([0.54681489])

# 새로 배운 것

In [186]:
tmp  = final_Y_moBWHT_train[0]
scaler4Y_moBWHT.inverse_transform(tmp)

array([[-2.41567832e+00, -2.10234841e-02, -9.25387317e-02],
       [-2.51075271e+00, -7.23300439e-02, -9.97491628e-02],
       [-2.73093486e+00, -1.25147835e-01, -1.08115894e-01],
       [-3.00607260e+00, -1.60816150e-01, -1.15027882e-01],
       [-3.22541115e+00, -1.74435763e-01, -1.17654086e-01],
       [-3.28680206e+00, -1.69955687e-01, -1.13794444e-01],
       [-3.16697862e+00, -1.67686141e-01, -1.03404123e-01],
       [-2.91149806e+00, -1.91591927e-01, -8.92064787e-02],
       [-2.58951564e+00, -2.67537857e-01, -7.41286807e-02],
       [-2.23772789e+00, -4.19574688e-01, -5.83174052e-02],
       [-1.86470372e+00, -6.52384292e-01, -3.96422824e-02],
       [-1.46291752e+00, -9.58473331e-01, -1.35135931e-02],
       [-1.04444354e+00, -1.31586574e+00,  2.30539194e-02],
       [-6.12435634e-01, -1.70745894e+00,  7.25045427e-02],
       [-1.78486231e-01, -2.11612233e+00,  1.33218901e-01],
       [ 2.43189310e-01, -2.54305152e+00,  2.02129582e-01],
       [ 6.51489640e-01, -3.00614817e+00

In [184]:
narr_trainData_Y_moBWHT[:101,:]

array([[-2.415678321823174, -0.0210234840976431, -0.0925387317037507],
       [-2.51075270514186, -0.0723300438527851, -0.0997491628081989],
       [-2.7309348590316778, -0.1251478348323766, -0.1081158936584269],
       [-3.0060726039852104, -0.1608161501111852, -0.1150278817657105],
       [-3.2254111451342258, -0.1744357634103454, -0.117654085667523],
       [-3.28680206213427, -0.1699556870492113, -0.1137944436687997],
       [-3.1669786235224877, -0.1676861409639301, -0.1034041232735306],
       [-2.911498061487384, -0.1915919267663104, -0.0892064787478356],
       [-2.589515642886767, -0.26753785669753, -0.0741286807098905],
       [-2.2377278904292144, -0.4195746882256952, -0.05831740523152],
       [-1.8647037232394807, -0.6523842921047722, -0.0396422824200176],
       [-1.4629175229835394, -0.9584733307282268, -0.0135135931356871],
       [-1.0444435389829425, -1.315865736117481, 0.0230539193560824],
       [-0.6124356343565387, -1.707458937987593, 0.0725045427231502],
       [

In [16]:
[col for col in df_testData_X.columns if 'ACC' in col ]

['target_shank_ACC_X',
 'target_shank_ACC_Y',
 'target_shank_ACC_Z',
 'target_shoe_ACC_X',
 'target_shoe_ACC_Y',
 'target_shoe_ACC_Z',
 'target_thigh_ACC_X',
 'target_thigh_ACC_Y',
 'target_thigh_ACC_Z',
 'nontarget_shank_ACC_X',
 'nontarget_shank_ACC_Y',
 'nontarget_shank_ACC_Z',
 'nontarget_shoe_ACC_X',
 'nontarget_shoe_ACC_Y',
 'nontarget_shoe_ACC_Z',
 'nontarget_thigh_ACC_X',
 'nontarget_thigh_ACC_Y',
 'nontarget_thigh_ACC_Z',
 'trunk_ACC_X',
 'trunk_ACC_Y',
 'trunk_ACC_Z']

In [182]:
narr_trainData_Y_moBWHT.shape

(70094, 3)

In [171]:
patientID = 'P002'
dg_infos = dg_Fromxlsx.loc[dg_Fromxlsx['ID']==patientID][['age','height','weight_bl']] # 단위는 kg & meter
A = list(dg_infos.loc[0])

In [172]:
scaled_Y_moBWHT_train.shape

(70094, 3)

# 배운 것

In [48]:
dataSetDir = r'./SAVE_dataSet/IWALQQ_AE_2nd'
numFold = 0
load_train = np.load(join(dataSetDir,f"{numFold}_fold_final_train.npz"))
list(load_train.keys())

['final_X_train',
 'final_Y_angle_train',
 'final_Y_moBWHT_train',
 'final_DG_train']

In [62]:
load_train["final_DG_train"][:,2].std()

0.9999999999999998

In [54]:
print(load_train["final_DG_train"])

[[ 1.54122627 -0.01055326  0.00757153]
 [ 1.54122627 -0.01055326  0.00757153]
 [ 1.54122627 -0.01055326  0.00757153]
 ...
 [ 2.1250763   0.33671487 -0.40171557]
 [ 2.1250763   0.33671487 -0.40171557]
 [ 2.1250763   0.33671487 -0.40171557]]


In [151]:
scaled_acc_df_trainData_X[0]

array([0.3230613 , 0.42753102, 0.42861958, 0.30121612, 0.16047492,
       0.24595254, 0.59406985, 0.40963409, 0.42955488, 0.45234759,
       0.38345966, 0.44269578, 0.46638882, 0.41718013, 0.52734556,
       0.4386134 , 0.4051468 , 0.46834086, 0.48050352, 0.40983974,
       0.47541456])

In [158]:
# 데이터 합친 것 분리시키기
sep_scaled_acc_df_trainData_X = final_X_train[:,:21]
rescaled_sep_scaled_acc_df_trainData_X = acc_scaler4X.inverse_transform(sep_scaled_acc_df_trainData_X)
rescaled_sep_scaled_acc_df_trainData_X[:101,1]

array([-33.61053209, -47.0113584 , -60.05875628, -70.39615999,
       -77.89855086, -81.72189821, -81.77630101, -78.84886503,
       -74.76309178, -71.22439372, -69.0646494 , -68.30789749,
       -68.62482825, -69.76134749, -71.61298748, -73.93296161,
       -76.34707444, -78.36676841, -79.54349408, -79.68261848,
       -78.60107702, -76.16291149, -72.28527582, -67.11740913,
       -60.97544396, -54.21878518, -47.37868773, -40.93673914,
       -35.36739764, -30.89029369, -27.49457336, -25.08321788,
       -23.39636299, -22.23151503, -21.30929815, -20.44818303,
       -19.50523539, -18.39119907, -17.12164262, -15.70357713,
       -14.20455377, -12.65694758, -11.1132361 ,  -9.61827669,
        -8.1989963 ,  -6.90071925,  -5.74817927,  -4.79353562,
        -4.04557619,  -3.49065127,  -3.09305105,  -2.81882949,
        -2.66076003,  -2.61982218,  -2.71709838,  -2.96592163,
        -3.38506231,  -3.96745981,  -4.70025764,  -5.54911313,
        -6.50533562,  -7.56853218,  -8.7309051 ,  -9.98

In [56]:
A = makeColumns_target()
len(A)

42

In [54]:
A

['target_shank_ACC_X',
 'target_shank_ACC_Y',
 'target_shank_ACC_Z',
 'target_shank_GYRO_X',
 'target_shank_GYRO_Y',
 'target_shank_GYRO_Z',
 'target_shoe_ACC_X',
 'target_shoe_ACC_Y',
 'target_shoe_ACC_Z',
 'target_shoe_GYRO_X',
 'target_shoe_GYRO_Y',
 'target_shoe_GYRO_Z',
 'target_thigh_ACC_X',
 'target_thigh_ACC_Y',
 'target_thigh_ACC_Z',
 'target_thigh_GYRO_X',
 'target_thigh_GYRO_Y',
 'target_thigh_GYRO_Z',
 'nontarget_shank_ACC_X',
 'nontarget_shank_ACC_Y',
 'nontarget_shank_ACC_Z',
 'nontarget_shank_GYRO_X',
 'nontarget_shank_GYRO_Y',
 'nontarget_shank_GYRO_Z',
 'nontarget_shoe_ACC_X',
 'nontarget_shoe_ACC_Y',
 'nontarget_shoe_ACC_Z',
 'nontarget_shoe_GYRO_X',
 'nontarget_shoe_GYRO_Y',
 'nontarget_shoe_GYRO_Z',
 'nontarget_thigh_ACC_X',
 'nontarget_thigh_ACC_Y',
 'nontarget_thigh_ACC_Z',
 'nontarget_thigh_GYRO_X',
 'nontarget_thigh_GYRO_Y',
 'nontarget_thigh_GYRO_Z']

In [5]:
relativeDir = '../preperation/SAVE_dataSet'
dataSetDir = join(relativeDir,nameDataset)
dataType = 'moBWHT'
numFold = 0
load_scaler4Y = load(open(join(dataSetDir,f"{numFold}_fold_scaler4Y_{dataType}.pkl"), 'rb'))

In [8]:
dddd

IndexError: index 3 is out of bounds for axis 0 with size 1