In [101]:
import numpy as np
import pandas as pd
##_____________________________

# make_data_set 함수: csv 파일을 numpy로 반환, 추가로 csv 파일의 열 이름도 반환
def make_data_set(num_features, act, trial, sub_id):
    data_set = np.zeros((0, num_features))
    fname = 'A_DeviceMotion_data/' + act + '_' + str(trial) + '/sub_' + str(sub_id)+'.csv'
    raw_data = pd.read_csv(fname)
    raw_data = raw_data.drop(['Unnamed: 0'], axis=1)
    unlabel_data = raw_data.values
    data_set = np.append(data_set, unlabel_data, axis = 0)
    pd.DataFrame(data_set).to_csv('A_DeviceMotion_data/' + act + '_' + str(trial) + '/sub_' + str(sub_id)+'_origin.csv', header=raw_data.columns, index=1)
    return data_set, list(raw_data.columns)
#________________________________

# gorilla_comp 함수
def gorilla_comp(ds):
    # 1차 변환: 각 열에 대해 xor 연산
    ds_xor = np.empty_like(ds)
    for i in range(ds.shape[1]):
        for j in range(ds.shape[0]):
            if(j == 0):
                ds_xor[j, i] = ds[j, i]
            else:
                ds_xor[j, i] = ds[j - 1, i] ^ ds[j, i]

    # # 2차 변환
    # ds_huff = np.empty_like(ds_xor)
    # for i in range(ds_xor.shape[1]):
    #     for j in range(ds_xor.shape[0]):
    #         if(j == 0):
    #             ds_huff[j, i] = ds_xor[j, i]
    #         else:
    #             lead = (64 - len(bin(ds_xor[j, i]))) + 2
    #             trail = (len(bin(ds_xor[j, i] & -ds_xor[j, i])) - 3)
    #             mean = 64 - lead - trail
    #             if(mean <= 52):
    #                 masked = (ds_xor[j, i] >> trail) & ((1 << mean) - 1)
    #                 ds_huff[j, i] = (lead << (6 + mean)) | (mean << mean) | masked
    #             else:
    #                 ds_huff[j, i] = ds_xor[j, i]


    return ds_xor
#________________________________

print("--> Start...")
# 불러올 csv 파일의 이름 및 위치를 정한다
num_features = 12 # attitude(roll, pitch, yaw); gravity(x, y, z); rotationRate(x, y, z); userAcceleration(x,y,z)
act = "dws" # dws, ups, wlk, jog, sit, std
trial = 1 # 0 ~ 16
sub_id = 1 # 1 ~ 24

# make_data_set으로 csv 파일을 ds 변수에 저장
print("--> CSV to Numpy data set")
ds, columns_list = make_data_set(num_features, act, trial, sub_id)
print("--> Numpy data set shape:", ds.shape)
print(ds)
print("--> Numpy data set shape to HEX:", ds.shape)
ds_uint64 = ds.view(np.uint64) # float64를 uint64로 형변환
print(np.vectorize(hex)(ds_uint64)) # 배열을 16진수로 표현



--> Start...
--> CSV to Numpy data set
--> Numpy data set shape: (1751, 12)
[[ 1.528132 -0.733896  0.696372 ...  0.294894 -0.184493  0.377542]
 [ 1.527992 -0.716987  0.677762 ...  0.219405  0.035846  0.114866]
 [ 1.527765 -0.706999  0.670951 ...  0.010714  0.134701 -0.167808]
 ...
 [ 1.830821 -0.578367  2.447967 ... -0.08559  -0.030209 -0.08774 ]
 [ 1.849557 -0.586962  2.439458 ... -0.048105  0.029555  0.060441]
 [ 1.869375 -0.596783  2.433775 ... -0.065011 -0.042575  0.046052]]
--> Numpy data set shape to HEX: (1751, 12)
[['0x3ff8733a8a3f8983' '0xbfe77c1376d54973' '0x3fe648adeebb341e' ...
  '0x3fd2df8b1572580c' '0xbfc79d7774aba387' '0x3fd829a5ebb7739f']
 ['0x3ff872a7bd48cb4b' '0xbfe6f18eb8950764' '0x3fe5b039ef0f16f4' ...
  '0x3fcc157689ca18bd' '0x3fa25a682b62844b' '0x3fbd67dbb16c1e36']
 ['0x3ff871b9b66f9336' '0xbfe69fbc5de9c022' '0x3fe5786e3b46fdec' ...
  '0x3f85f138bcdfefbf' '0x3fc13de1e2de8709' '0xbfc57abb8800eade']
 ...
 ['0x3ffd4b0af5fd47bf' '0xbfe281fb82c2bd7f' '0x4003956fb8f57f7

In [102]:
# gorilla compression
print("--> BDI Compression Start")
ds_gorilla = gorilla_comp(ds_uint64)
print("--> Numpy data set shape:", ds_gorilla.shape)
print(np.vectorize(hex)(ds_gorilla)) # 배열을 16진수로 표현

# numpy를 csv 파일로 저장
print("--> Numpy to CSV")
pd.DataFrame(np.vectorize(hex)(ds_gorilla)).to_csv('A_DeviceMotion_data/' + act + '_' + str(trial) + '/sub_' + str(sub_id)+'_gorilla_comp.csv', header=columns_list, index=1)

--> BDI Compression Start
--> Numpy data set shape: (1751, 12)
[['0x3ff8733a8a3f8983' '0xbfe77c1376d54973' '0x3fe648adeebb341e' ...
  '0x3fd2df8b1572580c' '0xbfc79d7774aba387' '0x3fd829a5ebb7739f']
 ['0x19d377742c8' '0x18d9dce404e17' '0x3f89401b422ea' ...
  '0x1ecafd9cb840b1' '0x8065c71f5fc927cc' '0x654e7e5adb6da9']
 ['0x31e0b27587d' '0x6e32e57cc746' '0xc857d449eb18' ...
  '0x49e44e3515f702' '0x636789c9bc0342' '0x80781d60396cf4e8']
 ...
 ['0x4c56bf7ef66e' '0xbad8d5dfa038' '0xd8b008ea383' ... '0xfd0fcf776bb7d'
  '0x31e08687d8a1e7' '0x7b1e2ee8de10ad']
 ['0xdcc3e14c2053' '0x499f0afd687d' '0x116d36bac60f' ...
  '0x1d480e19510d3b' '0x8000acb1d5965d76' '0x8018843f8221c183']
 ['0x7f3cd63e3bc5' '0x1d0bc2146346f' '0xfc5d88b92d3a' ...
  '0x1805b8656d0c42' '0x803b8fc988fa8d9d' '0x9663fdf787f42']]
--> Numpy to CSV
