In [1]:
import pandas as pd
import numpy as np
import os
import copy
import math

## To Label the Data with 3rd class (Pre-FOG)

Before the occurence of every FOG event, the previous w*f_s timesteps are labelled as a third class 'preFOG' which can be trained in order to predict FOG before it's onset.



In [2]:
def label_prefog(dataset, window_length=1):
    dataset.drop(index=list(dataset[dataset['Action'] == 0].index), inplace=True)
    window_length = 64 * window_length

    fog_index = []
    for i in dataset.index:
        if dataset.loc[i, 'Action'] == 2:
            fog_index.append(i)

    start_indices = []
    for i in fog_index:
        if dataset.loc[i-1, 'Action'] != dataset.loc[i, 'Action']:
            start_indices.append(i)

    prefog = []
    for start in start_indices:
        prefog_start = [x for x in range(start-window_length, start)]
        prefog.append(prefog_start)

    prefog = [item for sublist in prefog for item in sublist]

    for i in prefog:
        dataset.loc[i, 'Action'] = 3
    dataset['Action'] = dataset['Action'] - 1
    return dataset

In [3]:
data_path = "dataset/"

people = []
for person in os.listdir(data_path):
    if '.txt' in person:
        people.append(person)

for window_length in range(1, 5):
    dataset = pd.DataFrame()  # 매 루프마다 dataset을 초기화
    for person in people:
        name = person.split('R')[0]
        print(name)
        file = data_path + person
        temp = pd.read_csv(file, delimiter=" ", header=None)
        print(person, ' is read', end='\t')
        if 2 in temp[max(temp.columns)].unique():
            print('Adding {} to dataset'.format(person), end='\t')
            temp.columns = ['time', 'A_F', 'A_V', 'A_L', 'L_F', 'L_V', 'L_L', 'T_F', 'T_V', 'T_L', 'Action']
            temp = label_prefog(temp, window_length).reset_index(drop=True)
            temp['name'] = name
            print('{} is labelled'.format(person))
            dataset = pd.concat([dataset, temp], axis=0)

        print('')
    dataset.reset_index(drop=True, inplace=True)
    to_path = data_path + "raw_labelled"
    os.makedirs(to_path, exist_ok=True)  # 디렉토리가 없으면 생성합니다.
    to_name = to_path + f"/win_{window_length}.csv"
    dataset.to_csv(to_name, index=False)

S01
S01R01.txt  is read	Adding S01R01.txt to dataset	S01R01.txt is labelled

S01
S01R02.txt  is read	Adding S01R02.txt to dataset	S01R02.txt is labelled

S02
S02R01.txt  is read	Adding S02R01.txt to dataset	S02R01.txt is labelled

S03
S03R01.txt  is read	Adding S03R01.txt to dataset	S03R01.txt is labelled

S02
S02R02.txt  is read	Adding S02R02.txt to dataset	S02R02.txt is labelled

S03
S03R02.txt  is read	Adding S03R02.txt to dataset	S03R02.txt is labelled

S05
S05R01.txt  is read	Adding S05R01.txt to dataset	S05R01.txt is labelled

S03
S03R03.txt  is read	
S04
S04R01.txt  is read	
S05
S05R02.txt  is read	Adding S05R02.txt to dataset	S05R02.txt is labelled

S07
S07R02.txt  is read	Adding S07R02.txt to dataset	S07R02.txt is labelled

S06
S06R01.txt  is read	Adding S06R01.txt to dataset	S06R01.txt is labelled

S07
S07R01.txt  is read	Adding S07R01.txt to dataset	S07R01.txt is labelled

S06
S06R02.txt  is read	
S10
S10R01.txt  is read	
S09
S09R01.txt  is read	Adding S09R01.txt to dataset	

In [8]:
# 추가된 부분
# 이 부분에서 path 변수를 정의합니다.
path = os.getcwd() + "/dataset"

col = ['A_F', 'A_V', 'A_L', 'L_F', 'L_V', 'L_L', 'T_F', 'T_V', 'T_L']
stat1 = pd.DataFrame(columns=col)  # 빈 데이터프레임을 초기화할 때 컬럼을 설정합니다.
features_path = path + "/features"
os.makedirs(features_path, exist_ok=True)  # 디렉토리가 없으면 생성합니다.
feature_name = features_path + f"/time_{window_length}.csv"
stat1.to_csv(feature_name, index=False)

print(dataset.head())

     time  A_F   A_V  A_L  L_F  L_V  L_L  T_F   T_V  T_L  Action name
0  750000  -30   990  326  -45  972  181  -38  1000   29       0  S01
1  750015  -30  1000  356  -18  981  212  -48  1028   29       0  S01
2  750031  -20   990  336   18  981  222  -38  1038    9       0  S01
3  750046  -20  1000  316   36  990  222  -19  1038    9       0  S01
4  750062    0   990  316   36  990  212  -29  1038   29       0  S01


## To Extract Non-Overlapping windows of length w *f_s from the continously logged accelerometer data from the dataset.

In [9]:
def create_window(act,window_length,dataframe):
    
  indices = list(dataframe[dataframe.Action == act].index)
  groups = []
  temp = []
  group_count = 0
  for i in range(len(indices)):
    if i == len(indices)-1:
      temp.append(indices[i])
      groups.append(temp)
      temp = []
      break
    temp.append(indices[i])
    if indices[i]+1 != indices[i+1]: 
      group_count+=1
      groups.append(temp)
      temp = []

  fs = 64
  window_length = 1
  # window_length = window_length*fs

  final_dataframe = pd.DataFrame()
  for i in groups: 
    required = math.floor(len(i)/(window_length*fs))
    
    req_index = i[0:(required*fs)]
    
    final_dataframe = pd.concat([final_dataframe,dataframe.iloc[req_index,:]],axis = 0)
  return final_dataframe


In [10]:
for window_length in range(1,5):
  
  path = os.getcwd()+"/dataset"
  name = path + f"/raw_labelled/win_{window_length}.csv"
  dataframe = pd.read_csv(name)

  activities = []
  for act in range(3):
    activities.append(create_window(act,window_length,dataframe))
  to_write = pd.concat(activities, axis=0)

  windows_path = path + "/windows"
  os.makedirs(windows_path, exist_ok=True)  # 디렉토리가 없으면 생성합니다.
  to_path = windows_path + f"/windowed_{window_length}.csv"
  to_write.to_csv(to_path, index=False)   

## Extracting Features

The following feature are extracted in the time domain

1. Mean
2. std
3. var
4. Mav
5. rms

The following feature are extracted in the frequency domain

1. Freeze Index
2. Power
3. Energy
4. Entropy
5. Peak Frequency

In [11]:
#read file 
window_length = 1
fs = 64
# for window_length in range(1,5):
w = window_length*fs
FE_path = path + "/windows/windowed_"
name = FE_path + str(window_length) + ".csv"
dataframe = pd.read_csv(name)

df = dataframe.drop(columns=['time','Action','name'])
stat = pd.DataFrame()


col= list(df.columns)
for s in col:    
  print (s)
  mn =[] 
  var = []
  std = []
  mav = []
  rms =[]
  for i in range(0,len(df),w):
      mn_  = np.mean(df[s].iloc[i:i+w])
      var_  = np.var(df[s].iloc[i:i+w])
      std_  = np.std(df[s].iloc[i:i+w])
      mav_  = np.mean(abs(df[s].iloc[i:i+w]))
      rms_  = np.sqrt(np.mean((df[s].iloc[i:i+w])**2))

      mn.append(mn_)
      var.append(var_)
      std.append(std_)
      mav.append(mav_)
      rms.append(rms_)

  stat['mean_'+s] = mn
  stat['var_'+s] = var
  stat['std_'+s] = std
  stat['rms_'+s] = rms
  stat['mav_'+s] = mav


stat.shape


import copy
stat1 = copy.copy(stat)
stat1['w'] = dataframe['Action'].iloc[[x for x in range(0,len(dataframe),w)]].to_list()
order = ['w']
order += stat1.columns.to_list()[:-1]
stat1 = stat1[order]
stat1.columns
col = stat1.columns.to_list()
col[0] = 0
stat1.columns = col
feature_name = path + "/features/time_"+str(window_length)+".csv"
stat1.to_csv(feature_name, index = False)

A_F
A_V
A_L
L_F
L_V
L_L
T_F
T_V
T_L


In [12]:
from scipy.signal import butter, lfilter

window_length = 3
fs = 64
# for window_length in range(1,5):
w = window_length*fs
FE_path = path + "/windows/windowed_"
name = FE_path + str(window_length) + ".csv"
dataframe = pd.read_csv(name)

df = dataframe.drop(columns=['time','Action','name'])

col= list(df.columns)

order=5

fi=pd.DataFrame()

power = pd.DataFrame()
bands = {'locomotor' :(0.5,3),'freeze' :(3,8)}

for s in col:
    xtemp = []
    xtemp1 = []
    for i in range(0,len(df),w):
        nyq=0.5*fs
        
        #locomotor band 0.5-3hz
        loc_low= 0.5/nyq
        loc_high=3/nyq
        
        #clipping off band from the window
        b, a = butter(order, [loc_low, loc_high], btype='band')
        y=lfilter(b,a,df[s].iloc[i:i+w])
        
        #total power in locomotor band
        e1=sum([x**2 for x in y])

        #freeze band 3-8hz
        frez_low= 3/nyq
        frez_high=8/nyq

        #clipping off band from the window
        b1, a1 = butter(order, [frez_low, frez_high], btype='band')
        y1=lfilter(b1,a1,df[s].iloc[i:i+w])
        #total power in locomotor band
        e2=sum([x**2 for x in y1])
        
        FI=e2/e1
        POW=e2+e1
        xtemp.append(FI)
        xtemp1.append(POW)
    fi['FI'+s] = xtemp
    power['P'+s] = xtemp1
print ("Freeze and power done")


w = window_length*fs
E=[]
for i in range(0,len(df),w):
  energy = np.sum((df.iloc[i:i+w,:])**2)
  E.append(energy)
E = pd.DataFrame(E)
E.columns = ["EN_" + x for x in df.columns]

#Entropy 
from scipy.signal import periodogram

peak_f = pd.DataFrame()
PSE = pd.DataFrame()
for s in col:
  peakF = []
  pse = []
  for i in range(0,len(df),w):
      f,Pxx_den = periodogram(df[s].iloc[i:i+w],fs)
      p_norm = Pxx_den/sum(Pxx_den)
      p_norm = list(filter(lambda a: a != 0, p_norm))
      pse.append(-(np.sum(p_norm*np.log(p_norm))))
      peak = (fs/w)*max(Pxx_den)
      peakF.append(peak)
  PSE['ENt_'+s] = pse
  peak_f['peak_'+s] = peakF
PSE.fillna(0,inplace = True)


freq = pd.concat([fi,power,E,PSE,peak_f],axis = 1)

feature_name = path + "/features/freq_"+str(window_length)+".csv"
freq.to_csv(feature_name, index = False)

Freeze and power done


  p_norm = Pxx_den/sum(Pxx_den)
  p_norm = Pxx_den/sum(Pxx_den)
  p_norm = Pxx_den/sum(Pxx_den)


In [13]:
df.shape

(811200, 9)

In [14]:
p_norm

[3.2475133510467163e-32,
 0.008737503700812927,
 0.057239190898979796,
 0.31419744338747646,
 0.05723153572091037,
 0.040425336591763464,
 0.0038633418321145758,
 0.04035895812284892,
 0.004548671813727431,
 0.006411827107515323,
 0.021938571733910717,
 0.020931521571531754,
 0.0060894162608230865,
 0.004171999271817693,
 0.006813729308807736,
 0.033312735814741444,
 0.042450475769469226,
 0.004005058772182705,
 0.012051711376991783,
 0.0004617421185715281,
 0.00830289452004817,
 0.0004641823826224023,
 0.022746701171254633,
 0.01582728154476885,
 0.01648019071179349,
 0.003891162537965589,
 0.001655848263382903,
 0.004423960163810015,
 0.014612890740322921,
 0.000665443246263766,
 0.0027637184722803486,
 0.0014418740542361363,
 0.002838284050597186,
 0.006221949506275692,
 0.007616654253299707,
 0.007245866275830395,
 0.0024514351938327017,
 0.0029871879218501592,
 0.0012231884121725212,
 0.0023006624143964015,
 0.003825775594873588,
 0.00044759689008329106,
 0.0016845887591069573,
 0