In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import os
import math
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler

In [2]:
data_path='E:\XGBoost\dataset\Rawdata_sisfall'
fs = os.listdir(data_path)  
len(fs)

4500

In [3]:
data=pd.DataFrame()

In [4]:
for f in tqdm(fs):
    file_path = os.path.join(data_path, f)
    df = pd.read_csv(file_path, usecols = [0,1,2], names = ["Ax", "Ay", "Az"])
    #if ('D01' in f) or ('D02' in f) or ('D03' in f) or ('D04' in f):
    #    df = df[:10000]
    df = df.dropna()#drop all the missing value
    df = df.loc[::2] #reduce from 200Hz to 100Hz 
    
    #convert the bit into acceleration(g)
    sensor_resolution = 13
    sensor_range = 16
    g = (2*sensor_range/2**sensor_resolution)
    
    df['Ax'] = g*df['Ax']
    df['Ay'] = g*df['Ay']
    df['Az'] = g*df['Az']
    
    #feature extraction 
    #Feature 1: sum vector magnitude (SVM) - C1
    A1 = []
    for i in range (df.shape[0]): #row by row
        A1.append(np.sqrt(df.iloc[i]['Ax']**2 + df.iloc[i]['Ay']**2 + df.iloc[i]['Az']**2))
    df['SVM'] = A1
    
    #Feature 2: SVM on horizontal plane - C2
    A2 = []
    for i in range(df.shape[0]):
        A2.append(np.sqrt(df.iloc[i]['Ax']**2 + df.iloc[i]['Az']**2))
    df['SVM_Horizontal'] = A2
    
    A3 = []
    #Feature 3: Angle between z-axis and vertical (in rad) - C4
    for i in range (df.shape[0]):
        A3.append(math.atan2(np.sqrt(df.iloc[i]['Ax']**2+df.iloc[i]['Ay']**2), df.iloc[i]['Az']))
    df['Angle_z_xy'] = A3
    
    max_SVM = df['SVM'].max()
    max_SVM_index = df.index[df.SVM == max_SVM][0]
    
    min_SVM = df['SVM'].min()
    min_SVM_index = df.index[df.SVM == min_SVM][0]
    len_df = len(df)
          
    #after feature extraction, we need to select the window frame 
    #create a 3s window frame 
    #100Hz - 1 datapoint per 0.01s so 1.5s will have 150 datapoints.   
    if (max_SVM_index - 150 < 0):
        df = df[0:301]
    else:
        if (max_SVM_index + 150 > len_df):
            df = df[len_df-301:len_df]
        else:
            df = df[max_SVM_index - 150 : max_SVM_index + 150]
    
    #tabulate the features for each sample
    extracted_features = ['Ax', 'Ay', 'Az', 'SVM', 'SVM_Horizontal', 'Angle_z_xy']
    for feature in extracted_features:
        df['Mean_'+feature] = df[feature].mean()
        df['Std_'+feature] = df[feature].std()
        df['Var_'+feature] = df[feature].var()
        df['Range_'+feature] = df[feature].max() - df[feature].min()
        
    #create label: 1-Fall; 0-Non-fall
    df['label'] = f[0] #the first word of the file (D/F)
    df['label'] = df['label'].map({'D':0, 'F':1})
    
    df.drop(['Ax','Ay','Az','SVM','SVM_Horizontal','Angle_z_xy'], axis=1, inplace=True)
    df = df[0:1] #only require the first row         
    
    data=pd.concat([data,df],axis=0)

100%|████████████████████████████████████████████████████████████████████████████| 4500/4500 [1:47:01<00:00,  1.43s/it]


In [5]:
data.to_csv('E:\XGBoost\dataset\sisfall_processed_100Hz.csv',index=False)