In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.signal import butter, filtfilt

In [2]:
def low_pass_filter(data, cutoff_freq, fs):
    nyquist = 0.5 * fs
    normal_cutoff = cutoff_freq / nyquist
    b, a = butter(4, normal_cutoff, btype='low', analog=False)
    filtered_data = filtfilt(b, a, data)
    return filtered_data

In [8]:
def process_csv_files(main_folder_path, cutoff_frequency=5, sampling_rate=505):
    lst=['x_data', 'y_data', 'z_data', 'x_jerk', 'y_jerk', 'z_jerk', 'total', 'total_jerk']
    feature_columns = ['time'] + ['folder_label']
    feature = pd.DataFrame(columns=feature_columns)

    folder_labels = {'Circular_Data': 1, 'DoubleTap':2, 'Square':3}

    for folder_name in os.listdir(main_folder_path):
        folder_path = os.path.join(main_folder_path, folder_name)
        if os.path.isdir(folder_path):
            folder_label = folder_labels.get(folder_name, None)
            if folder_label is None:
                continue  # Skip if folder not labeled
            for file_name in os.listdir(folder_path):
                if file_name.endswith('.csv'):
                    file_path = os.path.join(folder_path, file_name)
                    df = pd.read_csv(file_path)

                    # Rename columns and drop 'TgF'
                    df.rename(columns={'gFx': 'x_data', 'gFy': 'y_data', 'gFz': 'z_data'}, inplace=True)
                    df.drop(columns=['TgF'], inplace=True)

                    # Apply low-pass filter
                    df['x_data'] = low_pass_filter(df['x_data'], cutoff_frequency, sampling_rate)
                    df['y_data'] = low_pass_filter(df['y_data'], cutoff_frequency, sampling_rate)
                    df['z_data'] = low_pass_filter(df['z_data'], cutoff_frequency, sampling_rate)

                    # Calculate additional features
                    df['total'] = np.sqrt(df['x_data']**2 + df['y_data']**2 + df['z_data']**2)
                    df['x_jerk'] = (df['x_data'] - df['x_data'].shift(1)) / (df['time'] - df['time'].shift(1))
                    df['y_jerk'] = (df['y_data'] - df['y_data'].shift(1)) / (df['time'] - df['time'].shift(1))
                    df['z_jerk'] = (df['z_data'] - df['z_data'].shift(1)) / (df['time'] - df['time'].shift(1))
                    df['total_jerk'] = (df['total'] - df['total'].shift(1)) / (df['time'] - df['time'].shift(1))

                    # Calculate statistics
                    feature_dct={}
                    feature_dct['time'] = df['time'].max() - df['time'].min()
                    for j in lst:
                        temp_dict = calculate_statistics(df, j)
                        feature_dct.update(temp_dict)

                    # Append folder label
                    feature_dct['folder_label'] = folder_label

                    # Append feature dictionary to feature DataFrame
                    feature = feature.append(feature_dct, ignore_index=True)

    return feature

In [9]:
def calculate_statistics(df, column_name):
    column_data = df[column_name]
    stats_dict = {}
    
    stats_dict[column_name + '_min'] = np.min(column_data)
    
    stats_dict[column_name + '_max'] = np.max(column_data)

    stats_dict[column_name + '_range'] = np.ptp(column_data)

    # Calculate average
    stats_dict[column_name + '_avg'] = np.mean(column_data)

    # Calculate standard deviation
    stats_dict[column_name + '_std'] = np.std(column_data)

    # Calculate variance
    stats_dict[column_name + '_var'] = np.var(column_data)

    # Calculate root mean square (RMS)
    stats_dict[column_name + '_rms'] = np.sqrt(np.mean(column_data**2))

    return stats_dict

In [10]:
def det_time(df, column_name):
    return np.max(df[column_name])- np.min(df[column_name])

In [11]:
folder_path = '/home/ettexham/Desktop/Thesis/Accelerometer Data/'
result = process_csv_files(folder_path)

  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=Tru

  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=Tru

  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=Tru

  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=True)
  feature = feature.append(feature_dct, ignore_index=Tru

In [12]:
result

Unnamed: 0,time,folder_label,x_data_min,x_data_max,x_data_range,x_data_avg,x_data_std,x_data_var,x_data_rms,y_data_min,...,total_std,total_var,total_rms,total_jerk_min,total_jerk_max,total_jerk_range,total_jerk_avg,total_jerk_std,total_jerk_var,total_jerk_rms
0,4.967726,3.0,-0.594113,0.740917,1.335030,0.073815,0.173074,0.029955,0.188158,-0.488705,...,0.242476,0.058795,1.034585,-80.027925,66.822884,,-0.200353,8.186344,67.016227,8.188795
1,4.961296,3.0,-0.716802,0.768120,1.484922,0.010316,0.179077,0.032069,0.179374,-0.684318,...,0.264814,0.070127,1.043018,-138.544181,286.250237,,0.509831,18.071746,326.588019,18.078937
2,4.957216,3.0,-0.798691,0.740975,1.539666,-0.032505,0.198400,0.039363,0.201045,-0.681283,...,0.234759,0.055112,1.041098,-159.859528,192.511826,,0.110863,13.157349,173.115838,13.157816
3,4.961768,3.0,-0.694075,0.937472,1.631548,0.003114,0.236152,0.055768,0.236173,-0.921178,...,0.319942,0.102363,1.073930,-239.540551,149.506588,,-0.200193,22.435540,503.353445,22.436433
4,4.965329,3.0,-0.563631,0.756368,1.320000,0.079084,0.193159,0.037310,0.208721,-0.598885,...,0.219598,0.048223,1.034210,-109.110179,76.274626,,-0.324603,10.971519,120.374224,10.976320
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192,3.969930,2.0,-4.112396,2.451951,6.564346,-0.342144,0.927126,0.859563,0.988244,-1.195450,...,0.621980,0.386859,1.350931,-624.798792,514.818332,,-0.786785,48.105565,2314.145344,48.111998
193,3.970507,2.0,-3.941188,2.104934,6.046122,-0.248794,0.862823,0.744463,0.897976,-1.144062,...,0.567768,0.322360,1.288868,-265.431335,464.052275,,1.747448,33.374251,1113.840654,33.419968
194,3.969322,2.0,-3.553735,2.060154,5.613889,-0.113843,0.850742,0.723762,0.858325,-1.049480,...,0.558239,0.311631,1.279830,-392.623841,487.419634,,0.052389,44.353811,1967.260581,44.353842
195,3.969753,2.0,-3.801698,2.231351,6.033049,-0.331447,0.841533,0.708178,0.904453,-0.986833,...,0.544671,0.296666,1.277531,-278.567231,190.801683,,-0.920665,28.396745,806.375114,28.411666


In [14]:
result['folder_label'].value_counts()

2.0    76
1.0    61
3.0    60
Name: folder_label, dtype: int64

In [16]:
result.to_csv('feature.csv', index=False)