In [1]:
import pandas as pd
from scipy import signal
import numpy as np
import matplotlib.pyplot as plt
import seaborn
# from helper_methods import *

In [2]:
seaborn.set()

In [3]:
# Global parameters
Fs = -1

In [4]:
cutoff = 8  # in Hz

INPUT_DIRECTORY = "data/"
INPUT_FILES = []


## Functions (may not all be needed)

In [5]:
# Calculate sampling rate (rounded to nearest integer) based on recorded data
# [IN]
#  x: Series containing the relative time values (from 0-##.##)
# [OUT]
#  Fs: samples per second, Hz 
def get_sampling_rate(x):
    return round(len(x.index) / x.iloc[-1])

In [6]:
# Shows plot of total acceleration values
# [IN]
#  df: Dataframe containing x, y, z, total acceleration, and time (from 0 to ## seconds)
def plot_accel(df):
    plt.figure(figsize=(10,5))
    plt.title('Total Acceleration')
    plt.xlabel('Time [s]')
    plt.ylabel('Acceleration [m/s^2]')
    plt.plot(df['time'].values, df['atotal'].values, 'b-', linewidth=1)
    plt.show()

In [7]:
# Apply butterworth filter to values in Dataframe; use with df.apply()
# [IN]
#  df: Dataframe containing x-,y-,z- acceleration values ('atotal' not needed, can be calculated after)
def butterworth_lowpass(df):
    nyq = 0.5 * Fs
    normalized_cutoff = cutoff / nyq
    b, a = signal.butter(3, normalized_cutoff, btype='lowpass')
    return signal.filtfilt(b, a, df)

In [8]:
# Shows FFT of the total acceleration
# [IN]
#  df: Dataframe containing acceleration values, must have 'atotal'
def plot_fft(df):
    w = np.fft.fft(df['atotal'])
    freqs = np.fft.fftfreq(len(df['atotal']))
    freqs = freqs * Fs

    n_samples = len(w)
    middle = -1
    if (n_samples % 2 == 0):
        middle = (n_samples // 2) - 1
    else:
        middle = (n_samples // 2)

    plt.figure(figsize=(10,5))
    plt.title('One-sided Frequency Spectrum (0 Hz excluded)')
    plt.xlabel('Frequency [Hz]')
    plt.ylabel('Magnitude')
    plt.plot(freqs[1:middle+1], np.abs(w[1:middle+1]))

    plt.figure(figsize=(10,5))
    plt.title('Frequency Spectrum')
    plt.xlabel('Frequency [Hz]')
    plt.ylabel('Magnitude')
    plt.plot(freqs, np.abs(w))

    plt.show()

In [9]:
# Calculates the FFT of the total acceleration into a Dataframe
# [IN]
#  df: Dataframe containing acceleration values, must have 'atotal'
# [OUT]
#  fft_df: Dataframe with two columns ['freq', 'value'], containing the result of applying FFT to total acceleration
def get_fft(df):
    w = np.fft.fft(df['atotal'])
    freqs = np.fft.fftfreq(len(df['atotal']))
    freqs = freqs * Fs
    
    fft_df = pd.DataFrame({
        'freq': freqs,
        'value': np.abs(w)
    })
    
    return fft_df

In [10]:
# Formats column names to be ['ax', 'ay', 'az', 'atotal']; Alfred's phone recorded it with units, and 'aT' label for last column
# [IN]
#  col_name: the column name as a string
# [OUT]
#  new_col: properly formatted column name (to conform to other group members' data)
def format_column(col_name):
    new_col = col_name.split('(')[0].strip()
    if (new_col == 'aT'):
        new_col = 'atotal'
    return new_col

In [11]:
# For filename with <id>_<pos>_<step_count>.csv format, returns the step count as an integer
# [IN]
#  filename: string with the filename of input csv
# [OUT]
#  int(count): the actual step count as an integer
def extract_step_count(filename):
    count_with_csv = filename.split('_')[2]
    count = count_with_csv.split('.')[0]
    return int(count)

In [12]:
# Prints predicted step frequencies and step count, and compares with real step count if available
# [IN]
#  pred_freq: the predicted step frequency
#  duration: the entire time taken for data recording
#  real_step_count: the actual step count from data collection (default -1 if not available)
def print_step_comparisons(pred_freq, duration, real_step_count=-1):
    predicted_step_count = pred_freq * duration
    predicted_step_60 = pred_freq * 60
    
    print('Predicted step frequency: ', pred_freq)
    print('Predicted step frequency (assuming 60s walking): ', round(predicted_step_60))
    print('Predicted step count (using total duration): ', round(predicted_step_count))
    if real_step_count > 0:
        print('Real step count: ', real_step_count)

In [13]:
# Returns predicted step counts with given step frequency and duration
# [IN]
#  pred_freq: the predicted step frequency
#  duration: the entire time taken for data recording
# [OUT]
#  predicted_step_count: use given frequency to calculate steps throughout entire duration
#  predicted_step_60: use given frequency to calculate steps throught 60 seconds
def get_predicted_steps(pred_freq, duration):
    predicted_step_count = round(pred_freq * duration)
    predicted_step_60 = round(pred_freq * 60)
    
    return (predicted_step_count, predicted_step_60)

## Initialize Dataframe for final values

In [14]:
results = pd.DataFrame(columns=['filename', 'real_steps', 'freq1', 'steps1', 'freq2','steps2', 'freq3', 'steps3'])
results

Unnamed: 0,filename,real_steps,freq1,steps1,freq2,steps2,freq3,steps3


In [15]:
new_row = []

## Read data from .csv

In [16]:
filename = INPUT_DIRECTORY + '1_ankle1_100.csv'

new_row.append(filename)

data = pd.read_csv(filename)

data = data.dropna(axis=1, how='all')
data['time'] = data['time'] - data['time'].iloc[0]

data.rename(format_column, axis=1, inplace=True)
data

Unnamed: 0,time,ax,ay,az,atotal
0,0.000000,-0.0307,0.0410,0.0209,0.055
1,0.001124,0.0413,0.0191,0.0289,0.054
2,0.001571,0.1131,0.0758,0.0774,0.157
3,0.011621,0.0729,0.1319,0.1543,0.216
4,0.012436,0.0067,0.0760,0.1240,0.146
...,...,...,...,...,...
17232,85.568552,-0.1385,-0.1635,-0.3993,0.453
17233,85.574097,-0.0076,-0.0726,-1.7910,1.793
17234,85.578660,0.1535,0.0881,-1.4361,1.447
17235,85.583472,0.3926,0.1603,-0.5206,0.671


In [17]:
total_duration = data['time'].iloc[-1]
total_duration

85.58783899999999

In [18]:
Fs = get_sampling_rate(data['time'])
Fs

201

## Plot x, y, z acceleration and magnitude (raw data)

In [19]:
# plot_accel(data)

In [20]:
# plot_fft(data)

## Filtering data

In [21]:
data_accel = data[['ax','ay','az','atotal']].copy()
data_accel_filtered = data_accel.apply(butterworth_lowpass, axis=0)
data_accel_filtered['atotal'] = np.sqrt(data_accel_filtered['ax']**2 + data_accel_filtered['ay']**2 + data_accel_filtered['az']**2)
data_accel_filtered['time'] = data['time'].copy()

In [22]:
# plot_accel(data_accel_filtered)

In [23]:
# plot_fft(data_accel_filtered)

## Step Frequency Analysis

In [24]:
real_step_count = extract_step_count(filename)
new_row.append(real_step_count)
real_step_count

100

In [25]:
accel_fft = get_fft(data_accel_filtered)
accel_fft

Unnamed: 0,freq,value
0,0.000000,80226.670639
1,0.011661,25152.215276
2,0.023322,14931.922778
3,0.034983,5960.177294
4,0.046644,2307.218481
...,...,...
17232,-0.058305,5833.179092
17233,-0.046644,2307.218481
17234,-0.034983,5960.177294
17235,-0.023322,14931.922778


In [26]:
candidate_freqs = accel_fft[accel_fft.freq > 0].nlargest(10, ['value'])
# print(candidate_freqs.to_string(index=False))
candidate_freqs

Unnamed: 0,freq,value
1,0.011661,25152.215276
148,1.725822,16497.913495
75,0.874572,15658.340591
2,0.023322,14931.922778
150,1.749144,14485.543075
151,1.760805,10437.479634
152,1.772466,9510.679056
73,0.85125,8915.294738
223,2.600395,8747.288182
153,1.784127,8104.867931


In [27]:
# Method 1:
# Take the candidate frequency with largest magnitude as the step frequency

estimated_freq = candidate_freqs['freq'].iloc[0]
new_row.append(estimated_freq)

print_step_comparisons(estimated_freq, total_duration, real_step_count)
new_row.append(get_predicted_steps(estimated_freq, total_duration))

Predicted step frequency:  0.011660961884318618
Predicted step frequency (assuming 60s walking):  1
Predicted step count (using total duration):  1
Real step count:  100


In [28]:
# Method 2:
# Take the mean of the candidate frequencies as step frequency

estimated_freq = candidate_freqs['freq'].mean()
new_row.append(estimated_freq)

print_step_comparisons(estimated_freq, total_duration, real_step_count)
new_row.append(get_predicted_steps(estimated_freq, total_duration))

Predicted step frequency:  1.31535650055114
Predicted step frequency (assuming 60s walking):  79
Predicted step count (using total duration):  113
Real step count:  100


In [29]:
# Method 3:
# Take the mean of candidate frequencies whose magnitudes are greater than half of the maximum magnitude (excluding 0 Hz)

max_value = candidate_freqs['value'].iloc[0]
best_freq = candidate_freqs[candidate_freqs['value'] > (max_value / 2)]

estimated_freq = best_freq['freq'].mean()
new_row.append(estimated_freq)

print_step_comparisons(estimated_freq, total_duration, real_step_count)
new_row.append(get_predicted_steps(estimated_freq, total_duration))

Predicted step frequency:  0.87690433370076
Predicted step frequency (assuming 60s walking):  53
Predicted step count (using total duration):  75
Real step count:  100


In [30]:
new_row

['data/1_ankle1_100.csv',
 100,
 0.011660961884318618,
 (1, 1),
 1.31535650055114,
 (113, 79),
 0.87690433370076,
 (75, 53)]

## Append results to Dataframe

In [31]:
results.loc[len(results)] = new_row

In [32]:
results

Unnamed: 0,filename,real_steps,freq1,steps1,freq2,steps2,freq3,steps3
0,data/1_ankle1_100.csv,100,0.011661,"(1, 1)",1.315357,"(113, 79)",0.876904,"(75, 53)"
