In [1]:
## Should have all the same signal processing functions from the signal package in R:
## https://cran.r-project.org/web/packages/signal/signal.pdf
from scipy.signal import butter, lfilter

## https://docs.scipy.org/doc/scipy/reference/signal.html

import pandas as pd
import numpy as np
import csv
import glob
import matplotlib.pyplot as plt
import datetime
import math as math
from zipfile import ZipFile

In [2]:
def get_utc_date_time(ts):
    return pd.to_datetime(ts, unit='s', infer_datetime_format = True)

def add_fs(sample_rate, date):
    return date + datetime.timedelta(milliseconds=1.0/(sample_rate) * 1000.0)

Helper function that will generate the list of timestamps that will be added as a new column to our dataframe

In [3]:
def time_range(df_length, start_timestamp, sample_rate):
    # Generate date time range based on sample rate and starting timestamp
    time_range = []
    t_0 = get_utc_date_time(float(start_timestamp))
    time_range.append(t_0)
    
    next_date = t_0
    
    for count in range(df_length-1):
        next_date = add_fs(sample_rate, next_date)
        time_range.append(next_date)
    return time_range

Fourier transform that takes a df with X, Y, Z columns and returns one with single column of value:
    sqrt(x^2 + y^2 + z^2)

In [4]:
def fourier_transform(df):
    '''fourier transformation: sqrt(x^2 + y^2 + z^2)'''
    new_columns = { df.columns[0]: 'X', df.columns[1]: 'Y', df.columns[2]: 'Z' }
    df = df.rename(columns=new_columns)
    transformed_df = pd.DataFrame()
    transformed_df['Transformed'] = df.apply(lambda row: f_t(row), axis=1)
    print(transformed_df)
    return transformed_df

def f_t(row):
    return math.sqrt(row['X']**2 + row['Y']**2 + row['Z']**2)
    

In [5]:
base_path = r"C:\Users\Dan Cohen\Desktop\Empatica_Data\ID PR025\Empatica data" 
participant_id = "PR025"

In [6]:
list_of_zips = glob.glob(base_path + "/A*.zip")

In [None]:
hr_all = []

for zip_file in list_of_zips:
    zf = ZipFile(zip_file)
    acc_df_raw =pd.read_csv(zf.open('ACC.csv'))
    
    starting_timestamp = acc_df_raw.columns[0]
    
    acc_df_raw = acc_df_raw.drop([0, 0]) #First row is the sample rate, we don't need it for this script
    ft_df = fourier_transform(acc_df_raw)
    ft_df['ParticipantId'] = participant_id
    print(ft_df)
    

    #time_col = time_range(len(acc_df_raw), starting_timestamp, sample_rate)
    
    #acc_df['Timestamp'] = time_col
    #acc_df = acc_df.rename(columns={starting_timestamp:"ACC"})
    #acc_df['ParticipantId'] = participant_id
    
    acc_all.append(acc_df)

In [110]:
acc_df_all = pd.concat(acc_df)

In [111]:
acc_df_all

Unnamed: 0,HR,Timestamp,ParticipantId
1,49.00,2020-12-15 13:59:18,PR025
2,49.00,2020-12-15 13:59:19,PR025
3,88.00,2020-12-15 13:59:20,PR025
4,78.00,2020-12-15 13:59:21,PR025
5,74.00,2020-12-15 13:59:22,PR025
6,73.83,2020-12-15 13:59:23,PR025
7,74.86,2020-12-15 13:59:24,PR025
8,75.62,2020-12-15 13:59:25,PR025
9,76.33,2020-12-15 13:59:26,PR025
10,77.00,2020-12-15 13:59:27,PR025
