In [1]:
## Should have all the same signal processing functions from the signal package in R:
## https://cran.r-project.org/web/packages/signal/signal.pdf
from scipy.signal import butter, lfilter

## https://docs.scipy.org/doc/scipy/reference/signal.html

import pandas as pd
import numpy as np
import csv
import glob
import matplotlib.pyplot as plt
import datetime
import math as math
from zipfile import ZipFile

In [2]:
def get_utc_date_time(ts):
    return pd.to_datetime(ts, unit='s', infer_datetime_format = True)

def add_fs(sample_rate, date):
    return date + datetime.timedelta(milliseconds=1.0/(sample_rate) * 1000.0)

Helper function that will generate the list of timestamps that will be added as a new column to our dataframe

In [3]:
def time_range(df_length, start_timestamp, sample_rate):
    # Generate date time range based on sample rate and starting timestamp
    time_range = []
    t_0 = get_utc_date_time(float(start_timestamp))
    time_range.append(t_0)
    
    next_date = t_0
    
    for count in range(df_length-1):
        next_date = add_fs(sample_rate, next_date)
        time_range.append(next_date)
    return time_range

Fourier transform that takes a df with X, Y, Z columns and returns one with single column of value:
    sqrt(x^2 + y^2 + z^2)

In [4]:
def fourier_transform(df):
    '''fourier transformation: sqrt(x^2 + y^2 + z^2)'''
    transformed_df = pd.DataFrame()
    transformed_df['Transformed'] = df.apply(lambda row: f_t(row), axis=1)
    return transformed_df

def f_t(row):
    return math.sqrt(row['X']**2 + row['Y']**2 + row['Z']**2)
    

In [5]:
def time_range(df_length, start_timestamp, sample_rate):
    '''Generate date time range based on sample rate and starting timestamp'''
    time_range = []
    t_0 = get_utc_date_time(float(start_timestamp))
    time_range.append(t_0)
    
    next_date = t_0
    
    for count in range(df_length-1):
        next_date = add_fs(sample_rate, next_date)
        time_range.append(next_date)
    return time_range

In [6]:
base_path = r"C:\Users\Dan Cohen\Desktop\Empatica_Data\ID PR025\Empatica data" 
participant_id = "PR025"

In [7]:
list_of_zips = glob.glob(base_path + "/A*.zip")

In [None]:
acc_all = []

for zip_file in list_of_zips:
    zf = ZipFile(zip_file)
    #look into reading columns in with int16 dtype
    acc_df_raw =pd.read_csv(zf.open('ACC.csv'), dtype='int16') #nrows=1000,
    
    starting_timestamp = acc_df_raw.columns[0]
    
    acc_df_raw = acc_df_raw.drop([0, 0]) #First row is the sample rate, we don't need it for this script
    #ft_df = fourier_transform(acc_df_raw)
    #ft_df['ParticipantId'] = participant_id
    #time_col = time_range(len(ft_df), starting_timestamp, 32)
    new_columns = { acc_df_raw.columns[0]: 'X', acc_df_raw.columns[1]: 'Y', acc_df_raw.columns[2]: 'Z' }
    
    acc_df_raw = acc_df_raw.rename(columns=new_columns)
    acc_df_raw['ParticipantId'] = participant_id
    time_col = time_range(len(acc_df_raw), starting_timestamp, 32)
    
    acc_df_raw['Timestamp'] = time_col
    acc_all.append(acc_df_raw)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1466928 entries, 1 to 1466928
Data columns (total 3 columns):
X    1466928 non-null int16
Y    1466928 non-null int16
Z    1466928 non-null int16
dtypes: int16(3)
memory usage: 19.6 MB
None
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1356756 entries, 1 to 1356756
Data columns (total 3 columns):
X    1356756 non-null int16
Y    1356756 non-null int16
Z    1356756 non-null int16
dtypes: int16(3)
memory usage: 18.1 MB
None
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1273974 entries, 1 to 1273974
Data columns (total 3 columns):
X    1273974 non-null int16
Y    1273974 non-null int16
Z    1273974 non-null int16
dtypes: int16(3)
memory usage: 17.0 MB
None
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1377114 entries, 1 to 1377114
Data columns (total 3 columns):
X    1377114 non-null int16
Y    1377114 non-null int16
Z    1377114 non-null int16
dtypes: int16(3)
memory usage: 18.4 MB
None


In [11]:
acc_df_all = []
acc_df_all = pd.concat(acc_all)

In [12]:
acc_df_all

Unnamed: 0,X,Y,Z,ParticipantId,Timestamp
1,-18,-6,61,PR025,2020-12-15 13:59:08.000000
2,-18,-6,61,PR025,2020-12-15 13:59:08.031250
3,-18,-7,61,PR025,2020-12-15 13:59:08.062500
4,-18,-7,61,PR025,2020-12-15 13:59:08.093750
5,-18,-8,62,PR025,2020-12-15 13:59:08.125000
6,-19,-8,61,PR025,2020-12-15 13:59:08.156250
7,-18,-7,61,PR025,2020-12-15 13:59:08.187500
8,-18,-7,62,PR025,2020-12-15 13:59:08.218750
9,-17,-7,61,PR025,2020-12-15 13:59:08.250000
10,-18,-6,61,PR025,2020-12-15 13:59:08.281250
