In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author:    gkweston
github:     https://github.com/gkweston

This scripts takes raw biometric and accelerometer data files for each participant and merges
them while retaining datetime accuracy and sorting stress moments into output file. Many of
these processes are necessary to retain proper formatting w/o corrupting time information.

NOTE: String slices are used to manage I/O naming
"""

import glob as gb
import pandas as pd
import numpy as np

In [2]:
masterDir = r"/Users/gkweston/git/ptsd-continuous-monitoring/data"
outputDir = r"/Users/gkweston/git/ptsd-continuous-monitoring/data/prep_output/"
monitorDir = r"/Users/gkweston/Desktop/workingFiles/acelab/proj2/data_monitor/"

# lists of files
bioFiles = gb.glob(masterDir + r"/k_imputed/" + "*.csv")
rawBioFiles = gb.glob(masterDir + r"/raw_bio/" + "*.csv")
motionFiles = gb.glob(masterDir + r"/raw_motion/" + "*.csv")
# sort files
bioFiles.sort()
rawBioFiles.sort()
motionFiles.sort()

# path to write premerge processed files
motionFix = masterDir + r"/motion_prep/"
bioFix = masterDir + r"/bio_prep/"

In [3]:
"""
This process reformats motion files by changing the column hierarchy
"""
for file in motionFiles:
    fileName = file[63:]
    fileNum = file[74:76]
    print("Premerge processing: " + fileName)
    open_file=pd.read_csv(file, low_memory=False)
    prim_df=pd.DataFrame(open_file)
    sec_df=prim_df['ACCELERATION']
    sec_df.to_csv(motionFix + "motion_" + fileNum + ".csv")

Premerge processing: motiondata_04.csv


  # This is added back by InteractiveShellApp.init_path()


Premerge processing: motiondata_13.csv
Premerge processing: motiondata_14.csv
Premerge processing: motiondata_15.csv
Premerge processing: motiondata_25.csv


In [4]:
"""  
This process merges stress moments in dataframe by time, but drops all other appended info.
"""
pd.options.mode.chained_assignment = None  # disable false positive copy warning
    
for i in range(len(bioFiles)):
    file = bioFiles[i]
    rFile = rawBioFiles[i]
    fileName = file[62:]
    fileNum = file[70:72]
    print("Premerge processing: " + fileName)
    
    open_file=pd.read_csv(file, low_memory=False)
    rOpen_file=pd.read_csv(rFile, low_memory=False)
    
    bio_df = pd.DataFrame(open_file)
    rBio_df = pd.DataFrame(rOpen_file)
    
# kalman imputation drops index of stress moments, raw files must be used
    stressLoc=rBio_df.loc[rBio_df['startDate']=='STRESSES'].index[0]
    assessLoc=rBio_df.loc[rBio_df['startDate']=='ASSESSMENTS'].index[0]

    moment_df = rBio_df[stressLoc+1:assessLoc]
    moment_df['smoment']=1
    moment_df.set_index('startDate', inplace=True)

    bio_drop = bio_df[0:stressLoc]
    bio_drop.set_index('startDate', inplace=True)
    
    bio_out = pd.concat([bio_drop, moment_df], sort=False)
    bio_out.sort_index(ascending=True, inplace=True)

    bio_out.drop("Unnamed: 0", axis=1, inplace=True)
    bio_out.to_csv(bioFix + "biodata_" + fileNum + "i.csv")

print("\nPremerge processing completed")

Premerge processing: biodata_04i.csv
Premerge processing: biodata_13i.csv
Premerge processing: biodata_14i.csv
Premerge processing: biodata_15i.csv
Premerge processing: biodata_25i.csv

Premerge processing completed


In [5]:
# Read in fixed to file lists, sort
motionFilesFin = gb.glob(motionFix + "/*.csv")
bioFilesFin = gb.glob(bioFix + "/*.csv")
motionFilesFin.sort()
bioFilesFin.sort()

In [6]:
for i in range(len(bioFilesFin)):
    bFileName = bioFilesFin[i][61:]
    mFileName = motionFilesFin[i][64:]
    fileNum = bioFilesFin[i][69:71]
    
    print(f"Merging: {bFileName}, {mFileName}")

    bio_df=pd.DataFrame(pd.read_csv(bioFilesFin[i], low_memory=False))
    bio_df.columns = ['date', 'hr', 'smoment']
    
    motion_df = pd.DataFrame(pd.read_csv(motionFilesFin[i], low_memory=False))
    merge_df = bio_df.merge(motion_df, how='outer', sort='ascending')
    
    merge_df['participant']=fileNum
    merge_df.to_csv(outputDir + r"participant_" + fileNum + r".csv")
    merge_df.to_csv(monitorDir + r"participant_" + fileNum + r".csv")
        
print("\nAll processes completed successfully.")

Merging: biodata_04i.csv, motion_04.csv
Merging: biodata_13i.csv, motion_13.csv
Merging: biodata_14i.csv, motion_14.csv
Merging: biodata_15i.csv, motion_15.csv
Merging: biodata_25i.csv, motion_25.csv

All processes completed successfully.
