In [159]:
# !/user/bin/env python
# -*- coding: utf-8 -*-
"""
@author: gkweston
This script uses string slices to manage directory/file names in I/O
thus naming convention must be aligned w/ slices to execute
"""

import glob as gb
import pandas as pd
import numpy as np

DEBUG = False

In [160]:
masterDir = r"/Users/gkweston/Desktop/workingFiles/acelab/proj2/data_raw"
outputDir = r"/Users/gkweston/Desktop/workingFiles/acelab/proj2/final_output/"
motionFix = masterDir + r"/motion_fix/"
bioFix = masterDir + r"/bio_fix/"

In [161]:
# Read in files and sort
bioFiles = gb.glob(masterDir + r"/bio/" + "*.csv")
motionFiles = gb.glob(masterDir + r"/motion/" + "*.csv")
bioFiles.sort()
motionFiles.sort()
selectBioFiles=[]
selectMotFiles=[]

"""  
This process inserts stress moments to dataframe by time, but drops all other appended info.
It automatically skips users without stress moments.
"""
pd.options.mode.chained_assignment = None  # disable false positive copy warning

for file in bioFiles:
    open_file=pd.read_csv(file, low_memory=False)
    bio_df = pd.DataFrame(open_file)
    stressLoc=bio_df.loc[bio_df['startDate']=='STRESSES'].index[0]
    assessLoc=bio_df.loc[bio_df['startDate']=='ASSESSMENTS'].index[0]
    moment_df = bio_df[stressLoc+1:assessLoc]
        
    if moment_df.shape[0]>0:
        selectBioFiles.append(file)
        selectMotFiles.append(masterDir + "/motion/motion_" + file[67:])
        print(f"Premerge processing file: {file[63:]}")
        moment_df['smoment']=1
        bio_drop = pd.DataFrame()
        bio_drop = bio_df[0:stressLoc]
        bio_out = bio_drop.merge(moment_df, how='outer')
        bio_out = bio_out.set_index('startDate')
        bio_out = bio_out.sort_values(ascending=True, by='startDate')
        bio_out.reset_index()
        bio_out.to_csv(bioFix + "bio_f_" + file[67:])

bioFiles = selectBioFiles
motionFiles = selectMotFiles
         
"""
This process reformats motion files by changing the column hierarchy.
"""
for file in motionFiles:
    print(f"Premerge processing file: {file[66:]}")
    open_file=pd.read_csv(file, low_memory=False)
    prim_df=pd.DataFrame(open_file)
    sec_df=prim_df['ACCELERATION']
    sec_df.to_csv(motionFix + "motion_f_" + file[73:])

# Read in fixed files and sort
motionFiles = gb.glob(motionFix + "/*.csv")
bioFiles = gb.glob(bioFix + "/*.csv")
motionFiles.sort()
bioFiles.sort()

print("Completed premerge")

Premerge processing file: bio_04.csv
Premerge processing file: bio_13.csv
Premerge processing file: bio_14.csv
Premerge processing file: motion_04.csv
Premerge processing file: motion_13.csv
Premerge processing file: motion_14.csv
Completed premerge


In [162]:
for i in range(len(bioFiles)):
    print("Merging:")
    print(f"Bio file: {fixed_b_files[i][67:]}")
    print(f"Motion file: {fixed_m_files[i][70:]}")
    out_df = pd.DataFrame()
    bio_df=pd.DataFrame(pd.read_csv(bioFiles[i], low_memory=False))
    bio_df.columns = ['date', 'hr', 'smoment']
    motion_df = pd.DataFrame(pd.read_csv(motionFiles[i], low_memory=False))
    
    merge_df = bio_df.merge(motion_df, how='outer', sort='ascending')
#     merge_df['participant']=bioFiles[i][73:75]
    
#     Moments indexed from 1
    momentLis = merge_df.index[merge_df['smoment']==1].tolist()
    momentCount = 0
    for moment in momentLis:
        momentCount+=1 
        temp_df = pd.DataFrame()
        temp_df = merge_df[moment-60 : moment+60]
        temp_df['smoment']=momentCount
        temp_df['participant']=bioFiles[i][73:75]
        out_df = out_df.append(temp_df)
#         temp_df.reset_index()

    print(f"Writing pre_impute_{bioFiles[i][73:]}")
    out_df.to_csv(outputDir + "pre_impute_" + bioFiles[i][73:])

Merging:
Bio file: bio_f_04.csv
Motion file: motion_f_04.csv

Writing pre_impute_04.csv
Merging:
Bio file: bio_f_13.csv
Motion file: motion_f_13.csv

Writing pre_impute_13.csv
Merging:
Bio file: bio_f_14.csv
Motion file: motion_f_14.csv

Writing pre_impute_14.csv


In [163]:
print("All processes completed successfully.")

All processes completed successfully.
