# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import math

from os import listdir, walk
from os.path import isfile, join
from sklearn.preprocessing import MinMaxScaler

# Load Raw Data

In [2]:
%run ../ddc/load_dataset.ipynb

In [3]:
subj_range = np.hstack((np.arange(2001,2002),np.arange(3001,3006)))

all_patients = [str(i) for i in subj_range]

In [4]:
datapath = '../../DDC_Data/'
basepath = '../../'

df_all_p = pd.DataFrame()

for subject_id in all_patients:
    print("Loading {0}'s data".format(subject_id))
    
    acc_filepath = mypath + subject_id + '/' + subject_id + '-log_acc.csv'
    df_raw = pd.read_csv(acc_filepath, header=None, names=['x','y','z','timestamp'])

    df_timer, rec_date, start_time, end_time = load_timer(subject_id)
    df_filt = load_acc(subject_id, rec_date, start_time, end_time)
    df_hr = load_hr(subject_id, rec_date, start_time, end_time)

    df1 = merge_acc_and_hr(df_filt, df_hr)
    
    cols = ['x','y','z']
    xyz_ = df1[cols].to_dict('split')['data']
    xyz_new = MinMaxScaler().fit_transform(xyz_)
    
    for i in range(len(cols)):
        df1[cols[i]] = pd.Series(xyz_new.transpose()[i])

    X_i_p = np.array(df1[cols].to_dict(orient='split')['data'])
    subj_i_p = np.array([subject_id for i in range(X_i_p.shape[0])])
    
    df_all_p = df_all_p.append(df1, sort=False)
    
print('Finished Loading')

Loading 2001's data
Loading 3001's data
Loading 3002's data
Loading 3003's data
Loading 3004's data
Loading 3005's data
Finished Loading


In [5]:
df_all_p = df_all_p.reset_index(drop=True)

# Copy Data

In [45]:
df_day = pd.DataFrame()

In [46]:
T = 0.16
freq = 1/T
oneday = 24*60*60

In [47]:
while(df_day.shape[0]<=int(oneday*freq)):
    df_day = df_day.append(df_all_p, sort=False)
    
df_day.shape

(547074, 6)

In [48]:
df_day = df_day[:int(oneday*freq)]
df_day = df_day.reset_index(drop=True)

df_day['ID'] = pd.Series(['9999' for i in range(df_day.shape[0])])

In [49]:
date = '2019-03-28'
midnight = '00:00:00.000'

time_list = np.array([date + ' ' + calc_ts(calc_sec(midnight)+(T*i)) for i in range(int(oneday*freq))])

In [50]:
time_list.shape

(540000,)

In [51]:
df_day['timestamp'] = pd.Series(time_list)

In [52]:
df_day

Unnamed: 0,ID,timestamp,x,y,z,HR
0,9999,2019-03-28 0:0:0.0,0.246041,0.475436,0.576183,68.205284
1,9999,2019-03-28 0:0:0.16,0.246431,0.473881,0.576429,68.205284
2,9999,2019-03-28 0:0:0.32,0.246236,0.474464,0.573722,68.205284
3,9999,2019-03-28 0:0:0.48,0.246041,0.473881,0.574460,67.365480
4,9999,2019-03-28 0:0:0.64,0.246821,0.472521,0.575444,67.365480
5,9999,2019-03-28 0:0:0.8,0.247016,0.474659,0.572246,67.365480
6,9999,2019-03-28 0:0:0.96,0.247211,0.477574,0.573722,67.365480
7,9999,2019-03-28 0:0:1.12,0.245262,0.469994,0.575198,67.365480
8,9999,2019-03-28 0:0:1.28,0.246041,0.475047,0.571754,67.365480
9,9999,2019-03-28 0:0:1.44,0.246236,0.471937,0.570277,67.365480


# Store Cleaned Data in CSV

In [53]:
df_day.to_csv(cleaned_data_path)