In [29]:
from glob import glob
import pandas as pd

# Reading data from our CSV files that are stored inside this repository (single files)

try: 
    single_file_accelerometer = pd.read_csv(
        '../../data/raw/MetaMotion/A-bench-heavy2-rpe8_MetaWear_2019-01-11T16.10.08.270_C42732BE255C_Accelerometer_12.500Hz_1.4.4.csv')
    single_file_gyroscope = pd.read_csv(
        '../../data/raw/MetaMotion/A-bench-heavy2-rpe8_MetaWear_2019-01-11T16.10.08.270_C42732BE255C_Gyroscope_25.000Hz_1.4.4.csv')
except FileNotFoundError:
    print("Files that you try to use are not available.")

# Reading all CVS files from data/raw/MetaMotion that we will use as a list later 

# Read all files that have csv extension if inside this repo the compiler finds some files that are not with this extension they 
# will be ignored

files = glob('../../data/raw/MetaMotion/*.csv')

# Extract pieces of the file name e.g. A-bench-heavy2-rpe8_MetaWear_2019-01-11T16.10.08.270_C42732BE255C_Gyroscope_25.000Hz_1.4.4.csv and append 
# it to the data frame

data_path = '../../data/raw/MetaMotion/'

first_file = files[0]

def get_data_from_files(files):
    accelerometer_data_frame = pd.DataFrame() # creating an empty data frame
    gyroscope_data_frame = pd.DataFrame()

    accelerometer_set = 1
    gyroscope_set = 1

    for file in files:
        participant = file.split('-')[0].replace(data_path,'') # on this way we are updating existing path and getting the participate
        exercise = file.split('-')[1]
        category = file.split('-')[2].rstrip('123').rstrip('_MetaWear_2019') # with category we have the number of the set so we need to remove it

        data_frame = pd.read_csv(file)

        # We will extract three variables from the file name: participant: A, exercise: bench, category of sets(e.g. heavy)
        data_frame['participant'] = participant # adding new columns inside the data frame
        data_frame['exercise'] = exercise
        data_frame['category'] = category

        if 'Accelerometer' in file:
            data_frame['set'] = accelerometer_set 
            accelerometer_set += 1
            accelerometer_data_frame = pd.concat([accelerometer_data_frame,data_frame])
    
        if 'Gyroscope' in file:
            data_frame['set'] = gyroscope_set
            gyroscope_set += 1
            gyroscope_data_frame = pd.concat([gyroscope_data_frame,data_frame])

    # Working with date-times (epoch and time columns), epoch is the UTC date-time format from 1 January 1970 to today (in milliseconds),
    # doesn't care in which time zone you area
    accelerometer_data_frame.index = pd.to_datetime(accelerometer_data_frame["epoch (ms)"],unit='ms')
    gyroscope_data_frame.index = pd.to_datetime(gyroscope_data_frame["epoch (ms)"],unit='ms')

    # we need to delete duplicated columns 

    del accelerometer_data_frame['epoch (ms)']
    del accelerometer_data_frame['time (01:00)']
    del accelerometer_data_frame['elapsed (s)']

    del gyroscope_data_frame['epoch (ms)']
    del gyroscope_data_frame['time (01:00)']
    del gyroscope_data_frame['elapsed (s)']

    return accelerometer_data_frame, gyroscope_data_frame

accelerometer_data_frame, gyroscope_data_frame = get_data_from_files(files)
print(accelerometer_data_frame)

# --------------------------------------------------------------
# Merging datasets
# --------------------------------------------------------------


# --------------------------------------------------------------
# Resample data (frequency conversion)
# --------------------------------------------------------------

# Accelerometer:    12.500HZ
# Gyroscope:        25.000Hz


# --------------------------------------------------------------
# Export dataset
# --------------------------------------------------------------


                         x-axis (g)  y-axis (g)  z-axis (g) participant  \
epoch (ms)                                                                
2019-01-11 15:42:43.566      -0.136       0.986      -0.053           B   
2019-01-11 15:42:43.646      -0.143       0.977      -0.053           B   
2019-01-11 15:42:43.726      -0.187       0.935       0.039           B   
2019-01-11 15:42:43.806      -0.152       0.958      -0.075           B   
2019-01-11 15:42:43.886      -0.143       0.944       0.010           B   
...                             ...         ...         ...         ...   
2019-01-14 13:50:00.195      -0.085       0.911      -0.057           A   
2019-01-14 13:50:00.275      -0.070       0.926      -0.102           A   
2019-01-14 13:50:00.355      -0.082       0.955      -0.128           A   
2019-01-14 13:50:00.435      -0.091       0.985      -0.119           A   
2019-01-14 13:50:00.515      -0.084       0.988      -0.121           A   

                        