In [1]:
import pandas as pd

In [2]:
# File Path
file = '/Users/Lisa/Sports_Activity_Capstone/sports_activity_data/a01_sitting/p1/s01.txt'

# Structure of the Data

- In each text file, there are 5 units x 9 sensors = 45 columns
- Each column contains the 125 samples of data acquired from one of the sensors of one of the units over a period of 5 sec.

9 sensors on each unit:
- (3 x 3 sensors) x 5 body parts = 45 columns of data
- x,y,z accelerometers
- x,y,z gyroscopes
- x,y,z magnetometers

Columns 1-45 correspond to: 
- T_xacc, T_yacc, T_zacc, T_xgyro, ..., T_ymag, T_zmag, 
- RA_xacc, RA_yacc, RA_zacc, RA_xgyro, ..., RA_ymag, RA_zmag, 
- LA_xacc, LA_yacc, LA_zacc, LA_xgyro, ..., LA_ymag, LA_zmag, 
- RL_xacc, RL_yacc, RL_zacc, RL_xgyro, ..., RL_ymag, RL_zmag, 
- LL_xacc, LL_yacc, LL_zacc, LL_xgyro, ..., LL_ymag, LL_zmag. 

# Create a list of column names for pandas import

In [3]:
# Torso list
t = ['T_xacc', 'T_yacc', 'T_zacc', 'T_xgyro', 'T_ygyro', 'T_zgyro', 'T_xmag', 'T_ymag', 'T_zmag']
# Right Arm list
ra = ['RA_xacc', 'RA_yacc', 'RA_zacc', 'RA_xgyro', 'RA_ygyro', 'RA_zgyro', 'RA_xmag', 'RA_ymag', 'RA_zmag']
# Left Arm list
la = ['LA_xacc', 'LA_yacc', 'LA_zacc', 'LA_xgyro', 'LA_ygyro', 'LA_zgyro', 'LA_xmag', 'LA_ymag', 'LA_zmag']
# Right Leg list
rl = ['RL_xacc', 'RL_yacc', 'RL_zacc', 'RL_xgyro', 'RL_ygyro', 'RL_zgyro', 'RL_xmag', 'RL_ymag', 'RL_zmag']
# Left Leg list
ll = ['LL_xacc', 'LL_yacc', 'LL_zacc', 'LL_xgyro', 'LL_ygyro', 'LL_zgyro', 'LL_xmag', 'LL_ymag', 'LL_zmag']

In [4]:
# Complete colun list variable to use for dataframe imports
columns = t + ra + la + rl + ll

# Test import using columns

In [5]:
a01_p1 = pd.read_csv(file, names=columns)
# Works just fine!

# Functions to aggregate the distributed data

In [6]:
# extract links for data import
def make_lst(a):
    '''
    Enter a string that will be plugged into spot {a}, which is a folders title
    spot {x} will address the individual files within {y}'s folder, which is in folder {a}
    
    parameter:
    a = string of a folders name
    
    returns:
    a list of strings with all the necessary links for all 480 file imports per activity
    '''
    lst = []
    url = f"/Users/Lisa/Sports_Activity_Capstone/sports_activity_data/{a}/p1/s01.txt"
    for x in range(1,61):
        for y in range(1,9):
            if x < 10:
                lst.append(f"/Users/Lisa/Sports_Activity_Capstone/sports_activity_data/{a}/p{y}/s0{x}.txt")
            else:
                lst.append(f"/Users/Lisa/Sports_Activity_Capstone/sports_activity_data/{a}/p{y}/s{x}.txt")
    return lst

In [7]:
# Test the function of the first activity
a01 = make_lst('a01_sitting')
# Returns a list of all 480 strings needed for data import

In [8]:
# Create a list of folder strings to make the complete list of 9,120 imports
fold_strings = ["a01_sitting", 
                "a02_standing", 
                "a03_laying_on_back", 
                "a04_laying_on_right_side",
                "a05_ascending_stairs",
                "a06_ descending_stairs",
                "a07_still_in_elevator",
                "a08_moving_in_elevator",
                "a09_parking_lot_walking",
                "a10_walking_flat_treadmill",
                "a11_walking_incline_treadmill",
                "a12_running_treadmill",
                "a13_stepper",
                "a14_cross_trainer",
                "a15_horizontal_exercise_bike",
                "a16_vertical_exercise_bike",
                "a17_rowing",
                "a18_jumping",
                "a19_basketball"
               ]

In [9]:
# Create a complete list with all of the file paths for data ingestion
def make_final_lst(n):
    '''
    Enter in a list of folder strings in order to create the 480 imports for each folder
    parameter:
    n = a list of folder strings
    
    returns:
    a list of lists containing all of the file paths for the 9,120 imports
    '''
    lst = []
    for x in n:
        lst.append(make_lst(x))
    return lst

In [10]:
# Creating a final list variable which will have 19 lists, each list will contain 480 strings
fin_lst = make_final_lst(fold_strings)
# lists of lists works and has 9,120 items in it

In [11]:
# Now attempt to turn an activity into a dataframe
def make_dataframes(lst):
    '''
    Enter in a list to iterate through to turn file imports into an activity dataframe
    parameters:
    lst = a list of strings (file paths)
    
    returns:
    a dataframe of a particular sports activity 
    '''
    df = pd.DataFrame(columns=columns)
    for x in lst:
        data = pd.read_csv(x, names=columns)
        df = df.append(data, ignore_index = True)
    return df

In [12]:
# Test function
# sitting_df = make_dataframes(fin_lst[0])
# Works! returns a dataframe for an activity with the appropriate column names

In [13]:
# Create a FINAL dataframe of ALL the sports activities
def make_dataframes_2(lst):
    '''
    Enter in a list of lists that contain the file path strings for the data ingestion
    parameters:
    lst = a list of lists containing the file path strings for data ingestions
    
    returns:
    a complete dataframe with all the sports activity data
    '''
    df = pd.DataFrame(columns=columns)
    for x in lst:
        for y in x:
            data = pd.read_csv(y, names=columns)
            df = df.append(data, ignore_index = True)
    return df

In [14]:
# Test out making the dataframe 
# df = make_dataframes_2(fin_lst)
# Works! Commenting out since it takes a minute to run

In [15]:
# Create a CSV to add on sports activity data later
# df.to_csv('complete_dataset.csv')
# Works! Commented out not to keep creating csv's