In [1]:
import pandas as pd

In [2]:
# File Path
file = '/Users/Lisa/_Sports_Activity_Capstone/sports_activity_data/a01_sitting/p1/s01.txt'

# Structure of the Data

- In each text file, there are 5 units x 9 sensors = 45 columns
- Each column contains the 125 samples of data acquired from one of the sensors of one of the units over a period of 5 sec.

9 sensors on each unit:
- (3 x 3 sensors) x 5 body parts = 45 columns of data
- x,y,z accelerometers
- x,y,z gyroscopes
- x,y,z magnetometers

Columns 1-45 correspond to: 
- T_xacc, T_yacc, T_zacc, T_xgyro, ..., T_ymag, T_zmag, 
- RA_xacc, RA_yacc, RA_zacc, RA_xgyro, ..., RA_ymag, RA_zmag, 
- LA_xacc, LA_yacc, LA_zacc, LA_xgyro, ..., LA_ymag, LA_zmag, 
- RL_xacc, RL_yacc, RL_zacc, RL_xgyro, ..., RL_ymag, RL_zmag, 
- LL_xacc, LL_yacc, LL_zacc, LL_xgyro, ..., LL_ymag, LL_zmag. 

# Create a list of column names for pandas import

In [3]:
# Torso list
t = ['T_xacc', 'T_yacc', 'T_zacc', 'T_xgyro', 'T_ygyro', 'T_zgyro', 'T_xmag', 'T_ymag', 'T_zmag']
# Right Arm list
ra = ['RA_xacc', 'RA_yacc', 'RA_zacc', 'RA_xgyro', 'RA_ygyro', 'RA_zgyro', 'RA_xmag', 'RA_ymag', 'RA_zmag']
# Left Arm list
la = ['LA_xacc', 'LA_yacc', 'LA_zacc', 'LA_xgyro', 'LA_ygyro', 'LA_zgyro', 'LA_xmag', 'LA_ymag', 'LA_zmag']
# Right Leg list
rl = ['RL_xacc', 'RL_yacc', 'RL_zacc', 'RL_xgyro', 'RL_ygyro', 'RL_zgyro', 'RL_xmag', 'RL_ymag', 'RL_zmag']
# Left Leg list
ll = ['LL_xacc', 'LL_yacc', 'LL_zacc', 'LL_xgyro', 'LL_ygyro', 'LL_zgyro', 'LL_xmag', 'LL_ymag', 'LL_zmag']

In [4]:
# Complete colun list variable to use for dataframe imports
columns = t + ra + la + rl + ll

# Test import using columns

In [5]:
a01_p1 = pd.read_csv(file, names=columns)
# Works just fine!

# Functions to aggregate the distributed data

In [6]:
# Create a list of folder strings to make the complete list for 9,120 imports
fold_strings = ["a01_sitting", 
                "a02_standing", 
                "a03_laying_on_back", 
                "a04_laying_on_right_side",
                "a05_ascending_stairs",
                "a06_ descending_stairs",
                "a07_still_in_elevator",
                "a08_moving_in_elevator",
                "a09_parking_lot_walking",
                "a10_walking_flat_treadmill",
                "a11_walking_incline_treadmill",
                "a12_running_treadmill",
                "a13_stepper",
                "a14_cross_trainer",
                "a15_horizontal_exercise_bike",
                "a16_vertical_exercise_bike",
                "a17_rowing",
                "a18_jumping",
                "a19_basketball"
               ]

people = ["p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8"]

In [7]:
# extract links for data import per person/activity
def make_lst(fold_strings, people):
    '''
    Enter a string that will be plugged into spot {a}, which is a folders title
    spot {x} will address the individual files within {y}'s folder, which is in folder {a}
    
    parameter:
    a = string of a folders name,
    p = p1 - p8
    
    returns:
    a list of strings with all the necessary links for all file imports per activity
    '''
    lst = []
    url = f"/Users/Lisa/_Sports_Activity_Capstone/sports_activity_data/a01_sitting/p1/s01.txt"
    for name in fold_strings:
        for peeps in people:
            for x in range(1,61):
                if x < 10:
                    lst.append(f"/Users/Lisa/_Sports_Activity_Capstone/sports_activity_data/{name}/{peeps}/s0{x}.txt")
                else:
                    lst.append(f"/Users/Lisa/_Sports_Activity_Capstone/sports_activity_data/{name}/{peeps}/s{x}.txt")
    
    return lst

In [8]:
# Test the above funciton
fin_lst = make_lst(fold_strings, people)
len(fin_lst)
# Returns a list of ALL 9,120 file imports

9120

In [15]:
# Returns a complete list of all 9,120 files IN ORDER, per activity
# i.e. sitting data, p1, files 1-60, p2, files 1-60, etc etc.
fin_lst

['/Users/Lisa/_Sports_Activity_Capstone/sports_activity_data/a01_sitting/p1/s01.txt',
 '/Users/Lisa/_Sports_Activity_Capstone/sports_activity_data/a01_sitting/p1/s02.txt',
 '/Users/Lisa/_Sports_Activity_Capstone/sports_activity_data/a01_sitting/p1/s03.txt',
 '/Users/Lisa/_Sports_Activity_Capstone/sports_activity_data/a01_sitting/p1/s04.txt',
 '/Users/Lisa/_Sports_Activity_Capstone/sports_activity_data/a01_sitting/p1/s05.txt',
 '/Users/Lisa/_Sports_Activity_Capstone/sports_activity_data/a01_sitting/p1/s06.txt',
 '/Users/Lisa/_Sports_Activity_Capstone/sports_activity_data/a01_sitting/p1/s07.txt',
 '/Users/Lisa/_Sports_Activity_Capstone/sports_activity_data/a01_sitting/p1/s08.txt',
 '/Users/Lisa/_Sports_Activity_Capstone/sports_activity_data/a01_sitting/p1/s09.txt',
 '/Users/Lisa/_Sports_Activity_Capstone/sports_activity_data/a01_sitting/p1/s10.txt',
 '/Users/Lisa/_Sports_Activity_Capstone/sports_activity_data/a01_sitting/p1/s11.txt',
 '/Users/Lisa/_Sports_Activity_Capstone/sports_activit

In [9]:
# Now attempt to turn an activity into a dataframe
def make_dataframe(lst):
    '''
    Enter in a list to iterate through to turn file imports into an activity dataframe
    parameters:
    lst = a list of strings (file paths)
    
    returns:
    a dataframe of a particular sports activity 
    '''
    df = pd.DataFrame(columns=columns)
    for x in lst:
        data = pd.read_csv(x, names=columns)
        df = df.append(data, ignore_index = True)
    return df

In [10]:
# Test make_dataframe function
# df = make_dataframe(fin_lst)
# Works! returns a dataframe with the appropriate column names and rows of data 1,114,000 x 45
# Commenting out since it took 45 minutes to run...

In [16]:
# Create a CSV to add on sports activity data later
# df.to_csv('complete_dataset.csv')
# Works! Commented out not to keep creating csv's