In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.dates as mdates
import warnings
warnings.filterwarnings('ignore')

## Create Initial Dataframe

Traverses each file in the folder and converts each text file (each workout) into a row in the dataframe. Extracts the date of the workout from the file metadata and converts it into a pandas datetime object. Then, sorts each workout in chronological order.

The data cleaning cell removes some non-workout files that I used to plan out my splits and notes for non-weightlifting workouts.

In [8]:
path = r"C:\Users\blake\OneDrive\Desktop\WorkoutData\iCloud Notes\Workouts"

In [9]:
# List to store file content
data = []

# Traverse through all subfolders and text files
for root, dirs, files in os.walk(path):
    for file in files:
        if file.endswith('.txt'):  # Ensure it only reads text files
            file_path = os.path.join(root, file)
            
            # Read the content of the text file
            with open(file_path, 'r',encoding="utf8") as f:
                content = f.readlines()

            folder_name = os.path.basename(root)
            file_name = file
            data.append({'Folder': folder_name, 'File': file_name, 'Content': content})
df = pd.DataFrame(data)

In [10]:
# Extract dates
df['Extract'] = df['File'].apply(lambda x: x[-24:-5].replace('_',':'))
df['Date'] = pd.to_datetime(df['Extract'],utc=True)

# Convert to local timezone
df['Date'] = df['Date'].dt.tz_convert('US/Pacific') 
df['Date'] = df['Date'].apply(lambda x: x.replace(tzinfo=None))

# Add ordinal date for numerical calculations
df["Date_ordinal"] = df['Date'].apply(lambda x: x.toordinal())

# Sort by date
df = df.sort_values('Date')

In [11]:
### DATA CLEANING

# Remove first three entries
df = df.iloc[3:]

# Remove split planning notes
df = df.drop([380,276,295,48])

# Remove erroneous days
df = df.drop([203, 81, 112, 201])

# Remove useless columns
df.drop(["File","Extract"], axis=1, inplace=True)

## Extract the One Rep Maximums

In [12]:
### Lifts examined
exercises = ["Squat","Bench","Deadlift","Overhead press"]
for i in exercises:
    df[i+'_ORM']=0

In [13]:
def get_clean(lift,content):
    """Takes a lift (str) and workout content (str). Returns a list of strings 
    corresponding to sets done of that particular lift. """
    start_index = None
    end_index = None
    for i, item in enumerate(content):
        if lift in item:
            #print(lift)
            start_index = i
        
        elif item == "\n" and start_index != None and i != start_index + 1:
            
            ## check to make sure it's not a linebreak after a comment at start of lift
            ## code: skip this if there wasn't a #x# lift in the last line
            if not re.search(r'\dx\d',content[i-1]+content[i-2]):
                continue
                
            end_index = i
            break
            
    ## handle not finding lift
    if start_index == None:
        return None
    ## removes front/back whitespaces, 
    clean = [i.strip() for i in content[start_index+1:end_index]]
    return clean

In [14]:
import re
"""Takes a single line (str) of a workout textfile and extracts the weight (int) and repetitions (int). """
def extract_weight_reps(line):
    '''
    TEST CASES:
    
    >>> extract_weight_reps("135x5")
    (135, 5)
    
    >>> extract_weight_reps("135 x 5")
    (135, 5)
    
    >>> extract_weight_reps("135x5.75")
    (135, 6)
    
    >>> extract_weight_reps("44.75x5")
    (45, 5)
    
    >>> extract_weight_reps("175x7 (last time 165x8)")
    (175, 7)
    
    >>> extract_weight_reps("185x2 F on 3")
    (185, 2)
    
    >>> extract_weight_reps("SS 25x7")
    (25, 7)
    
    >>> extract_weight_reps("70x2+ 45x2 Yikes")
    (70, 2)
    
    >>> extract_weight_reps("120x5 - 90x4")
    (120, 5)
    
    >>> extract_weight_reps("120x5 - 90x4 - 30x5")
    (120, 5)
    
    >>> extract_weight_reps("135x2.5 (not full 3rd)")
    (135, 2)
    
    >>> extract_weight_reps("200x1 (fail on 2)")
    (200, 1)
    
    >>> extract_weight_reps("5 set 95")
    (0, 0)
    
    
    >>> extract_weight_reps("205 x F")
    (0, 0)
    
    '''
    weight = 1
    reps = 1
    
    split = line.split('x')
    
    # no 'x' case
    if len(split) == 1:
        return 0, 0
    
    # simple case (should handle dropsets too?)
    if len(split) > 1:
        a = split[0].strip()
        b = split[1].strip()
        w = re.findall(r"[\d,.]{1,}$",a)
        r = re.findall(r"^[\d,.]{1,}",b)
        
        if len(w) == 0 or len(r) == 0:
            return 0, 0
        
        weight = round(float(w[0]))
        reps = round(float(r[0]))
    return weight, reps

In [15]:
### TESTING
import doctest
doctest.testmod(verbose=False)

TestResults(failed=0, attempted=14)

In [18]:
def find_ORM(lift, content):
    """Takes a lift (str) and a single workout's content(str). 
    Returns the highest ORM done for that lift in that workout. """
    
    #extract sets done for a lift
    clean = get_clean(lift,content)
    
    ## handle not finding any sets
    if clean == None:
        return None
        
    top = 0
    for i in clean:
        weight, reps = extract_weight_reps(i)
        
        ### except high rep sets
        if reps > 15:
            continue

        ## formula: weight × (36 / (37 - reps))
        ORM = weight * (36/(37 - reps))
        if ORM > top:
            top = ORM
    return top

In [19]:
### fill the ORMs

for index, row in df.iterrows():
    for i in exercises:
        ORM = find_ORM(i, row['Content'])
        setit = i + '_ORM'
        df.at[index,setit] = ORM

## Add Splits

Labels each row (workout) as a particular workout split. This will be useful for later analysis.

In [21]:
#### Define the date ranges for the different splits manually
splits = {
"fullbody1" : (pd.to_datetime('1/27/2022'), pd.to_datetime('2/8/2023')),
"PPL1" : (pd.to_datetime('2/9/2023'), pd.to_datetime('4/6/2023')),
"BALC" : (pd.to_datetime('4/7/2023'), pd.to_datetime('3/17/2024')),
"PPL2" : (pd.to_datetime('3/18/2024'), pd.to_datetime('8/25/2024')),
"Arnold" : (pd.to_datetime('8/26/2024'), pd.to_datetime('9/15/2024'))}

In [22]:
### Add the splits to each row of the dataset
splits_index = pd.IntervalIndex.from_tuples(splits.values())
split_names = list(splits.keys())
df['Split'] = splits_index.get_indexer(df['Date'])
df['Split'] = df['Split'].map(lambda i: split_names[i] if i != -1 else 'Out of Range')

## Export

In [27]:
df.to_pickle("ORMs_Sept2024.pkl")