### Import data

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib as map

## Import HRV data

In [2]:
from pathlib import Path

# Path.cwd() gives you the *current working directory* (where the notebook is).
# Because the notebook lives in "notebooks/", we go one level up with .parent
# that lands us in the project root folder: hrv-readiness-study/
ROOT = Path.cwd().parent  

# Build the path to the "data/raw" folder inside the project root.
# This is equivalent to writing "../data/raw" if you're in notebooks/,
# but much clearer and more stable.
RAW = ROOT / "data" / "raw"  

# Read the CSV file stored in data/raw. 
# RAW / "wellness.csv" joins the folder path with the filename safely,
# so you don’t have to worry about slashes on different OSes.
hrv = pd.read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vQdQNqbZ2h-NgflUUbKUbLiCXJkcCne-qIp-JWaQE_2XZhFdljkkANmcHM4obQTcbNI8y4_U5cc3xz_/pub?gid=1912488080&single=true&output=csv")

# freeze a copy of the csv file to RAW folder
from datetime import date 
today = date.today()
# Turn it into a string like "2025-09-26"
today_str = today.strftime("%Y-%m-%d")
# Build the filename using that string
filename = f"wellness_{today}.csv"
# Join it with your RAW folder path
path = RAW / filename
# Save the DataFrame snapshot
hrv.to_csv(path, index=False)
# Quick check
print("Saved snapshot to:", path)
hrv


HTTPError: HTTP Error 400: Bad Request

In [None]:
# begin cleaning the data

#rename columns
hrv_clean = hrv.rename(columns={"Timestamp": "timestamp", "Today's Date": "date",
    "How did you feel today? (1 worst, 10 best)": "feeling_score",
    "Whoop recovery status": "recovery_status",
    "Whoop recovery score%": "recovery_score",
    "Whoop sleep score%": "sleep_score",
    "RHR": "rhr",
    "HRV": "hrv",
    "Calories": "calories",
    "Whoop sleep score %": "sleep_score",
    "Whoop sleep hours": "sleep_time",
    "Training day or rest day?": "train_rest",
    "Did you train or did you rest?": "trained_yn",
    "Training performance?": "training_score",
    "Any notes?": "notes",})

hrv_clean

Unnamed: 0,timestamp,date,feeling_score,recovery_status,recovery_score,sleep_score,sleep_time,hrv,rhr,calories,train_rest,trained_yn,training_score,notes
0,9/26/2025 11:10:28,9/26/2025,3,Green,91,85,6:15,36,62,Deficit,Training day,Trained,Below average,Felt somewhat sick all day. Very drained. Low ...
1,9/27/2025 6:44:05,9/27/2025,4,Yellow,61,85,7:03,28,62,Deficit,Rest day,Rested,,"Tired, lethargic, sleepy."
2,9/28/2025 9:55:31,9/28/2025,5,Yellow,37,82,6:35,23,66,Maintenance,Training day,Trained,Average,Joint pain. Fatigue.


In [None]:
# check dtypes
hrv_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   timestamp        3 non-null      object
 1   date             3 non-null      object
 2   feeling_score    3 non-null      int64 
 3   recovery_status  3 non-null      object
 4   recovery_score   3 non-null      int64 
 5   sleep_score      3 non-null      int64 
 6   sleep_time       3 non-null      object
 7   hrv              3 non-null      int64 
 8   rhr              3 non-null      int64 
 9   calories         3 non-null      object
 10  train_rest       3 non-null      object
 11  trained_yn       3 non-null      object
 12  training_score   2 non-null      object
 13  notes            3 non-null      object
dtypes: int64(5), object(9)
memory usage: 468.0+ bytes


In [None]:
# turn date into datetime 
hrv_clean["date"] = pd.to_datetime(hrv_clean["date"], errors="raise")

# check output 
hrv_clean["date"].dtype


dtype('<M8[ns]')

In [None]:
# extract year and date
hrv_clean["year"] = hrv_clean["date"].dt.year
hrv_clean["month"] = hrv_clean["date"].dt.month
hrv_clean["day"] = hrv_clean["date"].dt.day

# filter by ranges
hrv_clean[hrv_clean["date"] == "26-09-25"]


Unnamed: 0,timestamp,date,feeling_score,recovery_status,recovery_score,sleep_score,sleep_time,hrv,rhr,calories,train_rest,trained_yn,training_score,notes,year,month,day
0,9/26/2025 11:10:28,2025-09-26,3,Green,91,85,6:15,36,62,Deficit,Training day,Trained,Below average,Felt somewhat sick all day. Very drained. Low ...,2025,9,26


In [23]:
def clean_time(h_to_m):
    parts = h_to_m.split(":")  # Step 1: Split the string
    hours = int(parts[0])       # Step 2: Get hours (first part)
    minutes = int(parts[1])     # Step 3: Get minutes (second part)
    min_time = (hours * 60) + minutes
    decimal_time = min_time / 60
    return decimal_time
            

total_time = "6:15" 
clean_time(total_time)

6.25

In [24]:
hrv_clean["sleep_hours_decimal"] = hrv_clean["sleep_time"].apply(clean_time)

NameError: name 'hrv_clean' is not defined

In [None]:
hrv_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 17 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   timestamp        3 non-null      object        
 1   date             3 non-null      datetime64[ns]
 2   feeling_score    3 non-null      int64         
 3   recovery_status  3 non-null      object        
 4   recovery_score   3 non-null      int64         
 5   sleep_score      3 non-null      int64         
 6   sleep_time       3 non-null      object        
 7   hrv              3 non-null      int64         
 8   rhr              3 non-null      int64         
 9   calories         3 non-null      object        
 10  train_rest       3 non-null      object        
 11  trained_yn       3 non-null      object        
 12  training_score   2 non-null      object        
 13  notes            3 non-null      object        
 14  year             3 non-null      int32        

In [None]:
# turn numerical objects into intergers
hrv_clean["feeling_score"] = pd.to_numeric(hrv_clean["feeling_score"], errors="coerce") #feeling score
hrv_clean["rhr"] = pd.to_numeric(hrv_clean["rhr"], errors="coerce") #rhr
hrv_clean["sleep_score"] = pd.to_numeric(hrv_clean["sleep_score"], errors="coerce")
hrv_clean["sleep_hhmm"] = pd.to_numeric(hrv_clean["rhr"], errors="coerce") #rhr
hrv_clean["sleep_score"] = pd.to_numeric(hrv_clean["sleep_score"], errors="coerce")



In [None]:
# use matplot lib to have plots in ipynb
# plotly: allows interactive charts, spins up small web server like streamlit 

## Import workout logs

In [None]:
with open("../data/raw/workouts/sept_28_25.txt") as f:
    lines = f.readlines()

print(lines[:20])


['Upper MP B\n', 'Sun, Sep 28, 2025\n', '\n', 'Bench Press (Barbell)\n', 'Set 1 : 40 kg  x 9\n', 'Set 2 : 45 kg  x 10\n', 'Set 3 : 55 kg  x 7\n', '\n', 'Chest Supported Row (Machine)\n', 'Set 1 : 35 kg  x 7\n', 'Set 2 : 35 kg  x 8\n', 'Set 3 : 35 kg  x 10\n', '\n', 'Lat Pulldown\n', 'Set 1 : 50 kg  x 7\n', 'Set 2 : 55 kg  x 9\n', 'Set 3 : 60 kg  x 8\n', '\n', 'Lateral Raise (Dumbbell)\n', 'Set 1 : 5 kg  x 8\n']


In [None]:
for line in lines:
    cleaned = line.strip("\n")
    print(cleaned)
     

Upper MP B
Sun, Sep 28, 2025

Bench Press (Barbell)
Set 1 : 40 kg  x 9
Set 2 : 45 kg  x 10
Set 3 : 55 kg  x 7

Chest Supported Row (Machine)
Set 1 : 35 kg  x 7
Set 2 : 35 kg  x 8
Set 3 : 35 kg  x 10

Lat Pulldown
Set 1 : 50 kg  x 7
Set 2 : 55 kg  x 9
Set 3 : 60 kg  x 8

Lateral Raise (Dumbbell)
Set 1 : 5 kg  x 8
Set 2 : 5 kg  x 8
Set 3 : 5 kg  x 11
Set 4 : 5 kg  x 13

Reverse Pec Deck
Set 1 : 25 kg  x 10
Set 2 : 35 kg  x 9
Set 3 : 45 kg  x 7

Overhead Tricep Extension (Cable)
Set 1 : 25 kg  x 10
Set 2 : 35 kg  x 6



In [None]:
from pathlib import Path

# Path.cwd() gives you the *current working directory* (where the notebook is).
# Because the notebook lives in "notebooks/", we go one level up with .parent
# that lands us in the project root folder: hrv-readiness-study/
ROOT = Path.cwd().parent  

# Build the path to the "data/raw" folder inside the project root.
# This is equivalent to writing "../data/raw" if you're in notebooks/,
# but much clearer and more stable.
WORKOUTS = ROOT / "data" / "raw" / "workouts"  

WORKOUTS.mkdir(parents=True, exist_ok=True) # mkdir -> “make this folder.”
# parents=True -> if any parent folders don’t exist yet (like data/ or data/raw/), create them too.
# exist_ok=True -> don’t crash if the folder already exists; just carry on.

list(WORKOUTS.glob("*.txt")) # gives you Path objects. .glob is the method on the Path object

print([p.name for p in WORKOUTS.glob("*.txt")]) # .glob("*.txt") → all .txt files in that folder.
# ("*.csv") → all .csv files.
# glob("2025-09-*.txt") → all text files that start with that date pattern.



['09_28_25.txt', '09_26_25.txt']


In [None]:
f = list(WORKOUTS.glob("*.txt"))[0] # grab first file in workouts
lines = f.read_text(encoding="utf-8").splitlines() # Return a list of the lines in the string, breaking at line boundaries.

print("File:", f.name)
print("First 15 lines:")
for line in lines[:15]:
    print(repr(line)) # repr: shows the representation of the object. For strings, that means wrapping them in quotes and escaping things like \n.

File: 09_28_25.txt
First 15 lines:
'Upper MP B'
'Sun, Sep 28, 2025'
''
'Bench Press (Barbell)'
'Set 1 : 40 kg  x 9'
'Set 2 : 45 kg  x 10'
'Set 3 : 55 kg  x 7'
''
'Chest Supported Row (Machine)'
'Set 1 : 35 kg  x 7'
'Set 2 : 35 kg  x 8'
'Set 3 : 35 kg  x 10'
''
'Lat Pulldown'
'Set 1 : 50 kg  x 7'
