### Import data

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib as map

In [6]:
from pathlib import Path

# Path.cwd() gives you the *current working directory* (where the notebook is).
# Because the notebook lives in "notebooks/", we go one level up with .parent
# that lands us in the project root folder: hrv-readiness-study/
ROOT = Path.cwd().parent  

# Build the path to the "data/raw" folder inside the project root.
# This is equivalent to writing "../data/raw" if you're in notebooks/,
# but much clearer and more stable.
RAW = ROOT / "data" / "raw"  

# Read the CSV file stored in data/raw. 
# RAW / "wellness.csv" joins the folder path with the filename safely,
# so you donâ€™t have to worry about slashes on different OSes.
hrv = pd.read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vS4FdE6I5uAi2KpwzDw5dtUT_b8W7QdBokKBkDwZESSPnfLye4oRWrpXtZdvrcAMUe6y3l0veXbdg-y/pub?gid=1737013422&single=true&output=csv")

# freeze a copy of the csv file to RAW folder
from datetime import date 
today = date.today()
# Turn it into a string like "2025-09-26"
today_str = today.strftime("%Y-%m-%d")
# Build the filename using that string
filename = f"wellness_{today}.csv"
# Join it with your RAW folder path
path = RAW / filename
# Save the DataFrame snapshot
hrv.to_csv(path, index=False)
# Quick check
print("Saved snapshot to:", path)
hrv.head(2)


Saved snapshot to: /Users/chandlershortlidge/Desktop/hrv-readiness-study/data/raw/wellness_2025-09-27.csv


Unnamed: 0,Timestamp,Today's Date,How did you feel today?,Whoop recovery score,RHR,Calories,Whoop sleep score %,Whoop sleep hours,Training day or rest day?,Any notes?,HRV,Did you train or did you rest?
0,9/26/2025 11:10:28,9/26/2025,2,Green,62,Deficit,85,6:15,Training day,Felt somewhat sick all day. Very drained. Low ...,36,Trained
1,9/27/2025 6:44:05,9/27/2025,3,Yellow,62,Deficit,85,7:03,Rest day,"Tired, lethargic, sleepy.",28,Rested


In [24]:
# begin cleaning the data

#rename columns
hrv_clean = hrv.rename(columns={"Timestamp": "timestamp", "Today's Date": "date",
    "How did you feel today?": "feeling",
    "Whoop recovery score": "whoop_status",
    "Resting RHR": "rhr",
    "Calories": "calories",
    "Whoop sleep score %": "sleep_score",
    "Whoop sleep hours": "sleep_time",
    "Trained?": "trained",
    "Any notes?": "notes",})

hrv_clean

Unnamed: 0,timestamp,date,feeling,whoop_status,rhr,calories,sleep_score,sleep_time,trained,notes
0,9/26/2025 11:10:28,9/26/2025,2,Green,62,Deficit,85,6:15,Yes,Felt somewhat sick all day. Very drained. Low ...


In [25]:
# check dtypes
hrv_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   timestamp     1 non-null      object
 1   date          1 non-null      object
 2   feeling       1 non-null      int64 
 3   whoop_status  1 non-null      object
 4   rhr           1 non-null      int64 
 5   calories      1 non-null      object
 6   sleep_score   1 non-null      int64 
 7   sleep_time    1 non-null      object
 8   trained       1 non-null      object
 9   notes         1 non-null      object
dtypes: int64(3), object(7)
memory usage: 212.0+ bytes


In [26]:
# turn date into datetime 
hrv_clean["date"] = pd.to_datetime(hrv_clean["date"], errors="raise")

# check output 
hrv_clean["date"].dtype


dtype('<M8[ns]')

In [27]:
# extract year and date
hrv_clean["year"] = hrv_clean["date"].dt.year
hrv_clean["month"] = hrv_clean["date"].dt.month
hrv_clean["day"] = hrv_clean["date"].dt.day

# filter by ranges
hrv_clean[hrv_clean["date"] == "26-09-25"]


Unnamed: 0,timestamp,date,feeling,whoop_status,rhr,calories,sleep_score,sleep_time,trained,notes,year,month,day
0,9/26/2025 11:10:28,2025-09-26,2,Green,62,Deficit,85,6:15,Yes,Felt somewhat sick all day. Very drained. Low ...,2025,9,26


In [None]:
# turn numerical objects into intergers
hrv_clean["rhr"] = pd.to_numeric(hrv_clean["rhr"], errors="coerce") #rhr
hrv_clean["sleep_score"] = pd.to_numeric(hrv_clean["sleep_score"], errors="coerce")
hrv_clean["sleep_hhmm"] = pd.to_numeric(hrv_clean["rhr"], errors="coerce") #rhr
hrv_clean["sleep_score"] = pd.to_numeric(hrv_clean["sleep_score"], errors="coerce")



In [None]:
# use matplot lib to have plots in ipynb
# plotly: allows interactive charts, spins up small web server like streamlit 