In [65]:
import re
import pandas as pd
from zoneinfo import ZoneInfo

In [66]:
input_path = "estrogen_injections_08292025.csv"
output_path = "injections.csv"

In [86]:
DEFAULT_TZ = ZoneInfo("America/New_York")
DEFAULT_TIME = "12:00"

def parse_date(s: str): # -> pd.Timestamp | pd.NaT
    if pd.isna(s): # Guard on Invalid Datetime
        return pd.NaT

    raw = str(s).strip() # Get rid of whitespace

    # Pull out the timezone info if present
    match = re.search(r"\(([^)]+)\)s*$", raw)
    tz_abbr = match.group(1).upper() if match else None

    cleaned = re.sub(r"\s*\([^)]*\)\s*$", "", raw)
    
    has_time = bool(re.search(r"\d{1,2}:\d{1,2}", raw))

    dt = pd.to_datetime(raw, errors="coerce", utc=False)

    if pd.isna(dt):
        for fmt in ("%B %d, %Y %H:%M", "%B %d, %Y"):
            try:
                dt = pd.to_datetime(cleaned, format=fmt, errors="raise", utc=False)
                break
            except Exception:
                continue

    # Still couldn't parse, exit out
    if pd.isna(dt):
        return dt

    if not has_time:
        dt = pd.Timestamp.combine(dt.date(), pd.to_datetime(DEFAULT_TIME).time())

    if tz_abbr is None:
        dt = dt.replace(tzinfo=DEFAULT_TZ)
    elif tz_abbr in ("EDT"):
        dt = dt.replace(tzinfo=DEFAULT_TZ)
    else:
        dt = dt.replace(tzinfo=DEFAULT_TZ)
    
    return dt

In [87]:
def parse_dosage(input: str):
    numeric_val = pd.to_numeric(input, errors="raise")
    
    # The problem with this data, is that I started recording in mg, but then
    # I changed to ml (e.g 0.2ml of 10mg/ml solution equals 2mg estrodial)
    
    if numeric_val < 1.0:
        numeric_val = numeric_val * 10        
    
    return numeric_val

In [88]:
notion_export = pd.read_csv(input_path)

injections = pd.DataFrame({
    "date": notion_export["Date"].apply(parse_date),
    "dose_mg": notion_export["Amount (mg)"].apply(parse_dosage),
    "route": "im",
    "notes": notion_export["Notes"] + " leg=" + notion_export["Leg"]
})

In [89]:
display(injections)

Unnamed: 0,date,dose_mg,route,notes
0,2024-11-17 12:00:00-05:00,2.0,im,
1,2024-11-24 12:00:00-05:00,2.0,im,
2,2024-12-01 13:00:00-05:00,2.0,im,
3,2024-12-03 12:00:00-05:00,2.0,im,
4,2024-12-10 12:00:00-05:00,2.0,im,
5,2024-12-24 12:00:00-05:00,2.0,im,
6,2024-12-31 12:00:00-05:00,2.0,im,
7,2025-02-04 12:00:00-05:00,2.0,im,Right leg leg=Right Leg
8,2025-02-11 12:00:00-05:00,2.0,im,Left leg leg=Left Leg
9,2025-02-18 12:00:00-05:00,2.0,im,Right leg leg=Right Leg
