In [22]:
import pandas as pd
import numpy as np

from sqlalchemy import create_engine, text
from dotenv import load_dotenv
import os
load_dotenv()

True

In [23]:
user = os.getenv("user")
password = os.getenv("password")
host = os.getenv("host")
port = os.getenv("port")
dbname = os.getenv("dbname")

DATABASE_URL = f"postgresql://{user}:{password}@{host}:{port}/{dbname}"
engine = create_engine(DATABASE_URL)

try:
    with engine.connect() as conn:
        result = result = conn.execute(text("SELECT version();"))
        for row in result:
            print("✅ Connected to:", row[0])
except Exception as e:
    print("❌ Connection failed:", e)

✅ Connected to: PostgreSQL 17.4 on aarch64-unknown-linux-gnu, compiled by gcc (GCC) 13.2.0, 64-bit


In [24]:
import glob
file_paths = glob.glob("data/*.csv")
frames = []
count = 0
for path in file_paths:
    df_sleep = pd.read_csv(path, header=None, names=["Metric", "Value"])
    df_sleep = df_sleep.set_index("Metric").T
    date = df_sleep.loc["Value", "Date"]
    df_sleep.index = pd.to_datetime([date])
    df_sleep = df_sleep.loc[:, ~df_sleep.columns.duplicated()]

    df_sleep = df_sleep.drop(
        columns=[
            'Date',
            'Sleep Score 1 Day',
            'Sleep Score Factors',
            'Sleep Timeline Metrics',
            'Total Sleep Duration',
            'Sleep 1 Day'], errors='ignore'
        )
  
    frames.append(df_sleep)

df = pd.concat(frames, axis=0).reset_index()
df = df.rename(columns={'index': 'WorkoutDate'})
    
df.dtypes

Metric
WorkoutDate             datetime64[ns]
Sleep Duration                  object
Sleep Score                     object
Quality                         object
Stress Avg                      object
Deep Sleep Duration             object
Light Sleep Duration            object
REM Duration                    object
Awake Time                      object
Restless Moments                object
Resting Heart Rate              object
Body Battery Change             object
Avg SpO₂                        object
Lowest SpO2                     object
Avg Respiration                 object
Lowest Respiration              object
Avg Overnight HRV               object
7d Avg HRV                      object
dtype: object

In [25]:
import re

def parse_duration(s):
    """“7h 55m” → Timedelta('0 days 07:55:00'), “13m” → Timedelta('0 days 00:13:00')"""
    if pd.isna(s):
        return pd.NaT
    hrs = re.search(r'(\d+)\s*h', s)
    mins = re.search(r'(\d+)\s*m', s)
    h = int(hrs.group(1)) if hrs else 0
    m = int(mins.group(1)) if mins else 0
    return pd.Timedelta(hours=h, minutes=m)

def parse_int(s):
    """Strip non-digits (and leading “+”/“–”), return nullable Int64"""
    if pd.isna(s):
        return pd.NA
    num = re.sub(r'[^\d\-+]', '', str(s))
    return pd.to_numeric(num, errors='coerce')

def parse_pct(s):
    """“95%” → 95 (Int64)"""
    if pd.isna(s):
        return pd.NA
    return pd.to_numeric(str(s).rstrip('%'), errors='coerce')

# 3) Apply conversions

# Durations → Timedelta
for col in [
    'Sleep Duration',
    'Deep Sleep Duration',
    'Light Sleep Duration',
    'REM Duration',
    'Awake Time',
    
]:
    df[col] = df[col].apply(parse_duration)
    df[col] = df[col].apply(
        lambda x: str(x).split(".")[0] if pd.notnull(x) else None
    )   

# Simple integers
for col in [
    'Sleep Score',
    'Stress Avg',
    'Restless Moments'
]:
    df[col] = df[col].apply(parse_int)

# Heart rates, respiration, HRV, battery change
for col in [
    'Resting Heart Rate',      # e.g. “50 bpm”
    'Avg Respiration',         # e.g. “14 brpm”
    'Lowest Respiration',      # e.g. “12 brpm”
    'Avg Overnight HRV',       # e.g. “82 ms”
    'Body Battery Change'      # e.g. “+79”
]:
    df[col] = df[col].apply(parse_int)

# Percentages
df['Avg SpO₂']    = df['Avg SpO₂'].apply(parse_pct)
df['Lowest SpO2'] = df['Lowest SpO2'].apply(parse_pct)

# Categorical
df['Quality']   = df['Quality'].astype('category')
df['7d Avg HRV'] = df['7d Avg HRV'].astype('category')

# 4) Check your result
print(df.dtypes)


Metric
WorkoutDate             datetime64[ns]
Sleep Duration                  object
Sleep Score                      int64
Quality                       category
Stress Avg                     float64
Deep Sleep Duration             object
Light Sleep Duration            object
REM Duration                    object
Awake Time                      object
Restless Moments               float64
Resting Heart Rate               int64
Body Battery Change            float64
Avg SpO₂                       float64
Lowest SpO2                    float64
Avg Respiration                  int64
Lowest Respiration               int64
Avg Overnight HRV              float64
7d Avg HRV                    category
dtype: object


In [26]:
df = df.rename(columns={
    'WorkoutDate': 'workout_date',
    'Sleep Duration': 'sleep_duration',
    'Sleep Score': 'sleep_score',
    'Quality': 'quality',
    'Stress Avg': 'stress_avg',
    'Deep Sleep Duration': 'deep_sleep_duration',
    'Light Sleep Duration': 'light_sleep_duration',
    'REM Duration': 'rem_duration',
    'Awake Time': 'awake_time',
    'Restless Moments': 'restless_moments',
    'Resting Heart Rate': 'resting_heart_rate',
    'Body Battery Change': 'body_battery_change',
    'Avg SpO₂': 'avg_spo2',
    'Lowest SpO2': 'lowest_spo2',
    'Avg Respiration': 'avg_respiration',
    'Lowest Respiration': 'lowest_respiration',
    'Avg Overnight HRV': 'avg_overnight_hrv',
    '7d Avg HRV': 'avg_hrv_7d'
})

In [27]:
df.to_sql("sleep_data", con=engine, if_exists="append", index=False)
print("✅ Data uploaded successfully.")

✅ Data uploaded successfully.
