In [None]:
# 03_build_cycle_dataset.ipynb  (or .py)
import pandas as pd, numpy as np, pathlib, joblib
from xgboost import XGBClassifier

DATA = pathlib.Path("data")
MODELS = pathlib.Path("models")
DATA.mkdir(exist_ok=True)
MODELS.mkdir(exist_ok=True)

# 1. load cleaned sleep (requires pyarrow just installed)
sleep = pd.read_parquet(DATA / "sleep_clean.parquet")

# 2. synthesize extra columns
n = len(sleep)
rng = np.random.default_rng(42)
sleep["cycle_day"] = rng.integers(1, 31, n)
sleep["processed_sugar"] = rng.integers(0, 2, n)
sleep["caffeine_evening"] = rng.integers(0, 2, n)
sleep["pain_today"] = rng.integers(0, 11, n)

# 3. create label flare_tomorrow (simple rule for demo)
cond = (
    (sleep.duration_h < 6) |
    (sleep.cycle_day > 26) |
    (sleep.processed_sugar == 1)
)
sleep["flare_tomorrow"] = cond.shift(-1).fillna(0).astype(int)

# 4. save training parquet (optional)
sleep.to_parquet(DATA / "train_cycle.parquet", index=False)

# 5. train model
X = sleep[[
    "duration_h","quality_pct","cycle_day",
    "processed_sugar","caffeine_evening","pain_today"
]]
y = sleep["flare_tomorrow"]
model = XGBClassifier(max_depth=4, n_estimators=150, random_state=42)
model.fit(X, y)

# 6. dump model
joblib.dump(model, MODELS / "flare_cycle_xgb.pkl")
print("✔ model saved to models/flare_cycle_xgb.pkl")


ArrowKeyError: A type extension with name pandas.period already defined

In [None]:
from xgboost import XGBClassifier
import joblib, pandas as pd, numpy as np
df = pd.read_parquet('data/train_cycle.parquet')
X = df[['duration_h','quality_pct','cycle_day',
        'processed_sugar','caffeine_evening','pain_today']]
y = df['flare_tomorrow']
model = XGBClassifier(max_depth=4, n_estimators=150).fit(X, y)
joblib.dump(model, 'models/flare_cycle_xgb.pkl')
