
# 05 — Summary Report

This notebook consolidates key results from:
1. **01_period_analysis.ipynb**
2. **02_weight_analysis.ipynb**
3. **03_weight_cycle_analysis.ipynb**
4. **04_weight_cycle_kmeans.ipynb**

It aggregates summary statistics, shows final figures (if present), and exports clean tables for your GitHub README.


In [None]:

# ===== Config =====
FIG_DIR = "figures"
SAVE_TABLES = True  # set False if you don't want CSVs saved
EXPORT_DIR = "exports"

import os
os.makedirs(FIG_DIR, exist_ok=True)
os.makedirs(EXPORT_DIR, exist_ok=True)


In [None]:

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from pathlib import Path

# Display full columns
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 120)


## Load cleaned data

In [None]:

# Try pickle first (faster), then CSV
def load_periods():
    if Path("cleaned_periods.pkl").exists():
        return pd.read_pickle("cleaned_periods.pkl")
    elif Path("cleaned_periods.csv").exists():
        return pd.read_csv("cleaned_periods.csv", parse_dates=["begin","end","ovulation_date"])
    else:
        raise FileNotFoundError("cleaned_periods.csv/.pkl not found")

def load_weights():
    if Path("cleaned_weights.pkl").exists():
        return pd.read_pickle("cleaned_weights.pkl")
    elif Path("cleaned_weights.csv").exists():
        return pd.read_csv("cleaned_weights.csv", parse_dates=["Date"])
    else:
        raise FileNotFoundError("cleaned_weights.csv/.pkl not found")

df_periods = load_periods().copy()
df_weight = load_weights().copy()

# Optional derived columns (if not already present)
if "cycle_length" not in df_periods.columns and "next_begin" in df_periods.columns:
    df_periods["cycle_length"] = (df_periods["next_begin"] - df_periods["begin"]).dt.days

if "duration_days" not in df_periods.columns and "end" in df_periods.columns:
    df_periods["duration_days"] = (df_periods["end"] - df_periods["begin"]).dt.days

df_periods = df_periods.sort_values("begin")
df_weight = df_weight.sort_values("Date")

df_periods.head()


## Key metrics

In [None]:

# Cycle-level metrics
metrics = {}

if "cycle_length" in df_periods.columns:
    metrics["cycle_length_mean"] = float(df_periods["cycle_length"].mean())
    metrics["cycle_length_sd"] = float(df_periods["cycle_length"].std())

if "duration_days" in df_periods.columns:
    metrics["period_duration_mean"] = float(df_periods["duration_days"].mean())
    metrics["period_duration_sd"] = float(df_periods["duration_days"].std())

# Count by year/season
df_periods["year"] = df_periods["begin"].dt.year

def get_season(dt):
    m = dt.month
    if m in [12,1,2]: return "Winter"
    elif m in [3,4,5]: return "Spring"
    elif m in [6,7,8]: return "Summer"
    else: return "Fall"

df_periods["season"] = df_periods["begin"].apply(get_season)

by_year = df_periods["year"].value_counts().sort_index()
by_season = df_periods["season"].value_counts().reindex(["Winter","Spring","Summer","Fall"]).fillna(0).astype(int)

print("=== Summary metrics ===")
for k,v in metrics.items():
    print(f"{k}: {v:.2f}")
print("\nBy year:")
display(by_year.to_frame("count"))
print("\nBy season:")
display(by_season.to_frame("count"))

if SAVE_TABLES:
    pd.Series(metrics).to_json(os.path.join(EXPORT_DIR, "summary_metrics.json"), indent=2)
    by_year.to_csv(os.path.join(EXPORT_DIR, "counts_by_year.csv"))
    by_season.to_csv(os.path.join(EXPORT_DIR, "counts_by_season.csv"))


## Cluster results (if available)

In [None]:

# If you saved 'clustered_meta.csv' in your clustering notebook, load it.
# Otherwise, this cell will skip quietly.
clustered_meta = None
cluster_path = Path("clustered_meta.csv")
if cluster_path.exists():
    clustered_meta = pd.read_csv(cluster_path, parse_dates=["cycle_start"])
    print("Loaded clustered_meta.csv")
else:
    print("clustered_meta.csv not found. Skipping cluster summaries.")
clustered_meta.head() if clustered_meta is not None else None


In [None]:

if clustered_meta is not None:
    df_joined = df_periods.merge(clustered_meta, left_on="begin", right_on="cycle_start", how="inner")
    cluster_counts = df_joined["cluster"].value_counts().sort_index()

    # cluster-level cycle/duration stats (if columns exist)
    agg_cols = {}
    if "cycle_length" in df_joined.columns:
        agg_cols["mean_cycle_length"] = ("cycle_length","mean")
        agg_cols["sd_cycle_length"] = ("cycle_length","std")
    if "duration_days" in df_joined.columns:
        agg_cols["mean_duration"] = ("duration_days","mean")
        agg_cols["sd_duration"] = ("duration_days","std")

    cluster_summary = (df_joined.groupby("cluster").agg(**agg_cols) if agg_cols else pd.DataFrame())
    display(cluster_counts.to_frame("count"))
    display(cluster_summary.round(2))

    # season/year distributions
    season_dist = pd.crosstab(df_joined["cluster"], df_joined["season"]).reindex(columns=["Winter","Spring","Summer","Fall"], fill_value=0)
    year_dist = pd.crosstab(df_joined["cluster"], df_joined["year"]).sort_index(axis=1)
    season_pct = (season_dist.div(season_dist.sum(axis=1), axis=0) * 100).round(1)
    year_pct = (year_dist.div(year_dist.sum(axis=1), axis=0) * 100).round(1)

    print("\nSeason counts by cluster:")
    display(season_dist)
    print("\nSeason % by cluster:")
    display(season_pct)
    print("\nYear counts by cluster:")
    display(year_dist)
    print("\nYear % by cluster:")
    display(year_pct)

    if SAVE_TABLES:
        cluster_counts.to_csv(os.path.join(EXPORT_DIR, "cluster_counts.csv"))
        if not cluster_summary.empty:
            cluster_summary.round(2).to_csv(os.path.join(EXPORT_DIR, "cluster_summary.csv"))
        season_dist.to_csv(os.path.join(EXPORT_DIR, "cluster_season_counts.csv"))
        season_pct.to_csv(os.path.join(EXPORT_DIR, "cluster_season_percent.csv"))
        year_dist.to_csv(os.path.join(EXPORT_DIR, "cluster_year_counts.csv"))
        year_pct.to_csv(os.path.join(EXPORT_DIR, "cluster_year_percent.csv"))


## Figures (if present)

In [None]:

from IPython.display import display, Image

def show_if_exists(path, caption=None, width=900):
    if Path(path).exists():
        display(Image(filename=path, width=width))
        if caption:
            print(caption)
    else:
        print(f"[Missing] {path}")

# Weight × Cycle analysis figures
show_if_exists(os.path.join(FIG_DIR, "01_weight_timeline.png"), "Weight timeline with cycle shading")
show_if_exists(os.path.join(FIG_DIR, "02_align_period_start.png"), "Aligned around Period Start")
show_if_exists(os.path.join(FIG_DIR, "03_align_ovulation.png"), "Aligned around Ovulation")
show_if_exists(os.path.join(FIG_DIR, "04_compare_period_vs_ovulation.png"), "Period vs Ovulation (mean ± 95% CI)")

# Optionally show cluster plots you saved manually (e.g., cluster_0.png, cluster_1.png, ...)
for i in range(10):
    p = os.path.join(FIG_DIR, f"cluster_{i}.png")
    if Path(p).exists():
        show_if_exists(p, f"Cluster {i} pattern")


## README helper: export quick markdown snippets

In [None]:

md_lines = []

# Metrics snippet
md_lines.append("### Key Metrics\n")
for k, v in (pd.read_json(os.path.join(EXPORT_DIR, "summary_metrics.json"), typ='series').to_dict()
             if Path(os.path.join(EXPORT_DIR, "summary_metrics.json")).exists() else {}).items():
    md_lines.append(f"- **{k.replace('_',' ').title()}**: {v:.2f}")
md_lines.append("\n")

# Cluster counts snippet
cc_path = os.path.join(EXPORT_DIR, "cluster_counts.csv")
if Path(cc_path).exists():
    cc = pd.read_csv(cc_path, index_col=0)
    md_lines.append("### Cluster Counts\n")
    md_lines.append(cc.to_markdown())
    md_lines.append("\n")

# Season % by cluster snippet
sp_path = os.path.join(EXPORT_DIR, "cluster_season_percent.csv")
if Path(sp_path).exists():
    sp = pd.read_csv(sp_path, index_col=0)
    md_lines.append("### Seasonal Distribution by Cluster (%)\n")
    md_lines.append(sp.to_markdown())
    md_lines.append("\n")

snippet = "\n".join(md_lines) if md_lines else "_Run the cells above first to generate snippets._"
print(snippet)

# Save a copy to exports/readme_snippet.md
with open(os.path.join(EXPORT_DIR, "readme_snippet.md"), "w") as f:
    f.write(snippet)
print("\nSaved: exports/readme_snippet.md")
