In [1]:
import numpy as np
import pandas as pd
import wisc_ecephys_tools as wet
from ecephys import wne
from scipy import stats

from findlay2025a import core, hypnograms
from findlay2025a.constants import Experiments as Exps

In [2]:
experiment_display_names = {
    Exps.NOD: "Novelty",
    Exps.COW: "Locomotion",
    Exps.CTN: "Dual",
}

In [3]:
def get_bout_duration_ecdfs(hg: pd.DataFrame):
    g = hg.groupby("state")
    return {state: stats.ecdf(g.get_group(state)["duration"]) for state in g.groups}


def bout_duration_ecdfs_to_df(
    ecdfs, duration=np.linspace(0.5, 600, 1200, endpoint=True)
) -> pd.DataFrame:
    return pd.concat(
        [
            pd.DataFrame(
                {
                    "duration": duration,
                    "Proportion": ecdfs[state].cdf.evaluate(duration),
                    "state": state,
                }
            )
            for state in ecdfs
        ],
        axis=0,
        ignore_index=True,
    )

In [4]:
s3 = core.get_project("shared")
frac = pd.DataFrame()
hg = pd.DataFrame()
ecdf = pd.DataFrame()

for sglx_subject, experiment in core.yield_sglx_subject_experiment_pairs(
    [Exps.NOD, Exps.COW]
):
    ### Step 1: Get hypnograms
    lib_hg = wne.sglx.utils.load_reconciled_float_hypnogram(
        s3,
        experiment,
        sglx_subject,
        probes=[],
        sources=[],
        reconcile_ephyviewer_edits=True,
        simplify=True,
    )
    d1lp_hg = wet.shared.get_day1_light_period_hypnogram(
        lib_hg, experiment, sglx_subject
    )

    ewk_hg = hypnograms.get_extended_wake_hypnogram(lib_hg, experiment, sglx_subject)
    pdd2lp_hg = hypnograms.get_post_deprivation_day2_light_period_hypnogram(
        lib_hg,
        experiment,
        sglx_subject,
        sleep_deprivation_end=ewk_hg["end_time"].max(),
    )  # Post-deprivation day 2 light period hypnogram

    _fracb = d1lp_hg.fractional_occupancy().to_frame().reset_index()
    _fracb["subject"] = sglx_subject.name
    _fracb["experiment"] = experiment
    _fracb["day"] = "Baseline"

    _fracr = pdd2lp_hg.fractional_occupancy().to_frame().reset_index()
    _fracr["subject"] = sglx_subject.name
    _fracr["experiment"] = experiment
    _fracr["day"] = "Recovery"

    frac = pd.concat([frac, _fracb, _fracr], axis=0, ignore_index=True)

    d1lp_hg["subject"] = sglx_subject.name
    d1lp_hg["experiment"] = experiment
    d1lp_hg["day"] = "Baseline"

    pdd2lp_hg["subject"] = sglx_subject.name
    pdd2lp_hg["experiment"] = experiment
    pdd2lp_hg["day"] = "Recovery"

    hg = pd.concat([hg, d1lp_hg, pdd2lp_hg], axis=0, ignore_index=True)

    d1lp_df = bout_duration_ecdfs_to_df(get_bout_duration_ecdfs(d1lp_hg))
    d1lp_df["subject"] = sglx_subject.name
    d1lp_df["experiment"] = experiment
    d1lp_df["day"] = "Baseline"

    pdd2lp_df = bout_duration_ecdfs_to_df(get_bout_duration_ecdfs(pdd2lp_hg))
    pdd2lp_df["subject"] = sglx_subject.name
    pdd2lp_df["experiment"] = experiment
    pdd2lp_df["day"] = "Recovery"

    ecdf = pd.concat([ecdf, d1lp_df, pdd2lp_df], axis=0, ignore_index=True)

In [5]:
for df in [ecdf, hg, frac]:
    df["experiment"] = df["experiment"].map(experiment_display_names)

In [6]:
# Eugene is missing too much data during periods covered here, too unevenly distributed, to include.
frac = frac[frac["subject"] != "CNPIX6-Eugene"]
hg = hg[hg["subject"] != "CNPIX6-Eugene"]
ecdf = ecdf[ecdf["subject"] != "CNPIX6-Eugene"]

In [7]:
KEEP_STATES = ["NREM", "IS", "REM", "Wake", "MA"]
frac = frac[frac["state"].isin(KEEP_STATES)]
frac.rename(columns={"duration": "fractional_occupancy"}, inplace=True)
fracc = frac[
    frac["subject"].apply(
        lambda s: (Exps.NOD in core.MANIFEST[s]) and (Exps.COW in core.MANIFEST[s])
    )
]

hg = hg[hg["state"].isin(KEEP_STATES)]
hgc = hg[
    hg["subject"].apply(
        lambda s: (Exps.NOD in core.MANIFEST[s]) and (Exps.COW in core.MANIFEST[s])
    )
]

ecdf = ecdf[ecdf["state"].isin(KEEP_STATES)]
ecdfc = ecdf[
    ecdf["subject"].apply(
        lambda s: (Exps.NOD in core.MANIFEST[s]) and (Exps.COW in core.MANIFEST[s])
    )
]

In [8]:
nb = core.get_project("seahorse")
frac.to_parquet(nb.get_project_file("sleep_period_fractional_occupancy.pqt"))
hg.to_parquet(nb.get_project_file("sleep_period_bouts.pqt"))
ecdf.to_parquet(nb.get_project_file("sleep_period_bout_duration_ecdfs.pqt"))