## Imports

In [None]:
import os
import sys
from pathlib import Path

def _find_repo_root(start: Path) -> Path:
    """Find project root even when notebook is launched from a subfolder."""
    candidates = [start, *start.parents]

    for candidate in candidates:
        if (
            (candidate / "code" / "utils").exists()
            and (candidate / "sourcedata").exists()
            and (candidate / "derivatives").exists()
        ):
            return candidate

    for candidate in candidates:
        if (candidate / "code" / "utils").exists():
            return candidate

    return start


repo_root = _find_repo_root(Path.cwd().resolve())
code_root = repo_root / "code"

for p in (repo_root, code_root):
    p_str = str(p)
    if p.exists() and p_str not in sys.path:
        sys.path.insert(0, p_str)

project_code_root = code_root if (code_root / "utils").exists() else repo_root
if str(project_code_root) not in sys.path:
    sys.path.insert(0, str(project_code_root))

print(f"Resolved repo root: {repo_root}")

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from IPython import get_ipython
from IPython.display import display
from matplotlib import animation
from matplotlib.colors import TwoSlopeNorm
from matplotlib.patches import Rectangle


from utils import autocorrelation as acf
from utils import helper_functions as hf

---
### Load subjects 

In [None]:
subjects = ["secundo", "gus"]

repo_root = _find_repo_root(Path.cwd().resolve())
source_root = repo_root / "sourcedata"
deriv_root = repo_root / "derivatives" / "preprocessing"

if not source_root.exists():
    raise FileNotFoundError(f"sourcedata not found at {source_root}")
if not deriv_root.exists():
    raise FileNotFoundError(f"preprocessing derivatives not found at {deriv_root}")

ANALYSIS_MODES = ["raw", "mean_divide", "zscore"]
DATA_MODE = "zscore"

EDA_ROOT_NAME = "eda"
ACF_QC_FILENAME = "acf_qc_summary.csv"
FDIFF_ROBUST_PCTL = (2.0, 98.0)
SPATIAL_EPS = 1e-8
SPATIAL_MIN_VALID_SAMPLES = 8


def _ensure_acf_configured():
    acf.configure(
        deriv_root_path=deriv_root,
        eda_root_name=EDA_ROOT_NAME,
        acf_qc_filename=ACF_QC_FILENAME,
        frame_diff_robust_pctl=FDIFF_ROBUST_PCTL,
        spatial_eps=SPATIAL_EPS,
        spatial_min_valid_samples=SPATIAL_MIN_VALID_SAMPLES,
    )


_ensure_acf_configured()


---
### Load Baseline Data

Load baseline sessions for model training.

In [None]:
for subject in subjects:
    data_directory = source_root / subject
    data_output_dir = deriv_root / subject  # subject-specific output folder
    data_output_dir.mkdir(parents=True, exist_ok=True)
    if DATA_MODE == "raw":
        baseline_output_dir = os.path.join(data_output_dir, "baseline_only")
    else:
        baseline_output_dir = os.path.join(
            data_output_dir, f"baseline_only_normalized/{DATA_MODE}"
        )
    # Load all baseline sessions
    baseline_sessions = hf.load_saved_baseline_sessions(baseline_output_dir)

    # Print summary
    if len(baseline_sessions) > 0:
        print(f"\nBaseline Data Summary:{subject} ({DATA_MODE})")
        print(f"  Total sessions: {len(baseline_sessions)}")
        total_frames = sum(s["frames"].shape[0] for s in baseline_sessions)
        print(f"  Total baseline frames: {total_frames:,}")

        spatial_shapes = [s["frames"].shape[1:] for s in baseline_sessions]
        unique_shapes = set(spatial_shapes)
        print(f"  Spatial dimensions: {unique_shapes}")

        # Show frame count distribution
        frame_counts = [s["frames"].shape[0] for s in baseline_sessions]
        print(
            f"  Frames per session: min={min(frame_counts)}, max={max(frame_counts)}, "
            f"mean={np.mean(frame_counts):.0f}, std={np.std(frame_counts):.0f}"
        )

        # first session
        if len(baseline_sessions) > 0:
            first_session = baseline_sessions[0]
            print(f"\n  First session ({first_session['session_id']}):")
            print(f"    Frames: {first_session['frames'].shape[0]}")
            print(f"    Shape: {first_session['frames'].shape}")
            print(f"    Dtype: {first_session['frames'].dtype}")
            print(
                f"Value range: [{first_session['frames'].min():.2f}, {first_session['frames'].max():.2f}]"
            )

---
## Exploratory data analysis

---

### 1. Visualize individual pixels
 the first cell displays a sample frame in which you can move your mouse over to get the pixel coordinates.
 You can then plug the coordinates into the second cell to display the time course of that individual pixel (mean subtracted baseline)

 #### section setup

In [None]:
USE_WIDGET = False
ipy = get_ipython()
backend_mode = "inline"

if ipy is not None:
    try:
        ipy.run_line_magic("matplotlib", "inline")
    except Exception:
        pass

print(f"Matplotlib backend: {matplotlib.get_backend()} | mode={backend_mode}")

subjects = ["secundo", "gus"]
sample_subject = subjects[0]  # change if needed
sample_session_idx = 2  # change if needed
sample_frame_idx = 0  # change if needed

baseline_output_dir = deriv_root / sample_subject / "baseline_only"
baseline_sessions = hf.load_all_baseline(str(baseline_output_dir))



#### load normalized baseline frames

In [None]:
if len(baseline_sessions) == 0:
    raise RuntimeError(f"No baseline sessions found for {sample_subject}")

sample_session = baseline_sessions[sample_session_idx]
raw_frames = sample_session["frames"].astype(np.float32, copy=False)
mean_frames = hf.load_saved_session_frames(
    deriv_root,
    sample_subject,
    sample_session["session_id"],
    "mean_divide",
)

if not (0 <= sample_frame_idx < mean_frames.shape[0]):
    raise ValueError(
        f"sample_frame_idx={sample_frame_idx} out of range [0, {mean_frames.shape[0] - 1}]"
    )

frame = mean_frames[sample_frame_idx]

#### Sample frame
##### display a sample frame 

In [None]:
# symmetric limits around 0 for diverging display
abs_lim = float(np.percentile(np.abs(frame), 99.0))
if abs_lim <= 1e-8:
    abs_lim = 1.0
norm = TwoSlopeNorm(vmin=-abs_lim, vcenter=0.0, vmax=abs_lim)

fig, ax = plt.subplots(figsize=(6, 6))
im = ax.imshow(frame, cmap="seismic", norm=norm)
cbar = plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
cbar.set_label("mean_divide value", rotation=90)
ax.set_title(
    f"{sample_subject} | session {sample_session['session_id']} | mean_divide frame {sample_frame_idx} | move mouse for (y,x)"
)
ax.set_xlabel("x")
ax.set_ylabel("y")

coord_text = ax.text(
    0.02,
    0.98,
    "(y, x) = (-, -)",
    transform=ax.transAxes,
    ha="left",
    va="top",
    fontsize=11,
    color="lime",
    bbox=dict(
        facecolor="black", alpha=0.6, edgecolor="none", boxstyle="round,pad=0.25"
    ),
)


def on_move(event):
    if event.inaxes != ax or event.xdata is None or event.ydata is None:
        return
    x = int(round(event.xdata))
    y = int(round(event.ydata))
    h, w = frame.shape
    if 0 <= x < w and 0 <= y < h:
        coord_text.set_text(f"(y, x) = ({y}, {x})")
        fig.canvas.draw_idle()


# show coordinates in status/toolbar when supported
ax.format_coord = lambda x, y: (
    f"x={int(round(x))}, y={int(round(y))}" if (x is not None and y is not None) else ""
)

_ = fig.canvas.mpl_connect("motion_notify_event", on_move)
plt.tight_layout()
explorer_fig = fig

# inline-only display (avoid ipywidgets frontend requirements in VS Code)
display(fig)
plt.show()


##### create and save whole sampe frame timecourse with red/ blue colour map 

In [None]:
ref_frame = mean_frames[sample_frame_idx]
abs_lim = float(np.percentile(np.abs(ref_frame), 99.0))
if abs_lim <= 1e-8:
    abs_lim = 1.0
norm = TwoSlopeNorm(vmin=-abs_lim, vcenter=0.0, vmax=abs_lim)

fig, ax = plt.subplots(figsize=(6, 6))
im = ax.imshow(mean_frames[0], cmap="seismic", norm=norm)
cbar = fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
cbar.set_label("mean_divide value", rotation=90)

title = ax.set_title(
    f"{sample_subject} | session {sample_session['session_id']} | mean_divide frame 0"
)
ax.set_xlabel("x")
ax.set_ylabel("y")

T = mean_frames.shape[0]


def update(i):
    im.set_data(mean_frames[i])
    title.set_text(
        f"{sample_subject} | session {sample_session['session_id']} | mean_divide frame {i}"
    )
    return (im,)


ani = animation.FuncAnimation(
    fig, update, frames=T, interval=100, blit=False, repeat=False
)

out_dir = Path("derivatives") / "videos"
out_dir.mkdir(parents=True, exist_ok=True)
base = f"{sample_subject}_session-{sample_session['session_id']}_mean_divide"

mp4_path = out_dir / f"{base}.mp4"
gif_path = out_dir / f"{base}.gif"

saved = None
try:
    writer = animation.FFMpegWriter(fps=10, bitrate=1800)
    ani.save(str(mp4_path), writer=writer, dpi=150)
    saved = mp4_path
except FileNotFoundError:
    print("ffmpeg not found; saving GIF instead...")
    writer = animation.PillowWriter(fps=10)
    ani.save(str(gif_path), writer=writer, dpi=120)
    saved = gif_path

plt.close(fig)
print(f"Saved: {saved}")

##### display timecourse of a given pixel in a sample frame

In [None]:
# plot one pixel over time (mean_divide normalization) for a sample baseline acquisition
# with color-mapped values + colorbar

from matplotlib.colors import TwoSlopeNorm

baseline_output_dir = deriv_root / sample_subject / "baseline_only"
baseline_sessions = hf.load_all_baseline(str(baseline_output_dir))

if len(baseline_sessions) == 0:
    raise RuntimeError(f"No baseline sessions found for {sample_subject}")

sample_session = baseline_sessions[sample_session_idx]
raw_frames = sample_session["frames"].astype(np.float32, copy=False)  # (T,H,W)
mean_frames = hf.load_saved_session_frames(
    deriv_root,
    sample_subject,
    sample_session["session_id"],
    "mean_divide",
)

T, H, W = mean_frames.shape
y = 49
x = 21  # slightly left of center

pixel_ts = mean_frames[:, y, x]
t = np.arange(T)

# symmetric color limits around 0 from robust percentile of this pixel trace
abs_lim = float(np.percentile(np.abs(pixel_ts), 99.0))
if abs_lim <= 1e-8:
    abs_lim = 1.0
norm = TwoSlopeNorm(vmin=-abs_lim, vcenter=0.0, vmax=abs_lim)

fig, ax = plt.subplots(figsize=(11, 4.8))
ax.plot(t, pixel_ts, color="black", linewidth=1.0, alpha=0.45, zorder=1)
sc = ax.scatter(t, pixel_ts, c=pixel_ts, cmap="seismic", norm=norm, s=18, zorder=2)

marker_frames_1based = [1, 5, 10, 20, 40, 80]
marker_colors = [
    "tab:red",
    "tab:orange",
    "tab:green",
    "tab:blue",
    "tab:purple",
    "tab:brown",
]

for f, c in zip(marker_frames_1based, marker_colors, strict =False):
    idxf = f - 1  # convert to 0-based index for plotting
    if 0 <= idxf < T:
        ax.axvline(
            idxf, linestyle="--", linewidth=1.2, color=c, alpha=0.9, label=f"frame {f}"
        )

cbar = fig.colorbar(sc, ax=ax, fraction=0.04, pad=0.02)
cbar.set_label("mean_divide value (pixel color mapping)", rotation=90)

ax.set_title(
    f"{sample_subject} | session {sample_session['session_id']} | mean_divide pixel trace @ (y={y}, x={x})"
)
ax.set_xlabel("Frame index (0-based)")
ax.set_ylabel("Normalized value")
ax.grid(alpha=0.25)
ax.legend(ncol=4, fontsize=9)
fig.tight_layout()
pixel_trace_fig = fig
plt.show()

#### Smaller sample patch
#####  setup section

In [None]:
# load data + set up for plotting multiple pixels around a given pixel
from matplotlib.colors import TwoSlopeNorm

baseline_output_dir = deriv_root / sample_subject / "baseline_only"
baseline_sessions = hf.load_all_baseline(str(baseline_output_dir))

if len(baseline_sessions) == 0:
    raise RuntimeError(f"No baseline sessions found for {sample_subject}")

sample_session = baseline_sessions[sample_session_idx]
raw_frames = sample_session["frames"].astype(np.float32, copy=False)  # (T,H,W)
mean_frames = hf.load_saved_session_frames(
    deriv_root,
    sample_subject,
    sample_session["session_id"],
    "mean_divide",
)

T, H, W = mean_frames.shape
pixel = (49, 21)
center_pixel = pixel
radius = 1

y0 = max(0, pixel[0] - radius)
x0 = max(0, pixel[1] - radius)
y1 = min(H, pixel[0] + radius + 1)
x1 = min(W, pixel[1] + radius + 1)
y_range = range(y0 - pixel[0], y1 - pixel[0])
x_range = range(x0 - pixel[1], x1 - pixel[1])

surrounding_pixels = [
    (pixel[0] + dy, pixel[1] + dx)
    for dy in y_range
    for dx in x_range
    if (dy, dx) != (0, 0)
]

pixel_coordinates = sorted(surrounding_pixels + [pixel])
print(f"Plotting {len(pixel_coordinates)} pixels around (y={pixel[0]}, x={pixel[1]})")
pixel_trace_colours = [f"C{i}" for i in range(len(pixel_coordinates))]
coord_to_color = {p: pixel_trace_colours[i] for i, p in enumerate(pixel_coordinates)}

#####  display timecourse of a given pixel within a smaller patch in a sample frame

In [None]:
# plot multile pixels over time (mean_divide normalization) for a sample baseline acquisition

all_patch_values = np.concatenate(
    [mean_frames[:, y, x] for (y, x) in pixel_coordinates]
)
abs_lim = float(np.nanpercentile(np.abs(all_patch_values), 99.0))
if not np.isfinite(abs_lim) or abs_lim <= 1e-8:
    abs_lim = 1.0
norm = TwoSlopeNorm(vmin=-abs_lim, vcenter=0.0, vmax=abs_lim)

fig, ax = plt.subplots(figsize=(11, 4.8))
for pixel in pixel_coordinates:
    y, x = pixel
    pixel_ts = mean_frames[:, y, x]
    t = np.arange(T)

    trace_color = coord_to_color[pixel]
    ax.plot(t, pixel_ts, color=trace_color, linewidth=1.2, alpha=0.95, zorder=2)
    sc = ax.scatter(
        t, pixel_ts, c=pixel_ts, cmap="seismic", norm=norm, s=12, alpha=0.70, zorder=3
    )

marker_frames_1based = [1, 5, 10, 20, 40, 80]
marker_colors = [
    "tab:red",
    "tab:orange",
    "tab:green",
    "tab:blue",
    "tab:purple",
    "tab:brown",
]

for f, c in zip(marker_frames_1based, marker_colors, strict=False):
    idxf = f - 1  # convert to 0-based index for plotting
    if 0 <= idxf < T:
        ax.axvline(
            idxf, linestyle="--", linewidth=1.2, color=c, alpha=0.9, label=f"frame {f}"
        )

cbar = fig.colorbar(sc, ax=ax, fraction=0.04, pad=0.02)
cbar.set_label("mean_divide value (dot color mapping)", rotation=90)

ax.set_title(
    f"{sample_subject} | session {sample_session['session_id']} | mean_divide traces for patch around (y={center_pixel[0]}, x={center_pixel[1]})"
)
ax.set_xlabel("Frame index (0-based)")
ax.set_ylabel("Normalized value")
ax.grid(alpha=0.25)
ax.legend(ncol=4, fontsize=9)
fig.tight_layout()
# pixel_trace_fig = fig
plt.show()

#####  display a smaller patch of a sample frame 

In [None]:
# center pixel as (y, x)
center_pixel = (49, 21)
cy, cx = center_pixel
r = 1  # 3x3 patch
sample_frame_idx = 0
frame = mean_frames[sample_frame_idx]
H, W = frame.shape

# patch bounds
y0, y1 = max(0, cy - r), min(H, cy + r + 1)
x0, x1 = max(0, cx - r), min(W, cx + r + 1)
patch = frame[y0:y1, x0:x1]
h, w = patch.shape

abs_lim = float(np.nanpercentile(np.abs(frame), 99.0))
if not np.isfinite(abs_lim) or abs_lim <= 1e-8:
    abs_lim = 1.0
norm = TwoSlopeNorm(vmin=-abs_lim, vcenter=0.0, vmax=abs_lim)

fig, ax = plt.subplots(figsize=(6, 6))
im = ax.imshow(patch, cmap="seismic", norm=norm, interpolation="nearest")
pad = 0.08  # small axis buffer so outer borders are visible
ax.set_xlim(-0.5 - pad, w - 0.5 + pad)
ax.set_ylim(h - 0.5 + pad, -0.5 - pad)

# thicker per-pixel outlines, colored to match trace colors
for iy in range(h):
    for ix in range(w):
        global_coord = (y0 + iy, x0 + ix)  # (y, x) in full frame
        edge = coord_to_color.get(global_coord, "white")
        ax.add_patch(
            Rectangle(
                (ix - 0.5, iy - 0.5), 1, 1, fill=False, edgecolor=edge, linewidth=3.0
            )
        )

ax.scatter(
    cx - x0,
    cy - y0,
    s=120,
    facecolors="none",
    edgecolors="yellow",
    linewidths=2.8,
    zorder=5,
)

cbar = plt.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
cbar.set_label("mean_divide value", rotation=90)

ax.set_title(
    f"{sample_subject} | frame {sample_frame_idx} | patch around (y={cy}, x={cx})"
)
ax.set_xlabel("x")
ax.set_ylabel("y")
ax.set_aspect("equal")
plt.show()

##### create and save video of patch time course with red/blue colour map


In [None]:
# center pixel as (y, x)
center_pixel = (49, 21)
cy, cx = center_pixel
r = 1  # 3x3 patch

# patch bounds
y0, y1 = max(0, cy - r), min(H, cy + r + 1)
x0, x1 = max(0, cx - r), min(W, cx + r + 1)
patch = frame[y0:y1, x0:x1]
h, w = patch.shape

patched_frames = mean_frames[:, y0:y1, x0:x1]  # (T, h, w)

sample_frame_idx = 0
frame = mean_frames[sample_frame_idx]
H, W = frame.shape

ref_frame = mean_frames[sample_frame_idx]
abs_lim = float(np.percentile(np.abs(ref_frame), 99.0))
if abs_lim <= 1e-8:
    abs_lim = 1.0
norm = TwoSlopeNorm(vmin=-abs_lim, vcenter=0.0, vmax=abs_lim)


fig, ax = plt.subplots(figsize=(6, 6))
im = ax.imshow(patch, cmap="seismic", norm=norm)
cbar = fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
cbar.set_label("mean_divide value", rotation=90)

title = ax.set_title(
    f"{sample_subject} | session {sample_session['session_id']} |patch | mean_divide frame 0"
)
ax.set_xlabel("x")
ax.set_ylabel("y")

T = patched_frames.shape[0]


def update(i):
    im.set_data(patched_frames[i])
    title.set_text(
        f"{sample_subject} | session {sample_session['session_id']} | mean_divide frame {i}"
    )
    return (im,)


ani = animation.FuncAnimation(
    fig, update, frames=T, interval=100, blit=False, repeat=False
)

out_dir = Path("derivatives") / "videos"
out_dir.mkdir(parents=True, exist_ok=True)
base = f"{sample_subject}_session-{sample_session['session_id']}_mean_divide_patch"

mp4_path = out_dir / f"{base}.mp4"
gif_path = out_dir / f"{base}.gif"

saved = None
try:
    writer = animation.FFMpegWriter(fps=10, bitrate=1800)
    ani.save(str(mp4_path), writer=writer, dpi=150)
    saved = mp4_path
except FileNotFoundError:
    print("ffmpeg not found; saving GIF instead...")
    writer = animation.PillowWriter(fps=10)
    ani.save(str(gif_path), writer=writer, dpi=120)
    saved = gif_path

plt.close(fig)
print(f"Saved: {saved}")