# 1818 Full Analysis Pipeline

This notebook ties together alignment (events/DLC/DA ↔ NP), LFP/CSD, spike–LFP coupling, PAC, coherence, and replay/HMM skeletons.

Fill the marked placeholders (paths, strobe mappings).

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from scipy.signal import welch, spectrogram, butter, filtfilt, hilbert, coherence, find_peaks
from scipy.io import loadmat
from tqdm.auto import tqdm
try:
    import cupy as cp
    have_cupy = True
except ImportError:
    have_cupy = False
try:
    from hmmlearn import hmm
    have_hmm = True
except ImportError:
    have_hmm = False

import spikeinterface.full as si
from spikeinterface.core import NumpyRecording

# ---- paths (edit) ----
spikeglx_probe_folder = Path(r"Z:\Koji\Neuropixels\1818\1818_11202025_g0\1818_11202025_g0_imec0")
spike_dir = spikeglx_probe_folder / "kilosort4"
spike_times_path = spike_dir / "spike_seconds_adj.npy"
spike_clusters_path = spike_dir / "spike_clusters.npy"
unit_labels_path = spike_dir / ".." / "kilosort4qMetrics" / "templates._bc_unit_labels.tsv"
celltype_path = spike_dir / "unit_classification_rulebased.csv"
csd_memmap_path = spike_dir / "csd_tmp.float32.bin"
spectrogram_meta = spikeglx_probe_folder / "spectrogram_fullsession_meta.npz"
spectrogram_memmap = spikeglx_probe_folder / "spectrogram_fullsession.dat"

event_csvs = [
    Path(r"Z:\Koji\NP_Coh3\Recording\Day27_1818_Clockwise_corner_2025-11-20T15_11_30.csv"),
    Path(r"Z:\Koji\NP_Coh3\Recording\Day27_1818_Clockwise_licking_2025-11-20T15_11_30.csv"),
]
dlc_csv = Path(r"Z:\Koji\NP_Coh3\Recording\Day27_1818_Clockwise2025-11-20T15_50_10DLC_HrnetW32_openfield_v3Sep10shuffle2_detector_170_snapshot_160.csv")
da_mat = Path(r"Z:\Koji\NP_Coh3\TDT\1818\250919\1818_250919_dFF.mat")
np_strobe_npy = spike_dir / "strobe_signal.npy"  # set to actual NP strobe npy
cam_fps = 60.0

# ---- strobe mapping placeholders (fill once you run alignment) ----
# t_np = a_cam_np * t_cam + b_cam_np
a_cam_np = None  # e.g., 1.0
b_cam_np = None  # e.g., 0.0
# t_np = a_da_np * t_da + b_da_np
a_da_np = None
b_da_np = None

In [None]:
# Load behavior and DA strobe/DA signal
events = pd.concat([pd.read_csv(p) for p in event_csvs], ignore_index=True)
dlc = pd.read_csv(dlc_csv, header=None)
dlc_time_cam = np.arange(len(dlc)) / cam_fps

da = loadmat(da_mat)
da_dff = None; da_strobe = None; da_fs = None
for k in da.keys():
    kl = k.lower()
    if kl == 'dff':
        da_dff = np.asarray(da[k]).squeeze()
    if 'strobe' in kl:
        da_strobe = np.asarray(da[k]).squeeze()
    if kl == 'fs':
        da_fs = float(np.asarray(da[k]).squeeze())
np_strobe = np.load(np_strobe_npy) if np_strobe_npy.exists() else None
print("Events", events.shape, "DLC", dlc.shape, "DA dFF", None if da_dff is None else da_dff.shape)

In [None]:
# Build strobe mappings if coefficients not set
def detect_strobe_edges(sig, fs, height=None, distance=None):
    sig = np.asarray(sig).astype(float)
    if height is None:
        height = 0.5 * (sig.min() + sig.max())
    if distance is None:
        distance = max(1, int(0.5 * fs / cam_fps))
    peaks, _ = find_peaks(sig, height=height, distance=distance)
    return peaks / fs

if np_strobe is not None and (a_cam_np is None or b_cam_np is None):
    fs_np = 1000.0  # set NP strobe sampling rate correctly (e.g., 30000 for AP or 1000 for LF)
    np_edges = detect_strobe_edges(np_strobe, fs=fs_np)
    cam_edges = np.arange(len(dlc_time_cam)) / cam_fps
    n_match = min(len(np_edges), len(cam_edges))
    if n_match > 5:
        a_cam_np, b_cam_np = np.polyfit(cam_edges[:n_match], np_edges[:n_match], 1)
        print(f"Fitted cam->NP: t_np = {a_cam_np:.6f}*t_cam + {b_cam_np:.6f}")
if np_strobe is not None and da_strobe is not None and da_fs is not None and (a_da_np is None or b_da_np is None):
    np_edges = detect_strobe_edges(np_strobe, fs=fs_np)
    da_edges = detect_strobe_edges(da_strobe, fs=da_fs)
    n_match = min(len(np_edges), len(da_edges))
    if n_match > 5:
        a_da_np, b_da_np = np.polyfit(da_edges[:n_match], np_edges[:n_match], 1)
        print(f"Fitted DA->NP: t_np = {a_da_np:.6f}*t_da + {b_da_np:.6f}")

def cam_to_np_time(t_cam):
    if a_cam_np is None or b_cam_np is None:
        raise RuntimeError("Set a_cam_np/b_cam_np")
    return a_cam_np * np.asarray(t_cam) + b_cam_np

def da_to_np_time(t_da):
    if a_da_np is None or b_da_np is None:
        raise RuntimeError("Set a_da_np/b_da_np")
    return a_da_np * np.asarray(t_da) + b_da_np

dlc_time_np = cam_to_np_time(dlc_time_cam) if (a_cam_np is not None and b_cam_np is not None) else None
da_time_np = da_to_np_time(np.arange(len(da_dff))/da_fs) if (da_dff is not None and a_da_np is not None) else None

In [None]:
# Load spikes and LFP/CSD
spike_times = np.load(spike_times_path)
spike_clusters = np.load(spike_clusters_path)
unit_labels = np.loadtxt(unit_labels_path, delimiter="\t", dtype=int)
good_units = unit_labels == 1
celltypes = {}
if celltype_path.exists():
    df_ct = pd.read_csv(celltype_path)
    celltypes = dict(zip(df_ct["cluster_id"], df_ct["cell_type"]))
lfp_rec = si.read_spikeglx(spikeglx_probe_folder, stream_id="imec0.lf")
csd_rec = None
if csd_memmap_path.exists():
    n_frames = lfp_rec.get_num_frames(); n_channels = lfp_rec.get_num_channels(); fs = lfp_rec.get_sampling_frequency()
    csd_mm = np.memmap(csd_memmap_path, dtype=np.float32, mode="r", shape=(n_frames, n_channels))
    csd_rec = NumpyRecording([csd_mm], sampling_frequency=fs)
    csd_rec.set_probe(lfp_rec.get_probe()); csd_rec.set_channel_ids(lfp_rec.channel_ids)
print(lfp_rec)
if csd_rec: print("CSD memmap loaded")

In [None]:
# PSD / Bandpower / Coherence / PAC / Coupling (from SpikeLFP_analysis)
def compute_bandpower(rec, band, win_s=0.5, step_s=0.1, duration_s=120):
    fs = rec.get_sampling_frequency(); n_frames_total = min(int(duration_s * fs), rec.get_num_frames())
    win = int(win_s * fs); step = max(1, int(step_s * fs)); b,a = butter(4, [band[0]/(fs/2), band[1]/(fs/2)], btype='band')
    powers=[]; times=[]
    for start in tqdm(range(0, n_frames_total - win + 1, step), desc=f"{band[0]}-{band[1]} Hz"):
        end = start + win; x = rec.get_traces(start_frame=start, end_frame=end); xf = filtfilt(b,a,x,axis=0); p = np.mean(xf**2, axis=0)
        powers.append(p); times.append(start/fs)
    return np.asarray(times), np.vstack(powers)

def compute_phase_locking(spike_times_s, spike_clusters, good_units_mask, lfp_rec, band=(13,30)):
    fs = lfp_rec.get_sampling_frequency(); b,a = butter(4, [band[0]/(fs/2), band[1]/(fs/2)], btype='band'); phases={}
    for unit_id, good in enumerate(good_units_mask):
        if not good: continue
        st = spike_times_s[spike_clusters == unit_id];
        if st.size==0: continue
        ch=0; end_f = min(lfp_rec.get_num_frames(), int((st.max()+1)*fs)); lfp = lfp_rec.get_traces(start_frame=0, end_frame=end_f, channel_ids=[ch])[:,0]
        phase = np.angle(hilbert(filtfilt(b,a,lfp)))
        idx = (st*fs).astype(int); idx = idx[idx < phase.size]; phases[unit_id] = phase[idx]
    return phases

def spike_field_coherence(unit_id, spike_times_s, spike_clusters, lfp_rec, ch=0, duration_s=120, nperseg=1024, noverlap=512, max_f=200):
    fs = lfp_rec.get_sampling_frequency(); n_frames = min(int(duration_s*fs), lfp_rec.get_num_frames())
    st = spike_times_s[spike_clusters == unit_id]; st = st[st < duration_s]; spikes_bin = np.zeros(n_frames); idx = (st*fs).astype(int); idx = idx[idx < n_frames]; spikes_bin[idx]=1
    lfp = lfp_rec.get_traces(start_frame=0, end_frame=n_frames, channel_ids=[ch])[:,0]
    f,Cxy = coherence(spikes_bin, lfp, fs=fs, nperseg=nperseg, noverlap=noverlap); mask = f<=max_f; return f[mask], Cxy[mask]

def field_field_coherence(lfp_rec, ch_a=0, ch_b=1, duration_s=120, nperseg=2048, noverlap=1024, max_f=200):
    fs = lfp_rec.get_sampling_frequency(); n_frames = min(int(duration_s*fs), lfp_rec.get_num_frames()); x = lfp_rec.get_traces(start_frame=0, end_frame=n_frames, channel_ids=[ch_a,ch_b])
    f,Cxy = coherence(x[:,0], x[:,1], fs=fs, nperseg=nperseg, noverlap=noverlap); mask = f<=max_f; return f[mask], Cxy[mask]

def compute_pac(lfp_rec, phase_band=(13,30), amp_band=(30,80), duration_s=60):
    fs = lfp_rec.get_sampling_frequency(); n_frames = min(int(duration_s*fs), lfp_rec.get_num_frames()); x = lfp_rec.get_traces(start_frame=0, end_frame=n_frames)
    b_p,a_p = butter(4, [phase_band[0]/(fs/2), phase_band[1]/(fs/2)], btype='band'); b_a,a_a = butter(4, [amp_band[0]/(fs/2), amp_band[1]/(fs/2)], btype='band')
    phase = np.angle(hilbert(filtfilt(b_p,a_p,x,axis=0))); amp = np.abs(hilbert(filtfilt(b_a,a_a,x,axis=0)))
    nbins=18; bins = np.linspace(-np.pi, np.pi, nbins+1); mi = np.zeros(x.shape[1])
    for ch in range(x.shape[1]):
        digitized = np.digitize(phase[:,ch], bins) - 1; mean_amp = np.array([amp[digitized==b,ch].mean() for b in range(nbins)]); mean_amp /= mean_amp.sum()
        mi[ch] = (np.log(nbins) + np.sum(mean_amp*np.log(mean_amp+1e-12))) / np.log(nbins)
    return mi

print("Helper functions loaded. Use as needed below.")

In [None]:
# Example: bandpower and PAC
bands = [(4,8),(13,30),(30,80)]
band_results = {}
for band in bands:
    times,pwr = compute_bandpower(lfp_rec, band=band, win_s=0.5, step_s=0.1, duration_s=120)
    band_results[band]=(times,pwr)
pac_beta_gamma = compute_pac(lfp_rec, phase_band=(13,30), amp_band=(30,80), duration_s=60)
print("Bandpower/PAC examples done")

## Replay decoding skeleton
1. Choose position from DLC (`dlc_pos = dlc.iloc[:,0].values` or other column).
2. Ensure `dlc_time_np` is set from strobe mapping.
3. Compute tuning curves and decode in events.

In [None]:
def compute_tuning(pos, pos_time, spikes_s, spikes_clu, good_mask, nbins=50):
    pos = np.asarray(pos); pos_time = np.asarray(pos_time)
    edges = np.linspace(pos.min(), pos.max(), nbins+1); centers = 0.5*(edges[:-1]+edges[1:])
    dt = np.median(np.diff(pos_time))
    occ,_ = np.histogram(pos, bins=edges); occ = occ*dt
    tc={}
    for uid, good in enumerate(good_mask):
        if not good: continue
        st = spikes_s[spikes_clu==uid];
        if st.size==0: continue
        pos_at_spikes = np.interp(st, pos_time, pos)
        spk_counts,_ = np.histogram(pos_at_spikes, bins=edges)
        tc[uid] = (spk_counts+1e-3)/(occ+1e-3)
    return centers, tc

def decode_position(tuning_curves, pos_bins, spike_counts, dt=0.02):
    units = list(tuning_curves.keys()); lam = np.stack([tuning_curves[u] for u in units])
    # spike_counts shape (n_units, n_timebins)
    log_l = spike_counts.T[:,:,None]*np.log(lam.T[None,:,:]+1e-12) - dt*lam.T[None,:,:]
    post = np.exp(log_l.sum(axis=1)); post /= post.sum(axis=1, keepdims=True)
    return post, units

# Example usage (uncomment, set dlc_pos):
# dlc_pos = dlc.iloc[:,0].values
# centers, tc = compute_tuning(dlc_pos, dlc_time_np, spike_times, spike_clusters, good_units)
# Build spike_counts by binning spikes into dt bins during candidate events, then call decode_position(tc, centers, spike_counts)

## HMM on LFP bandpower (optional; requires hmmlearn)


In [None]:
def lfp_bandpower_features(lfp_rec, bands, win_s=0.5, step_s=0.1, duration_s=120):
    fs = lfp_rec.get_sampling_frequency(); n_frames = min(int(duration_s*fs), lfp_rec.get_num_frames())
    win = int(win_s*fs); step = max(1, int(step_s*fs)); feats=[]; times=[]
    filters = [butter(4, [b[0]/(fs/2), b[1]/(fs/2)], btype='band') for b in bands]
    for start in tqdm(range(0, n_frames - win + 1, step), desc="LFP bandpower feats"):
        end = start + win; x = lfp_rec.get_traces(start_frame=start, end_frame=end)
        bp = []
        for b,a in filters:
            xf = filtfilt(b,a,x,axis=0); bp.append(np.mean(xf**2, axis=0))
        feats.append(np.hstack(bp)); times.append(start/fs)
    return np.asarray(times), np.vstack(feats)

def fit_hmm_bandpower(features, n_states=3):
    if not have_hmm:
        raise RuntimeError("Install hmmlearn to use HMM")
    model = hmm.GaussianHMM(n_components=n_states, covariance_type='full', n_iter=100)
    model.fit(features)
    states = model.predict(features)
    return model, states

# Example (uncomment to run if hmmlearn installed):
# bands = [(4,8),(13,30),(30,80)]
# t_feat, feats = lfp_bandpower_features(lfp_rec, bands, win_s=0.5, step_s=0.1, duration_s=120)
# model, states = fit_hmm_bandpower(feats, n_states=3)
# plt.plot(t_feat, states); plt.title('LFP HMM states'); plt.show()

## Joint spike–LFP–behavior features


In [None]:
def build_joint_features(spike_times_s, spike_clusters, good_units, lfp_rec, dlc_pos, dlc_time_np, bands, bin_s=0.05, duration_s=120):
    fs = lfp_rec.get_sampling_frequency(); nbins = int(duration_s/bin_s); t_bins = np.linspace(0, duration_s, nbins+1)
    units = np.where(good_units)[0]; spk_mat = np.zeros((nbins, len(units)))
    for ui,u in enumerate(units):
        st = spike_times_s[spike_clusters==u]; st = st[(st>=0)&(st<duration_s)]; counts,_ = np.histogram(st, bins=t_bins); spk_mat[:,ui] = counts/bin_s
    lfp = lfp_rec.get_traces(start_frame=0, end_frame=min(int(duration_s*fs), lfp_rec.get_num_frames()), channel_ids=[0])[:,0]
    band_feats=[]
    for b in bands:
        b_f,a_f = butter(4, [b[0]/(fs/2), b[1]/(fs/2)], btype='band')
        lfpf = filtfilt(b_f,a_f,lfp); amp = lfpf**2; t_lfp = np.arange(len(amp))/fs
        bp,_ = np.histogram(t_lfp, bins=t_bins, weights=amp); counts,_ = np.histogram(t_lfp, bins=t_bins); bp = bp/(counts+1e-6); band_feats.append(bp)
    band_feats = np.vstack(band_feats).T
    pos_interp = np.interp((t_bins[:-1]+t_bins[1:])/2, dlc_time_np, dlc_pos)
    speed = np.concatenate([[0], np.abs(np.diff(pos_interp))/bin_s])
    beh_feats = np.vstack([pos_interp, speed]).T
    feats = np.hstack([spk_mat, band_feats, beh_feats])
    return t_bins[:-1], feats, units

# Example clustering (requires dlc_time_np set):
# from sklearn.mixture import GaussianMixture
# t_feat, feats, units = build_joint_features(spike_times, spike_clusters, good_units, lfp_rec, dlc.iloc[:,0].values, dlc_time_np, bands=[(13,30),(30,80)], bin_s=0.05, duration_s=120)
# gm = GaussianMixture(n_components=5, covariance_type='full', n_init=3)
# labels = gm.fit_predict(feats)
# plt.plot(t_feat, labels); plt.title('Joint feature clusters'); plt.show()

## Positional clustering of units


In [None]:
from sklearn.mixture import GaussianMixture

def cluster_place_fields(tuning_curves, n_clusters=4):
    units = list(tuning_curves.keys())
    if len(units)==0:
        return None, None
    mat = np.stack([tuning_curves[u] for u in units])
    mat = (mat - mat.mean(axis=1, keepdims=True)) / (mat.std(axis=1, keepdims=True)+1e-6)
    gm = GaussianMixture(n_components=n_clusters, covariance_type='full', n_init=3)
    labels = gm.fit_predict(mat)
    return dict(zip(units, labels)), gm

# Example after compute_tuning: labels, gm = cluster_place_fields(tc, n_clusters=4)

## Real-time stimulation (outline)
- Stream spikes (threshold/template) and detect simple patterns (rate/phase-lock).
- Trigger DAQ line for opto with safety limits.
- Prototype on DAT-Cre, then adapt to AnxA1-Cre.
- Implement in a real-time framework (Open Ephys plugin or C++/Python with NI-DAQ).