In [None]:
from pathlib import Path
import scipy.io as sio
import pandas as pd
import numpy as np
import matplotlib as plt
import pursuit_functions as pursuit
import pyarrow as pa
import re

pd.set_option('display.max_columns', 100)  # Show more columns (default is 20)

%load_ext autoreload
%autoreload 2

In [None]:
#load region files
#data_dir = Path("/Users/may/Projects/nitzPurusitData")
data_dir = Path(r"Z:\Data\Andy\nitzPurusitData")
#data_dir = Path("/Volumes/ASA_Lab/Data/Xiaoxiao/ppcRscEVCPoster/pursuitSessionFiles")


region_directories = pursuit.file_reader.load_region_files(data_dir, prefix='sl')

In [None]:
#convert loaded region directory files into dataframes
rsc_directory = pd.DataFrame(region_directories['slRsc'])
ppc_directory = pd.DataFrame(region_directories['slPar']) 

In [None]:
#unpack block indices
pursuit.df_utils.unpack_block_indices(rsc_directory)
pursuit.df_utils.unpack_block_indices(ppc_directory)

In [None]:
rsc_directory

In [None]:
def unpack_block_indices(df):
    blocks = ["FE1", "pursuit", "FE2"]

    for i, blocks in enumerate(blocks):
        df[f"{blocks}_start"] = df["blocks"].apply(lambda x: x[i][0] if len(x) > i else None)
        df[f"{blocks}_end"]   = df["blocks"].apply(lambda x: x[i][1] if len(x) > i else None)
    
    return df


In [None]:
unpack_block_indices(rsc_directory)
unpack_block_indices(ppc_directory)

In [None]:
#load pursuit session files
#data_dir2 = Path("/Users/may/Projects/nitzPurusitData/Sessions")
data_dir2 = Path(r"Z:\Data\Andy\nitzPurusitData\Sessions")

rsc_files = rsc_directory["sessFile"].unique().tolist()
ppc_files = ppc_directory["sessFile"].unique().tolist()
include_files = list(set(rsc_files) | set(ppc_files))

session_files = pursuit.file_reader.load_session_files(data_dir2, include_files=include_files)


In [None]:
#convert extracted pursuit session file data into dataframes
def convert_sessions_to_dfs(session_files_df):
    session_dfs = {
        filename: pd.DataFrame(file_data).convert_dtypes(dtype_backend="pyarrow")
        for filename, file_data in session_files_df.items()
    }
    return session_dfs 

In [None]:
session_dfs = convert_sessions_to_dfs(session_files)

In [None]:
session_dfs["KB10_01_pursuitRoot.mat"].head()

In [None]:
#filter for valid cells 
def filter_spkTables(region_df, session_dfs):

    filtered_dfs = {}

    grouped = region_df.groupby("sessFile")["cellIndex"].apply(list) 

    for session, df in session_dfs.items():
        if session not in grouped:
            continue
        
        valid_cells = [str(c) for c in grouped[session]]
        
        keep_spk_cols = [
            col for col in df.columns
            if col.startswith("spkTable") and any(re.fullmatch(rf"spkTable_?{cell}", col) for cell in valid_cells)
        ]
        
        non_spk_cols = [c for c in df.columns if not c.startswith("spkTable")]
        keep_cols = non_spk_cols + keep_spk_cols
        filtered_dfs[session] = df[keep_cols].copy()
    
    return filtered_dfs

In [None]:
rsc_session_dfs = filter_spkTables(rsc_directory, session_dfs)

In [None]:
rsc_session_dfs["KB10_09_pursuitRoot.mat"].head()

In [None]:
rsc_session_dfs["KB10_01_pursuitRoot.mat"]

In [None]:
def build_region_df(region_df, session_dfs):
    max_cell = region_df["cellIndex"].max()
    print("Highest cellIndex:", max_cell)

    all_spk_cols = [f"spkTable_{i}" for i in range (1, max_cell + 1)]

    dfs = [] 

    for sessFile, df in session_dfs.items():
        df = df.copy()

        for col in all_spk_cols:
            if col not in df.columns:
                df[col] = np.nan

        non_spk_cols = [c for c in df.columns if not c.startswith("spkTable")]
        df = df[non_spk_cols + all_spk_cols]

        
        df["sessIdx"] = df.index
        df["sessFile"] = sessFile

        dfs.append(df)

        session_row = region_df.loc[region_df["sessFile"] == sessFile].iloc[0]

        blocks = np.full(len(df),  np.nan, dtype=object)
        for block_id in ["FE1", "pursuit", "FE2"]:
            start_val = session_row[f"{block_id}_start"]
            end_val = session_row[f"{block_id}_end"]

            if pd.notna(start_val) and pd.notna(end_val):
                start, end = int(start_val), int(end_val)
                mask = (df["sessIdx"] >= start) & (df["sessIdx"] <= end)
                blocks[mask] = block_id

        df["block"] = blocks

        dfs.append(df)

    big_df = pd.concat(dfs, axis=0)
    return big_df


In [None]:
rsc_df = build_region_df(rsc_directory, rsc_session_dfs)

In [None]:
rsc_df

In [None]:
def count_neurons(region_df):
    grand_total = 0
    groups = region_df.groupby("sessFile")
    
    for sessFile, group in groups:
        spk_cols = [col for col in group.columns if col.startswith("spkTable")]
        spk_cols = [col for col in spk_cols if group[col].notna().any()]
        spk_count = len(spk_cols)
        print(f"{sessFile}: {spk_count} spkTable columns")
        grand_total += spk_count
    
    print(f"\nGrand total: {grand_total} (session, column) combos")


In [None]:
count_neurons(rsc_df)

In [None]:
def count_spkTables(session_dfs):
    grand_total = 0
    for session, df in session_dfs.items():
        spk_cols = [col for col in df.columns if col.startswith("spkTable")]
        spk_count = len(spk_cols)
        print(f"{session}: {spk_count} spkTable columns")
        grand_total += spk_count
    
    print(f"\nGrand total: {grand_total} (session, column) combos")


In [None]:
count_spkTables(rsc_session_dfs)

In [None]:
rsc_session_dfs["KB10_01_pursuitRoot.mat"].head()

In [None]:
def make_region_df(region_df, session_dfs, region: str):    
    region_dfs = {}

    dfs = []
    for session in region_df["sessFile"].unique():
        if session in session_dfs:
            df = session_dfs[session].copy()
            df['region'] = region
            dfs.append(df)

    if dfs:
        region_dfs[region] = pd.concat(dfs, ignore_index=True)
    
    return region_dfs   
