## Add project root

In [None]:
import sys
from pathlib import Path
import importlib

# Add the parent of the *outer* DLC-Jupyter-Notebooks folder to sys.path
project_root = Path().resolve().parents[0]  # This is /Users/atanugiri/Downloads/GhrelinBehaviorQuantification
print(project_root)
sys.path.append(str(project_root))


## Connect to db

In [None]:
# Connect to DB with a CSV fallback
from Python_scripts import config
import pandas as pd
import platform
import os

# Try to connect to PostgreSQL using centralized config; fall back to CSV files in DATA_DIR.
try:
    conn = config.get_conn()
    cursor = conn.cursor()
    use_csv_fallback = False
    print("[INFO] Connected to Postgres")
except Exception as exc:
    print("[WARN] DB connection failed, falling back to CSVs:", exc)
    conn = None
    cursor = None
    use_csv_fallback = True
    data_dir = config.get_data_dir()
    print(f"[INFO] Using DATA_DIR = {data_dir}")

    # Load available dlc_table_*.csv files into a dict for later fallback lookups
    dlc_tables = {}
    if data_dir.exists():
        for p in data_dir.glob('dlc_table_*.csv'):
            try:
                dlc_tables[p.name] = pd.read_csv(p)
                print(f"[INFO] Loaded {p.name}")
            except Exception as e:
                print(f"[WARN] Couldn't load {p}: {e}")
    else:
        print(f"[WARN] DATA_DIR does not exist: {data_dir}")


In [None]:
# Path resolver helper: maps stored metadata paths to local files under DATA_DIR
from pathlib import Path
# Ensure data_dir is available whether or not we used the CSV fallback earlier
try:
    data_dir
except NameError:
    data_dir = config.get_data_dir()

def resolve_video_path(stored_path, data_dir=data_dir):
    """Resolve a video or CSV path recorded in the metadata table to a local file under DATA_DIR.

    Behavior:
    - If stored_path is an absolute or relative path that exists locally, return Path(stored_path).
    - If stored_path is a basename or relative path, try locating it under DATA_DIR (recursive rglob).
    - If not found, return Path(stored_path) (caller can decide how to handle missing files).
    """
    if stored_path is None:
        return None
    p = Path(stored_path)
    # If path exists as provided, return it
    if p.exists():
        return p
    # If data_dir is usable, search for the filename within it
    if data_dir is not None and data_dir.exists():
        matches = list(data_dir.rglob(p.name))
        if matches:
            return matches[0]
        # try joining directly
        cand = data_dir / p
        if cand.exists():
            return cand
    # fallback to original Path (may be non-existent)
    return p

# Quick smoke-check (prints only when run interactively)
if 'dlc_tables' in globals():
    print(f"[INFO] dlc_tables has {len(dlc_tables)} entries; DATA_DIR={data_dir}")
else:
    print(f"[INFO] DATA_DIR={data_dir}; no dlc_tables loaded")


## Import modules

In [None]:
import importlib
import Python_scripts.Data_analysis.fetch_id_list as fetch_mod
importlib.reload(fetch_mod)
from Python_scripts.Data_analysis.fetch_id_list import fetch_id_list


In [None]:
import importlib
import Python_scripts.Feature_functions.angle_features
import Python_scripts.Data_analysis.plot_groupwise_bar

importlib.reload(Python_scripts.Feature_functions.angle_features)
importlib.reload(Python_scripts.Data_analysis.plot_groupwise_bar)

from Python_scripts.Feature_functions.angle_features import (
    angle_features_for_trial, batch_angle_features
)
from Python_scripts.Data_analysis.plot_groupwise_bar import plot_groupwise_bar


## Fetch id list

In [None]:
task_name = "AllTask"
bad_id = [549, 559, 566, 567, 570, 571, 595, 617, 621, 638, 640, 36]

saline_id, ghrelin_id, exc_id, inh_id = fetch_id_list(
    conn,
    task_name=None, #['FoodLight', 'ToyOnly', 'ToyLight', 'LightOnly'],
    dose_mult=2,
    genotype="white",
    bad_ids=bad_id,
    csv_prefix="dlc_table",   # -> dlc_table_saline.csv, dlc_table_ghrelin.csv, ...
    min_trial_length=None     # or 600
)

print(f"saline_id: {saline_id}\n")
print(f"ghrelin_id: {ghrelin_id}\n")
print(f"exc_id: {exc_id}\n")
print(f"inh_id: {inh_id}\n")


## Calculate angle features

### Batch call with different input parameter sweep

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

# --- Configure once ---å
likelihood_thr=np.arange(0.65, 0.68, 0.025)
window_size=None

# Put every group you *might* use here; leave others as None or [] when not needed.
group_specs = {
    "Saline":  saline_id,
    # "Ghrelin": ghrelin_id,
    "Inhibitory": inh_id,
    "Excitatory": exc_id,
}
# Keep only defined & non-empty groups
group_specs = {label: ids for label, ids in group_specs.items() if ids not in (None, [], ())}

outfile = f"White_Modulation_2X_{task_name}_ang_likelihood_sweep.pdf"
with PdfPages(outfile) as pdf:
    for likelihood in likelihood_thr:
        print(f"[INFO] Analyzing likelihood = {likelihood}")

        # Compute all groups for this window size
        frames = []
        for label, ids in group_specs.items():
            df = batch_angle_features(
                conn, ids, likelihood_threshold=likelihood, smooth_window=window_size,
            )
            # Keep what we need; add group label
            df = df[['trial_id', 'head_body_misalignment_p95']].copy().dropna()
            df['group'] = label
            frames.append(df)

        if not frames:
            print("[WARN] No groups provided—skipping this window.")
            continue

        df = pd.concat(frames, ignore_index=True)

        # Plot
        order = list(group_specs.keys())
        fig, ax = plot_groupwise_bar(
            df,
            y='head_body_misalignment_p95',
            ylabel='head_body_misalignment_p95',
            plot_type='bar',
            show_points=False,
            order=order,
            show_stats=True,
            tests_to_show=("ranksums", "ttest", "anova")  # if supported
        )
        ax.set_title(f"{task_name} | likelihood={likelihood}", pad=20)

        pdf.savefig(fig, bbox_inches='tight')
        plt.close(fig)

print(f"[✓] Saved {outfile}")


### Single call with fixed hyper parameters

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

smooth_window=None
likelihood_threshold=0.65

# Put every group you *might* use here; leave others as None or [] when not needed.
group_specs = {
    "Saline":  saline_id,
    "Ghrelin": ghrelin_id,
    # "Inhibitory": inh_id,
    # "Excitatory": exc_id,
}
# Keep only defined & non-empty groups
group_specs = {label: ids for label, ids in group_specs.items() if ids not in (None, [], ())}

frames = []
for label, ids in group_specs.items():
    df = batch_angle_features(
        conn, ids, likelihood_threshold=likelihood_threshold, smooth_window=smooth_window
    )
    # Keep what we need; add group label
    df = df[['trial_id', 'head_body_misalignment_p95']].copy().dropna()
    df['group'] = label
    frames.append(df)

if not frames:
    print("[WARN] No groups provided—skipping this window.")

df = pd.concat(frames, ignore_index=True)

# Plot
order = list(group_specs.keys())

ax = plot_groupwise_bar(
    df, y='head_body_misalignment_p95',
    ylabel='Mean head_body_misalignment_p95',
    plot_type='bar', show_points=True,
    order=order, show_stats=True,
    tests_to_show=("ranksums","ttest")  # optional
)
fig = ax[0]

In [None]:
print(ax[2])

In [None]:
ax[1].set_title(f"{task_name} | likelihood_threshold={likelihood_threshold}", pad=20)
# ax[1].set_ylim([0, 1.75])
fig.savefig(f"White_2X_{task_name}_ang.pdf", dpi=300, bbox_inches='tight')


## Save Excel file for data submission

In [None]:
# Step 1. Get the unique trial IDs from df
trial_ids = df['trial_id'].unique().tolist()

# Step 2. Query the database for task & modulation — or build meta_df from CSV fallback
if conn is not None:
    query = """
    SELECT id, task, modulation, video_path
    FROM dlc_table
    WHERE id = ANY(%s);
    """
    meta_df = pd.read_sql_query(query, conn, params=(trial_ids,))
else:
    # Build meta_df by concatenating available dlc_table CSVs and filtering by id
    csv_frames = []
    for name, table_df in (dlc_tables.items() if 'dlc_tables' in globals() else []):
        if isinstance(table_df, pd.DataFrame):
            csv_frames.append(table_df)
    if csv_frames:
        meta_all = pd.concat(csv_frames, ignore_index=True)
        meta_df = meta_all[meta_all['id'].isin(trial_ids)][['id', 'task', 'modulation']].drop_duplicates()
    else:
        # No metadata available; create an empty dataframe with expected columns
        meta_df = pd.DataFrame(columns=['id', 'task', 'modulation'])

# Step 3. Merge with df on id
# If meta_df has an 'id' column matching trial_id, merge; otherwise, attempt to use a 'trial_id' column
if 'id' in meta_df.columns:
    df_out = df.merge(meta_df, left_on='trial_id', right_on='id', how='left')
    df_out = df_out.drop(columns=['id']) if 'id' in df_out.columns else df_out
else:
    df_out = df.copy()

# Optional: attempt to resolve video paths if present in meta_df (adds a column 'resolved_video_path')
if 'video_path' in meta_df.columns:
    df_out['resolved_video_path'] = df_out['trial_id'].map(lambda tid: resolve_video_path(meta_df.loc[meta_df['id']==tid, 'video_path'].iloc[0] if not meta_df.loc[meta_df['id']==tid, 'video_path'].empty else None))

# Step 4. Save to Excel
out_path = Path.cwd() / "10X_White_Angle.xlsx"
df_out.to_excel(out_path, index=False)
print(f"[✓] Saved {out_path}")
