## Add project root

In [None]:
import sys
from pathlib import Path
import importlib

# Add the parent of the *outer* DLC-Jupyter-Notebooks folder to sys.path
project_root = Path().resolve().parents[0]  # This is /Users/atanugiri/Downloads/GhrelinBehaviorQuantification
print(project_root)
sys.path.append(str(project_root))


## Connect to db

In [None]:
import psycopg2
import platform

host = "localhost" if platform.system() == "Windows" else "129.108.49.49"
conn = psycopg2.connect(
    dbname="deeplabcut_db", user="postgres", 
    password="1234", host=host, port="5432")
cursor = conn.cursor()


## Fetch id list

In [None]:
import pandas as pd

task = 'WhiteAnimals10X_MazeOnly'

# IDs to remove
bad_id = [549, 559, 566, 567, 570, 571, 595, 617, 621, 638, 640, 36]

saline_q = f"""
SELECT id, video_name, task, health, modulation, genotype, trial_length, dose_mult FROM dlc_table 
WHERE health = 'saline' AND modulation='NA' AND trial_length>599 AND genotype='white' AND dose_mult=10
AND task='MazeOnly'
ORDER BY id;
"""

df = pd.read_sql_query(saline_q, conn)
saline_id = df['id'].to_list()

# Filter them out
saline_id = [i for i in saline_id if i not in bad_id]

df.to_csv("dlc_table_saline.csv", index=False)

ghrelin_q = f"""
SELECT id, video_name, task, health, genotype, modulation, trial_length, dose_mult FROM dlc_table 
WHERE health = 'ghrelin' AND modulation='NA' AND trial_length>599 AND genotype='white' AND dose_mult=10
AND task='MazeOnly'
ORDER BY id;
"""

df = pd.read_sql_query(ghrelin_q, conn)
ghrelin_id = df['id'].to_list()
# Filter them out
ghrelin_id = [i for i in ghrelin_id if i not in bad_id]

df.to_csv("dlc_table_ghrelin.csv", index=False)

# Inh_q = f"""
# SELECT id, video_name, task, health, genotype, modulation, trial_length FROM dlc_table 
# WHERE modulation='Inhibitory'
# ORDER BY id;
# """
# df = pd.read_sql_query(Inh_q, conn)
# Inh_id = df['id'].to_list()
# Inh_id = [i for i in Inh_id if i not in bad_id]

# df.to_csv("dlc_table_inhibitory.csv", index=False)

# Exc_q = f"""
# SELECT id, video_name, task, health, genotype, modulation, trial_length FROM dlc_table 
# WHERE modulation='Excitatory'
# ORDER BY id;
# """
# df = pd.read_sql_query(Exc_q, conn)
# Exc_id = df['id'].to_list()
# Exc_id = [i for i in Exc_id if i not in bad_id]

# df.to_csv("dlc_table_excitatory.csv", index=False)

print(f"saline_id: {saline_id}\n")
print(f"ghrelin_id: {ghrelin_id}\n")
# print(f"Exc_id: {Exc_id}\n")
# print(f"Inh_id: {Inh_id}\n")


## Calculate distance

In [None]:
import importlib
import Python_scripts.Feature_functions.motion_features
import Python_scripts.Data_analysis.plot_groupwise_bar
import Python_scripts.Data_analysis.compare_distributions

importlib.reload(Python_scripts.Feature_functions.motion_features)
importlib.reload(Python_scripts.Data_analysis.plot_groupwise_bar)
importlib.reload(Python_scripts.Data_analysis.compare_distributions)

from Python_scripts.Feature_functions.motion_features import (
    compute_motion_features, batch_compute_motion_feature
)
from Python_scripts.Data_analysis.plot_groupwise_bar import plot_groupwise_bar
from Python_scripts.Data_analysis.compare_distributions import compare_distributions


### Distribution of distance per frame

In [None]:
import numpy as np

fig, ax = compare_distributions(
    np.concatenate(saline_d),
    np.concatenate(ghrelin_d),
    labels=['Saline', 'Ghrelin'],
    kind='cdf'
)

ax.set_xlim(0, 0.075)
ax.set_title(f"{task}_distance_distribution.pdf")
fig.savefig(f"{task}_distance_distribution.pdf", dpi=300, bbox_inches='tight')


### Batch call with different max_time

In [None]:
import numpy as np
import os
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt

pdf_path = f'{task}_distance_all_time.pdf'
with PdfPages(pdf_path) as pdf:
    for max_time in range(200, 1400, 200):
        print(f"[INFO] Analyzing max_time = {max_time} seconds")
        
        saline_d = batch_compute_motion_feature(conn, saline_id, bodypart='Head', feature='distance', time_limit=max_time)
        ghrelin_d = batch_compute_motion_feature(conn, ghrelin_id, bodypart='Head', feature='distance', time_limit=max_time)
        # exc_d = batch_compute_motion_feature(conn, Exc_id, bodypart='Head', feature='distance', time_limit=max_time)
        # inh_d = batch_compute_motion_feature(conn, Inh_id, bodypart='Head',, feature='distance', time_limit=max_time)

        # 1. Total distance per trial
        saline_totals = [d.sum() for d in saline_d]
        ghrelin_totals = [d.sum() for d in ghrelin_d]
        # exc_totals = [d.sum() for d in exc_d]
        # inh_totals = [d.sum() for d in inh_d]

        # 2. Create DataFrame for plotting
        df = pd.DataFrame({
            'group': ['Saline'] * len(saline_totals) + ['Ghrelin'] * len(ghrelin_totals) 
            # + ['Excitatory'] * len(exc_totals) + ['Inhibitory'] * len(inh_totals)
            , 'total_distance': saline_totals + ghrelin_totals 
            # + exc_totals + inh_totals
        })
        # Plot
        fig, ax = plot_groupwise_bar(df, y='total_distance', ylabel='Distance')

        ax.set_title(f"{task} | max_time = {max_time}s")
        pdf.savefig(fig)
        plt.close(fig)  # Prevents inline display in notebooks

print(f"[DONE] All plots saved to: {pdf_path}")


## Single call with different max_time

In [None]:
import numpy as np
import os
import matplotlib.pyplot as plt

max_time = 1200

saline_d = batch_compute_motion_feature(conn, saline_id, bodypart='Head', feature='distance', time_limit=max_time)
# ghrelin_d = batch_compute_motion_feature(conn, ghrelin_id, bodypart='Head', feature='distance', time_limit=max_time)
exc_d = batch_compute_motion_feature(conn, Exc_id, bodypart='Head', feature='distance', time_limit=max_time)
inh_d = batch_compute_motion_feature(conn, Inh_id, bodypart='Head', feature='distance', time_limit=max_time)

# 1. Total distance per trial
saline_totals = [d.sum() for d in saline_d]
# ghrelin_totals = [d.sum() for d in ghrelin_d]
exc_totals = [d.sum() for d in exc_d]
inh_totals = [d.sum() for d in inh_d]

# 2. Create DataFrame for plotting
df = pd.DataFrame({
    'group': 
    ['Saline'] * len(saline_totals) # + ['Ghrelin'] * len(ghrelin_totals) 
    + ['Inhibitory'] * len(inh_totals) + ['Excitatory'] * len(exc_totals)
    , 'total_distance': 
    saline_totals + # ghrelin_totals
    inh_totals + exc_totals 
})
# Plot
fig, ax = plot_groupwise_bar(df, y='total_distance', ylabel='Distance', plot_type='bar', show_points=False)

ax.set_title(f"{task} | max_time = {max_time}s")


In [None]:
ax.set_ylim(0, 120)
fig.savefig(f"{task}_distance_bar.pdf", dpi=300, bbox_inches='tight')


In [None]:
from scipy.stats import ttest_ind

# Split data into groups
vals1 = df[df['group'] == 'Saline']['total_distance']
vals2 = df[df['group'] == 'Inhibitory']['total_distance']
vals3 = df[df['group'] == 'Excitatory']['total_distance']

# Welch's t-test (no assumption of equal variances)
stat, pval = ttest_ind(vals1, vals2, equal_var=False)

print(f"Welch's t-test: t = {stat:.3f}, p = {pval:.5f}")
