## Normalize DeepLabCut generated csv files

In [None]:
import sys
from pathlib import Path
import importlib

# Add the parent of the *outer* DLC-Jupyter-Notebooks folder to sys.path
project_root = Path().resolve().parents[0]  # This is /Users/atanugiri/Downloads/DLC-Jupyter-Notebooks
sys.path.append(str(project_root))
 
import Python_scripts.Extract_db_columns.normalize_bodypart_by_id
importlib.reload(Python_scripts.Extract_db_columns.normalize_bodypart_by_id)
from Python_scripts.Extract_db_columns.normalize_bodypart_by_id import normalize_bodypart_by_id


In [None]:
import os
import psycopg2
import pandas as pd
import platform

host = "localhost" if platform.system() == "Windows" else "129.108.49.30"
conn = psycopg2.connect(dbname="deeplabcut_db", user="postgres", password="1234", host=host, port="5432")
cursor = conn.cursor()


## Check data distribution

In [None]:
from Python_scripts.Utility_functions.plot_column_distribution_by_id import plot_column_distribution_by_id
# plot_column_distribution_by_id(conn, id=56, column_name='corner4_x')
# plot_column_distribution_by_id(conn, id=56, column_name='corner4_y')
plot_column_distribution_by_id(conn, id=56, column_name='head_x')


In [None]:
import numpy as np

query = "SELECT head_y FROM dlc_table WHERE id = 56;"
df = pd.read_sql_query(query, conn)

# Convert to array (handle stringified list if needed)
val = df.iloc[0]['head_y']
head_x = np.array(val if isinstance(val, list) else eval(val))

# Count NaNs
num_nans = np.isnan(head_x).sum()
print(f"NaNs in head_y: {num_nans} out of {len(head_x)}")


In [None]:
x_norm, y_norm = normalize_bodypart_by_id(conn, id=56, bodypart='head')

if x_norm is not None:
    print("Normalized coordinates (first 5 frames):")
    print("x:", x_norm[:5])
    print("y:", y_norm[:5])

## Insert the normalized csv files into database

In [None]:
from Python_scripts.Insert_to_featuretable.insert_norm_dlc_arrays import insert_norm_dlc_arrays

query = "SELECT id FROM dlc_table WHERE head_x IS NOT NULL ORDER BY id;"
df = pd.read_sql_query(query, conn)
id_list = df['id'].tolist()

insert_norm_dlc_arrays(id_list, conn, bodypart="head")


In [None]:
import pandas as pd

query = "SELECT head_x_norm, head_y_norm FROM dlc_table WHERE id = 56;"
df = pd.read_sql_query(query, conn)

# View first 5 values
print("head_x_norm:", df['head_x_norm'].iloc[0][:5])
print("head_y_norm:", df['head_y_norm'].iloc[0][:5])


In [None]:
import matplotlib.pyplot as plt

x = df['head_x_norm'].iloc[0]
y = df['head_y_norm'].iloc[0]

plt.figure(figsize=(6, 6))
plt.plot(x, y, marker='o', linestyle='-')
plt.title("Normalized Head Trajectory for ID 58")
plt.xlabel("x (normalized)")
plt.ylabel("y (normalized)")
plt.axis("equal")
plt.grid(True)
plt.show()


In [None]:
query = """
SELECT * FROM dlc_table WHERE id = 56;
"""
df = pd.read_sql_query(query, conn)
print(df)
# for col in df.columns:
#     print(f"{col}: {df[col].iloc[0][:10]}")


In [None]:
corner_pts = []
for i in range(1, 5):
    x_vals = df[f'corner{i}_x'].iloc[0]
    y_vals = df[f'corner{i}_y'].iloc[0]

    # Safely compute median if x_vals and y_vals are array-like
    if isinstance(x_vals, (list, np.ndarray)) and isinstance(y_vals, (list, np.ndarray)):
        x_median = np.nanmedian(x_vals)
        y_median = np.nanmedian(y_vals)
        corner_pts.append([x_median, y_median])
    else:
        print(f"⚠️ corner{i} is not array-like — skipping")
        corner_pts.append([np.nan, np.nan])

corner_pts = np.array(corner_pts)
print(corner_pts)
