## Connect to database

In [None]:
import os
import psycopg2
import pandas as pd
import platform

host = "localhost" if platform.system() == "Windows" else "129.108.49.30"

conn = psycopg2.connect(dbname="deeplabcut_db", user="postgres", password="1234", host=host, port="5432")

cursor = conn.cursor()


## Insert video file names (Run this from the computer where the videos are located)

In [None]:
from pathlib import Path

# Get the root of the project (i.e., where Jupyter Lab started)
project_root = Path().resolve().parents[0]

# Define base folder relative to root
base_folder = project_root / "data" / "ToyOnly" / "SplitVideos"

# Check path
print(f"Looking in: {base_folder}")
print(f"Exists? {base_folder.exists()}")

# Collect video files
video_files = list(base_folder.rglob("*.mp4")) + list(base_folder.rglob("*.avi"))

# Convert to relative paths (relative to 'data')
video_records = [(str(vf.relative_to(base_folder)),) for vf in video_files]

# Insert into dlc_table
insert_query = "INSERT INTO dlc_table (video_name) VALUES (%s);"

# Avoid Duplicate Inserts
cursor.execute("SELECT video_name FROM dlc_table;")
existing = set(row[0] for row in cursor.fetchall())

# Filter only new videos
new_records = [vr for vr in video_records if vr[0] not in existing]

cursor.executemany(insert_query, new_records)
conn.commit()


## Code to Add All Subdirs of "Python_scripts" to sys.path

In [None]:
import sys
from pathlib import Path

# Add Python_scripts to sys.path (parent of Extract_db_columns)
scripts_dir = Path().resolve().parents[0] / "Python_scripts"
sys.path.append(str(scripts_dir))


## Insert task, date_str, name, health, id_ in dlc_table

In [None]:
import importlib
import Extract_db_columns.parse_video_name
importlib.reload(Extract_db_columns.parse_video_name)
from Extract_db_columns.parse_video_name import parse_video_name

cursor.execute("SELECT id, video_name FROM dlc_table WHERE video_name IS NOT NULL;")
rows = cursor.fetchall()

updates = []

for row in rows:
    id_, video_name = row
    task, date_str, name, health = parse_video_name(video_name)
    updates.append((task, date_str, name, health, id_))

# Update in batch
cursor.executemany("""
UPDATE dlc_table
SET task = %s, date = %s, name = %s, health = %s
WHERE id = %s;
""", updates)

conn.commit()


## Insert num_frames, frame_rate, video_width, video_height

In [None]:
import importlib
import Extract_db_columns.update_video_info

importlib.reload(Extract_db_columns.update_video_info)

# Call the function with subdirectories
video_subdirs = ['FoodLight', 'FoodOnly', 'LightOnly', 'ToyOnly', 'ToyLight']
Extract_db_columns.update_video_info.update_video_info_in_db(video_subdirs)


## Insert raw coordinates for bodyparts

### Query dlc_table table to get:

In [None]:
query = """
SELECT id, video_name, frame_rate
FROM dlc_table
WHERE video_name IS NOT NULL AND frame_rate IS NOT NULL ORDER BY id;
"""
df = pd.read_sql_query(query, conn)


In [None]:
import importlib
import Extract_db_columns.find_csv_for_video
import Extract_db_columns.parse_dlc_csv
import Insert_to_featuretable.insert_dlc_arrays
import Insert_to_featuretable.is_dlc_data_already_inserted

importlib.reload(Extract_db_columns.find_csv_for_video)
importlib.reload(Extract_db_columns.parse_dlc_csv)
importlib.reload(Insert_to_featuretable.insert_dlc_arrays)
importlib.reload(Insert_to_featuretable.is_dlc_data_already_inserted)

from Extract_db_columns.find_csv_for_video import find_csv_for_video
from Extract_db_columns.parse_dlc_csv import parse_dlc_csv
from Insert_to_featuretable.insert_dlc_arrays import insert_dlc_arrays
from Insert_to_featuretable.is_dlc_data_already_inserted import is_dlc_data_already_inserted


# All bodyparts needs to be inserted
bodyparts = ['Corner1', 'Corner2', 'Corner3', 'Corner4', 'Head', 'Neck', 'Tailbase']

from tqdm import tqdm

for _, row in tqdm(df.iterrows(), total=len(df), desc="Processing DLC files"):
    video_id = row['id']
    video_name = row['video_name']
    frame_rate = row['frame_rate']
    print(f"{video_id}, {video_name}, {frame_rate}")

    # if is_dlc_data_already_inserted(conn, video_id):
    #     print(f"⏩ Skipping video_id {video_id}")
    #     log(f"Skipped video_id {video_id} — already has data")
    #     continue

    csv_path = find_csv_for_video(video_name)

    if csv_path:
        try:
            rows = parse_dlc_csv(csv_path, frame_rate, bodyparts)
            df_parsed = pd.DataFrame(rows)
            insert_dlc_arrays(conn, video_id, df_parsed, bodyparts)

            print(f"✅ Inserted video_id {video_id}")

        except Exception as e:
            print(f"❌ Error for video_id {video_id}: {e}")

    else:
        print(f"❌ CSV not found for {video_name}")


In [None]:
bodyparts = ['Corner1', 'Corner2', 'Corner3', 'Corner4', 'Head', 'Neck', 'Tailbase']

row = df.iloc[0]
video_id = row['id']
video_name = row['video_name']
frame_rate = row['frame_rate']

print(f"Processing ID {video_id} — {video_name} @ {frame_rate} FPS")


In [None]:
csv_path = find_csv_for_video(video_name)
print(f"CSV Path: {csv_path}")


In [None]:
rows = parse_dlc_csv(csv_path, frame_rate, bodyparts)
df_parsed = pd.DataFrame(rows)
df_parsed.head()


In [None]:
df_parsed.shape


In [None]:
import importlib
import Extract_db_columns.find_csv_for_video
import Extract_db_columns.parse_dlc_csv
import Insert_to_featuretable.insert_dlc_arrays
import Insert_to_featuretable.is_dlc_data_already_inserted

importlib.reload(Extract_db_columns.find_csv_for_video)
importlib.reload(Extract_db_columns.parse_dlc_csv)
importlib.reload(Insert_to_featuretable.insert_dlc_arrays)
importlib.reload(Insert_to_featuretable.is_dlc_data_already_inserted)

from Extract_db_columns.find_csv_for_video import find_csv_for_video
from Extract_db_columns.parse_dlc_csv import parse_dlc_csv
from Insert_to_featuretable.insert_dlc_arrays import insert_dlc_arrays
from Insert_to_featuretable.is_dlc_data_already_inserted import is_dlc_data_already_inserted

insert_dlc_arrays(conn, video_id, df_parsed, bodyparts)

In [None]:
import pandas as pd

df_preview = pd.DataFrame(rows)
df_preview.head()
print(df_preview.shape)

### Completely Clear Table First (CAREFUL)

In [None]:
cursor.execute("DELETE FROM dlc_files")
conn.commit()
print("All entries deleted from dlc_files.")
# conn.rollback()

## Insert csv files into columns

In [None]:
# Folder with new CSVs
csv_dir = r"C:\DeepLabCutProjects\DLC-Atanu-2024-12-25\Analyzed-videos-filtered"
csv_files = [f for f in os.listdir(csv_dir) if f.endswith('.csv')]

# Insert each CSV path
for csv_file in csv_files:
    coord_path = os.path.join(csv_dir, csv_file)

    # Optional: check if already in DB to avoid duplicates
    cursor.execute("SELECT 1 FROM dlc_files WHERE coord_path = %s", (coord_path,))
    exists = cursor.fetchone()
    if exists:
        print(f"⚠️ Already in DB: {csv_file}")
        continue

    cursor.execute("INSERT INTO dlc_files (coord_path) VALUES (%s)", (coord_path,))
    print(f"✅ Inserted: {csv_file}")

conn.commit()
# cursor.close()
print("🎉 All new coord_paths uploaded with video_path = NULL.")


## Fix bad entries

In [None]:
import psycopg2

fixes = {
    'Eli.': 'Eli',
    'Orelans': 'NewOrleans',
    'London.': 'London'
}

conn = psycopg2.connect(
    dbname="deeplabcut_db",
    user="postgres",
    password="1234",
    host="localhost",
    port="5432"
)

cursor = conn.cursor()

for old_name, corrected_name in fixes.items():
    cursor.execute(
        "UPDATE dlc_files SET name = %s WHERE name = %s",
        (corrected_name, old_name)
    )
    print(f"✔ Updated '{old_name}' → '{corrected_name}'")

conn.commit()
cursor.close()
conn.close()
print("🎉 Name cleanup done.")

## Insert maze number

In [None]:
update_dlc_table.update_column("maze")