In [None]:
# Convet labeled data in JARVIS to the Lightning Pose format
# Lightening Pose provides a function to convert DLC labeled data to LP labeled data
# Reference
# https://github.com/danbider/lightning-pose/blob/main/scripts/converters/dlc2lp.py



In [1]:
import glob
import os
import shutil

import numpy as np
import pandas as pd
from PIL import Image

In [2]:
jarvis_dir = r'E:\Hand_tracking\Datasets\annotation\6cam_dataset_231216' # path to the labeled dataset 
lp_dir = r'E:\Hand_tracking\LP_projects\LP_240120' # path to the lp project

# find all labeled data in JARVIS project
prefix_year = '2023' # Trial names starts with XXXX year. 
dirs = [filename for filename in os.listdir(jarvis_dir) if filename.startswith(prefix_year)]
dirs.sort()
dfs = []


In [3]:
for d in dirs:
    cameras= os.listdir(os.path.join(jarvis_dir, d))
    for c in cameras:
        csv_file = glob.glob(os.path.join(jarvis_dir, d, c, "annotations.csv"))[0]
        
        df1 = pd.read_csv(csv_file, on_bad_lines='skip', header = None, index_col=0) 
        df2 = pd.read_csv(csv_file, skiprows=4, header = None, index_col=0) 
        last_column = df2.shape[1]
        df2 = df2.drop(columns=[last_column],axis='columns')

        # Remove entities row
        isNotEntities = [x != 'entities' for x in df1.index.values]
        df1 = df1.iloc[isNotEntities]
        # Find coords row and remove state columns
        isCoords = [x == 'coords' for x in df1.index.values]
        isXY = [s != 'state' for s in df1.iloc[isCoords].values]
        df1 = df1.iloc[:,isXY[0]]
        df2 = df2.iloc[:, isXY[0]]
        # Replace image file name with its file path
        vid = d + '_' + c
        imgs = list(df2.index.values)
        # Change .jpg to .png (JARVIS- .jpg, LP/DLC- .png)
        im_idx = [i[6:len(i)-4] for i in imgs]
        imgs_new =['img' + format(int(i), '04d') + ".png" for i in im_idx]
        new_col = [f"labeled-data/{vid}/{i}" for i in imgs_new]
        df2.index = new_col

        df_tmp = pd.concat([df1,df2])
        
        df_tmp.to_csv(os.path.join(jarvis_dir,d,c, "CollectedData.csv"), header = False)
        df = pd.read_csv(os.path.join(jarvis_dir,d,c, "CollectedData.csv"), header = [0,1,2], index_col=0)
        
        dfs.append(df)
df_all = pd.concat(dfs)

os.makedirs(lp_dir, exist_ok=True)

# save concatenated labels
df_all.to_csv(os.path.join(lp_dir, "CollectedData.csv"))
            
    
        

In [4]:
src_vid_dir = r'E:\Hand_tracking\Recordings\Videos'
os.makedirs(os.path.join(lp_dir,'videos'), exist_ok=True)
for d in dirs:
    cameras= os.listdir(os.path.join(jarvis_dir,d))
    for c in cameras:
        vid = d + '_' + c # new video name
        os.makedirs(os.path.join(lp_dir,"labeled-data",vid), exist_ok=True)
        # Convert .jpg to .png and copy frames over
        imgs = [im for im in os.listdir(os.path.join(jarvis_dir,d,c)) if im.endswith('.jpg')]
        for i in imgs:
            im = Image.open(os.path.join(jarvis_dir,d,c,i))
            im_idx = i[6:len(i)-4] 
            new_name ='img' + format(int(im_idx), '04d') + ".png" 
            im.save(os.path.join(lp_dir,"labeled-data", vid, new_name))
        
        # copy videos over
        session = d[0:10]
        src = os.path.join(src_vid_dir, session, d, c+'.mp4')
        dst = os.path.join(lp_dir, "videos", vid+'.mp4')
        shutil.copy(src, dst)

In [5]:
# check
for im in df_all.index:
    assert os.path.exists(os.path.join(lp_dir, im))

In [None]:
import argparse
import glob
import os
import shutil

import numpy as np
import pandas as pd

parser = argparse.ArgumentParser()
parser.add_argument("--dlc_dir", type=str)
parser.add_argument("--lp_dir", type=str)
args = parser.parse_args()
dlc_dir = args.dlc_dir
lp_dir = args.lp_dir

print(f"Converting DLC project located at {dlc_dir} to LP project located at {lp_dir}")

# check provided DLC path exists
if not os.path.exists(dlc_dir):
    raise NotADirectoryError(f"did not find the directory {dlc_dir}")

# check paths are not the same
if dlc_dir == lp_dir:
    raise NameError(f"dlc_dir and lp_dir cannot be the same")

# find all labeled data in DLC project
dirs = os.listdir(os.path.join(dlc_dir, "labeled-data"))
dirs.sort()
dfs = []

for d in dirs:
    print(d)
    try:
        csv_file = glob.glob(os.path.join(dlc_dir, "labeled-data", d, "CollectedData*.csv"))[0]
        df_tmp = pd.read_csv(csv_file, header=[0, 1, 2], index_col=0)
        if len(df_tmp.index.unique()) != df_tmp.shape[0]:
            # new DLC labeling scheme that splits video/image in different cells
            vids = df_tmp.loc[
                   :, ("Unnamed: 1_level_0", "Unnamed: 1_level_1", "Unnamed: 1_level_2")]
            imgs = df_tmp.loc[
                   :, ("Unnamed: 2_level_0", "Unnamed: 2_level_1", "Unnamed: 2_level_2")]
            new_col = [f"labeled-data/{v}/{i}" for v, i in zip(vids, imgs)]
            df_tmp1 = df_tmp.drop(
                ("Unnamed: 1_level_0", "Unnamed: 1_level_1", "Unnamed: 1_level_2"), axis=1,
            )
            df_tmp2 = df_tmp1.drop(
                ("Unnamed: 2_level_0", "Unnamed: 2_level_1", "Unnamed: 2_level_2"), axis=1,
            )
            df_tmp2.index = new_col
            df_tmp = df_tmp2
    except IndexError:
        try:
            h5_file = glob.glob(os.path.join(dlc_dir, "labeled-data", d, "CollectedData*.h5"))[0]
            df_tmp = pd.read_hdf(h5_file)
            if type(df_tmp.index) == pd.core.indexes.multi.MultiIndex:
                # new DLC labeling scheme that splits video/image in different cells
                imgs = [i[2] for i in df_tmp.index]
                vids = [df_tmp.index[0][1] for _ in imgs]
                new_col = [f"labeled-data/{v}/{i}" for v, i in zip(vids, imgs)]
                df_tmp1 = df_tmp.reset_index().drop(
                    columns="level_0").drop(columns="level_1").drop(columns="level_2")
                df_tmp1.index = new_col
                df_tmp = df_tmp1
        except IndexError:
            print(f"Could not find labels for {d}; skipping")
    dfs.append(df_tmp)
df_all = pd.concat(dfs)

os.makedirs(lp_dir, exist_ok=True)

# save concatenated labels
df_all.to_csv(os.path.join(lp_dir, "CollectedData.csv"))

# copy frames over
src = os.path.join(dlc_dir, "labeled-data")
dst = os.path.join(lp_dir, "labeled-data")
shutil.copytree(src, dst)

# copy videos over
src = os.path.join(dlc_dir, "videos")
dst = os.path.join(lp_dir, "videos")
if os.path.exists(src):
    print("copying video files")
    shutil.copytree(src, dst)
else:
    print("DLC video directory does not exist; creating empty video directory")
    os.makedirs(dst, exist_ok=True)

# check
for im in df_all.index:
    assert os.path.exists(os.path.join(lp_dir, im))