In [None]:
# import relevant libraries
import os
import copy
import glob
import pandas as pd

In [None]:
# function to merge separated columns of data for a single deeplabcut project
def merge_video_data(path, template_path, output_path):
    
    # get file paths of all CollectedData_BidayeLab.csv files
    print("Collecting data from", os.path.join(path, '*/labeled-data/*/*.csv'))
    un_collected_data = glob.glob(os.path.join(path, '*/labeled-data/*/*.csv'))
    
    # note: this will need to be changed depending on the number of videos
    num_videos = 3
    
    # organize this data by video (sequentially cluster videos for all legs)
    collected_data = []
    for i in range(0,num_videos+1):
        for j in range(i,len(un_collected_data),num_videos):
            collected_data.append(un_collected_data[j])
    
    # further organize data into list clusters
    for i in range(0, len(collected_data)):
        collected_data[i]=collected_data[i].split("\\")
    
    # put common legs into lists
    collected_data_new = []
    temp = []
    for i in range(0, len(collected_data)):
        if len(temp)==0:
            temp.append(collected_data[i])
        elif collected_data[i][-2]==temp[0][-2]:
            temp.append(collected_data[i])
        else:
            collected_data_new.append(temp)
            temp = []
            temp.append(collected_data[i])
    
    # replace collected_data variable
    collected_data = collected_data_new
    
    # combine data for each list of video files and save
    for i in range(0,len(collected_data)):
        # import the template
        cam = pd.read_csv(template_path, header=None)
        
        # replace first three columns of template with file/img names from the data
        cam = pd.concat([cam, pd.read_csv(os.path.join(*collected_data[i][0]),header=None).loc[3:, 0:2]])
        
        # get all joint headers
        joint_headers = cam.loc[1, :].to_list()
        
        # merge legs from single video (collected_data[i]) into csv
        for leg in collected_data[i]:
            
            # get csv file data for given leg
            leg = pd.read_csv(os.path.join(*leg), header=None)
            
            for i in range(3, len(leg.loc[:, 3:].columns)+3,2):
                
                # get index of current leg's headers in the template
                current_leg = leg.loc[1, i]
                idx = cam.loc[1, :].to_list().index(str(current_leg))
                
                # concatenate leg data to the template based on index of headers
                cam.loc[:,idx] = pd.concat([cam.loc[:2, idx], leg.loc[3:, i]])
                cam.loc[:,idx+1] = pd.concat([cam.loc[:2, idx+1], leg.loc[3:, i+1]])
        
        # save data for a video to corresponding labeled-data folder
        print("Saving to", str(output_path)+str(cam.loc[3,1])+'/CollectedData_BidayeLab.csv')
        cam.to_csv(str(output_path)+str(cam.loc[3,1])+'/CollectedData_BidayeLab.csv', header=False, index=False)

In [None]:
# path: main folder that contains individual project folders corresponding to each leg
path = 'C:/dlc-utilities/merge-datasets/example/1-camA-Kate'

# template_path: template with manually-defined order of joint names
template_path = 'C:/dlc-utilities/merge-datasets/example/cam-template.csv'

# output_path: labeled_data folder containing individual video folders in which the filled csvs will be saved
output_path = 'C:/dlc-utilities/merge-datasets/example/camA_combined/labeled-data/'

# function call to the merge_video_data function, merging the datasets
merge_video_data(path, template_path, output_path)

In [None]:
# convertcsv2h5() is a deeplabcut function to convert the csv files in labeled-data folders to required h5 files
# note: need all files in a deeplabcut structure to run (copy labeled-data folder to a deeplabcut structure)
# deeplabcut.convertcsv2h5(config_path)