In [None]:
## Script by Oscar Healy & Fabian Plum

import cv2
import pathlib
import json
import csv

import numpy as np
import pandas as pd
import matplotlib as plt

import os
from os.path import isfile, join, isdir

In [None]:
dataset_location = r"../YOUR/DLC/PROJECT"
target_dir = r"../YOUR/OUTPUT/FOLDER"
SCORER = "Fabi"

In [None]:
def fix_bounding_boxes(coords,max_val):
    # fix bounding box coordinates so they do not reach beyond the image
    fixed_coords = []
    for c, coord in enumerate(coords):
        if c == 0 or c == 2:
            max_val_temp = max_val[0]
        else:
            max_val_temp = max_val[1]
            
        if coord >= max_val_temp:
            coord = max_val_temp
        elif coord <= 0:
            coord = 0
        
        fixed_coords.append(int(coord))
        
    return fixed_coords

def resize_box(coords, factor, maxsize, minsize = (10,10)): # Add clamp to min and max
    resized_coords = []
    factor = pow(1 + factor, 0.5)
    centre = [coords[0] + (coords[2]-coords[0])/2, coords[1] + (coords[3]-coords[1])/2]
    
    x_length = centre[0] - coords[0]
    y_length = centre[1] - coords[1]
    x_increase = x_length*factor
    y_increase = y_length*factor


    resized_coords.append(centre[0] - x_increase)
    resized_coords.append(centre[1] - y_increase)
    resized_coords.append(centre[0] + x_increase)
    resized_coords.append(centre[1] + y_increase)

    if resized_coords[3] == resized_coords[1] or resized_coords[2] == resized_coords[0] or any(i < 0 for i in resized_coords):
        resized_coords = [centre[0] - minsize[0]/2, coords[1]- minsize[0]/2, coords[0] + minsize[0]/2, coords[1] + minsize[1]/2]
    elif resized_coords[3] - resized_coords[1] > maxsize[0] or resized_coords[2] - resized_coords[0] > maxsize[1]:
        resized_coords = [0,0,maxsize[0], maxsize[1]]

    return resized_coords

In [None]:
labeled_dir = dataset_location + "/labeled-data"
frame_dir_names = [f for f in os.listdir(labeled_dir) if (isdir(join(labeled_dir, f)) and "labeled" not in f)]
resize_factor = 0
random_size = True
resize_resolution = 500
num_samples = 2
all_points = []
output_file_names = []

np.random.seed(0)

for m ,dir in enumerate(frame_dir_names):
    vid_dir = labeled_dir + "/" + dir
    with open(vid_dir +"/CollectedData_" + SCORER + ".csv") as file:
        csvFile = csv.reader(file)
        list_csvFile = list(csvFile)
    bodyparts = list_csvFile[1][3:]
    
    frames = list_csvFile[3:]
    for n, frame in enumerate(frames):

        img = cv2.imread(vid_dir + "/" + frame[2])

        x_coords = []
        y_coords = []

        for c in frame[3::2]:
            if c != '':
                x_coords.append(float(c))
            else:
                x_coords.append(0)
        
        for c in frame[4::2]:
            if c != '':
                y_coords.append(float(c))
            else:
                y_coords.append(0)
        
        non_zero_x = [x for x in x_coords if x > 0]
        non_zero_y = [y for y in y_coords if y > 0]
        for i in range(num_samples):    

            img_name = "dir_{}_frame_{}_rep_{}.png".format(n,m,i)
            output_file_names.append("labeled-data/" + str(os.path.basename(target_dir)) + "/" + img_name)
            
            if non_zero_x != [] and non_zero_y != []:
                #[minx, miny, maxx, maxy]
                bbox = [min(non_zero_x), min(non_zero_y), max(non_zero_x), max(non_zero_y)] 
                bbox = fix_bounding_boxes(bbox, img.shape[-2::-1])

                if random_size:
                    bbox = resize_box(bbox, np.random.random() - 0.5, img.shape[-2::-1])
                else:
                    bbox = resize_box(bbox, resize_factor, img.shape[-2::-1])
                print(bbox)
                if bbox[2] - bbox[0] <= 0 or bbox[3] - bbox[1] <= 0:
                    all_points.append([0 for x in range(len(bodyparts))])
                    continue
                
                new_x_coords = [x - bbox[0] for x in x_coords]
                new_y_coords = [y - bbox[1] for y in y_coords]
                bbox = [int(i) for i in bbox]

                
                if resize_resolution is not None:
                    factor_X = resize_resolution / (bbox[2] - bbox[0])
                    rescaled_X = [round(x * factor_X, 2) for x in new_x_coords]
                    factor_Y = resize_resolution / (bbox[3] - bbox[1])
                    rescaled_Y = [round(y * factor_Y, 2) for y in new_y_coords]
                    all_points.append([i for sublist in zip(rescaled_X, rescaled_Y) for i in sublist])
                else:    
                    x = [round(x , 2) for x in new_x_coords] # X
                    y = [round(y , 2) for y in new_y_coords] # Y
                    all_points.append([i for sublist in zip(x, y) for i in sublist])        
                if resize_resolution is not None:
                        display_img_crop = img[bbox[1]:bbox[3],bbox[0]:bbox[2]]
   
                        resized_display_img = cv2.resize(display_img_crop, 
                                                                (resize_resolution, resize_resolution), 
                                                                        interpolation = cv2.INTER_CUBIC)
                        cv2.imwrite(target_dir + "/" + img_name, resized_display_img)
                else:
                        cv2.imwrite(target_dir + "/" + img_name, img[bbox[1]:bbox[3],bbox[0]:bbox[2]])
            else:
                all_points.append([0 for x in range(len(bodyparts))])

In [None]:
# next create the required hierarchy
scorer = [SCORER for i in range(len(bodyparts))]
individuals = []

coords = []
for i in range(int(len(bodyparts)/2)):
    coords.extend(["x","y"])

In [None]:
categories = [scorer, bodyparts, coords]
categories_tuples = list(zip(*categories))
columns = pd.MultiIndex.from_tuples(categories_tuples, names=["scorer",
                                                           "bodyparts",
                                                           "coords"])

final_dataframe = pd.DataFrame(all_points, index = output_file_names, columns=columns)
# convert all zeros, negative values, and those esxceeding the cropped image to NaN
final_dataframe[final_dataframe < 0] = 0 # set negative values to 0
if resize_resolution is not None:
    final_dataframe[final_dataframe > resize_resolution] = 0
final_dataframe = final_dataframe.replace(0, np.nan)
final_dataframe = final_dataframe.dropna(thresh=2)

In [None]:
final_dataframe.to_csv(os.path.join(target_dir, "CollectedData_{}.csv".format(SCORER)))

# IF the function below fails, this is likely due to exceeding the number of columns supported by HDF5 files!
# Restrict the number of simulated animals to < 20 if the goal is to train a DLC network

final_dataframe.to_hdf(
    os.path.join(target_dir, "CollectedData_" + SCORER + ".h5"),
    "df_with_missing",
    format="table",
    mode="w")
    