# Module to process video DLC model

This module belongs to the manuscript "Burchardt, L., Van de Sande, Y., Kehy, M., Gamba, M., Ravignani, A., Pouw, W. A complete computational and data toolkit for the dynamic study of laryngeal air sacs in Siamang (Symphalangus syndactylus) with applications for spherical tracking in other animals".

This contains a module for tracking Siamang head and air sack postures. The following keypoints will be tracked by a trained resnet 101 model:
- UpperLip
- LowerLip
- Nose
- EyeBridge
- Start_outline_outer_left
- Start_outline_outer_right
- LowestPoint_outline
- MidLowleft_outline
- MidLowright_outline

Note that Deeplabcut needs to be installed (in command prompt "pip install -r requirements.txt"). By default the CPU version is installed. Please see the original documentation of DeepLabCut to ensure GPU compatibility if you want to speed up the tracking process. We do recommend to use a GPU supported deeplabcut.

The trained resnet101 model needs to be downloaded first from google drive; so please go to the following folder and follow the download link and download to that folder: "./AirSacTracker/Toolkit/module_process_video_DLC_model/trained_model_and_metainfo/dlc-models/iteration-0/Deep_AirSacTrackingV1Jan1-trainset95shuffle1/train/". 

In [None]:
# load in all the packages needed:

# processing the videos with deeplabcut:
import deeplabcut
import os
import shutil
from os.path import isfile, join
from IPython.display import Video

# performing a circle estimation with Landau algorithm:
import glob
import pandas as pd
import numpy as np
from pprint import pprint


# plot landau circles on processed video:
from os import listdir
from os.path import isfile, join
import cv2
import shutil
import math

## Part 1: processing your video with DLC

In [4]:
# load the pre-trained model settings
config_path = "./DLC/trained_model_and_metainfo/config.yaml"

# where are we going to save our tracked results to?
output_dir = "./DLC/output/"

# set videofolder from which we are going to process
videofolder = "../input/"

# loading in the videos
vids = [f for f in os.listdir(videofolder) if isfile(join(videofolder, f))]

In [36]:
# display the first video in the set
Video(videofolder + vids[0], width=300, height=200)

In [None]:
# loop through each video and track using DLC
for i in vids:  # add the image folder name to get the full path
    video_path = videofolder + i
    # analyze the video using the pre-trained model
    deeplabcut.analyze_videos(
        config_path,
        [video_path],
        save_as_csv=True,
        videotype=".mp4",
        destfolder=output_dir,
    )
    # if you only want csv's than you uncomment the next line instead (note though that the labeling from deeplabcut requires .h5 instead of csv)
    # deeplabcut.analyze_videos(config_path, [video_path],save_as_csv=False, videotype='.mp4', destfolder=output_dir)
    deeplabcut.create_labeled_video(config_path, [video_path], destfolder=output_dir)
# convert H5 files to CSV files so you have the data in both extensions
deeplabcut.analyze_videos_converth5_to_csv(output_dir, ".mp4")

In [None]:
stored_video_names = []
for file in os.listdir(output_dir):
    if file.endswith(".mp4"):
        stored_video_names.append(file)

for index in range(0, len(stored_video_names)):
    shutil.copy(stored_video_names[index], "./DLC/output/labeled_videos/" + vids[index])

renaming_df = pd.DataFrame({"DLC_name:": stored_video_names, "new_label": vids})
print(
    "You have sucessfully renamed your labeled videos. Please do double check if the new labels correspont with the videos:"
)
pprint(renaming_df)

We now have a labeled video through DLC

In [None]:
# display the first video in the set
Video("./DLC/output/labeled_videos/" + vids[0], width=300, height=200)

## Circle estimation

In [None]:
# load data:

savename = "example8"
path = "./DLC/output/"
path_output = "./intermediate_output/"
timeseries_folder = "./intermediate_output/timeseries/"
pattern = "*.csv"
list_of_files = glob.glob(path + pattern)

print("these are the files that you selected to be processed:")
pprint(list_of_files)

In [15]:
# set parameters

threshold = 0.6

In [16]:
def estimateInitialGuessCircle(XY):
    # estimate initial guess for circle LM
    x0 = np.mean(XY["x"].values)
    y0 = np.mean(XY["y"].values)
    r0 = np.mean(
        np.sqrt((XY["x"].values ** 2 + x0**2) + (XY["y"].values ** 2 + y0**2))
    )
    ParIni = [x0, y0, r0]
    return ParIni

In [17]:
def Landau(XY, ParIni=np.NAN, epsilon=0.0001, IterMax=800):
    if np.isnan(ParIni):
        ParIni = estimateInitialGuessCircle(XY)

    centroidx = np.mean(XY["x"].values)
    centroidy = np.mean(XY["y"].values)
    centroid = [centroidx, centroidy]
    X = XY["x"].values - centroid[0]
    Y = XY["y"].values - centroid[1]
    centroid = centroid + [0]

    ParNew = [a - b for a, b in zip(ParIni, centroid)]

    for i in range(0, IterMax + 1):
        ParOld = ParNew
        Dx = X - ParOld[0]
        Dy = Y - ParOld[1]
        Dx_squared = Dx * Dx
        Dy_squared = Dy * Dy
        D = np.sqrt([sum(x) for x in zip(Dx_squared, Dy_squared)])
        ParNew = [
            -np.mean(Dx / D) * np.mean(D),
            -np.mean(Dy / D) * np.mean(D),
            np.mean(D),
        ]

        progress = np.linalg.norm([new - old for new, old in zip(ParNew, ParOld)]) / (
            np.linalg.norm(ParOld) + epsilon
        )

        if progress < epsilon:
            break

    Par = [sum(x) for x in zip(ParOld, centroid)]

    return Par

In [18]:
def data_prep_radius_estim_DLC(data):
    def circle_format(df_sub):
        df_all = []

        df = pd.DataFrame(columns=["x", "y", "likelihood", "frame"])
        for frame in range(1, df_sub.shape[0]):  # hier kan een .unique() flag achter
            for b in range(
                1, 6
            ):  # R starts with 1 but is inclusive in endpoint, range in Python is exclusive on endpoint
                end_col = b * 3
                start_col = end_col - 3

                helper_list = (
                    df_sub.iloc[frame, start_col:end_col].values.flatten().tolist()
                )
                helper_list.append(frame)
                df.loc[len(df)] = helper_list  # add data in row b in dataframe

            df_all.append(df)

        return pd.concat(df_all)

    ## 01b: main ----

    # list of columns needed for circle estimation used later in function
    list_airsac_points = [
        "Start_outline_outer_left_x",
        "Start_outline_outer_left_y",
        "Start_outline_outer_left_likelihood",
        "Start_outline_outer_right_x",
        "Start_outline_outer_right_y",
        "Start_outline_outer_right_likelihood",
        "LowestPoint_outline_x",
        "LowestPoint_outline_y",
        "LowestPoint_outline_likelihood",
        "MidLowleft_outline_x",
        "MidLowleft_outline_y",
        "MidLowleft_outline_likelihood",
        "MidLowright_outline_x",
        "MidLowright_outline_y",
        "MidLowright_outline_likelihood",
    ]

    colnames = []
    colnames.append("frames")  # first element is a string frames
    for i in range(1, data.shape[1]):
        colnames.append("_".join([str(data.iloc[0, i]), str(data.iloc[1, i])]))

    data.columns = colnames

    df_all = data.iloc[2:, :]

    df_sub = df_all.filter(list_airsac_points)
    df_sub = df_sub.apply(pd.to_numeric)

    circle_format_data = circle_format(df_sub)
    return circle_format_data

In [19]:
# sub functions for normalization


def nose_eye_normalization(auto_data, min_frames=2, threshold_normalization=0.8):
    norm_data = []
    list_normalization_points = [
        "Nose_x",
        "Nose_y",
        "Nose_likelihood",
        "EyeBridge_x",
        "EyeBridge_y",
        "EyeBridge_likelihood",
    ]

    colnames = ["frames"]

    for i in range(1, auto_data.shape[1]):
        colnames.append(
            "_".join([str(auto_data.iloc[0, i]), str(auto_data.iloc[1, i])])
        )

    auto_data.columns = colnames

    df = auto_data.iloc[2:, :]
    df_sub = df.filter(items=list_normalization_points)
    df_sub = df_sub.apply(pd.to_numeric)

    def euc_dist(xbridge, xnose, ybridge, ynose):
        return np.sqrt((xbridge - xnose) ** 2 + (ybridge - ynose) ** 2)

    df_sub_normalization = df_sub.loc[
        df_sub["Nose_likelihood"] >= threshold_normalization
    ]
    df_sub_normalization = df_sub_normalization.loc[
        df_sub_normalization["EyeBridge_likelihood"] >= threshold_normalization
    ]

    if df_sub_normalization.shape[0] >= min_frames:
        distance = [
            euc_dist(
                df_sub_normalization["EyeBridge_x"],
                df_sub_normalization["Nose_x"],
                df_sub_normalization["EyeBridge_y"],
                df_sub_normalization["Nose_y"],
            )
            for i in range(0, df_sub_normalization.shape[0])
        ]
    else:
        distance = np.NAN

    norm_data = np.nanmean(distance)

    return norm_data

In [20]:
def from_DLC_to_circle(path, list_of_files):
    matrix = np.empty((0, 5))
    columnnames = ["radius", "x", "y", "frame", "videofile"]
    # radius_all = pd.DataFrame(matrix, columns= columnnames)
    radius_all = []
    normalization_value = pd.DataFrame(
        data=np.empty((len(list_of_files), 2)),
        columns=["normalization_value", "videofile"],
    )

    for file in range(0, len(list_of_files)):
        radius = pd.DataFrame(columns=columnnames)

        auto_data = pd.read_csv(list_of_files[file])

        data_circle_estimation = data_prep_radius_estim_DLC(data=auto_data)
        data_circle_estimation = data_circle_estimation.astype({"frame": "int"})
        grouped_data_circle_estimation = data_circle_estimation.loc[
            data_circle_estimation["likelihood"] > threshold
        ]
        grouped_data_circle_estimation = grouped_data_circle_estimation.groupby(
            "frame", group_keys=False
        )

        count_n = 0
        for name, group in grouped_data_circle_estimation:
            if len(grouped_data_circle_estimation) > 0:
                frame_data = group
                frame_data = frame_data.drop_duplicates()
                if frame_data.shape[0] >= 3:
                    circles_LAN = Landau(
                        frame_data.iloc[:, 0:2],
                        ParIni=np.NAN,
                        epsilon=1e-06,
                        IterMax=500,
                    )
                else:
                    circles_LAN = [np.NAN, np.NAN, np.NAN, np.NAN]
                circles_res = [
                    circles_LAN[2],
                    circles_LAN[0],
                    circles_LAN[1],
                    count_n,
                    list_of_files[file],
                ]

                radius.loc[len(radius)] = circles_res

            count_n += 1
        radius_all.append(radius)
        normalization_value["normalization_value"][file] = nose_eye_normalization(
            auto_data
        )
        normalization_value["videofile"][file] = list_of_files[file]
    radius_all = pd.concat(radius_all)
    results_I = [radius_all, normalization_value]
    results = pd.merge(results_I[0], results_I[1], how="left", on="videofile")

    results["norm_radius"] = pd.to_numeric(
        results["radius"] / results["normalization_value"]
    )

    return results

In [21]:
results = from_DLC_to_circle(path=path, list_of_files=list_of_files)

In [22]:
results.to_csv(path_output + "/" + savename + "_DLC_toRadii.csv")
results.to_csv(timeseries_folder + savename + ".csv", na_rep="NA")

## Plot videos

In [None]:
tsfol = "./intermediate_output/timeseries/"  # this is where your timeseries are with the same name as the complementary video
vidfol = "./DLC/output/labeled_videos/"  # this is where the original videos are
outfol = "./output/"  # this is where you can collect your output
toprocess = os.listdir(tsfol)  # list all the time series files

for tt in toprocess:
    ts = pd.read_csv(tsfol + tt)  # get the time series
    idname = tt[0 : len(tt) - 4]  # remove the .csv
    vidloc = vidfol + idname + ".mp4"  # add mp4 (we assume we only process mp4s!)
    cap = cv2.VideoCapture(vidloc)  # open the video
    frameWidth = cap.get(
        cv2.CAP_PROP_FRAME_WIDTH
    )  # get the framewidth, and use it for the new video
    frameHeight = cap.get(
        cv2.CAP_PROP_FRAME_HEIGHT
    )  # get the framewidth, and use it for the new video
    fps = cap.get(cv2.CAP_PROP_FPS)  # fps = frames per second
    # what should we write to?
    out = cv2.VideoWriter(
        outfol + idname + "_circle.mp4",
        cv2.VideoWriter_fourcc(*"MP4V"),
        fps,
        (int(frameWidth), int(frameHeight)),
    )
    print("working on video: " + idname)
    while cap.isOpened():
        ret, frame = cap.read()
        if ret == False:
            break
        frame_number = int(cap.get(cv2.CAP_PROP_POS_FRAMES))

        index_var = (
            ts["frame"] == frame_number
        )  # get the index of the timeseries for the current frame number
        dat = ts.loc[index_var, :]  # get the slice of data for this frame
        for index, row in dat.iterrows():
            if (
                math.isnan(row["radius"]) == False
            ):  # only draw a circle when there a no NaN's
                cv2.circle(
                    frame,
                    (int(row["x"]), int(row["y"])),
                    int(row["radius"]),
                    (200, 0, 0),
                    2,
                )  # draw circle
        out.write(frame)  # save it into a new frame

# cleaning up
out.release()  # release the output video
cap.release()  # release the original video
print(
    "We are all done, go look into your output folder: " + str(os.path.abspath(outfol))
)