In [1]:
# Video Processing to generate training set clips


In [14]:
import numpy as np
import cv2
import pandas as pd
import os
import random
from ast import literal_eval

In [15]:
def read_temporal_annotations(annotation_file, fps = 30, skiprows = 1):
    """Read annotations as seconds for given animals in a file

    Args:
        annotation_file (str): path to annotation file
        skiprows (int, optional): number of rows to skip in annotation file. Defaults to 2.

    Returns:
        dict: list of start, stops for each animal 
    """
    via_output = pd.read_csv(annotation_file, skiprows = skiprows)
    via_output['behavior'] = via_output['metadata'].apply(lambda x: x.split('"TEMPORAL-SEGMENTS":"')[-1][:-2].title())
    via_output['frame_start'] = round(via_output['temporal_segment_start']*30).astype(int)
    via_output['frame_end'] = round(via_output['temporal_segment_end']*30).astype(int)
    
    return via_output.loc[:,['behavior', 'frame_start', 'frame_end', 'temporal_segment_start', 'temporal_segment_end']].sort_values(by = 'frame_start').reset_index(drop = True)

In [16]:
def frame_crop(x, y, buffer = 200, shape = [1080, 1920]):
    
    if (x + buffer) >  shape[1]:
        max_x = shape[1]
    else:
        max_x = x+buffer

    if (x - buffer) < 0:
        min_x = 0
    else:
        min_x = x-buffer

    if (y + buffer) >  shape[0]:
        max_y = shape[0]
    else:
        max_y = y+buffer

    if (y - buffer) < 0:
        min_y = 0
    else:
        min_y = y - buffer

    # print(f"Min y: {min_y}, Max y: {max_y}, Min x: {min_x}, Max x: {max_x}")
    cc = [round(min_y, 1), round(max_y, 1), round(min_x,1), round(max_x,1)]
    center = [round(x, 1),round(y, 1)]
    return [round(c) for c in cc], [round(c) for c in center]



def padcrop_image(frame, buffer, cc, center):

    padded_image = np.zeros((buffer*2, buffer*2, 3), dtype=np.uint8)
     
    # Paste the cropped region onto the black image
    padded_image[
        max(0, buffer - (center[1] - cc[0])):min(buffer*2, buffer + (cc[1] - center[1])),
        max(0, buffer - (center[0] - cc[2])):min(buffer*2, buffer + (cc[3] - center[0])),:] = frame[cc[0]:cc[1], cc[2]:cc[3],:]

    return padded_image


In [17]:
def generate_behavior_clips(vid, ann_table, folder, crop_center= None, buffer = None, fps = 30, frame_shape = [1080, 1920, 3], an_suffix = None):
    
    writers = {k:None for k in ann_table['behavior'].unique()}
    cap = cv2.VideoCapture(vid)
    frames = cap.get(cv2.CAP_PROP_FRAME_COUNT) 
    start = ann_table.loc[0, 'frame_start']
    

    if start > 0:
        cap.set(cv2.CAP_PROP_POS_FRAMES, int(start-1))
    frame_count = start
    reading = True

    if buffer is not None:
        out_shape = (buffer *2, buffer *2, 3)
    else:
        out_shape = frame_shape

    while reading:
        if frame_count > frames:
            reading = False
            for k,v in writers.items():
                if v is not None:
                    v.release()
        else:
            
            current_table = ann_table.loc[(frame_count < ann_table['frame_end']) & (frame_count >= ann_table['frame_start']),:]
            ret, frame = cap.read()
            if not ret:
                print(f'Could not read frame {frame_count}')
                break 
            # print(f'Current Frame: {frame_count}')
            for _, row in current_table.iterrows():
                
                if (frame_count == row['frame_start']) or (writers[row['behavior']] is None):
                    base_name, _ = os.path.splitext(os.path.basename(vid))
                    if an_suffix is not None:
                        base_name = base_name + '_'+ an_suffix
                    write_name = os.path.join(folder, row['behavior'], base_name + '_'+ str(int(row['frame_start']))+ '_'+str(int(row['frame_end'])) +'.mp4')
                    writer = cv2.VideoWriter(write_name, cv2.VideoWriter_fourcc(*'mp4v'), fps, out_shape[:2])
                    writers[row['behavior']] = writer
                else:
                    writer = writers[row['behavior']]
                

                if crop_center is not None:
                    
                    cc, center = frame_crop(crop_center[0], crop_center[1], buffer = buffer, shape = frame.shape)
                    # print(f'Crop Center: {cc}')
                    # print(f'Center: {center}')
                    pc = padcrop_image(frame, buffer, cc, center)
                    writer.write(pc)
                    # print(write_name)
                    # print(f'Wrote frame #{frame_count}')
                else:
                    writer.write(frame)
                
                if (frame_count + 1) > row['frame_end']:
                    writer.release()
                    writers[row['behavior']] = None
        

            frame_count = frame_count + 1
            if frame_count > ann_table.loc[ann_table.shape[0]-1, 'frame_end']:
                reading = False
    
    cap.release()

In [18]:
def get_params_from_vid(vid_path, animals = ['m1','m2','m3','m4','m5','m6'], dp = 0.5, minDist = 400):
    src = cv2.VideoCapture(str(vid_path))
    frames = src.get(cv2.CAP_PROP_FRAME_COUNT) 
    fps = src.get(cv2.CAP_PROP_FPS)

    ret = False
    count = 0

    while not ret:
        ret, frame = src.read() 
        count = count +1
        if count > frames:
            return None
    src.release()
     
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    circles = cv2.HoughCircles(gray, cv2.HOUGH_GRADIENT, dp, minDist)
    # print(circles)

    if len(animals) > 1:
        found = match_circles(circles[0])
        dat = {}
        for f, a in zip(found[0], animals):
            dat[a] = f
    else:
        dat = {}
        if circles is not None:
            x_dif = np.argmin(abs(circles[0][:,0] - (frame.shape[1]/2)))
            dat[animals] = circles[0][x_dif,:]
        else:
            dat[animals] = np.array([frame.shape[1]/2, frame.shape[0]/2, 350])
        # plot_circles(vid_path, np.array( [[dat['1']]]), radius = 400)
    return dat, frames, fps, frame.shape


def match_circles(circles, x_exp = np.array([350, 980, 1560]), y_exp = np.array([285, 800]), distance = 100):

    found = np.full([1,6,3], np.nan)
    x_displace = np.zeros((6))
    y_displace = np.zeros((6))
    for c in circles:
        # print(c)
        x_dis = x_exp - c[0]
        x_match = np.where(np.abs(x_dis) <= distance)[0]

        y_dis = y_exp - c[1]
        y_match = np.where(np.abs(y_exp - c[1]) <= distance)[0]

        
        if len(x_match) and len(y_match):
            id = (3* y_match)+ x_match 
            found[0,id,:] = c
            x_displace[id]= x_dis[x_match[0]]
            y_displace[id] = y_dis[y_match[0]]

    x_displace[x_displace == 0] = np.nan
    y_displace[y_displace == 0] = np.nan

    for v in np.argwhere(np.isnan(found)):
        # print(v[1])
        if v[2] == 0: # x
            start = np.mod(v[1], 3)
            dis = np.nanmean(x_displace[start::3])
            if np.isnan(dis):
                dis = np.nanmean(x_displace)
            found[v[0], v[1], v[2]] = x_exp[start] - dis
        if v[2] == 1: # y
            if v[1] < 3:
                dis = np.nanmean(y_displace[:3])
                row = 0
            else:
                dis = np.nanmean(y_displace[3:])
                row = 1
            found[v[0], v[1], v[2]] = y_exp[row] - dis

        if v[2] == 2: # r
            found[v[0], v[1], v[2]] = np.nanmean(found[0,:,2])

        
    return found

In [19]:
# vid = '/Users/rebeccakrall/Desktop/Mouse Videos/PRX02_D8FR2C20_71.mp4'
# dat, frames, fps, shape = get_params_from_vid(vid, animals = 'a')
# annotation_file = "/Users/rebeccakrall/Desktop/Mouse Videos/full_annoations_71.csv"
# ann_table = read_temporal_annotations(annotation_file)
# generate_behavior_clips(vid, ann_table, folder = "/Users/rebeccakrall/Desktop/Behavior Clips", crop_center= None, buffer = None, fps = fps, frame_shape = shape, frames = frames)

In [20]:
def load_multi_animal_annotations(annotation_file, animals, skiprows = 9):
    """Load annotations as seconds for given animals in a file

    Args:
        annotation_file (str): path to annotation file
        animals (list): list of str indicating animals to load
        skiprows (int, optional): number of rows to skip in annotation file. Defaults to 9.

    Returns:
        dict: list of start, stops for each animal 
    """
    via_output = pd.read_csv(annotation_file, skiprows = skiprows)
    tc = via_output['temporal_coordinates']
    tp = np.array([(np.array(literal_eval(a)) ) for a in tc])

    d = {}
    for k in animals:
        d[k] = tp[via_output['metadata'].str.contains(k),:]
        d[k][:,0].sort()
        d[k][:,1].sort()

    
    return d

In [21]:
# def random_off_clips(off, frame_count): ## want to improve this so that it samples better
#     off_starts = []
#     random.shuffle(frame_count)
#     for x,y  in zip(off, frame_count):
#         if (x[1] - x[0]) > y:
#             s = random.randint(x[0], x[1]- y)
#             e = s + y
#         else:
#             s = x[0]
#             e = x[1]

#         off_starts.append([s,e])

#     return off_starts

def random_off_clips(off, frame_count): ## want to improve this so that it samples better
    off_starts = []
    
    off2 = list(off[(off[:,1] - off[:,0]) > min(frame_count),:]) # choose only off epochs with sufficient 
    if len(off2) < 1:
        return None, None
    
    random.shuffle(frame_count)
    for y  in frame_count:
    
        matching = True
        while matching:
            if len(off2) < 1:
                off2 = list(off[(off[:,1] - off[:,0]) > min(frame_count),:])
            x = off2.pop()
            if (x[1] - x[0]) > y:
                s = random.randint(x[0], x[1]- y)
                e = s + y
                if (s - x[0]) > min(frame_count):
                    off2.append(np.array([x[0], s]))
                if (x[1] - e) > min(frame_count):
                    off2.append(np.array([e, x[1]]))
                matching = False

        off_starts.append([s,e])

    return off_starts, off2

def on_off_behavior_table(anns, total_frames, fps = 30, behavior_name = 'scratch', random_off = True):
    on = np.round(anns * fps).astype(int)
    all_frames = np.insert(on.flatten(), 0, 0) 
    all_frames = np.append(all_frames, total_frames)
    off = all_frames.reshape(-1,2)
    if random_off:
        frame_count = on[:,1] - on[:,0]
        off, remainder = random_off_clips(off, frame_count)
        off = np.array(off)
    label = [behavior_name] * on.shape[0] + ['not_'+behavior_name]* off.shape[0]
    frames = np.vstack([on, off])
    tbl =  pd.DataFrame({'behavior': label, 'frame_start':frames[:,0], 'frame_end': frames[:,1]})
    return tbl.sort_values('frame_start').reset_index(drop = True)


In [22]:
vid = "/Users/rebeccakrall/Data/Video Processing and Prediction/Clean Training Data/MP4 Clips & DLC/PRX_02_D8_Female_run2_comp2_m67-72clip_0.mp4"
annotation_file = "/Users/rebeccakrall/Data/Video Processing and Prediction/Clean Training Data/VIA CSV annotations/PRX_02_D8_Female_run2_comp2_m67-72clip_0_annotation.csv"


In [36]:
ann_folder = "/Users/rebeccakrall/Data/Video Processing and Prediction/Clean Training Data/VIA CSV annotations"
vid_folder = "/Users/rebeccakrall/Data/Video Processing and Prediction/Clean Training Data/MP4 Clips & DLC"

all_ann = os.listdir(ann_folder)

In [37]:
ex = all_ann.pop(-3)

In [44]:
for ex in all_ann:
    vid = os.path.join(vid_folder, ex.split('_annotation')[0]+'.mp4')
    dat, frames, fps, shape = get_params_from_vid(vid, animals = ['m1','m2','m3','m4','m5','m6'])
    d = load_multi_animal_annotations(os.path.join(ann_folder, ex), ['m1','m2','m3','m4','m5','m6'])
    for k in d.keys():
        if len(d[k]):
            tbl = on_off_behavior_table(d[k], frames, fps = fps, behavior_name = 'scratch')
            generate_behavior_clips(vid, tbl, folder = "/Users/rebeccakrall/Desktop/Scratching Clips", crop_center = dat[k], buffer = 300, fps = fps, frame_shape = shape, an_suffix = k)


  dis = np.nanmean(x_displace[start::3])


In [263]:
anns = d['m2']
on = np.round(anns * fps).astype(int)
all_frames = np.insert(on.flatten(), 0, 0) 
all_frames = np.append(all_frames, 9000)

In [265]:
frame_count = on[:,1] - on[:,0]

In [26]:
d[k]

array([], shape=(0, 2), dtype=float64)

In [267]:
off = all_frames.reshape(-1,2)

In [272]:
off = off[(off[:,1] - off[:,0]) > 15,:]

array([[   0, 4308],
       [4433, 5775],
       [5872, 6115],
       [6158, 6175],
       [6365, 6390],
       [6523, 8117],
       [8283, 8444],
       [8498, 8514],
       [8609, 9000]])

In [328]:
on = np.round(d['m1'] * fps).astype(int)
all_frames = np.insert(on.flatten(), 0, 0) 
all_frames = np.append(all_frames, 9000)
off = all_frames.reshape(-1,2)
frame_count = on[:,1] - on[:,0]
off, off2 = random_off_clips(off, frame_count)

[8813 8850] too small for 55


In [299]:
len(frame_count)

6

In [319]:
off

[[8952, 8983],
 [8667, 8691],
 [8850, 8896],
 [8905, 8935],
 [8776, 8813],
 [8707, 8762]]

In [78]:
start_dir = "/Users/rebeccakrall/Desktop/Scratching Clips/not_scratch"
left_dir = "/Users/rebeccakrall/Desktop/Behavior Clips/Left Foot Scratch"
right_dir = "/Users/rebeccakrall/Desktop/Behavior Clips/Right Foot Scratch"
end_dir = "/Users/rebeccakrall/Desktop/Behavior Clips"

In [79]:
df = pd.read_csv('video_labels2.csv')

In [76]:
for ind, row in df.iterrows():
    if row['Label'] == 'L':
        pass
        # os.rename(os.path.join(start_dir, row['Video']), os.path.join(left_dir, row['Video']))
        # print(f"{row['Video']} left")
    elif row['Label'] == 'R':
        # os.rename(os.path.join(start_dir, row['Video']), os.path.join(right_dir, row['Video']))
        # print('right')
        pass

In [80]:
for ind,row in df.iterrows():
    if pd.notnull(row['Label']):
        new_dir = os.path.join(end_dir, row['Label'])
        if os.path.isdir(new_dir):
                os.rename(os.path.join(start_dir, row['Video']), os.path.join(new_dir, row['Video']))
        else:
                print(f'{row["Label"]} directory does not exist')
    else:
        new_dir = os.path.join(end_dir, 'Not Scratch')
        if os.path.isdir(new_dir):
                os.rename(os.path.join(start_dir, row['Video']), os.path.join(new_dir, row['Video']))
        else:
            print(f'{row["Label"]} directory does not exist')

Face Groomiong directory does not exist
Face Groomoing directory does not exist
