# Instructions

This notebook can be used to convert a labels file in the format `(video_name, timestamp_start, timestamp_end, label)` into the required `(video_name, video_frame_filename, label)` format. 

The actual labels file format required also contains a `split` column allocating videos to train/test/validation splits but you can use the notebook `notebooks/helper_add_train_valid_test_splits_to_labels.ipynb` to add splits once your labels file is in the `(video_name, video_frame_filename, label)` format.

Note, the resulting label file from this notebook will depend on the Frames Per Second (FPS) that you extracted the video frames at.

Note: you may have to adjust leading zero count (search for `{:06d}` in this notebook) depending on how frames were exported and length of video so they match the frame files...

# Setup

In [1]:
# set desired output FPS
FPS_OUTPUT = 2

In [5]:
import pandas as pd
import numpy as np
import cv2
import sys
import os
from datetime import datetime

In [6]:
# setup paths
pwd = os.getcwd().replace("notebooks","")
path_data = pwd + 'data/'

In [9]:
path_labels_file = pwd + 'penguin_label_timestamps.csv'

In [10]:
if not os.path.exists(path_labels_file):
    print("ERROR - cannot find input timestamps label file")

# Helper functions

In [11]:
def get_seconds(minute,second):
    ts = datetime.now().replace(hour=0, minute=minute, second = second, microsecond = 0)
    return int((ts - ts.replace(hour=0, minute=0, second=0, microsecond=0)).total_seconds())

In [12]:
def get_seconds_start(x):
    return get_seconds(x[4],x[5])

In [13]:
def get_seconds_end(x):
    return get_seconds(x[6],x[7])

# Load labels file and save new labels file with 1 row per frame at desired FPS

In [14]:
df = pd.read_csv(path_labels_file)

In [15]:
# extract components of start and end times
df['start_minute'] = df['time_start'].str.split(":").str.get(1).astype(int)
df['start_second'] = df['time_start'].str.split(":").str.get(2).astype(int)

df['end_minute'] = df['time_end'].str.split(":").str.get(1).astype(int)
df['end_second'] = df['time_end'].str.split(":").str.get(2).astype(int)

In [16]:
df.head()

Unnamed: 0,video,time_start,time_end,label,start_minute,start_second,end_minute,end_second
0,20160801_no9_1,00:00:00,00:06:03,search,0,0,6,3
1,20160801_no9_1,00:06:03,00:07:00,descent,6,3,7,0
2,20160801_no9_1,00:07:00,00:07:06,bottom,7,0,7,6
3,20160801_no9_1,00:07:06,00:07:14,ascent,7,6,7,14
4,20160801_no9_1,00:07:14,00:07:14,breath,7,14,7,14


In [17]:
df['start_elapsed'] = df.apply(get_seconds_start, axis=1)
df['end_elapsed'] = df.apply(get_seconds_end, axis=1)

In [18]:
del df['time_start']
del df['time_end']
del df['start_minute']
del df['start_second']
del df['end_minute']
del df['end_second']

In [19]:
df.head()

Unnamed: 0,video,label,start_elapsed,end_elapsed
0,20160801_no9_1,search,0,363
1,20160801_no9_1,descent,363,420
2,20160801_no9_1,bottom,420,426
3,20160801_no9_1,ascent,426,434
4,20160801_no9_1,breath,434,434


In [20]:
videos = list(df['video'].unique())

In [21]:
# initialize output
out_all = []

for video in videos:
    # subset dataframe to labels for this video
    dft = df[df['video'] == video]

    out = []

    for i, row in enumerate(dft.values):
        vid = row[0]
        label = row[1]
        start = row[2]
        end = row[3]

        for i in range(start,end):
            for j in range(FPS_OUTPUT):
                out.append({"video":vid,
                            "frame": vid + "_{:06d}".format(len(out)) + '.jpg',
                            "label": label})
    
    out_all.extend(out)

In [22]:
# make dataframe and save
df_out = pd.DataFrame(out_all)
df_out = df_out[['video','frame','label']]
df_out.to_csv(path_data + 'labels.csv',index=False)

print("SAVED LABELS FILE to /data/labels.csv")

SAVED LABELS FILE to /data/labels.csv
