In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
cd ../src/

## Imports

In [None]:
import os
import re
import cv2
import time
import imageio
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm
from skimage.transform import resize

In [None]:
from params import *

## Dataframe

### Load

In [None]:
video_labels = pd.read_csv(DATA_PATH + 'train_labels.csv').fillna(0)

### Propagate labels

In [None]:
EXTENDED = np.array([-4, -3, -2, -1, 1, 2, 3, 4])

In [None]:
video_labels['extended_impact'] = video_labels['impact']

In [None]:
video_labels_with_impact = video_labels[video_labels['impact'] > 0]

for row in tqdm(video_labels_with_impact[['video', 'frame', 'label']].values):
    frames = EXTENDED + row[1]
    video_labels.loc[(video_labels['video'] == row[0]) & 
                     (video_labels['frame'].isin(frames))& 
                     (video_labels['label'] == row[2]), 'extended_impact'] = 1
    
#     break

In [None]:
EXTENDED = np.array([-4, -3, -2, -1, 0, 1, 2, 3, 4])

for impact_type in video_labels['impactType'].unique()[1:]:
    df_impact_type = video_labels[video_labels['impactType'] == impact_type]

    video_labels[f"extended_{impact_type}"] = 0

    for row in tqdm(df_impact_type[['video', 'frame', 'label']].values):
        frames = EXTENDED + row[1]
        video_labels.loc[(video_labels['video'] == row[0]) & 
                     (video_labels['frame'].isin(frames))& 
                     (video_labels['label'] == row[2]), f"extended_{impact_type}"] = 1

### Adapt cols

In [None]:
video_labels['image_name'] = (video_labels['video'].str.replace('.mp4', '') + '_' +
                              video_labels['frame'].apply(lambda x: f'{x:04d}') + '.png')

In [None]:
coords = {"left": "x", 'top': "y", 'width': "w", 'height':"h"}
video_labels = video_labels.rename(columns=coords)

In [None]:
video_labels['impact'] = video_labels['impact'].astype(int) + 1

### Truth : Impact visible & confident

In [None]:
video_labels = pd.read_csv(DATA_PATH + 'df_train.csv')

video_labels['truth'] = (video_labels['impact'] == 1) & (video_labels['confidence'] > 1) & (video_labels['visibility'] > 0) 
video_labels['extended_truth'] = video_labels['truth'].astype(int)
video_labels['truth'] = video_labels['truth'].astype(int)

In [None]:
EXTENDED = np.array([-4, -3, -2, -1, 1, 2, 3, 4])

video_labels_with_impact = video_labels[video_labels['truth'] > 0]

for row in tqdm(video_labels_with_impact[['video', 'frame', 'label']].values):
    frames = EXTENDED + row[1]
    video_labels.loc[(video_labels['video'] == row[0]) & 
                     (video_labels['frame'].isin(frames))& 
                     (video_labels['label'] == row[2]), 'extended_truth'] = 1
    
#     break

In [None]:
video_labels.head()

### Save

In [None]:
video_labels.to_csv(DATA_PATH + 'df_train.csv', index=False)

## Images

In [None]:
video_labels = pd.read_csv(DATA_PATH + 'df_train.csv')

In [None]:
def video_to_images(video_name, video_labels, video_dir="", size=0, out_dir="", only_with_impact=False):
    video_path=f"{video_dir}{video_name}"
    assert os.path.isfile(video_path)
    vid = imageio.get_reader(video_path,  'ffmpeg')
    
    if only_with_impact:
        boxes_all = video_labels.query("video == @video_name")
        print(video_path, boxes_all[boxes_all.impact > 1.0].shape[0])
    else:
        print(video_path)
        
    for frame in video_df['frame'].unique():

        try:
            image = vid.get_data(max(0, frame - 1))
        except IndexError:
            print(f'Index error at frame {frame}')
            pass                        
        
        img_name = f"{video_name[:-4]}_{frame:04d}.png"
        
        if os.path.isfile(out_dir + img_name):
            break # frame already found

        if size:
            image = resize(image, (size, size)) * 255

        if only_with_impact:
            boxes = video_labels.query("video == @video_name and frame == @frame")
            boxes_with_impact = boxes[boxes.impact > 1.0]
            if boxes_with_impact.shape[0] == 0:
                continue

        cv2.imwrite(out_dir + img_name, image)

    return image

In [None]:
for video_name in tqdm(video_labels.video.unique()):
    video_df = video_labels[video_labels["video"] == video_name]
    img = video_to_images(
        video_name, 
        video_df, 
        video_dir=TRAIN_VID_PATH, 
        out_dir=IMG_PATH_F, 
    )