In [None]:
%%time

from functools import partial
from collections import defaultdict, namedtuple
import numpy as np
import pandas as pd
import scipy
from sklearn.model_selection import KFold
from tqdm.notebook import tqdm
from itertools import product

from video699.screen.semantic_segmentation.fastai_detector import *
from video699.screen.semantic_segmentation.common import *
from video699.screen.semantic_segmentation.postprocessing import *
from video699.screen.semantic_segmentation.evaluation import *

In [None]:
detector = FastAIScreenDetector()
method_params = list(detector.methods.keys())
train_params = list(detector.train_params.keys())
all_params = train_params + method_params

# MODEL PARAMETERS

batch_size = [8]
resize_factor = [2]
frozen_epochs = [6]
unfrozen_epochs = [7]
frozen_lr = [1e-3]
unfrozen_lr = [slice(1e-4, 2e-4)]

train_params_values = [batch_size] + [resize_factor] + [frozen_epochs] + [unfrozen_epochs] + [frozen_lr] + [unfrozen_lr]

# POST PROCESSING PARAMETERS
base = [True]
base_lower_bounds = [5, 7, 10]
base_upper_bounds = [40, 50, 60]
base_factors = [[0.1, 0.01]]

erode_dilate = [True]
erode_dilate_lower_bounds = [5]
erode_dilate_upper_bounds = [40]
erode_dilate_factors = [[0.1, 0.01]]
erode_dilate_iterations = [40, 100]

ratio_split = [True]
ratio_split_lower_bounds = [0.7, 0.9]
ratio_split_upper_bounds = [1.5]

methods_values = [base] + [erode_dilate] + [ratio_split] + [base_lower_bounds] + [base_upper_bounds] \
        + [base_factors] + [erode_dilate_lower_bounds] + [erode_dilate_upper_bounds] + [erode_dilate_factors] \
        + [erode_dilate_iterations] + [ratio_split_lower_bounds] + [ratio_split_upper_bounds]

In [None]:
method_settings = list(product(*methods_values))
train_settings = list(product(*train_params_values))
all_lectures = [video.filename for video in ALL_VIDEOS]
all_frames = [frame for video in ALL_VIDEOS for frame in video]
all_frames_grouped_by_videos = {video.filename: [frame for frame in video] for video in ALL_VIDEOS}
test_lectures = ['PB069-D2-20140305.mp4']
test_frames = [frame for lecture in test_lectures for frame in all_frames_grouped_by_videos[lecture]]
actual_detector = AnnotatedSampledVideoScreenDetector()

In [None]:
Split = namedtuple('Split', ['train', 'valid'])
kf = KFold(n_splits=5, shuffle=True, random_state=123)
splits = {}
for j, split in enumerate(kf.split(all_lectures)):    
    train_lectures = [all_lectures[index] for index in split[0]]
    valid_lectures = [all_lectures[index] for index in split[1]]
    valid_frames = [frame for lecture in valid_lectures for frame in all_frames_grouped_by_videos[lecture]]
    splits[j] = Split(train=train_lectures, valid=valid_lectures)

In [None]:
%%time
# Model selection
method_settings = list(product(*methods_values))
train_settings = list(product(*train_params_values))
df_all = pd.DataFrame(columns=all_params + ['iou', 'wrong_count', 'kfold_split'])

for train_setting in tqdm(train_settings):
    train_params_dict = dict(zip(train_params, train_setting))
    for i in splits.keys():
        train_lectures = splits[i].train
        valid_lectures = splits[i].valid
        valid_frames = [frame for lecture in valid_lectures for frame in all_frames_grouped_by_videos[lecture]]

        filtered_by = lambda name: any([lecture in str(name) for lecture in train_lectures + valid_lectures])  \
                        and 'frame' in str(name)
        split_by = lambda name: any([lecture in str(name) for lecture in valid_lectures])
        
        detector = FastAIScreenDetector(train_params=train_params_dict, methods=None, filtered_by=filtered_by,
                                    valid_func=split_by, device='cuda')
    
        detector.train()
        
        actuals = [actual_detector.detect(frame) for frame in valid_frames]
        sem_preds = detector.semantic_segmentation_batch(valid_frames)
        
        print(f"Iterating through {len(method_settings)} methods in split {j}.")    
        for i, method_setting in enumerate(method_settings):    
            preds = detector.post_processing_batch(sem_preds, valid_frames, dict(zip(method_params, method_setting)))
            wrong_count, ious, _ = evaluate(actuals, preds)
            
            iou_score = np.nanmean(ious)
            wrong_count = len(wrong_count)
            df_all.loc[len(df_all)] = train_setting + method_setting + (iou_score, wrong_count, j)

In [None]:
df_all.to_csv("model_selection.csv")