## Optimizing `frozen_epochs`, `unfrozen_epochs`, `base_lower_bound`, `factors`, `ratio_split_lower_bound` , parameter of `FastAIScreenDetector`

In [None]:
%%time
import warnings
warnings.filterwarnings('ignore')
from functools import partial
from collections import defaultdict, namedtuple
import numpy as np
import pandas as pd
import scipy
from sklearn.model_selection import KFold
from tqdm.notebook import tqdm
from itertools import product

from video699.screen.semantic_segmentation.fastai_detector import *
from video699.screen.semantic_segmentation.common import *
from video699.screen.semantic_segmentation.postprocessing import *
from video699.screen.semantic_segmentation.evaluation import *

In [None]:
resize_factor = [2, 8]
frozen_epochs = [2, 6, 9]
unfrozen_epochs = [3, 7, 10]
base_lower_bound = [5, 7, 10, 15]
erosion_dilation_kernel_size = [20, 50, 80, 150]
ratio_split_lower_bound = [0.3, 0.4, 0.5, 0.7, 0.8, 0.9]

In [None]:
# resize_factor = [8]
# frozen_epochs = [1]
# unfrozen_epochs = [0]
# base_lower_bound = [5]
# erosion_dilation_kernel_size = [80]
# ratio_split_lower_bound = [0.5]

In [None]:
train = list(product(resize_factor, frozen_epochs, unfrozen_epochs))
train_names = ['resize_factor', 'frozen_epochs', 'unfrozen_epochs']

post_processing = list(product(base_lower_bound, erosion_dilation_kernel_size, ratio_split_lower_bound))
post_processing_names = ['base_lower_bound', 'erosion_dilation_kernel_size', 'ratio_split_lower_bound']

all_lectures = [video.filename for video in ALL_VIDEOS]
all_frames = [frame for video in ALL_VIDEOS for frame in video]
all_frames_grouped_by_videos = {video.filename: [frame for frame in video] for video in ALL_VIDEOS}

detector = FastAIScreenDetector()
actual_detector = AnnotatedSampledVideoScreenDetector()

In [None]:
def filtered_by(name, used):
    return any([lecture in str(name) for lecture in used]) and 'frame002000' in str(name)

def split_by(name, validation):
    return any([lecture in str(name) for lecture in validation])

### Cross validation splits by lecture

In [None]:
def make_splits(all_lectures):
    Split = namedtuple('Split', ['train', 'valid'])
    kf = KFold(n_splits=5, shuffle=True, random_state=123)
    splits = {}
    for j, split in enumerate(kf.split(all_lectures)):    
        train_lectures = [all_lectures[index] for index in split[0]]
        valid_lectures = [all_lectures[index] for index in split[1]]
        valid_frames = [frame for lecture in valid_lectures for frame in all_frames_grouped_by_videos[lecture]]
        splits[j] = Split(train=train_lectures, valid=valid_lectures)
    return splits

In [None]:
def model_selection(all_lectures, train_names, post_processing_names, default_filtered_by, default_split_by):
    splits = make_splits(all_lectures)
    df_all = pd.DataFrame(columns=train_names + post_processing_names + ['iou', 'wrong_count', 'kfold_split'])

    for train_values in tqdm(train):
        resize_factor, frozen_epochs, unfrozen_epochs = train_values
        CONFIGURATION['resize_factor'] = str(resize_factor)
        CONFIGURATION['frozen_epochs'] = str(frozen_epochs)
        CONFIGURATION['unfrozen_epochs'] = str(unfrozen_epochs)

        for j in splits.keys():
            filtered_by = partial(default_filtered_by, used=splits[j].train + splits[j].valid)
            split_by = partial(default_split_by, validation=splits[j].valid)

            detector = FastAIScreenDetector(filtered_by=filtered_by, valid_func=split_by)
            detector.train()

            valid_frames = [frame for frame in all_frames if split_by(frame.pathname)]
            actuals = [actual_detector.detect(frame) for frame in valid_frames]
            sem_preds = detector.semantic_segmentation_batch(valid_frames)

            for post_processing_values in post_processing:    
                preds = detector.post_processing_batch(sem_preds, valid_frames)
                wrong_count, ious, _ = evaluate(actuals, preds)

                iou_score = np.nanmean(ious)
                wrong_count = len(wrong_count)
                df_all.loc[len(df_all)] = train_values + post_processing_values + (iou_score, wrong_count, j)
                df_all.to_csv("model_selection.csv")
            df_all.to_csv("model_selection.csv")
    return df_all

In [None]:
%%time
model_selection(all_lectures, train_names, post_processing_names, filtered_by, split_by)

In [None]:
df_all.to_csv("model_selection.csv")