In [None]:
import json
from pathlib import Path

import numpy as np
import pandas as pd

from tqdm import tqdm_notebook as tqdm

# Test, Val, Train Splits

In [None]:
dev_set = Path('data/raw/Memorability 2018/dev-set')
test_set = Path('data/raw/Memorability 2018/test-set')

In [None]:
dev_videos = sorted((dev_set / 'sources').iterdir())
dev_keys = [i.name for i in dev_videos]

def choose_eval_status(train_ratio=0.5, val_ratio=0.25,
                       test_ratio=0.25):
    ratio_sum = train_ratio + val_ratio + test_ratio
    assert np.isclose(ratio_sum, 1)
    
    probs = [train_ratio, val_ratio, test_ratio]
    choices = ['train', 'val', 'test']
    return np.random.choice(choices, p=probs)

dev_annotations = {'train': [], 'val': [], 'test': []}
for key in dev_keys:
    dev_annotations[choose_eval_status()].append(key)

In [None]:
test_videos = sorted((test_set / 'sources').iterdir())
test_keys = [i.name for i in test_videos]

In [None]:
annotations = {'dev-set': dev_annotations,
               'test-set': {'test': test_keys}}
json.dump(annotations, open('data/processed/annotations.json', 'w'))

# Ground Truth

Just remains the same

# Inception Features

In [None]:
!mkdir -p data/processed/dev-set/
!mkdir -p data/processed/test-set/

In [None]:
def parse_inception_feature(s):
    pairs = s.strip().split(' ')
    pairs = [i.split(':') for i in pairs]
    return {int(k): float(v) for k, v in pairs}

def expand_inception_feature(d):
    feature = np.zeros(1000)
    for k, v in d.items():
        feature[k] = v
    return feature

# now combine two
def parse_and_expand_inception_feature(path):
    s = path.open('r').read()
    feature = parse_inception_feature(s)
    video, frame = path.name.split('-')
    video += '.webm'
    frame = int(frame.split('.')[0])
    return [video, frame] + list(expand_inception_feature(feature))

In [None]:
dev_inception_files = dev_set / 'features/InceptionV3/'
inception_features_flat = []
for file in tqdm(sorted(dev_inception_files.iterdir())):
    inception_features_flat += [parse_and_expand_inception_feature(file)]
    
inception_features_flat = pd.DataFrame(inception_features_flat).set_index([0, 1])

inception_features_flat = (inception_features_flat
                             .sort_index()
                             .reset_index()
                             .drop(columns=1)
                             .reset_index(level=0, drop=True))

inception_features_flat = inception_features_flat.rename(columns={0: 'video'}).rename(columns=str)

inception_features_flat.to_feather('data/processed/dev-set/inception_features.feather', )

In [None]:
test_inception_files = test_set / 'features/InceptionV3/'
inception_features_flat = []
for file in tqdm(sorted(test_inception_files.iterdir())):
    inception_features_flat += [parse_and_expand_inception_feature(file)]
    
inception_features_flat = pd.DataFrame(inception_features_flat).set_index([0, 1])

inception_features_flat = (inception_features_flat
                             .sort_index()
                             .reset_index()
                             .drop(columns=1)
                             .reset_index(level=0, drop=True))

inception_features_flat = inception_features_flat.rename(columns={0: 'video'}).rename(columns=str)

inception_features_flat.to_feather('data/processed/test-set/inception_features.feather', )