In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
from sklearn.externals import joblib
from scipy.stats import norm
%matplotlib inline

In [2]:
def log_progress(sequence, every=None, size=None, name='Items'):
    from ipywidgets import IntProgress, HTML, VBox
    from IPython.display import display

    is_iterator = False
    if size is None:
        try:
            size = len(sequence)
        except TypeError:
            is_iterator = True
    if size is not None:
        if every is None:
            if size <= 200:
                every = 1
            else:
                every = int(size / 200)     # every 0.5%
    else:
        assert every is not None, 'sequence is iterator, set every'

    if is_iterator:
        progress = IntProgress(min=0, max=1, value=1)
        progress.bar_style = 'info'
    else:
        progress = IntProgress(min=0, max=size, value=0)
    label = HTML()
    box = VBox(children=[label, progress])
    display(box)

    index = 0
    try:
        for index, record in enumerate(sequence, 1):
            if index == 1 or index % every == 0:
                if is_iterator:
                    label.value = '{name}: {index} / ?'.format(
                        name=name,
                        index=index
                    )
                else:
                    progress.value = index
                    label.value = u'{name}: {index} / {size}'.format(
                        name=name,
                        index=index,
                        size=size
                    )
            yield record
    except:
        progress.bar_style = 'danger'
        raise
    else:
        progress.bar_style = 'success'
        progress.value = index
        label.value = "{name}: {index}".format(
            name=name,
            index=str(index or '?')
        )

---

In [3]:
import keras

Using TensorFlow backend.


In [4]:
model = keras.models.load_model('models/model_e2e_phase2.hf5')
model_embd = keras.models.Model(model.input, model.get_layer('average_1').output)

In [5]:
def norm(arr, mins, maxs):
    arr = arr.astype(float)
    for i in range(arr.shape[-1]):
        arr[:, :, i] -= mins[i]
        arr[:, :, i] /= maxs[i]
    return arr

In [6]:
mins_audio, maxs_audio = np.load('data/audio_mins.npy'), np.load('data/audio_maxs.npy')
mins_kinect, maxs_kinect = np.load('data/kinect_mins.npy'), np.load('data/kinect_maxs.npy')

In [7]:
f_videos = sorted(glob('data/X_test_priv/*.npy'))

print len(f_videos)

124


In [None]:
for f in log_progress(f_videos):
    X = np.load(f).item()
#     print X['audio'].shape, X['eyes'].shape, X['face_nn'].shape, X['kinect'].shape
    
    X_audio = norm(X['audio'], mins_audio, maxs_audio)
    X_eyes = X['eyes']
    X_face = X['face_nn']
    X_kinect = norm(X['kinect'], mins_kinect, maxs_kinect)
    
    embds = model_embd.predict([X_audio, X_eyes, X_face, X_kinect])
    
    np.save('out_priv/{}'.format(os.path.basename(f)), embds)
    
#     break

---

In [3]:
LABELS_TEST_PATH = '/home/datasets/merc-2017/private_test/prediction/'
OUT_PATH = 'submission_priv'

In [4]:
predictor = joblib.load('models/predictor_phase2_e2e.pkl')

In [5]:
videos = sorted(os.listdir('out_priv/'))

print len(videos)

124


In [6]:
X_test_tinds = np.load('data/X_test_priv_tinds.npy').item()

print len(X_test_tinds)

124


In [7]:
if os.path.exists(OUT_PATH):
    print 'Submission already exists! Aborting.'
else:
    os.makedirs(os.path.join(OUT_PATH, 'prediction'))
    
    for video in log_progress(videos[:]):
        video = video.split('.npy')[0]

        embd = np.load('out_priv/' + video + '.npy')
        
        scores = predictor.predict_proba(embd)
        tinds = np.asarray(X_test_tinds[video + '.csv'])
        df = pd.read_csv(os.path.join(LABELS_TEST_PATH, video + '.csv'))

        for t_i, t in df.Time.iteritems():
            tmp_timings = tinds[(tinds[:, 0] <= t) & (tinds[:, 1] >= t)]
            tmp_scores = scores[(tinds[:, 0] <= t) & (tinds[:, 1] >= t)]

            if len(tmp_timings) == 0:
                if t_i > 0:
                    df.iloc[t_i, y_pred + 1] = 1.0
                else:
                    print 'Warning!'
                continue

            tavgs = np.mean(tmp_timings, axis=1)
            t_closest_ind = np.argmin(np.abs(tavgs - t))

            if len(tmp_timings) == 1:
                weights = np.asarray([1.0])
            else:
                weights = np.asarray([1.0] * len(tavgs))
                weights /= weights.max()

            tmp_scores_w = np.sum((tmp_scores.T * weights.T).T, axis=0)
            y_pred = np.argmax(tmp_scores_w)
#             print y_pred

            df.iloc[t_i, y_pred + 1] = 1.0

        df.to_csv(os.path.join(OUT_PATH, 'prediction', video + '.csv'), index=False)

#         break