In [1]:
# in this file we do what we need to do

In [39]:
import importlib
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Masking, Input
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [30]:
import data_processing
import pathways

importlib.reload(data_processing)
importlib.reload(pathways)
from data_processing import dataprocessing, datasetup
from pathways import pathways

In [31]:
# Define the repos we will use
scenarios_exp = pathways.home_videos_exp()
scenarios_no_touch = pathways.home_videos_no_touch()

# Collect all relevant paths to videos and corresponding jsons
packaged_data_exp = datasetup.data_packager(scenarios_exp)
packaged_data_test = datasetup.data_packager(scenarios_no_touch)

In [32]:
# Filter and prepare the data
X_all_exp, y_all_exp = [], []

for pair in packaged_data_exp:
    video_path = pair["video"]
    json_path = pair["json"]

    if not dataprocessing.qualification(json_path, min_frames=12):
        continue

    X, y, _, _ = dataprocessing.prepare_data(video_path, json_path, tail=0.1)

    if len(X) > 0 and len(y) > 0:
        X_all_exp.append(X)
        y_all_exp.append(y)

X_all_exp = np.array(X_all_exp, dtype=object)
y_all_exp = np.array(y_all_exp, dtype=object)

In [36]:
type(y_all_exp)

numpy.ndarray

In [37]:
# RNN APPROACH - LSTM

# input sequence of features
# output predict k next positions, should correspond to nb of frames that were removed
feature_size = X_all_exp[0].shape[1]

X_padded = pad_sequences(X_all_exp, dtype='float32', padding='post')
y_padded = pad_sequences(y_all_exp, dtype='float32', padding='post')

start_token = np.zeros((y_padded.shape[0], 1, feature_size), dtype="float32")
y_in = np.concatenate([start_token, y_padded[:, :-1, :]], axis=1)

y_out = y_padded



In [40]:
latent_dim = 128

encoder_inputs = Input(shape=(None, feature_size))
encoder_lstm = LSTM(latent_dim, return_state=True)
_, state_h, state_c = encoder_lstm(encoder_inputs)
encoder_states = [state_h, state_c]

decoder_inputs = Input(shape=(None, feature_size))
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
decoder_dense = Dense(feature_size)
decoder_outputs = decoder_dense(decoder_outputs)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer="adam", loss="mse")

model.summary()

In [41]:
history = model.fit(
    [X_padded, y_in],
    y_out,
    batch_size=4,
    epochs=20,
    validation_split=0.2
)

Epoch 1/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 27ms/step - loss: 21773.7012 - val_loss: 14427.9580
Epoch 2/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - loss: 20744.4766 - val_loss: 13926.3457
Epoch 3/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 20395.5215 - val_loss: 13479.2070
Epoch 4/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - loss: 19095.9160 - val_loss: 13079.5908
Epoch 5/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 19088.9648 - val_loss: 12702.9453
Epoch 6/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - loss: 17907.8789 - val_loss: 12367.7168
Epoch 7/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - loss: 18752.0840 - val_loss: 12052.5996
Epoch 8/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step - loss: 17460.1895 - val_l