### Dependencies

In [16]:
import cv2
import os
import sys

import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub

from tensorflow.python import pywrap_tensorflow

sys.path.append('..')

import video_input as vi
import model as model
import utils as utils

%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
params = utils.yaml_to_dict('../config.yml')
params['data_dir'] = os.path.join('..', params['data_dir'])
params['videos_folder'] = os.path.join('..', params['videos_folder'])

In [18]:
params

{'batch_size': 3,
 'classes_amount': 101,
 'data_dir': '../data',
 'eval_steps': 10,
 'json_data_path': 'data/activity_net.v1-2.min.json',
 'json_metadata_path': 'data/training_meta_data.json',
 'keep_checkpoint_max': 3,
 'learning_rate': 0.0001,
 'log_step_count_steps': 20,
 'max_steps': 21000,
 'model': 'gap',
 'model_dir': '.temp/checkpoints',
 'num_epochs': 1000,
 'resize': [224, 224],
 'save_checkpoints_steps': 20,
 'save_summary_steps': 20,
 'shuffle': True,
 'skip_frames': 10,
 'start_delay_secs': 10,
 'temp_dir': '.temp',
 'throttle_secs': 10,
 'videos_folder': '../data/videos',
 'weight_factor': 8}

### Set test image

In [19]:
img_path = os.path.join('..',params['temp_dir'], 'test_img.jpg')
img_width = 224
img_height = 224

test_img = cv2.imread(img_path)
print('Original Size',test_img.shape)

img_resize = cv2.resize(test_img,(img_width,img_height))
img_resize = img_resize[None, ...]

img_normalized = (img_resize / 255.0)
print('Resized and normalized image', img_normalized.shape)

Original Size (2448, 3264, 3)
Resized and normalized image (1, 224, 224, 3)


### Load pretrained model

In [24]:
pretrain_model = model._initialize_pretrained_model()

### Make predictions with different models

In [28]:
params['json_data_path'] = os.path.join('..',params['json_data_path'])
params['json_metadata_path'] = os.path.join('..',params['json_metadata_path'])

In [53]:
video_gen = vi.all_data_videos(params)
particular_frames = next(video_gen)[0]

In [55]:
particular_frames = particular_frames[None, ...]

In [56]:
block4_pool_features = pretrain_model.predict(particular_frames)

In [57]:
block4_pool_features.shape

(1, 5, 5, 1536)

In [59]:
x = model.gap_module(block4_pool_features)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.tables_initializer())
    elements = sess.run(x)

In [60]:
elements[0,...]

array([0.5006858 , 0.49980167, 0.49955243, 0.5017479 , 0.5001104 ,
       0.5020521 , 0.5002102 , 0.5001207 , 0.50145364, 0.49953502,
       0.50092816, 0.502103  , 0.4986282 , 0.49977246, 0.49872294,
       0.5001548 , 0.4990335 , 0.5000021 , 0.50046325, 0.49861667,
       0.50012636, 0.4997803 , 0.50031716, 0.50005025, 0.4995632 ,
       0.49796578, 0.50134337, 0.49940774, 0.49767587, 0.5003512 ,
       0.4996356 , 0.49937448, 0.49948993, 0.5000551 , 0.5019626 ,
       0.5012438 , 0.49845654, 0.4976322 , 0.50193685, 0.49563825,
       0.4990612 , 0.49995482, 0.50074804, 0.50087565, 0.4993557 ,
       0.498907  , 0.5002777 , 0.50050974, 0.50073975, 0.5006029 ,
       0.49967209, 0.50180435, 0.5005373 , 0.49922538, 0.5002415 ,
       0.50008035, 0.50154585, 0.4981932 , 0.4986977 , 0.49672168,
       0.5001936 , 0.5020308 , 0.49882108, 0.5001076 , 0.5005728 ,
       0.49986482, 0.50095636, 0.5002811 , 0.49961215, 0.49920332,
       0.5035722 , 0.50091654, 0.49852088, 0.49961802, 0.50062

In [61]:
x = model.dense_module(block4_pool_features)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.tables_initializer())
    elements = sess.run(x)

In [62]:
elements[0,...]

array([ 0.6181099 ,  0.774728  , -0.62187123, -0.9923966 , -0.02499747,
        0.30077046, -0.32329673, -0.21192867,  0.76027584,  0.11780857,
       -1.2356801 , -0.26320225,  0.36232397,  0.28695464, -0.22462136,
        0.64828837, -0.6722048 ,  0.92188233,  0.47824645,  0.14497358,
       -1.1362485 , -0.42516565, -0.825943  , -0.7148176 , -0.68884337,
       -0.6185166 , -0.01969439,  0.31619963, -0.54573697,  0.12805745,
        1.3144857 , -0.5395431 ,  0.43202168, -0.7889519 ,  0.79659104,
       -0.2636819 , -0.5107892 , -0.09784085,  0.14369413,  0.16350421,
       -0.54978734, -0.46457025, -0.34689838, -0.61426395, -0.05663039,
       -0.58806545, -0.04766598, -0.00789861,  0.84846556, -0.16402294,
       -0.95529836, -0.37164813, -0.5013966 , -0.04599807, -0.15904763,
       -0.3082617 , -0.38378876, -0.9938363 ,  0.06937587,  0.48646542,
       -0.516217  ,  0.28221872, -0.82786745, -0.26739353,  0.70640063,
        0.31455636,  0.45440435, -0.33601806, -0.84215873,  0.60

## Preprocessing data and make logits

In [104]:
all_data_videos = vi.all_data_videos(params)

In [None]:
block4_pool_features = pretrain_model.predict(particular_frames)

In [105]:
videos_dict = dict()

for data in all_data_videos:
    
    image_frames = data[0]
    image_label = data[1]
    
    image_feature_map = pretrain_model.predict(image_frames)
    
    videos_dict['feature_map'] = pretrain_model.predict(image_frames)
    videos_dict['label'] = image_label
    
    print(videos_dict['feature_map'].shape,videos_dict['label'].shape)

(3, 5, 5, 1536) (3, 101)
(3, 5, 5, 1536) (3, 101)
