In [1]:
import numpy as np
import h5py
from progressbar import ProgressBar
import random

f_video_features = h5py.File('/imatge/amontes/work/datasets/ActivityNet/v1.3/features_dataset/features_dataset.hdf5')
f_audio_features = h5py.File('./audio_descriptors.hdf5')
f_output = h5py.File('/imatge/amontes/work/datasets/ActivityNet/v1.3/output_dataset/output_dataset.hdf5')

subset = 'validation'
max_nb_videos = None

batch_size = 256
timesteps = 20

features_size = 4096
output_size = 201
mfcc_size = 80
spec_size = 8

In [2]:
def to_categorical(y, nb_classes=None):
    '''Convert class vector (integers from 0 to nb_classes)
    to binary class matrix, for use with categorical_crossentropy.
    '''
    if not nb_classes:
        nb_classes = np.max(y)+1
    Y = np.zeros((len(y), nb_classes))
    for i in range(len(y)):
        Y[i, y[i]] = 1.
    return Y

In [3]:
videos = f_video_features[subset].keys()

videos_not_features = []
for v in videos:
    if v not in f_audio_features['mfcc'].keys():
        videos_not_features.append(v)
    elif v not in f_audio_features['spec'].keys():
        videos_not_features.append(v)
        
videos_not_features = list(set(videos_not_features))
for vid in videos_not_features:
    videos.remove(vid)

random.shuffle(videos)
if max_nb_videos and max_nb_videos < len(videos):
    videos = videos[:max_nb_videos]

nb_videos = len(videos)
print('Number of videos: {}'.format(nb_videos))

Number of videos: 4675


In [4]:
sequence_stack = []
for _ in range(batch_size):
    sequence_stack.append([])
nb_clips_stack = np.zeros(batch_size).astype(np.int64)
accumulative_clips_stack = []
for _ in range(batch_size):
    accumulative_clips_stack.append([])

for video_id in videos:
    min_pos = np.argmin(nb_clips_stack)
    sequence_stack[min_pos].append(video_id)
    nb_clips_stack[min_pos] += f_video_features[subset][video_id].shape[0]
    accumulative_clips_stack[min_pos].append(nb_clips_stack[min_pos])

min_sequence = np.min(nb_clips_stack)
max_sequence = np.max(nb_clips_stack)
nb_batches_long = max_sequence // timesteps + 1
nb_batches = min_sequence // timesteps
print('Number of batches: {}'.format(nb_batches))

Number of batches: 185


In [5]:
video_features = np.zeros((nb_batches_long*batch_size*timesteps, features_size))
mfcc_features = np.zeros((nb_batches_long*batch_size*timesteps, mfcc_size))
spec_features = np.zeros((nb_batches_long*batch_size*timesteps, spec_size))
output = np.zeros((nb_batches_long*batch_size*timesteps, output_size))
index = np.arange(nb_batches_long*batch_size*timesteps)

progbar = ProgressBar(max_value=batch_size)

for i in range(batch_size):
    batch_index = index // timesteps % batch_size == i
    progbar.update(i)

    pos = 0
    for video_id in sequence_stack[i]:
        # Video features
        vid_features = f_video_features[subset][video_id][...]
        assert vid_features.shape[1] == features_size
        nb_instances = vid_features.shape[0]
        # MFCC features
        mfcc_feat = f_audio_features['mfcc'][video_id][...]
        assert mfcc_feat.shape == (nb_instances, mfcc_size)
        # Spec features
        spec_feat = f_audio_features['spec'][video_id][...]
        assert spec_feat.shape == (1, spec_size), spec_feat.shape
        spec_feat = np.broadcast_to(spec_feat, (nb_instances, spec_size))
        # Output
        output_classes = f_output[subset][video_id][...]
        assert nb_instances == output_classes.shape[0]
        

        video_index = index[batch_index][pos:pos+nb_instances]
        video_features[video_index,:] = vid_features
        mfcc_features[video_index,:] = mfcc_feat
        spec_features[video_index,:] = spec_feat
        output[video_index] = to_categorical(output_classes, nb_classes=output_size)

        pos += nb_instances
        
progbar.finish()
    
f_video_features.close()
f_audio_features.close()
f_output.close()

video_features = video_features[:nb_batches*batch_size*timesteps,:]
assert np.all(np.any(video_features, axis=1))
video_features = video_features.reshape((nb_batches*batch_size, timesteps, features_size))

mfcc_features = mfcc_features[:nb_batches*batch_size*timesteps,:]
assert np.all(np.any(mfcc_features, axis=1))
mfcc_features = mfcc_features.reshape((nb_batches*batch_size, timesteps, mfcc_size))

spec_features = spec_features[:nb_batches*batch_size*timesteps,:]
assert np.all(np.any(spec_features, axis=1))
spec_features = spec_features.reshape((nb_batches*batch_size, timesteps, spec_size))

output = output[:nb_batches*batch_size*timesteps,:]
assert np.all(np.any(output, axis=1))
output = output.reshape((nb_batches*batch_size, timesteps, output_size))

Putting data on position 0 of each batch


  0% (  1 of 256) |                        | Elapsed Time: 0:00:01 ETA:  0:08:18

Putting data on position 1 of each batch


  0% (  2 of 256) |                        | Elapsed Time: 0:00:03 ETA:  0:08:30

Putting data on position 2 of each batch


  1% (  3 of 256) |                        | Elapsed Time: 0:00:05 ETA:  0:08:08

Putting data on position 3 of each batch


  1% (  4 of 256) |                        | Elapsed Time: 0:00:07 ETA:  0:08:07

Putting data on position 4 of each batch


  1% (  5 of 256) |                        | Elapsed Time: 0:00:09 ETA:  0:08:01

Putting data on position 5 of each batch


  2% (  6 of 256) |                        | Elapsed Time: 0:00:11 ETA:  0:07:55

Putting data on position 6 of each batch


  2% (  7 of 256) |                        | Elapsed Time: 0:00:13 ETA:  0:07:51

Putting data on position 7 of each batch


  3% (  8 of 256) |                        | Elapsed Time: 0:00:15 ETA:  0:07:51

Putting data on position 8 of each batch


  3% (  9 of 256) |                        | Elapsed Time: 0:00:17 ETA:  0:07:51

Putting data on position 9 of each batch


  3% ( 10 of 256) |                        | Elapsed Time: 0:00:19 ETA:  0:07:46

Putting data on position 10 of each batch


  4% ( 11 of 256) |#                       | Elapsed Time: 0:00:20 ETA:  0:07:40

Putting data on position 11 of each batch


  4% ( 12 of 256) |#                       | Elapsed Time: 0:00:22 ETA:  0:07:38

Putting data on position 12 of each batch


  5% ( 13 of 256) |#                       | Elapsed Time: 0:00:24 ETA:  0:07:28

Putting data on position 13 of each batch


  5% ( 14 of 256) |#                       | Elapsed Time: 0:00:26 ETA:  0:07:24

Putting data on position 14 of each batch


  5% ( 15 of 256) |#                       | Elapsed Time: 0:00:28 ETA:  0:07:25

Putting data on position 15 of each batch


  6% ( 16 of 256) |#                       | Elapsed Time: 0:00:29 ETA:  0:07:22

Putting data on position 16 of each batch


  6% ( 17 of 256) |#                       | Elapsed Time: 0:00:31 ETA:  0:07:15

Putting data on position 17 of each batch


  7% ( 18 of 256) |#                       | Elapsed Time: 0:00:33 ETA:  0:07:12

Putting data on position 18 of each batch


  7% ( 19 of 256) |#                       | Elapsed Time: 0:00:35 ETA:  0:07:13

Putting data on position 19 of each batch


  7% ( 20 of 256) |#                       | Elapsed Time: 0:00:37 ETA:  0:07:13

Putting data on position 20 of each batch


  8% ( 21 of 256) |#                       | Elapsed Time: 0:00:39 ETA:  0:07:09

Putting data on position 21 of each batch


  8% ( 22 of 256) |##                      | Elapsed Time: 0:00:41 ETA:  0:07:16

Putting data on position 22 of each batch


  8% ( 23 of 256) |##                      | Elapsed Time: 0:00:43 ETA:  0:07:24

Putting data on position 23 of each batch


  9% ( 24 of 256) |##                      | Elapsed Time: 0:00:45 ETA:  0:07:22

Putting data on position 24 of each batch


  9% ( 25 of 256) |##                      | Elapsed Time: 0:00:47 ETA:  0:07:21

Putting data on position 25 of each batch


 10% ( 26 of 256) |##                      | Elapsed Time: 0:00:48 ETA:  0:07:19

Putting data on position 26 of each batch


 10% ( 27 of 256) |##                      | Elapsed Time: 0:00:50 ETA:  0:07:14

Putting data on position 27 of each batch


 10% ( 28 of 256) |##                      | Elapsed Time: 0:00:52 ETA:  0:07:12

Putting data on position 28 of each batch


 11% ( 29 of 256) |##                      | Elapsed Time: 0:00:54 ETA:  0:07:08

Putting data on position 29 of each batch


 11% ( 30 of 256) |##                      | Elapsed Time: 0:00:56 ETA:  0:07:11

Putting data on position 30 of each batch


 12% ( 31 of 256) |##                      | Elapsed Time: 0:00:58 ETA:  0:07:13

Putting data on position 31 of each batch


 12% ( 32 of 256) |###                     | Elapsed Time: 0:01:00 ETA:  0:07:00

Putting data on position 32 of each batch


 12% ( 33 of 256) |###                     | Elapsed Time: 0:01:02 ETA:  0:06:58

Putting data on position 33 of each batch


 13% ( 34 of 256) |###                     | Elapsed Time: 0:01:03 ETA:  0:06:53

Putting data on position 34 of each batch


 13% ( 35 of 256) |###                     | Elapsed Time: 0:01:05 ETA:  0:06:54

Putting data on position 35 of each batch


 14% ( 36 of 256) |###                     | Elapsed Time: 0:01:07 ETA:  0:06:52

Putting data on position 36 of each batch


 14% ( 37 of 256) |###                     | Elapsed Time: 0:01:09 ETA:  0:06:49

Putting data on position 37 of each batch


 14% ( 38 of 256) |###                     | Elapsed Time: 0:01:11 ETA:  0:06:48

Putting data on position 38 of each batch


 15% ( 39 of 256) |###                     | Elapsed Time: 0:01:13 ETA:  0:06:49

Putting data on position 39 of each batch


 15% ( 40 of 256) |###                     | Elapsed Time: 0:01:15 ETA:  0:06:36

Putting data on position 40 of each batch


 16% ( 41 of 256) |###                     | Elapsed Time: 0:01:16 ETA:  0:06:33

Putting data on position 41 of each batch


 16% ( 42 of 256) |###                     | Elapsed Time: 0:01:18 ETA:  0:06:32

Putting data on position 42 of each batch


 16% ( 43 of 256) |####                    | Elapsed Time: 0:01:20 ETA:  0:06:33

Putting data on position 43 of each batch


 17% ( 44 of 256) |####                    | Elapsed Time: 0:01:22 ETA:  0:06:32

Putting data on position 44 of each batch


 17% ( 45 of 256) |####                    | Elapsed Time: 0:01:24 ETA:  0:06:28

Putting data on position 45 of each batch


 17% ( 46 of 256) |####                    | Elapsed Time: 0:01:26 ETA:  0:06:28

Putting data on position 46 of each batch


 18% ( 47 of 256) |####                    | Elapsed Time: 0:01:27 ETA:  0:06:26

Putting data on position 47 of each batch


 18% ( 48 of 256) |####                    | Elapsed Time: 0:01:29 ETA:  0:06:20

Putting data on position 48 of each batch


 19% ( 49 of 256) |####                    | Elapsed Time: 0:01:31 ETA:  0:06:20

Putting data on position 49 of each batch


 19% ( 50 of 256) |####                    | Elapsed Time: 0:01:34 ETA:  0:06:40

Putting data on position 50 of each batch


 19% ( 51 of 256) |####                    | Elapsed Time: 0:01:36 ETA:  0:06:58

Putting data on position 51 of each batch


 20% ( 52 of 256) |####                    | Elapsed Time: 0:01:39 ETA:  0:07:18

Putting data on position 52 of each batch


 20% ( 53 of 256) |####                    | Elapsed Time: 0:01:41 ETA:  0:07:23

Putting data on position 53 of each batch


 21% ( 54 of 256) |#####                   | Elapsed Time: 0:01:43 ETA:  0:07:20

Putting data on position 54 of each batch


 21% ( 55 of 256) |#####                   | Elapsed Time: 0:01:45 ETA:  0:07:20

Putting data on position 55 of each batch


 21% ( 56 of 256) |#####                   | Elapsed Time: 0:01:47 ETA:  0:07:20

Putting data on position 56 of each batch


 22% ( 57 of 256) |#####                   | Elapsed Time: 0:01:49 ETA:  0:07:18

Putting data on position 57 of each batch


 22% ( 58 of 256) |#####                   | Elapsed Time: 0:01:50 ETA:  0:07:14

Putting data on position 58 of each batch


 23% ( 59 of 256) |#####                   | Elapsed Time: 0:01:52 ETA:  0:06:53

Putting data on position 59 of each batch


 23% ( 60 of 256) |#####                   | Elapsed Time: 0:01:54 ETA:  0:06:28

Putting data on position 60 of each batch


 23% ( 61 of 256) |#####                   | Elapsed Time: 0:01:56 ETA:  0:05:58

Putting data on position 61 of each batch


 24% ( 62 of 256) |#####                   | Elapsed Time: 0:01:57 ETA:  0:05:46

Putting data on position 62 of each batch


 24% ( 63 of 256) |#####                   | Elapsed Time: 0:01:59 ETA:  0:05:50

Putting data on position 63 of each batch


 25% ( 64 of 256) |######                  | Elapsed Time: 0:02:01 ETA:  0:05:39

Putting data on position 64 of each batch


 25% ( 65 of 256) |######                  | Elapsed Time: 0:02:03 ETA:  0:05:34

Putting data on position 65 of each batch


 25% ( 66 of 256) |######                  | Elapsed Time: 0:02:05 ETA:  0:05:33

Putting data on position 66 of each batch


 26% ( 67 of 256) |######                  | Elapsed Time: 0:02:06 ETA:  0:05:30

Putting data on position 67 of each batch


 26% ( 68 of 256) |######                  | Elapsed Time: 0:02:08 ETA:  0:05:28

Putting data on position 68 of each batch


 26% ( 69 of 256) |######                  | Elapsed Time: 0:02:10 ETA:  0:05:19

Putting data on position 69 of each batch


 27% ( 70 of 256) |######                  | Elapsed Time: 0:02:12 ETA:  0:05:26

Putting data on position 70 of each batch


 27% ( 71 of 256) |######                  | Elapsed Time: 0:02:14 ETA:  0:05:26

Putting data on position 71 of each batch


 28% ( 72 of 256) |######                  | Elapsed Time: 0:02:15 ETA:  0:05:19

Putting data on position 72 of each batch


 28% ( 73 of 256) |######                  | Elapsed Time: 0:02:17 ETA:  0:05:26

Putting data on position 73 of each batch


 28% ( 74 of 256) |######                  | Elapsed Time: 0:02:19 ETA:  0:05:20

Putting data on position 74 of each batch


 29% ( 75 of 256) |#######                 | Elapsed Time: 0:02:21 ETA:  0:05:16

Putting data on position 75 of each batch


 29% ( 76 of 256) |#######                 | Elapsed Time: 0:02:23 ETA:  0:05:15

Putting data on position 76 of each batch


 30% ( 77 of 256) |#######                 | Elapsed Time: 0:02:24 ETA:  0:05:16

Putting data on position 77 of each batch


 30% ( 78 of 256) |#######                 | Elapsed Time: 0:02:26 ETA:  0:05:20

Putting data on position 78 of each batch


 30% ( 79 of 256) |#######                 | Elapsed Time: 0:02:28 ETA:  0:05:13

Putting data on position 79 of each batch


 31% ( 80 of 256) |#######                 | Elapsed Time: 0:02:30 ETA:  0:05:07

Putting data on position 80 of each batch


 31% ( 81 of 256) |#######                 | Elapsed Time: 0:02:31 ETA:  0:05:02

Putting data on position 81 of each batch


 32% ( 82 of 256) |#######                 | Elapsed Time: 0:02:33 ETA:  0:05:00

Putting data on position 82 of each batch


 32% ( 83 of 256) |#######                 | Elapsed Time: 0:02:35 ETA:  0:05:06

Putting data on position 83 of each batch


 32% ( 84 of 256) |#######                 | Elapsed Time: 0:02:37 ETA:  0:05:05

Putting data on position 84 of each batch


 33% ( 85 of 256) |#######                 | Elapsed Time: 0:02:39 ETA:  0:05:05

Putting data on position 85 of each batch


 33% ( 86 of 256) |########                | Elapsed Time: 0:02:41 ETA:  0:05:03

Putting data on position 86 of each batch


 33% ( 87 of 256) |########                | Elapsed Time: 0:02:43 ETA:  0:05:04

Putting data on position 87 of each batch


 34% ( 88 of 256) |########                | Elapsed Time: 0:02:45 ETA:  0:05:05

Putting data on position 88 of each batch


 34% ( 89 of 256) |########                | Elapsed Time: 0:02:46 ETA:  0:05:10

Putting data on position 89 of each batch


 35% ( 90 of 256) |########                | Elapsed Time: 0:02:48 ETA:  0:05:08

Putting data on position 90 of each batch


 35% ( 91 of 256) |########                | Elapsed Time: 0:02:50 ETA:  0:05:01

Putting data on position 91 of each batch


 35% ( 92 of 256) |########                | Elapsed Time: 0:02:52 ETA:  0:04:55

Putting data on position 92 of each batch


 36% ( 93 of 256) |########                | Elapsed Time: 0:02:54 ETA:  0:04:58

Putting data on position 93 of each batch


 36% ( 94 of 256) |########                | Elapsed Time: 0:02:55 ETA:  0:04:54

Putting data on position 94 of each batch


 37% ( 95 of 256) |########                | Elapsed Time: 0:02:57 ETA:  0:04:51

Putting data on position 95 of each batch


 37% ( 96 of 256) |#########               | Elapsed Time: 0:02:59 ETA:  0:04:46

Putting data on position 96 of each batch


 37% ( 97 of 256) |#########               | Elapsed Time: 0:03:01 ETA:  0:04:42

Putting data on position 97 of each batch


 38% ( 98 of 256) |#########               | Elapsed Time: 0:03:03 ETA:  0:04:36

Putting data on position 98 of each batch


 38% ( 99 of 256) |#########               | Elapsed Time: 0:03:04 ETA:  0:04:35

Putting data on position 99 of each batch


 39% (100 of 256) |#########               | Elapsed Time: 0:03:06 ETA:  0:04:33

Putting data on position 100 of each batch


 39% (101 of 256) |#########               | Elapsed Time: 0:03:08 ETA:  0:04:32

Putting data on position 101 of each batch


 39% (102 of 256) |#########               | Elapsed Time: 0:03:10 ETA:  0:04:27

Putting data on position 102 of each batch


 40% (103 of 256) |#########               | Elapsed Time: 0:03:11 ETA:  0:04:24

Putting data on position 103 of each batch


 40% (104 of 256) |#########               | Elapsed Time: 0:03:13 ETA:  0:04:24

Putting data on position 104 of each batch


 41% (105 of 256) |#########               | Elapsed Time: 0:03:15 ETA:  0:04:19

Putting data on position 105 of each batch


 41% (106 of 256) |#########               | Elapsed Time: 0:03:17 ETA:  0:04:19

Putting data on position 106 of each batch


 41% (107 of 256) |##########              | Elapsed Time: 0:03:19 ETA:  0:04:18

Putting data on position 107 of each batch


 42% (108 of 256) |##########              | Elapsed Time: 0:03:20 ETA:  0:04:18

Putting data on position 108 of each batch


 42% (109 of 256) |##########              | Elapsed Time: 0:03:22 ETA:  0:04:16

Putting data on position 109 of each batch


 42% (110 of 256) |##########              | Elapsed Time: 0:03:24 ETA:  0:04:09

Putting data on position 110 of each batch


 43% (111 of 256) |##########              | Elapsed Time: 0:03:26 ETA:  0:04:04

Putting data on position 111 of each batch


 43% (112 of 256) |##########              | Elapsed Time: 0:03:27 ETA:  0:04:03

Putting data on position 112 of each batch


 44% (113 of 256) |##########              | Elapsed Time: 0:03:29 ETA:  0:03:59

Putting data on position 113 of each batch


 44% (114 of 256) |##########              | Elapsed Time: 0:03:31 ETA:  0:04:09

Putting data on position 114 of each batch


 44% (115 of 256) |##########              | Elapsed Time: 0:03:33 ETA:  0:04:02

Putting data on position 115 of each batch


 45% (116 of 256) |##########              | Elapsed Time: 0:03:35 ETA:  0:04:00

Putting data on position 116 of each batch


 45% (117 of 256) |##########              | Elapsed Time: 0:03:37 ETA:  0:04:02

Putting data on position 117 of each batch


 46% (118 of 256) |###########             | Elapsed Time: 0:03:38 ETA:  0:04:01

Putting data on position 118 of each batch


 46% (119 of 256) |###########             | Elapsed Time: 0:03:40 ETA:  0:04:02

Putting data on position 119 of each batch


 46% (120 of 256) |###########             | Elapsed Time: 0:03:42 ETA:  0:04:01

Putting data on position 120 of each batch


 47% (121 of 256) |###########             | Elapsed Time: 0:03:44 ETA:  0:04:00

Putting data on position 121 of each batch


 47% (122 of 256) |###########             | Elapsed Time: 0:03:45 ETA:  0:03:53

Putting data on position 122 of each batch


 48% (123 of 256) |###########             | Elapsed Time: 0:03:47 ETA:  0:03:49

Putting data on position 123 of each batch


 48% (124 of 256) |###########             | Elapsed Time: 0:03:49 ETA:  0:03:54

Putting data on position 124 of each batch


 48% (125 of 256) |###########             | Elapsed Time: 0:03:51 ETA:  0:03:52

Putting data on position 125 of each batch


 49% (126 of 256) |###########             | Elapsed Time: 0:03:53 ETA:  0:03:46

Putting data on position 126 of each batch


 49% (127 of 256) |###########             | Elapsed Time: 0:03:55 ETA:  0:03:44

Putting data on position 127 of each batch


 50% (128 of 256) |############            | Elapsed Time: 0:03:56 ETA:  0:03:41

Putting data on position 128 of each batch


 50% (129 of 256) |############            | Elapsed Time: 0:03:58 ETA:  0:03:42

Putting data on position 129 of each batch


 50% (130 of 256) |############            | Elapsed Time: 0:04:00 ETA:  0:03:40

Putting data on position 130 of each batch


 51% (131 of 256) |############            | Elapsed Time: 0:04:02 ETA:  0:03:41

Putting data on position 131 of each batch


 51% (132 of 256) |############            | Elapsed Time: 0:04:03 ETA:  0:03:33

Putting data on position 132 of each batch


 51% (133 of 256) |############            | Elapsed Time: 0:04:05 ETA:  0:03:27

Putting data on position 133 of each batch


 52% (134 of 256) |############            | Elapsed Time: 0:04:07 ETA:  0:03:25

Putting data on position 134 of each batch


 52% (135 of 256) |############            | Elapsed Time: 0:04:09 ETA:  0:03:25

Putting data on position 135 of each batch


 53% (136 of 256) |############            | Elapsed Time: 0:04:10 ETA:  0:03:22

Putting data on position 136 of each batch


 53% (137 of 256) |############            | Elapsed Time: 0:04:12 ETA:  0:03:21

Putting data on position 137 of each batch


 53% (138 of 256) |############            | Elapsed Time: 0:04:14 ETA:  0:03:18

Putting data on position 138 of each batch


 54% (139 of 256) |#############           | Elapsed Time: 0:04:16 ETA:  0:03:16

Putting data on position 139 of each batch


 54% (140 of 256) |#############           | Elapsed Time: 0:04:18 ETA:  0:03:20

Putting data on position 140 of each batch


 55% (141 of 256) |#############           | Elapsed Time: 0:04:20 ETA:  0:03:17

Putting data on position 141 of each batch


 55% (142 of 256) |#############           | Elapsed Time: 0:04:21 ETA:  0:03:19

Putting data on position 142 of each batch


 55% (143 of 256) |#############           | Elapsed Time: 0:04:23 ETA:  0:03:18

Putting data on position 143 of each batch


 56% (144 of 256) |#############           | Elapsed Time: 0:04:25 ETA:  0:03:19

Putting data on position 144 of each batch


 56% (145 of 256) |#############           | Elapsed Time: 0:04:27 ETA:  0:03:22

Putting data on position 145 of each batch


 57% (146 of 256) |#############           | Elapsed Time: 0:04:29 ETA:  0:03:24

Putting data on position 146 of each batch


 57% (147 of 256) |#############           | Elapsed Time: 0:04:31 ETA:  0:03:20

Putting data on position 147 of each batch


 57% (148 of 256) |#############           | Elapsed Time: 0:04:33 ETA:  0:03:21

Putting data on position 148 of each batch


 58% (149 of 256) |#############           | Elapsed Time: 0:04:34 ETA:  0:03:16

Putting data on position 149 of each batch


 58% (150 of 256) |##############          | Elapsed Time: 0:04:36 ETA:  0:03:19

Putting data on position 150 of each batch


 58% (151 of 256) |##############          | Elapsed Time: 0:04:38 ETA:  0:03:15

Putting data on position 151 of each batch


 59% (152 of 256) |##############          | Elapsed Time: 0:04:40 ETA:  0:03:11

Putting data on position 152 of each batch


 59% (153 of 256) |##############          | Elapsed Time: 0:04:41 ETA:  0:03:02

Putting data on position 153 of each batch


 60% (154 of 256) |##############          | Elapsed Time: 0:04:43 ETA:  0:02:59

Putting data on position 154 of each batch


 60% (155 of 256) |##############          | Elapsed Time: 0:04:45 ETA:  0:02:57

Putting data on position 155 of each batch


 60% (156 of 256) |##############          | Elapsed Time: 0:04:47 ETA:  0:02:58

Putting data on position 156 of each batch


 61% (157 of 256) |##############          | Elapsed Time: 0:04:49 ETA:  0:02:57

Putting data on position 157 of each batch


 61% (158 of 256) |##############          | Elapsed Time: 0:04:51 ETA:  0:02:56

Putting data on position 158 of each batch


 62% (159 of 256) |##############          | Elapsed Time: 0:04:53 ETA:  0:02:53

Putting data on position 159 of each batch


 62% (160 of 256) |###############         | Elapsed Time: 0:04:54 ETA:  0:02:51

Putting data on position 160 of each batch


 62% (161 of 256) |###############         | Elapsed Time: 0:04:56 ETA:  0:02:56

Putting data on position 161 of each batch


 63% (162 of 256) |###############         | Elapsed Time: 0:04:58 ETA:  0:02:59

Putting data on position 162 of each batch


 63% (163 of 256) |###############         | Elapsed Time: 0:05:00 ETA:  0:03:00

Putting data on position 163 of each batch


 64% (164 of 256) |###############         | Elapsed Time: 0:05:02 ETA:  0:02:57

Putting data on position 164 of each batch


 64% (165 of 256) |###############         | Elapsed Time: 0:05:04 ETA:  0:02:54

Putting data on position 165 of each batch


 64% (166 of 256) |###############         | Elapsed Time: 0:05:06 ETA:  0:02:52

Putting data on position 166 of each batch


 65% (167 of 256) |###############         | Elapsed Time: 0:05:08 ETA:  0:02:50

Putting data on position 167 of each batch


 65% (168 of 256) |###############         | Elapsed Time: 0:05:10 ETA:  0:02:54

Putting data on position 168 of each batch


 66% (169 of 256) |###############         | Elapsed Time: 0:05:12 ETA:  0:02:57

Putting data on position 169 of each batch


 66% (170 of 256) |###############         | Elapsed Time: 0:05:13 ETA:  0:02:50

Putting data on position 170 of each batch


 66% (171 of 256) |################        | Elapsed Time: 0:05:15 ETA:  0:02:45

Putting data on position 171 of each batch


 67% (172 of 256) |################        | Elapsed Time: 0:05:17 ETA:  0:02:43

Putting data on position 172 of each batch


 67% (173 of 256) |################        | Elapsed Time: 0:05:19 ETA:  0:02:40

Putting data on position 173 of each batch


 67% (174 of 256) |################        | Elapsed Time: 0:05:21 ETA:  0:02:38

Putting data on position 174 of each batch


 68% (175 of 256) |################        | Elapsed Time: 0:05:22 ETA:  0:02:31

Putting data on position 175 of each batch


 68% (176 of 256) |################        | Elapsed Time: 0:05:24 ETA:  0:02:25

Putting data on position 176 of each batch


 69% (177 of 256) |################        | Elapsed Time: 0:05:26 ETA:  0:02:22

Putting data on position 177 of each batch


 69% (178 of 256) |################        | Elapsed Time: 0:05:28 ETA:  0:02:20

Putting data on position 178 of each batch


 69% (179 of 256) |################        | Elapsed Time: 0:05:30 ETA:  0:02:25

Putting data on position 179 of each batch


 70% (180 of 256) |################        | Elapsed Time: 0:05:32 ETA:  0:02:22

Putting data on position 180 of each batch


 70% (181 of 256) |################        | Elapsed Time: 0:05:34 ETA:  0:02:20

Putting data on position 181 of each batch


 71% (182 of 256) |#################       | Elapsed Time: 0:05:35 ETA:  0:02:16

Putting data on position 182 of each batch


 71% (183 of 256) |#################       | Elapsed Time: 0:05:37 ETA:  0:02:10

Putting data on position 183 of each batch


 71% (184 of 256) |#################       | Elapsed Time: 0:05:39 ETA:  0:02:15

Putting data on position 184 of each batch


 72% (185 of 256) |#################       | Elapsed Time: 0:05:41 ETA:  0:02:16

Putting data on position 185 of each batch


 72% (186 of 256) |#################       | Elapsed Time: 0:05:43 ETA:  0:02:13

Putting data on position 186 of each batch


 73% (187 of 256) |#################       | Elapsed Time: 0:05:45 ETA:  0:02:11

Putting data on position 187 of each batch


 73% (188 of 256) |#################       | Elapsed Time: 0:05:47 ETA:  0:02:04

Putting data on position 188 of each batch


 73% (189 of 256) |#################       | Elapsed Time: 0:05:49 ETA:  0:02:04

Putting data on position 189 of each batch


 74% (190 of 256) |#################       | Elapsed Time: 0:05:50 ETA:  0:02:01

Putting data on position 190 of each batch


 74% (191 of 256) |#################       | Elapsed Time: 0:05:52 ETA:  0:02:00

Putting data on position 191 of each batch


 75% (192 of 256) |##################      | Elapsed Time: 0:05:54 ETA:  0:01:58

Putting data on position 192 of each batch


 75% (193 of 256) |##################      | Elapsed Time: 0:05:56 ETA:  0:01:53

Putting data on position 193 of each batch


 75% (194 of 256) |##################      | Elapsed Time: 0:05:58 ETA:  0:01:52

Putting data on position 194 of each batch


 76% (195 of 256) |##################      | Elapsed Time: 0:05:59 ETA:  0:01:47

Putting data on position 195 of each batch


 76% (196 of 256) |##################      | Elapsed Time: 0:06:01 ETA:  0:01:40

Putting data on position 196 of each batch


 76% (197 of 256) |##################      | Elapsed Time: 0:06:03 ETA:  0:01:35

Putting data on position 197 of each batch


 77% (198 of 256) |##################      | Elapsed Time: 0:06:05 ETA:  0:01:37

Putting data on position 198 of each batch


 77% (199 of 256) |##################      | Elapsed Time: 0:06:07 ETA:  0:01:33

Putting data on position 199 of each batch


 78% (200 of 256) |##################      | Elapsed Time: 0:06:08 ETA:  0:01:36

Putting data on position 200 of each batch


 78% (201 of 256) |##################      | Elapsed Time: 0:06:10 ETA:  0:01:38

Putting data on position 201 of each batch


 78% (202 of 256) |##################      | Elapsed Time: 0:06:12 ETA:  0:01:37

Putting data on position 202 of each batch


 79% (203 of 256) |###################     | Elapsed Time: 0:06:14 ETA:  0:01:36

Putting data on position 203 of each batch


 79% (204 of 256) |###################     | Elapsed Time: 0:06:16 ETA:  0:01:35

Putting data on position 204 of each batch


 80% (205 of 256) |###################     | Elapsed Time: 0:06:18 ETA:  0:01:29

Putting data on position 205 of each batch


 80% (206 of 256) |###################     | Elapsed Time: 0:06:19 ETA:  0:01:30

Putting data on position 206 of each batch


 80% (207 of 256) |###################     | Elapsed Time: 0:06:21 ETA:  0:01:25

Putting data on position 207 of each batch


 81% (208 of 256) |###################     | Elapsed Time: 0:06:23 ETA:  0:01:20

Putting data on position 208 of each batch


 81% (209 of 256) |###################     | Elapsed Time: 0:06:25 ETA:  0:01:15

Putting data on position 209 of each batch


 82% (210 of 256) |###################     | Elapsed Time: 0:06:27 ETA:  0:01:15

Putting data on position 210 of each batch


 82% (211 of 256) |###################     | Elapsed Time: 0:06:28 ETA:  0:01:10

Putting data on position 211 of each batch


 82% (212 of 256) |###################     | Elapsed Time: 0:06:30 ETA:  0:01:08

Putting data on position 212 of each batch


 83% (213 of 256) |###################     | Elapsed Time: 0:06:32 ETA:  0:01:06

Putting data on position 213 of each batch


 83% (214 of 256) |####################    | Elapsed Time: 0:06:34 ETA:  0:01:13

Putting data on position 214 of each batch


 83% (215 of 256) |####################    | Elapsed Time: 0:06:36 ETA:  0:01:14

Putting data on position 215 of each batch


 84% (216 of 256) |####################    | Elapsed Time: 0:06:38 ETA:  0:01:21

Putting data on position 216 of each batch


 84% (217 of 256) |####################    | Elapsed Time: 0:06:40 ETA:  0:01:26

Putting data on position 217 of each batch


 85% (218 of 256) |####################    | Elapsed Time: 0:06:42 ETA:  0:01:26

Putting data on position 218 of each batch


 85% (219 of 256) |####################    | Elapsed Time: 0:06:44 ETA:  0:01:20

Putting data on position 219 of each batch


 85% (220 of 256) |####################    | Elapsed Time: 0:06:45 ETA:  0:01:20

Putting data on position 220 of each batch


 86% (221 of 256) |####################    | Elapsed Time: 0:06:47 ETA:  0:01:16

Putting data on position 221 of each batch


 86% (222 of 256) |####################    | Elapsed Time: 0:06:49 ETA:  0:01:14

Putting data on position 222 of each batch


 87% (223 of 256) |####################    | Elapsed Time: 0:06:51 ETA:  0:01:05

Putting data on position 223 of each batch


 87% (224 of 256) |#####################   | Elapsed Time: 0:06:53 ETA:  0:00:57

Putting data on position 224 of each batch


 87% (225 of 256) |#####################   | Elapsed Time: 0:06:55 ETA:  0:00:51

Putting data on position 225 of each batch


 88% (226 of 256) |#####################   | Elapsed Time: 0:06:56 ETA:  0:00:51

Putting data on position 226 of each batch


 88% (227 of 256) |#####################   | Elapsed Time: 0:06:58 ETA:  0:00:49

Putting data on position 227 of each batch


 89% (228 of 256) |#####################   | Elapsed Time: 0:07:00 ETA:  0:00:46

Putting data on position 228 of each batch


 89% (229 of 256) |#####################   | Elapsed Time: 0:07:02 ETA:  0:00:44

Putting data on position 229 of each batch


 89% (230 of 256) |#####################   | Elapsed Time: 0:07:04 ETA:  0:00:48

Putting data on position 230 of each batch


 90% (231 of 256) |#####################   | Elapsed Time: 0:07:06 ETA:  0:00:48

Putting data on position 231 of each batch


 90% (232 of 256) |#####################   | Elapsed Time: 0:07:08 ETA:  0:00:52

Putting data on position 232 of each batch


 91% (233 of 256) |#####################   | Elapsed Time: 0:07:10 ETA:  0:00:52

Putting data on position 233 of each batch


 91% (234 of 256) |#####################   | Elapsed Time: 0:07:11 ETA:  0:00:47

Putting data on position 234 of each batch


 91% (235 of 256) |######################  | Elapsed Time: 0:07:13 ETA:  0:00:44

Putting data on position 235 of each batch


 92% (236 of 256) |######################  | Elapsed Time: 0:07:15 ETA:  0:00:48

Putting data on position 236 of each batch


 92% (237 of 256) |######################  | Elapsed Time: 0:07:17 ETA:  0:00:51

Putting data on position 237 of each batch


 92% (238 of 256) |######################  | Elapsed Time: 0:07:19 ETA:  0:00:45

Putting data on position 238 of each batch


 93% (239 of 256) |######################  | Elapsed Time: 0:07:21 ETA:  0:00:37

Putting data on position 239 of each batch


 93% (240 of 256) |######################  | Elapsed Time: 0:07:23 ETA:  0:00:34

Putting data on position 240 of each batch


 94% (241 of 256) |######################  | Elapsed Time: 0:07:24 ETA:  0:00:32

Putting data on position 241 of each batch


 94% (242 of 256) |######################  | Elapsed Time: 0:07:26 ETA:  0:00:30

Putting data on position 242 of each batch


 94% (243 of 256) |######################  | Elapsed Time: 0:07:28 ETA:  0:00:24

Putting data on position 243 of each batch


 95% (244 of 256) |######################  | Elapsed Time: 0:07:30 ETA:  0:00:17

Putting data on position 244 of each batch


 95% (245 of 256) |######################  | Elapsed Time: 0:07:32 ETA:  0:00:13

Putting data on position 245 of each batch


 96% (246 of 256) |####################### | Elapsed Time: 0:07:34 ETA:  0:00:10

Putting data on position 246 of each batch


 96% (247 of 256) |####################### | Elapsed Time: 0:07:35 ETA:  0:00:09

Putting data on position 247 of each batch


 96% (248 of 256) |####################### | Elapsed Time: 0:07:37 ETA:  0:00:08

Putting data on position 248 of each batch


 97% (249 of 256) |####################### | Elapsed Time: 0:07:39 ETA:  0:00:06

Putting data on position 249 of each batch


 97% (250 of 256) |####################### | Elapsed Time: 0:07:41 ETA:  0:00:06

Putting data on position 250 of each batch


 98% (251 of 256) |####################### | Elapsed Time: 0:07:43 ETA:  0:00:05

Putting data on position 251 of each batch


 98% (252 of 256) |####################### | Elapsed Time: 0:07:45 ETA:  0:00:06

Putting data on position 252 of each batch


 98% (253 of 256) |####################### | Elapsed Time: 0:07:46 ETA:  0:00:04

Putting data on position 253 of each batch


 99% (254 of 256) |####################### | Elapsed Time: 0:07:48 ETA:  0:00:00

Putting data on position 254 of each batch


 99% (255 of 256) |####################### | Elapsed Time: 0:07:50 ETA:  0:00:00

Putting data on position 255 of each batch


100% (256 of 256) |########################| Elapsed Time: 0:07:52 Time: 0:07:52


Computing sample weights

In [6]:
if subset == 'training':
    background_weight = 0.6
    sample_weights = np.ones(output.shape[:2])
    sample_weights[output[:,:,0] == 1] = background_weight

In [7]:
f_dataset = h5py.File('stateful_dataset_with_audio.hdf5', 'r+')
f_dataset_subset = f_dataset.create_group(subset)

f_dataset_subset.create_dataset('vid_features', data=video_features, chunks=(4, timesteps, features_size), dtype='float32')
f_dataset_subset.create_dataset('mfcc_features', data=mfcc_features, chunks=(256, timesteps, mfcc_size), dtype='float32')
f_dataset_subset.create_dataset('spec_features', data=spec_features, chunks=(256, timesteps, spec_size), dtype='float32')
f_dataset_subset.create_dataset('output', data=output, chunks=(256, timesteps, output_size), dtype='float32')
if subset == 'training':
    f_dataset_subset.create_dataset('sample_weight', data=sample_weights, chunks=(256, timesteps), dtype='float32')
f_dataset.close()