In [1]:
import os
import h5py
import numpy as np

from work.environment import FEATURES_DATASET_FILE
from work.dataset.activitynet import ActivityNetDataset

dataset = ActivityNetDataset(
    videos_path='../../../../dataset/videos.json',
    labels_path='../../../../dataset/labels.txt'
)

f_audio_descriptors = h5py.File('audio_descriptors.hdf5', 'w')
f_spec = f_audio_descriptors.create_group('spec')
f_mfcc = f_audio_descriptors.create_group('mfcc')
video_ids = []
with h5py.File(FEATURES_DATASET_FILE, 'r') as f_features:
    for subset in ('training', 'validation', 'testing'):
        video_ids += f_features[subset].keys()
print('Number of videos: {}'.format(len(video_ids)))

videos = dataset.videos
to_remove = []
for video in videos:
    if video.video_id not in video_ids:
        to_remove.append(video)
for v in to_remove:
    dataset.videos.remove(v)
    
print('Number of videos: {}'.format(len(videos)))

dict_nb_clips = {}
for v in videos:
    dict_nb_clips.update({v.video_id: v.num_frames//16})

Number of videos: 19752
Number of videos: 19752


### Spectral Descriptors

In [2]:
from progressbar import ProgressBar

path = '/imatge/iesquerra/work/activitynet/spec'
files = os.listdir(path)

progbar = ProgressBar(max_value=len(files))
count = 0
progbar.update(count)
videos_not_spec = []
videos_spec = []
for file_name in files:
    video_id = file_name[:11]
    videos_spec.append(video_id)

    file_path = os.path.join(path, file_name)
    descriptor = np.loadtxt(file_path, dtype=np.float32).reshape(1,-1)
    assert descriptor.ndim == 2
    assert descriptor.shape[1] == 8, descriptor.shape
    f_spec.create_dataset(video_id, data=descriptor, dtype='float32')
    count += 1
    progbar.update(count)
progbar.finish()

100% (19242 of 19242) |####################| Elapsed Time: 0:01:31 Time: 0:01:31


In [3]:
videos_without_spec = []
for v_id in video_ids:
    if v_id not in videos_spec:
        videos_without_spec.append(v_id)
print('Number of videos without spec: {}'.format(len(videos_without_spec)))

Number of videos without spec: 569


In [None]:
import warnings

path = '/imatge/iesquerra/work/activitynet/mfcc'
files = os.listdir(path)

progbar = ProgressBar(max_value=len(files))
count = 0
progbar.update(count)
videos_not_mfcc = []
with warnings.catch_warnings():
    warnings.filterwarnings('error')
    for video in videos:
        try:
            file_name = video.video_id+'.mfcc'
            if file_name not in files:
                videos_not_mfcc.append(video.video_id)
                continue
            file_path = os.path.join(path, file_name)
            descriptor = np.loadtxt(file_path, dtype=np.float32)
            if descriptor.shape == (0,):
                videos_not_mfcc.append(video.video_id)
                continue
            assert descriptor.ndim == 2, 'descriptor.ndim: {}, descriptor.shape:{}'.format(descriptor.ndim, descriptor.shape)
            assert descriptor.shape[1] == 40, descriptor.shape

            nb_clips = video.num_frames // 16
            mfcc_descriptor = np.zeros((nb_clips, 80))
            step = descriptor.shape[0] // nb_clips
            for i in range(nb_clips):
                s, e = i * step, (i + 1) * step
                d = descriptor[s:e,:]
                mfcc_descriptor[i,:40] = np.mean(d, axis=0)
                mfcc_descriptor[i,40:] = np.std(d, axis=0)
            f_mfcc.create_dataset(video.video_id, data=mfcc_descriptor, dtype='float32')
        except:
            videos_not_mfcc.append(video.video_id)
        count += 1
        progbar.update(count)
    progbar.finish()

print('Number of videos without mfcc: {}'.format(len(videos_not_mfcc)))

 99% (19467 of 19527) |################### | Elapsed Time: 2:20:44 ETA:  0:00:00

Number of videos without mfcc: 365


100% (19527 of 19527) |####################| Elapsed Time: 2:20:45 Time: 2:20:45


In [None]:
f_audio_descriptors.close()