Skip to content
Permalink
Browse files

feat(encoder): add yt8m feature extractor

  • Loading branch information...
jemmyshin committed Sep 3, 2019
1 parent 93a43f5 commit 50a944b65c6037dae46ed04925fbb9af6b5523ab
@@ -42,7 +42,8 @@
'MfccEncoder': 'audio.mfcc',
'PoolingEncoder': 'numeric.pooling',
'PyTorchTransformers': 'text.transformer',
'VggishEncoder': 'audio.vggish'
'VggishEncoder': 'audio.vggish',
'YouTube8MFeatureExtractor': 'video.yt8m_feature_extractor'
}

register_all_class(_cls2file_map, 'encoder')
@@ -0,0 +1,121 @@
# Tencent is pleased to support the open source community by making GNES available.
#
# Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import List
import numpy as np
from PIL import Image
from ..base import BaseVideoEncoder
from ...helper import batching, get_first_available_gpu


class YouTube8MFeatureExtractor(BaseVideoEncoder):
"""Extracts YouTube8M features for RGB frames.
First time constructing this class will create directory `yt8m` inside your
home directory, and will download inception model (85 MB) and YouTube8M PCA
matrix (15 MB). If you want to use another directory, then pass it to argument
`model_dir` of constructor.
If the model_dir exist and contains the necessary files, then files will be
re-used without download.
Usage Example:
from PIL import Image
import numpy
# Instantiate extractor. Slow if called first time on your machine, as it
# needs to download 100 MB.
extractor = YouTube8MFeatureExtractor()
image_file = os.path.join(extractor._model_dir, 'cropped_panda.jpg')
im = numpy.array(Image.open(image_file))
features = extractor.extract_rgb_frame_features(im)
** Note: OpenCV reverses the order of channels (i.e. orders channels as BGR
instead of RGB). If you are using OpenCV, then you must do:
im = im[:, :, ::-1] # Reverses order on last (i.e. channel) dimension.
then call `extractor.extract_rgb_frame_features(im)`
"""
batch_size = 64

def __init__(self, model_dir: str,
pca_dir: str,
select_layer: str = 'PreLogits',
*args, **kwargs):
super().__init__(*args, **kwargs)

self.model_dir = model_dir
self.pca_dir = pca_dir
self.select_layer = select_layer
self.inception_size_x = 299
self.inception_size_y = 299

def post_init(self):
import tensorflow as tf
from .yt8m_feature_extractor_cores.inception_v3 import inception_v3
from .yt8m_feature_extractor_cores.inception_utils import inception_arg_scope
import os
os.environ['CUDA_VISIBLE_DEVICES'] = str(get_first_available_gpu())

self.pca_mean = np.load(os.path.join(self.pca_dir, 'mean.npy'))[:, 0]
self.pca_eigenvals = np.load(os.path.join(self.pca_dir, 'eigenvals.npy'))[:1024, 0]
self.pca_eigenvecs = np.load(os.path.join(self.pca_dir, 'eigenvecs.npy')).T[:, :1024]

g = tf.Graph()
with g.as_default():
arg_scope = inception_arg_scope()
inception_v3.default_image_size = self.inception_size_x
self.inputs = tf.placeholder(tf.float32, (None,
self.inception_size_x,
self.inception_size_y, 3))

with tf.contrib.slim.arg_scope(arg_scope):
self.logits, self.end_points = inception_v3(self.inputs,
num_classes=1001,
is_training=False,
dropout_keep_prob=1.0)

config = tf.ConfigProto(log_device_placement=False)
if self.on_gpu:
config.gpu_options.allow_growth = True
self.sess = tf.Session(config=config)
self.saver = tf.train.Saver()
self.saver.restore(self.sess, self.model_dir)

def encode(self, img: List['np.ndarray'], *args, **kwargs) -> np.ndarray:
img = [(np.array(Image.fromarray(im).resize((self.inception_size_x,
self.inception_size_y)), dtype=np.float32) * 2 / 255. - 1.) for im
in img]

@batching
def _encode(_, data):
def _pca(data):
data = np.squeeze(data, axis=(1, 2))
data = (data - self.pca_mean).reshape((len(data), 2048))
data = np.matmul(data, self.pca_eigenvecs)
data = data / np.sqrt(self.pca_eigenvals + 1e-4)
return data

_, end_points_ = self.sess.run((self.logits, self.end_points),
feed_dict={self.inputs: data})

return _pca(end_points_[self.select_layer])

return _encode(self, img).astype(np.float32)

No changes.
@@ -0,0 +1,82 @@
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains common code shared by all inception models.
Usage of arg scope:
with slim.arg_scope(inception_arg_scope()):
logits, end_points = inception.inception_v3(images, num_classes,
is_training=is_training)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

slim = tf.contrib.slim


def inception_arg_scope(weight_decay=0.00004,
use_batch_norm=True,
batch_norm_decay=0.9997,
batch_norm_epsilon=0.001,
activation_fn=tf.nn.relu,
batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS,
batch_norm_scale=False):
"""Defines the default arg scope for inception models.
Args:
weight_decay: The weight decay to use for regularizing the model.
use_batch_norm: "If `True`, batch_norm is applied after each convolution.
batch_norm_decay: Decay for batch norm moving average.
batch_norm_epsilon: Small float added to variance to avoid dividing by zero
in batch norm.
activation_fn: Activation function for conv2d.
batch_norm_updates_collections: Collection for the update ops for
batch norm.
batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
activations in the batch normalization layer.
Returns:
An `arg_scope` to use for the inception models.
"""
batch_norm_params = {
# Decay for the moving averages.
'decay': batch_norm_decay,
# epsilon to prevent 0s in variance.
'epsilon': batch_norm_epsilon,
# collection containing update_ops.
'updates_collections': batch_norm_updates_collections,
# use fused batch norm if possible.
'fused': None,
'scale': batch_norm_scale,
}
if use_batch_norm:
normalizer_fn = slim.batch_norm
normalizer_params = batch_norm_params
else:
normalizer_fn = None
normalizer_params = {}
# Set weight_decay for weights in Conv and FC layers.
with slim.arg_scope([slim.conv2d, slim.fully_connected],
weights_regularizer=slim.l2_regularizer(weight_decay)):
with slim.arg_scope(
[slim.conv2d],
weights_initializer=slim.variance_scaling_initializer(),
activation_fn=activation_fn,
normalizer_fn=normalizer_fn,
normalizer_params=normalizer_params) as sc:
return sc

0 comments on commit 50a944b

Please sign in to comment.
You can’t perform that action at this time.