In [1]:
# youtube8m のディレクトリに移動
import os
while os.getcwd().split('/')[-1] != 'youtube8m': os.chdir('..')
os.getcwd()

'/home/jupyter/ASLOpenProject/youtube8m'

In [2]:
PROJECT = "qwiklabs-gcp-ml-3b4c41182bb8"  # Replace with your PROJECT
BUCKET = "qwiklabs-gcp-ml-3b4c41182bb8"  # Replace with your BUCKET
REGION = "us-central1"            # Choose an available region for Cloud MLE
TFVERSION = "1.14"                # TF version for CMLE to use

In [3]:
import tensorflow as tf
print(tf.__version__)

1.15.0


In [4]:
import shutil
from datetime import datetime
from data import read_dataset, CLASS_NUM
print(CLASS_NUM)

3862


In [5]:
TRAIN_DATA_PATH = './data/video/train*.tfrecord'
EVAL_DATA_PATH = './data/video/validate*.tfrecord'

# Models
特徴量を受け取って、識別用の logits を出力するモデル

In [6]:
class LinearModel(tf.keras.models.Model):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.output_dense = tf.keras.layers.Dense(CLASS_NUM)
    
    def call(self, visual_feature, audio_feature):
        '''
        フィーチャーを受け取って sigmoid の logits を返します
        '''
        return self.output_dense(tf.concat([visual_feature, audio_feature], axis=-1))


# Model Function

In [7]:
from typing import Dict
from data import read_dataset

def create_model(params):
    model_map = {
        'linear': LinearModel,
    }
    return model_map[params['model']](params)

def multi_hot(indices):
    return tf.reduce_sum(tf.one_hot(indices, CLASS_NUM), axis=-2)

def recall_topk(probabilities, labels, top_k):
    predicted_topk = tf.math.top_k(probabilities, k=top_k).indices
    predicted_topk_multihot = multi_hot(predicted_topk)
    return tf.metrics.recall(
        labels=labels,
        predictions=predicted_topk_multihot,
    )

def create_metrics(probabilities, labels, params):
    threshold = params.get('threshold', 0.5)
    top_k = params.get('top_k', 5)
    predicted_bool = tf.cast(probabilities >= threshold, tf.float32)
    metrics = {
        'accuracy': tf.metrics.accuracy(labels=labels, predictions=predicted_bool),
        'recall': tf.metrics.recall(labels=labels, predictions=predicted_bool),
        'precision': tf.metrics.precision(labels=labels, predictions=predicted_bool),
        'auc': tf.metrics.auc(labels=labels, predictions=predicted_bool),
        'predicted_tag_count': tf.metrics.mean(tf.math.count_nonzero(predicted_bool, axis=-1)),
    }
    for k in [top_k // 2, top_k, top_k * 2]:
        metrics['recall_top{}'.format(k)] = recall_topk(probabilities, labels, top_k=k)
    return metrics

def model_fn(
    features: Dict[str, tf.Tensor],
    labels: tf.Tensor,
    mode: tf.estimator.ModeKeys,
    params: Dict,
) -> tf.estimator.EstimatorSpec:
    threshold = params.get('threshold', 0.5)
    model = create_model(params)
    logits = model(features['mean_rgb'], features['mean_audio'])
    probabilities = tf.nn.sigmoid(logits)

    loss = None
    train_op = None
    eval_metric_ops = None

    if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
        # Convert string label to int
        cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(
            labels=labels,
            logits=logits,
        )
        loss = tf.reduce_mean(cross_entropy)

    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdamOptimizer(learning_rate=params.get('learning_rate', 0.001))
        # Batch Normalization 用
        update_ops = tf.get_collection(key = tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(control_inputs = update_ops):
            train_op = optimizer.minimize(
                loss,
                global_step=tf.train.get_or_create_global_step()
            )
    if mode == tf.estimator.ModeKeys.EVAL:
        eval_metric_ops = create_metrics(probabilities, labels, params)

    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions={
            'probabilities': probabilities,
            'predicted_topk': tf.math.top_k(probabilities, k=params.get('top_k', 5)).indices,
        },
        loss=loss,
        train_op=train_op,
        eval_metric_ops=eval_metric_ops,
    )

# その他諸々

In [8]:
def serving_input_fn():
    feature_placeholders = {
        'mean_rgb': tf.placeholder(dtype=tf.float32, shape=[None, 1024], name='serving_mean_rgb'),
        'mean_audio': tf.placeholder(dtype=tf.float32, shape=[None, 128], name='serving_mean_audio'),
    }
    return tf.estimator.export.ServingInputReceiver(
        features=feature_placeholders,
        receiver_tensors=feature_placeholders,
    )

In [9]:
def train_and_evaluate(output_dir: str, params: Dict) -> None:
    tf.summary.FileWriterCache.clear()
    eval_interval = params.get('eval_interval', 60)

    config = tf.estimator.RunConfig(
        model_dir=output_dir,
        save_checkpoints_secs=eval_interval,
        log_step_count_steps=params.get('log_interval_step', 100),
    )
    estimator = tf.estimator.Estimator(
        model_fn=model_fn,
        config=config,
        params=params,
    )
    batch_size = params.get('batch_size', 512)
    train_spec = tf.estimator.TrainSpec(
        lambda: read_dataset(
            params['train_data_path'],
            tf.estimator.ModeKeys.TRAIN,
            batch_size,
        ),
        max_steps=params.get('train_steps', 10000),
    )
    exporter = tf.estimator.LatestExporter(
        name='exporter', 
        serving_input_receiver_fn=serving_input_fn,
    )
    eval_spec = tf.estimator.EvalSpec(
        lambda: read_dataset(
            params['eval_data_path'],
            tf.estimator.ModeKeys.EVAL,
            batch_size,
        ),
        exporters=exporter,
        start_delay_secs=params.get('eval_delay_sec', 60),
        throttle_secs=eval_interval,
    )

    # Run train_and_evaluate loop
    tf.estimator.train_and_evaluate(
        estimator=estimator, 
        train_spec=train_spec, 
        eval_spec=eval_spec,
    )

# 学習させる

In [10]:
model_name = 'base'
out_dir = 'trained/video/{}/{}'.format(model_name, datetime.now().strftime('%Y%m%d_%H%M%S'))
params = {
    'train_data_path': TRAIN_DATA_PATH,
    'eval_data_path': EVAL_DATA_PATH,
    'model': 'linear',
    'batch_size': 256,
    'learning_rate': 0.001,
    'train_steps': 30000,
    'log_interval_step': 1000,
    'eval_delay_sec': 1,
    'eval_interval': 10,
}

train_and_evaluate(out_dir, params)

INFO:tensorflow:Using config: {'_train_distribute': None, '_log_step_count_steps': 1000, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f3010ce7908>, '_num_ps_replicas': 0, '_master': '', '_save_checkpoints_steps': None, '_device_fn': None, '_is_chief': True, '_experimental_distribute': None, '_eval_distribute': None, '_task_id': 0, '_service': None, '_task_type': 'worker', '_protocol': None, '_save_checkpoints_secs': 10, '_keep_checkpoint_max': 5, '_evaluation_master': '', '_keep_checkpoint_every_n_hours': 10000, '_global_id_in_cluster': 0, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_tf_random_seed': None, '_save_summary_steps': 100, '_num_worker_replicas': 1, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_model_dir': 'trained/video/base/20191209_064155'}
INFO:tensorflow:Not using Distribute Coordinator.
INFO:tensorflow:Running tr