In [1]:
# youtube8m のディレクトリに移動
import os
while os.getcwd().split('/')[-1] != 'youtube8m': os.chdir('..')
os.getcwd()

'/home/jupyter/ASLOpenProject/youtube8m'

## トレーニング

In [2]:
%%bash

JOBNAME=youtube8m_$(date -u +%y%m%d_%H%M%S)
BUCKET=asl-mixi-project-bucket
OUTDIR=gs://$BUCKET/model/youtube8m/video/auc/$(date -u +%y%m%d_%H%M%S)

gcloud ai-platform jobs submit training $JOBNAME \
       --region=us-central1 \
       --module-name=video.task \
       --package-path=$(pwd)/video \
       --job-dir=$OUTDIR \
       --staging-bucket=gs://$BUCKET \
       --scale-tier=BASIC_GPU \
       --runtime-version=1.14 \
       --python-version=3.5 \
       -- \
       --train_data_path=gs://$BUCKET/data/youtube-8m/train/train00*.tfrecord \
       --eval_data_path=gs://$BUCKET/data/youtube-8m/valid/validate00*.tfrecord \
       --output_dir=${OUTDIR} \
       --model=dnn \
       --train_steps=300 \
       --layer_num=10 \
       --hidden_dim=64 \
       --dropout=0.1 \
       --kernel_regularizer=0.0001 \
       --batch_size=128 \
       --learning_rate=0.01

jobId: youtube8m_191210_053750
state: QUEUED


Job [youtube8m_191210_053750] submitted successfully.
Your job is still active. You may view the status of your job with the command

  $ gcloud ai-platform jobs describe youtube8m_191210_053750

or continue streaming the logs with the command

  $ gcloud ai-platform jobs stream-logs youtube8m_191210_053750


## モデル生成

In [9]:
!gcloud ai-platform models create youtube8mkojo

Created ml engine model [projects/qwiklabs-gcp-ml-83914069970b/models/youtube8mkojo].


To take a quick anonymous survey, run:
  $ gcloud alpha survey



## バージョンの作成 

`--origin`のパスに気をつける。 \
学習時の`output_dir`に`export/exporter/{timestamp}`のようなディレクトリになります

In [14]:
VERSION='v0'
TFVERSION=1.14

!gcloud ai-platform versions delete {VERSION} --model youtube8mkojo --quiet
!gcloud ai-platform versions create {VERSION} --model youtube8mkojo \
    --origin gs://asl-mixi-project-bucket/model/youtube8m/video/auc/191210_031819/export/exporter/1575948073/ \
    --python-version=3.5 \
    --runtime-version={TFVERSION}

Creating version (this might take a few minutes)......done.                    


これで推論APIの生成処理は完了

## 検証

とりあえずすでにfeature化が完了したベクトルを取ってくる \
ついでにボキャブラリーファイルも取ってくる

In [3]:
!gsutil cp gs://asl-mixi-project-bucket/data/youtube-8m/features/feature_child.pb .
!gsutil cp gs://asl-mixi-project-bucket/data/youtube-8m/vocabulary.csv .

Copying gs://asl-mixi-project-bucket/data/youtube-8m/features/feature_child.pb...
/ [1 files][ 73.5 KiB/ 73.5 KiB]                                                
Operation completed over 1 objects/73.5 KiB.                                     
Copying gs://asl-mixi-project-bucket/data/youtube-8m/vocabulary.csv...
/ [1 files][  3.4 MiB/  3.4 MiB]                                                
Operation completed over 1 objects/3.4 MiB.                                      


In [4]:
import tensorflow as tf
tf.enable_eager_execution()

featureファイルから平均化されたリクエストを取得する関数

In [5]:
def get_feature(file_name):
    example = open(file_name, 'rb').read()
    context_features = {    
        "RGB/feature/flxxxxx": tf.VarLenFeature(tf.float32),
        "RGB/feature/timestamp": tf.VarLenFeature(tf.int64),
        "RGB/feature/dimensions": tf.VarLenFeature(tf.int64),
        "RGB/feature/rate": tf.VarLenFeature(tf.float32),
        "AUDIO/feature/timestamp": tf.VarLenFeature(tf.int64),
        "AUDIO/feature/dimensions": tf.VarLenFeature(tf.int64),
        "AUDIO/feature/rate": tf.VarLenFeature(tf.float32),
    }
    sequence_features = {
        "RGB/feature/floats": tf.VarLenFeature(tf.float32),
        "AUDIO/feature/floats": tf.VarLenFeature(tf.float32),
    }
    _, seq = tf.parse_single_sequence_example(example, context_features, sequence_features)

    mean_audio = tf.reduce_mean(tf.sparse.to_dense(seq['AUDIO/feature/floats']), axis=-2)
    mean_rgb = tf.reduce_mean(tf.sparse.to_dense(seq['RGB/feature/floats']), axis=-2)
    return {
        'mean_rgb': mean_rgb.numpy().tolist(),
        'mean_audio': mean_audio.numpy().tolist(),
    }

ボキャブラリーファイルからラベルdictを生成する

In [6]:
import csv

labels = {}
with open('./vocabulary.csv', 'r') as c:
    reader = csv.reader(c)
    next(reader, None)
    for r in reader:
        labels[r[0]] = r[3]

推論APIにリクエストを投げる関数

In [7]:
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials
import json

PROJECT = 'qwiklabs-gcp-ml-83914069970b'
MODEL_NAME = 'youtube8mkojo'
VERSION = 'v0'

credentials = GoogleCredentials.get_application_default()
api = discovery.build(
    "ml",
    "v1",
    credentials = credentials,
    discoveryServiceUrl = "https://storage.googleapis.com/cloud-ml/discovery/ml_v1_discovery.json"
)

def show_predict(prediction):
    for i, index in enumerate(prediction["predicted_topk"]):
        print("{}th({}:{}) probality is {}".format(
            (i+1),
            index,
            labels["{}".format(index)],
            prediction['probabilities'][index]
        ))

def predict(feature_file, model_name=MODEL_NAME, version=VERSION):
    request_data = {"instances":
      [
          get_feature(feature_file)
      ]
    }
    parent = "projects/{}/models/{}/versions/{}".format(PROJECT,MODEL_NAME, VERSION)
    response = api.projects().predict(body = request_data, name = parent).execute()
    show_predict(response['predictions'][0])
    return None

In [8]:
predict("./feature_child.pb", 'youtube8mkojo', 'v0')

1th(0:Game) probality is 0.1902475655078888
2th(1:Video game) probality is 0.1257742941379547
3th(2:Vehicle) probality is 0.09998852014541626
4th(3:Concert) probality is 0.09328439831733704
5th(4:Musician) probality is 0.06331360340118408
