In [2]:
import os
PROJECT = "qwiklabs-gcp-3f19cbba7aa3ae63" # REPLACE WITH YOUR PROJECT ID
BUCKET = "project-sample" # REPLACE WITH YOUR BUCKET NAME
REGION = "us-central1" # REPLACE WITH YOUR BUCKET REGION e.g. us-central1

# do not change these
os.environ["PROJECT"] = PROJECT
os.environ["BUCKET"] = BUCKET
os.environ["REGION"] = REGION
os.environ["TFVERSION"] = "1.13"  # Tensorflow version

In [3]:
%%bash
gcloud config set project $PROJECT
gcloud config set compute/region $REGION

Updated property [core/project].
Updated property [compute/region].


In [4]:
from google.cloud import storage
gs = storage.Client(project=PROJECT)

In [20]:
%%writefile image_model/model.py
from tensorflow.python.keras.preprocessing import text
from tensorflow.python.keras import models
#from tensorflow.python.keras.layers import Dense
#from tensorflow.python.keras.layers import Dropout
from tensorflow.python.keras.layers import Embedding
from tensorflow.python.keras.layers import Conv1D
from tensorflow.python.keras.layers import MaxPooling1D
from tensorflow.python.keras.layers import GlobalAveragePooling1D

import tensorflow.keras

from tensorflow.keras.models import Sequential
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions

from tensorflow.keras.layers import Reshape, Input
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
from tensorflow.keras.layers import average 

import tensorflow as tf
import pandas as pd
import numpy as np
import re
import os

from google.cloud import storage

tf.logging.set_verbosity(tf.logging.INFO)

CLASSES_LIST = ['Bras', 'Ties', 'Tops', 'Jeans', 'Polos', 'Rings', 'Socks',
       'Skirts', 'Watches', 'Leggings', 'Sweaters', 'T-Shirts',
       'Necklaces', 'Swim Tops', 'Underwear', 'Fragrances', 'Range Hoods',
       'Basins/Sinks', 'Button-Downs', 'Slacks/Pants', 'Swim Bottoms',
       'Jackets/Coats', 'Office Chairs', 'Gloves/Mittens',
       'Semi-Brim Hats', 'Dresses & Gowns', 'Pendants & Charms',
       'Blazers/Suit Coats', 'Swim Variety Packs', 'Bracelets & Anklets',
       'One-Piece Swimsuits', 'Protective Footwear',
       'Faucets/Taps/Handles', 'Bedding Variety Packs',
       'Earrings & Ear Jewelry', 'Protective/Active Tops',
       'Cardigans/Kimonos/Wraps', 'Everyday/Dress Footwear',
       'Protective/Active Pants', 'Protective/Active Vests',
       'Tableware Variety Packs', 'Active/Athletic Footwear',
       'Protective/Active Shorts', 'Specialty Sport Footwear',
       'Hair Cleaning & Treatments', 'Business/Formal Dress Suits',
       'Sweatshirts/Fleece Pullovers', 'Clothing Sets & Variety Packs',
       'Protective/Active Button-Downs',
       'Vitamins, Minerals, & Dietary Supplements']
CLASSES = { CLASSES_LIST[i]: i for i in range(len(CLASSES_LIST))}

# For image classification
HEIGHT = 224
WIDTH = 224
NUM_CHANNELS = 3
NCLASSES = 50

# For text classification
TOP_K = 20000  # Limit on the number vocabulary size used for tokenization
MAX_SEQUENCE_LENGTH = 500  # Sentences will be truncated/padded to this length
PADWORD = 'ZYXW'


# For image classification
def read_and_preprocess_with_augment(image_bytes, bucket_name=None, product_name=None, description=None, bucket_label=None, pretrained='none'):
    return read_and_preprocess(image_bytes=image_bytes, bucket_name=bucket_name, product_name=product_name, description=description, bucket_label=bucket_label, augment=True, pretrained=pretrained)


# For image classification
def read_and_preprocess(image_bytes, bucket_name=None, product_name=None, description=None, bucket_label=None, augment=False, pretrained='none'):
    # Decode the image, end up with pixel values that are in the -1, 1 range
    image = tf.image.decode_jpeg(contents = image_bytes, channels=NUM_CHANNELS)
    image = tf.expand_dims(input = image, axis = 0) # resize_bilinear needs batches

    if augment:
        image = tf.image.resize_bilinear(images = image, size = [HEIGHT+10, WIDTH+10], align_corners = False)
        image = tf.squeeze(input = image, axis = 0) # remove batch dimension
        image = tf.random_crop(value = image, size = [HEIGHT, WIDTH, NUM_CHANNELS])
        image = tf.image.random_flip_left_right(image = image)
        image = tf.image.random_brightness(image = image, max_delta = 63.0/255.0)
        image = tf.image.random_contrast(image = image, lower = 0.2, upper = 1.8)
    else:
        image = tf.image.resize_bilinear(images = image, size = [HEIGHT, WIDTH], align_corners = False)
        image = tf.squeeze(input = image, axis = 0) #remove batch dimension

    # Pixel values are in range [0,1], convert to [-1,1]
    if pretrained == 'none':
        image = tf.image.convert_image_dtype(image = image, dtype = tf.float32) # 0-1
        image = tf.subtract(x = image, y = 0.5)
        image = tf.multiply(x = image, y = 2.0)
    elif pretrained == 'res_50':
        image = preprocess_input(image)
    else:
        raise Exception('unknown pretrained model {}'.format(pretrained))

    return image, tf.one_hot(bucket_label, 50)
    return {'imagem':image}, bucket_name


# For image classification
def load_data(train_data_path, eval_data_path):
    column_names = ('bucket_name', 'product_id', 'product_name', 'description')

    def download_from_gcs(source, destination):
        search = re.search('gs://(.*?)/(.*)', source)
        bucket_name = search.group(1)
        blob_name = search.group(2)
        storage_client = storage.Client()
        bucket = storage_client.get_bucket(bucket_name)
        bucket.blob(blob_name).download_to_filename(destination)

    if train_data_path.startswith('gs://'):
        download_from_gcs(train_data_path, destination='train.csv')
        train_data_path = 'train.csv'
    if eval_data_path.startswith('gs://'):
        download_from_gcs(eval_data_path, destination='eval.csv')
        eval_data_path = 'eval.csv'

    def download_image(product_id, bucket_name, product_name, description, bucket_label):
        image_bytes = tf.read_file(filename = product_id)
        return image_bytes, bucket_name, product_name, description, bucket_label
    
    # Parse CSV using pandas
    df_train = pd.read_csv(train_data_path)
    df_eval = pd.read_csv(eval_data_path)
    
    df_train['bucket_label'] = df_train['bucket_name'].map(CLASSES)
    df_eval['bucket_label'] = df_eval['bucket_name'].map(CLASSES)
    
    tf_train = tf.data.Dataset.from_tensor_slices(
        (
            tf.cast(df_train['product_id'].values, tf.string),
            tf.cast(df_train['bucket_name'].values, tf.string),
            tf.cast(df_train['product_name'].values, tf.string),
            tf.cast(df_train['description'].values, tf.string),
            tf.cast(df_train['bucket_label'].values, tf.int32)
        )
    ).map(download_image)
    
    tf_eval = tf.data.Dataset.from_tensor_slices(
        (
            tf.cast(df_train['product_id'].values, tf.string),
            tf.cast(df_train['bucket_name'].values, tf.string),
            tf.cast(df_train['product_name'].values, tf.string),
            tf.cast(df_train['description'].values, tf.string),
            tf.cast(df_train['bucket_label'].values, tf.int32)
        )
    ).map(download_image)

    return (
        (
            list(df_train['product_name']),
            list(df_train['description']),
            list(df_train['product_id']),
            np.array(df_train['bucket_name'].map(CLASSES)),
            tf_train
        ),
        (
            list(df_eval['product_name']),
            list(df_eval['description']),
            list(df_eval['product_id']),
            np.array(df_eval['bucket_name'].map(CLASSES)),
            tf_eval
        )
    )


# This will read the dataset for both image and text classification
def make_image_input_fn(dataset, batch_size, mode, augment = False, pretrained = False):
    if augment: 
        dataset = dataset.map(map_func = read_and_preprocess_with_augment)
    else:
        dataset = dataset.map(map_func = read_and_preprocess)

    if mode == tf.estimator.ModeKeys.TRAIN:
        num_epochs = 10
        # indefinitely
        dataset = dataset.shuffle(buffer_size = 10 * batch_size)
    else:
        num_epochs = 1 # end-of-input after this

    dataset = dataset.repeat(count = num_epochs).batch(batch_size = batch_size)
    return dataset


"""
For text classificatoin
Create tf.estimator compatible input function
  # Arguments:
      texts: [strings], list of sentences
      labels: numpy int vector, integer labels for sentences
      batch_size: int, number of records to use for each train batch
      mode: tf.estimator.ModeKeys.TRAIN or tf.estimator.ModeKeys.EVAL 
  # Returns:
      tf.data.Dataset, produces feature and label
        tensors one batch at a time
"""
def input_fn(texts, labels, batch_size, mode):
    # Convert texts from python strings to tensors
    x = tf.constant(texts)

    # Map text to sequence of word-integers and pad
    x = vectorize_sentences(x)

    # Create tf.data.Dataset from tensors
    dataset = tf.data.Dataset.from_tensor_slices((x, labels))

    # Pad to constant length
    dataset = dataset.map(pad)

    if mode == tf.estimator.ModeKeys.TRAIN:
        num_epochs = None #loop indefinitley
        dataset = dataset.shuffle(buffer_size=50000) # our input is already shuffled so this is redundant
    else:
        num_epochs = 1

    dataset = dataset.repeat(num_epochs).batch(batch_size)
    return dataset


"""
For text classification
Given an int tensor, remove 0s then pad to a fixed length representation. 
  #Arguments:
    feature: int tensor 
    label: int. not used in function, just passed through
  #Returns:
    (int tensor, int) tuple.
"""
def pad(feature, label):
    # 1. Remove 0s which represent out of vocabulary words
    nonzero_indices = tf.where(tf.not_equal(feature, tf.zeros_like(feature)))
    without_zeros = tf.gather(feature,nonzero_indices)
    without_zeros = tf.squeeze(without_zeros, axis=1)

    # 2. Prepend 0s till MAX_SEQUENCE_LENGTH
    padded = tf.pad(without_zeros, [[MAX_SEQUENCE_LENGTH, 0]])  # pad out with zeros
    padded = padded[-MAX_SEQUENCE_LENGTH:]  # slice to constant length
    return (padded, label)


"""
For text classification
Given sentences, return an integer representation
  # Arguments:
      sentences: string tensor of shape (?,), contains sentences to vectorize
  # Returns:
      Integer representation of the sentence. Word-integer mapping is determined
        by VOCAB_FILE_PATH. Words out of vocabulary will map to 0
"""
def vectorize_sentences(sentences):
    # 1. Remove punctuation
    sentences = tf.regex_replace(sentences, '[[:punct:]]', ' ')

    # 2. Split string tensor into component words
    words = tf.string_split(sentences)
    words = tf.sparse_tensor_to_dense(words, default_value=PADWORD)

    # 3. Map each word to respective integer
    table = tf.contrib.lookup.index_table_from_file(
        vocabulary_file=VOCAB_FILE_PATH,
        num_oov_buckets=0,
        vocab_size=None,
        default_value=0,  # for words not in vocabulary (OOV)
        key_column_index=0,
        value_column_index=1,
        delimiter=',')
    numbers = table.lookup(words)

    return numbers


"""
For text classification
Builds a CNN model using keras and converts to tf.estimator.Estimator
  # Arguments
      model_dir: string, file path where training files will be written
      config: tf.estimator.RunConfig, specifies properties of tf Estimator
      filters: int, output dimension of the layers.
      kernel_size: int, length of the convolution window.
      embedding_dim: int, dimension of the embedding vectors.
      dropout_rate: float, percentage of input to drop at Dropout layers.
      pool_size: int, factor by which to downscale input at MaxPooling layer.
      embedding_path: string , file location of pre-trained embedding (if used)
        defaults to None which will cause the model to train embedding from scratch
      word_index: dictionary, mapping of vocabulary to integers. used only if
        pre-trained embedding is provided

    # Returns
        A tf.estimator.Estimator 
"""
def keras_estimator(model_dir,
                    config,
                    learning_rate,
                    filters=64,
                    dropout_rate=0.2,
                    embedding_dim=200,
                    kernel_size=3,
                    pool_size=3,
                    embedding_path=None,
                    word_index=None):
    # Create model instance.
    model = models.Sequential()
    num_features = min(len(word_index) + 1, TOP_K)

    # Add embedding layer. If pre-trained embedding is used add weights to the
    # embeddings layer and set trainable to input is_embedding_trainable flag.
    if embedding_path != None:
        embedding_matrix = get_embedding_matrix(word_index, embedding_path, embedding_dim)
        is_embedding_trainable = True  # set to False to freeze embedding weights

        model.add(Embedding(input_dim=num_features,
                            output_dim=embedding_dim,
                            input_length=MAX_SEQUENCE_LENGTH,
                            weights=[embedding_matrix],
                            trainable=is_embedding_trainable))
    else:
        model.add(Embedding(input_dim=num_features,
                            output_dim=embedding_dim,
                            input_length=MAX_SEQUENCE_LENGTH))

    model.add(Dropout(rate=dropout_rate))
    model.add(Conv1D(filters=filters,
                              kernel_size=kernel_size,
                              activation='relu',
                              bias_initializer='random_uniform',
                              padding='same'))

    model.add(MaxPooling1D(pool_size=pool_size))
    model.add(Conv1D(filters=filters * 2,
                              kernel_size=kernel_size,
                              activation='relu',
                              bias_initializer='random_uniform',
                              padding='same'))
    model.add(GlobalAveragePooling1D())
    model.add(Dropout(rate=dropout_rate))
    model.add(Dense(len(CLASSES), activation='softmax'))

    # Compile model with learning parameters.
    optimizer = tf.keras.optimizers.Adam(lr=learning_rate)
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['acc'])
    estimator = tf.keras.estimator.model_to_estimator(keras_model=model, model_dir=model_dir, config=config)

    return estimator


"""
For text classification
Defines the features to be passed to the model during inference
  Can pass in string text directly. Tokenization done in serving_input_fn 
  # Arguments: none
  # Returns: tf.estimator.export.ServingInputReceiver
"""
def serving_input_fn():
    feature_placeholder = tf.placeholder(tf.string, [None])
    features = vectorize_sentences(feature_placeholder)
    return tf.estimator.export.TensorServingInputReceiver(features, feature_placeholder)


"""
For text classification
Takes embedding for generic vocabulary and extracts the embeddings
  matching the current vocabulary
  The pre-trained embedding file is obtained from https://nlp.stanford.edu/projects/glove/
  # Arguments: 
      word_index: dict, {key =word in vocabulary: value= integer mapped to that word}
      embedding_path: string, location of the pre-trained embedding file on disk
      embedding_dim: int, dimension of the embedding space
  # Returns: numpy matrix of shape (vocabulary, embedding_dim) that contains the embedded
      representation of each word in the vocabulary.
"""
def get_embedding_matrix(word_index, embedding_path, embedding_dim):
    # Read the pre-trained embedding file and get word to word vector mappings.
    embedding_matrix_all = {}

    # Download if embedding file is in GCS
    if embedding_path.startswith('gs://'):
        download_from_gcs(embedding_path, destination='embedding.csv')
        embedding_path = 'embedding.csv'

    with open(embedding_path) as f:
        for line in f:  # Every line contains word followed by the vector value
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embedding_matrix_all[word] = coefs

    # Prepare embedding matrix with just the words in our word_index dictionary
    num_words = min(len(word_index) + 1, TOP_K)
    embedding_matrix = np.zeros((num_words, embedding_dim))

    for word, i in word_index.items():
        if i >= TOP_K:
            continue
        embedding_vector = embedding_matrix_all.get(word)
        if embedding_vector is not None:
            # words not found in embedding index will be all-zeros.
            embedding_matrix[i] = embedding_vector
    return embedding_matrix


"""
Main orchestrator for text classification
"""
def _train_text(output_dir, hparams, field):
    # Load Data
    (
        (train_product_name, train_description, _, train_labels, _),
        (test_product_name, test_description, _, test_labels, _)
    ) = load_data(hparams['train_data_path'], hparams['eval_data_path'])
    
    if field == 'product_name':
        train_texts = train_product_name
        test_texts = test_product_name
    else:
        train_texts = train_description
        test_texts = test_description

    # Create vocabulary from training corpus.
    tokenizer = text.Tokenizer(num_words=TOP_K)
    tokenizer.fit_on_texts(train_texts)

    # Generate vocabulary file from tokenizer object to enable
    # creating a native tensorflow lookup table later (used in vectorize_sentences())
    tf.gfile.MkDir(output_dir) # directory must exist before we can use tf.gfile.open
    global VOCAB_FILE_PATH; VOCAB_FILE_PATH = os.path.join(output_dir,'vocab.txt')
    with tf.gfile.Open(VOCAB_FILE_PATH, 'wb') as f:
        f.write("{},0\n".format(PADWORD))  # map padword to 0
        for word, index in tokenizer.word_index.items():
            if index < TOP_K: # only save mappings for TOP_K words
                f.write("{},{}\n".format(word, index))

    # Create estimator
    run_config = tf.estimator.RunConfig(save_checkpoints_steps=500)
    estimator = keras_estimator(
        model_dir=output_dir,
        config=run_config,
        learning_rate=hparams['learning_rate'],
        embedding_path=hparams['embedding_path'],
        word_index=tokenizer.word_index
    )

    # Create TrainSpec
    train_steps = hparams['num_epochs'] * len(train_texts) / hparams['batch_size']
    train_spec = tf.estimator.TrainSpec(
        input_fn=lambda:input_fn(
            train_texts,
            train_labels,
            hparams['batch_size'],
            mode=tf.estimator.ModeKeys.TRAIN),
        max_steps=train_steps
    )

    # Create EvalSpec
    exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)
    eval_spec = tf.estimator.EvalSpec(
        input_fn=lambda:input_fn(
            test_texts,
            test_labels,
            hparams['batch_size'],
            mode=tf.estimator.ModeKeys.EVAL),
        steps=None,
        exporters=exporter,
        start_delay_secs=10,
        throttle_secs=10
    )

    # Start training
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)


"""
Main orchestrator for product_name
"""
def train_product_name(output_dir, hparams):
    return _train_text(output_dir, hparams, 'product_name')


"""
Main orchestrator for product_name
"""
def train_description(output_dir, hparams):
    return _train_text(output_dir, hparams, 'description')


"""
Main orchestrator for image classification
"""
def train_image(output_dir, hparams):
    (
        (train_product_name, train_description, train_imgurl, train_labels, train_tfset),
        (test_product_name, test_description, test_imgurl, test_labels, eval_tfset)
    ) = load_data(hparams['train_data_path'], hparams['eval_data_path'])
    
    train_input_fn = make_image_input_fn(train_tfset, 100, tf.estimator.ModeKeys.TRAIN, pretrained=hparams['pretrained'])
    eval_input_fn = make_image_input_fn(eval_tfset, 100, tf.estimator.ModeKeys.EVAL, pretrained=hparams['pretrained'])
    
    # connect new layers to the output
    res_model = ResNet50(weights='imagenet')
    x = res_model.output

    # let's add a fully-connected layer
    x = Dense(1024, activation='relu',kernel_initializer='he_uniform')(x)
    # and a fully connected layer 
    predictions = Dense(50, activation='softmax', kernel_initializer='glorot_uniform')(x)

    Res50 = tf.keras.Model(inputs=res_model.input, outputs=predictions)

    # freeze ResNet during training
    for layer in res_model.layers:
        layer.trainable = False

    Res50.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['accuracy'])

    #Res50.fit(train_input_fn,
    #          validation_data=eval_input_fn,
    #          epochs=10,
    #          validation_steps=300,
    #          steps_per_epoch=20)
    
    estimator = tf.keras.estimator.model_to_estimator(
        keras_model = Res50,
        model_dir = output_dir,
        config = tf.estimator.RunConfig(
              tf_random_seed = 1, # for reproducibility
              save_checkpoints_steps = 100 # checkpoint every N steps
        )
    )
    
    def serving_input_fn():
        features = {
            'product_id': tf.placeholder(dtype = tf.string, shape = [None]),
            'bucket_name': tf.placeholder(dtype = tf.string, shape = [None]),
            'product_name': tf.placeholder(dtype = tf.string, shape = [None]),
            'description': tf.placeholder(dtype = tf.string, shape = [None]),
            'bucket_label': tf.placeholder(dtype = tf.int32, shape = [None]),
        }
        return tf.estimator.export.ServingInputReceiver(features=features, receiver_tensors=features)

    train_spec=tf.estimator.TrainSpec(input_fn = train_input_fn, max_steps = 300)
    exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)

    eval_spec=tf.estimator.EvalSpec(
                  input_fn = eval_input_fn,
                  steps = None,
                  start_delay_secs = 10, # wait at least N seconds before first evaluation (default 120)
                  throttle_secs = 10, # wait at least N seconds before each subsequent evaluation (default 600)
                  exporters = exporter) # export SavedModel once at the end of training

    tf.logging.set_verbosity(tf.logging.INFO) # so loss is printed during training
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)


Overwriting image_model/model.py


In [16]:
%%writefile image_model/task.py

import argparse
import json
import os

from . import model
import tensorflow as tf

if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  # Input Arguments
  parser.add_argument(
      '--batch_size',
      help='Batch size for training steps',
      type=int,
      default=100
  )
  parser.add_argument(
      '--learning_rate',
      help='Initial learning rate for training',
      type=float,
      default=0.01
  )
  parser.add_argument(
      '--train_steps',
      help="""\
      Steps to run the training job for. A step is one batch-size,\
      """,
      type=int,
      default=100
  )
  parser.add_argument(
      '--output_dir',
      help='GCS location to write checkpoints and export models',
      required=True
  )
  parser.add_argument(
      '--train_data_path',
      help='location of train file containing eval URLs',
      default='gs://cloud-ml-data/img/flower_photos/train_set.csv'
  )
  parser.add_argument(
      '--eval_data_path',
      help='location of eval file containing img URLs',
      default='gs://cloud-ml-data/img/flower_photos/eval_set.csv'
  )
  #build list of model fn's for help message
  model_names = [name.replace('_model','') \
                   for name in dir(model) \
                     if name.endswith('_model')]  
  parser.add_argument(
      '--job-dir',
      help='this model ignores this field, but it is required by gcloud',
      default='junk'
  )
  parser.add_argument(
      '--augment', 
      help='if specified, augment image data', 
      dest='augment', action='store_true')
  parser.add_argument(
      '--pretrained', 
      help='specify a pretrained model', 
      dest='pretrained',
      default='none')
  parser.set_defaults(augment=False)

  # optional hyperparameters used by cnn
  parser.add_argument(
      '--ksize1', 
      help='kernel size of first layer for CNN', 
      type=int, 
      default=5)
  parser.add_argument(
      '--ksize2', 
      help='kernel size of second layer for CNN', 
      type=int, 
      default=5)
  parser.add_argument(
      '--nfil1', 
      help='number of filters in first layer for CNN', 
      type=int, 
      default=10)
  parser.add_argument(
      '--nfil2', 
      help='number of filters in second layer for CNN', 
      type=int, 
      default=20)
  parser.add_argument(
      '--dprob', 
      help='dropout probability for CNN', 
      type=float, 
      default=0.25)
  parser.add_argument(
      '--batch_norm', 
      help='if specified, do batch_norm for CNN', 
      dest='batch_norm', 
      action='store_true')
  parser.set_defaults(batch_norm=False)

  args = parser.parse_args()
  hparams = args.__dict__
  print(hparams)
    
  output_dir = hparams.pop('output_dir')
  # Append trial_id to path for hptuning
  output_dir = os.path.join(
      output_dir,
      json.loads(
          os.environ.get('TF_CONFIG', '{}')
      ).get('task', {}).get('trial', '')
  )  
#   print(hparams)
  # Run the training job
  model.train_image(output_dir, hparams)


Overwriting image_model/task.py


## Run as a Python module

Let's first run it locally for a short while to test the code works. Note the --model parameter

In [37]:
%%bash
rm -rf image_model.tar.gz image_trained
python3 -m image_model.task \
    --output_dir=${PWD}/image_trained \
    --train_steps=5 \
    --learning_rate=0.01 \
    --batch_size=40 \
    --pretrained='res_50' \
    --train_data_path='gs://project-sample/dataset1_data_train.csv' \
    --eval_data_path='gs://project-sample/dataset1_data_eval.csv'

Process is terminated.


Now, let's do it on ML Engine. Note the --model parameter

In [17]:
%%writefile config.yml
trainingInput:
  scaleTier: CUSTOM
  masterType: complex_model_l_gpu
#  masterType: standard_gpu
#  workerCount: 8
#  workerType: standard_gpu
#  parameterServerCount: 3
#  parameterServerType: standard

Overwriting config.yml


In [21]:
%%bash
OUTDIR=gs://${BUCKET}/imagemodel_$(date -u +%y%m%d_%H%M%S)
JOBNAME=imagemodel_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gsutil -m rm -rf $OUTDIR
gcloud ml-engine jobs submit training $JOBNAME \
    --region=$REGION \
    --module-name=image_model.task \
    --package-path=${PWD}/image_model \
    --job-dir=${OUTDIR}_job \
    --staging-bucket=gs://$BUCKET \
    --scale-tier=CUSTOM \
    --runtime-version=$TFVERSION \
    --config=${PWD}/config.yml \
    -- \
    --output_dir=${OUTDIR}_trained \
    --train_steps=1000 \
    --learning_rate=0.01 \
    --batch_size=40 \
    --pretrained='res_50' \
    --batch_norm \
    --train_data_path='gs://project-sample/dataset1_data_train.csv' \
    --eval_data_path='gs://project-sample/dataset1_data_eval.csv'

gs://project-sample/imagemodel_190404_194002 us-central1 imagemodel_190404_194002
jobId: imagemodel_190404_194002
state: QUEUED


CommandException: 1 files/objects could not be removed.
Job [imagemodel_190404_194002] submitted successfully.
Your job is still active. You may view the status of your job with the command

  $ gcloud ml-engine jobs describe imagemodel_190404_194002

or continue streaming the logs with the command

  $ gcloud ml-engine jobs stream-logs imagemodel_190404_194002
