# Notebook for training and serving a model on cloud ml

In [1]:
%load_ext autoreload
%autoreload 2

In [17]:
import os
project_id = os.environ['TEST_PROJECT_ID']
import datetime as dt

from cloud_ml.client import CloudML
from cloud_storage.client import CloudStorage

In [18]:
ml = CloudML(project_id)
cs = CloudStorage(project_id)

# Create a model and bucket for storing data

In [4]:
model_name = 'test_model'
description = 'test description'
regions = ['europe-west1']

ml.create_model(model_name, description, regions)

INFO:googleapiclient.discovery:URL being requested: POST https://ml.googleapis.com/v1/projects/test-project-221821/models?alt=json
ERROR:root:Field: model.name Error: A model with the same name already exists.


In [5]:
bucket_name = 'vuk-mnist-example'
try:
    cs.create_bucket(bucket_name, location='EU')
except:
    print('Bucket probably already exists. Either you have created you bucket already or you have to specify a different name.')

Bucket probably already exists. Either you have created you bucket already or you have to specify a different name.


# Package and upload your training application and upload the data

You can use the gcloud to start a job, this will automatically package your training application. However, here we will package our training application ourselves. You can package your trainer, by navigating to the folder containing the `trainer` package and execute:
```
python setup.py sdist
```
A `dist` directory will be created where you can find your `tar.gz` file.

Use the following `gsutil` command to upload the mnist model training package and the data:
```
gsutil cp ${HOME}/gcp/cloud_ml/examples/mnist/dist/* gs://vuk-mnist-example/training/
gsutil -m cp -r ${HOME}/gcp/cloud_ml/examples/mnist/data gs://vuk-mnist-example/training/
```

# Train the model

In [9]:
job_id = 'mnist_' + dt.datetime.utcnow().strftime('%Y%m%d_%H%M%S')
scale_tier = 'BASIC'
package_uris = ['gs://{}/training/trainer-0.1.tar.gz'.format(bucket_name)]
python_module = 'trainer.task'
region = 'europe-west1'
job_dir = 'gs://{}/training/outputs/'.format(bucket_name)
runtime_version = '1.12'
python_version = '3.5'
job_arguments = ['--train_data_folder=gs://{}/training/data/train/'.format(bucket_name),
                 '--evaluation_data_folder=gs://{}/training/data/test/'.format(bucket_name),
                 '--model_dir={}'.format(job_dir),
                 '--nr_epochs=4',
                 '--learning_rate=0.0001']

response = ml.start_training_job(job_id,
                                 scale_tier,
                                 package_uris,
                                 python_module,
                                 region,
                                 job_dir,
                                 runtime_version,
                                 python_version,
                                 job_arguments)
response

INFO:googleapiclient.discovery:URL being requested: POST https://ml.googleapis.com/v1/projects/test-project-221821/jobs?alt=json


{'jobId': 'mnist_20190127_183425',
 'trainingInput': {'packageUris': ['gs://vuk-mnist-example/training/trainer-0.1.tar.gz'],
  'pythonModule': 'trainer.task',
  'args': ['--train_data_folder=gs://vuk-mnist-example/training/data/train/',
   '--evaluation_data_folder=gs://vuk-mnist-example/training/data/test/',
   '--model_dir=gs://vuk-mnist-example/training/outputs/',
   '--nr_epochs=4',
   '--learning_rate=0.0001'],
  'region': 'europe-west1',
  'runtimeVersion': '1.12',
  'jobDir': 'gs://vuk-mnist-example/training/outputs/',
  'pythonVersion': '3.5'},
 'createTime': '2019-01-27T18:34:28Z',
 'state': 'QUEUED',
 'trainingOutput': {},
 'etag': 'be25fnQlTpo='}

# Get the job to inspect the status

In [16]:
response = ml.get_job(job_id)
print("Job status: {}".format(response['state']))

INFO:googleapiclient.discovery:URL being requested: GET https://ml.googleapis.com/v1/projects/test-project-221821/jobs/mnist_20190127_183425?alt=json


Job status: SUCCEEDED


# Create a version for the model

We have already created a model, now we have to create a version under the model that can be used for predictions.

If this is the first version that is being created, then it will automatically be set as the default version used for predictions. However, if this is not the first version of this model, then you have to promote the version to be the default version for predictions.

In [None]:
version_name = 'neural_network_' + dt.datetime.utcnow().strftime('%Y%m%d_%H%M%S')
deployment_uri = 'gs://{}/training/outputs/'.format(bucket_name)

ml.create_model_version(model_name, version_name)

# Clean up

In [None]:
ml.delete_model(model_name)

In [None]:
cs.delete_bucket(bucket_name)