# Get training data and upload them to S3

In [1]:
%%bash
curl -O -L https://github.com/vpavlin/odh-tensorflow-jobs/raw/master/training/num-dataset.tar.gz

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100   162  100   162    0     0    247      0 --:--:-- --:--:-- --:--:--   247
 48 4959k   48 2383k    0     0  1453k      0  0:00:03  0:00:01  0:00:02 1453k100 4959k  100 4959k    0     0  2227k      0  0:00:02  0:00:02 --:--:-- 4396k


In [2]:
my_bucket="ODH-TENSORFLOW-JOBS-DEMO"

import boto3, os
conn = boto3.client(service_name='s3',
    endpoint_url=os.environ['S3_ENDPOINT_URL'])

buckets = [ b['Name'] for b in conn.list_buckets()['Buckets']]
if my_bucket not in buckets:
    resp = conn.create_bucket(Bucket=my_bucket)
    if resp['ResponseMetadata']['HTTPStatusCode'] != 200:
        raise Exception("Could not create bucket:(")
        
    print("Bucket %s created" % my_bucket)
else:
    print("Bucket %s exists" % my_bucket)
    
key = "/input-data/num-dataset.tar.gz"
conn.upload_file(Bucket=my_bucket, Key=key, Filename="num-dataset.tar.gz")
conn.list_objects(Bucket=my_bucket, Prefix=key)

Bucket ODH-TENSORFLOW-JOBS-DEMO created


{'ResponseMetadata': {'RequestId': 'tx00000000000000041a545-005c1261a6-11ed1491-default',
  'HostId': '',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-request-id': 'tx00000000000000041a545-005c1261a6-11ed1491-default',
   'content-type': 'application/xml',
   'content-length': '641',
   'date': 'Thu, 13 Dec 2018 13:41:58 GMT',
   'strict-transport-security': 'max-age=31536000; includeSubDomains; preload'},
  'RetryAttempts': 0},
 'IsTruncated': False,
 'Marker': '',
 'Contents': [{'Key': '/input-data/num-dataset.tar.gz',
   'LastModified': datetime.datetime(2018, 12, 13, 13, 41, 58, 78000, tzinfo=tzlocal()),
   'ETag': '"938fb559ed093232ca5e985d1f370bf9"',
   'Size': 5078688,
   'StorageClass': 'STANDARD',
   'Owner': {'DisplayName': 'Datahub Insights Dev User',
    'ID': 'datahub-insights1-dev'}}],
 'Name': 'ODH-TENSORFLOW-JOBS-DEMO',
 'Prefix': '/input-data/num-dataset.tar.gz',
 'MaxKeys': 1000,
 'EncodingType': 'url'}

# Install OpenShift client

In [3]:
%%bash
curl -o oc.tar.gz -L https://github.com/openshift/origin/releases/download/v3.11.0/openshift-origin-client-tools-v3.11.0-0cbc58b-linux-64bit.tar.gz
tar xzf oc.tar.gz
cp openshift-origin-client-tools-v3.11.0-0cbc58b-linux-64bit/oc ~/../bin/oc


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100   654    0   654    0     0   1807      0 --:--:-- --:--:-- --:--:--  1811
  0 53.8M    0 16360    0     0  16785      0  0:56:06 --:--:--  0:56:06 16785 23 53.8M   23 12.8M    0     0  6693k      0  0:00:08  0:00:01  0:00:07 12.9M 36 53.8M   36 19.7M    0     0  6838k      0  0:00:08  0:00:02  0:00:06  9.9M 43 53.8M   43 23.2M    0     0  5992k      0  0:00:09  0:00:03  0:00:06 7937k 48 53.8M   48 26.2M    0     0  5412k      0  0:00:10  0:00:04  0:00:06 6726k 57 53.8M   57 30.8M    0     0  5210k      0  0:00:10  0:00:06  0:00:04 6206k 62 53.8M   62 33.5M    0     0  4790k      0  0:00:11  0:00:07  0:00:04 4068k 63 53.8M   63 34.2M    0     0  4402k      0  0:00:12  0:00:07  0:00:05 2960k 66 53.8M   66 35.7M    0     0  4081k      0  0:0

# Check the Service Account

In [4]:
%%bash
oc whoami

system:serviceaccount:vpavlin-jupyterhub:jupyter-tensorflow


# List templates

You should see templates imported from [odh-tensorflow-jobs](https://github.com/vpavlin/odh-tensorflow-jobs/tree/master/openshift) repository

In [5]:
%%bash
oc get templates

NAME                                    DESCRIPTION                                                                        PARAMETERS    OBJECTS
jupyter-notebook-workspace-tensorflow   Template for deploying Tensorflow enable Jupyter Notebook images with persist...   4 (all set)   7
odh-config                              Template to configure basic components of ODH ML flow                              3 (2 blank)   1
odh-tensorflow-serving                  Template to serve models using tensorflow                                          5 (all set)   2
odh-tensorflow-training                 Template to train models using tensorflow                                          9 (all set)   1


# List parameters for the training template

`oc process` command allows you to list configurable parameters for a given template. We'll look at the training job tempalte first.

In [6]:
%%bash
oc process odh-tensorflow-training --parameters

NAME                    DESCRIPTION                                                                                              GENERATOR           VALUE
APP_NAME                Short name of your application (to be used in OpenShift artifact names)                                                      demo
TRAINING_STEPS          Number of training steps to perform                                                                                          10000
INPUT_DATA_LOCATION     Location of input data in form of 's3://BUCKET/path/', path is used as prefix to lookup the data in S3                       s3://MY-BUCKET/data/
OUTPUT_MODEL_LOCATION   Location where the resulting model will be stored                                                                            s3://MY-BUCKET/model-out/
MEMORY                  Memory limit to be assigned to the job                                                                                       2Gi
CPU                     Limit for numb

# Deploy the training job

Configure all necessary parameters from above and pipe the `oc process` output to `oc apply` command to submit the job to OpenShift

In [7]:
%%bash
oc process odh-tensorflow-training \
    -p APP_NAME=from-jupyter \
    -p TRAINING_STEPS=2000 \
    -p INPUT_DATA_LOCATION="s3://ODH-TENSORFLOW-JOBS-DEMO/input-data" \
    -p OUTPUT_MODEL_LOCATION="s3://ODH-TENSORFLOW-JOBS-DEMO/output-model" \
    -p MEMORY="9Gi" \
    -p CPU="9" \
        | oc apply -f -

job.batch/odh-tensorflow-training-from-jupyter created


# View logs

You can review the job execution by (re)running the below `oc logs` command

In [11]:
%%bash
oc logs job.batch/odh-tensorflow-training-from-jupyter

Cloning into 'app_code'...
Using context dir training
Processing /opt/app-root/src/app_code/pyodh
Collecting git+https://github.com/CermakM/intect (from -r requirements.txt (line 3))
  Cloning https://github.com/CermakM/intect to /tmp/pip-m1f4ugh7-build
Collecting names (from intect==0.2.3->-r requirements.txt (line 3))
  Downloading https://files.pythonhosted.org/packages/44/4e/f9cb7ef2df0250f4ba3334fbdabaa94f9c88097089763d8e85ada8092f84/names-0.3.0.tar.gz (789kB)
Collecting pyyaml (from intect==0.2.3->-r requirements.txt (line 3))
  Downloading https://files.pythonhosted.org/packages/9e/a3/1d13970c3f36777c583f136c136f804d70f500168edc1edea6daa7200769/PyYAML-3.13.tar.gz (270kB)
Collecting matplotlib (from intect==0.2.3->-r requirements.txt (line 3))
  Downloading https://files.pythonhosted.org/packages/71/07/16d781df15be30df4acfd536c479268f1208b2dfbc91e9ca5d92c9caf673/matplotlib-3.0.2-cp36-cp36m-manylinux1_x86_64.whl (12.9MB)


In [12]:
%%bash
oc logs job.batch/odh-tensorflow-training-from-jupyter -f

Cloning into 'app_code'...
Using context dir training
Processing /opt/app-root/src/app_code/pyodh
Collecting git+https://github.com/CermakM/intect (from -r requirements.txt (line 3))
  Cloning https://github.com/CermakM/intect to /tmp/pip-m1f4ugh7-build
Collecting names (from intect==0.2.3->-r requirements.txt (line 3))
  Downloading https://files.pythonhosted.org/packages/44/4e/f9cb7ef2df0250f4ba3334fbdabaa94f9c88097089763d8e85ada8092f84/names-0.3.0.tar.gz (789kB)
Collecting pyyaml (from intect==0.2.3->-r requirements.txt (line 3))
  Downloading https://files.pythonhosted.org/packages/9e/a3/1d13970c3f36777c583f136c136f804d70f500168edc1edea6daa7200769/PyYAML-3.13.tar.gz (270kB)
Collecting matplotlib (from intect==0.2.3->-r requirements.txt (line 3))
  Downloading https://files.pythonhosted.org/packages/71/07/16d781df15be30df4acfd536c479268f1208b2dfbc91e9ca5d92c9caf673/matplotlib-3.0.2-cp36-cp36m-manylinux1_x86_64.whl (12.9MB)
Collecting scipy (from intect==0.2.3->-r requirements.txt (l

# List parameters for Tensorflow serving template

Once the training job is completed, we can deploy the serving endpoint - see the parametrs below.

In [13]:
%%bash
oc process odh-tensorflow-serving --parameters

NAME                   DESCRIPTION                                                               GENERATOR           VALUE
APP_NAME               Short name of your application (to be used in OpenShift artifact names)                       demo
INPUT_MODEL_LOCATION   Location where the resulting model will be stored                                             s3://MY-BUCKET/model-out/
MODEL_NAME             Name of the model                                                                             mnist
MEMORY                 Memory limit to be assigned to the job                                                        1Gi
CPU                    Limit for number of cores to assign to the job                                                1


# Deploy Tensorflow serving endpoint

In [14]:
%%bash
oc process odh-tensorflow-serving \
    -p APP_NAME=from-jupyter \
    -p INPUT_MODEL_LOCATION="s3://ODH-TENSORFLOW-JOBS-DEMO/output-model" \
    -p MODEL_NAME="intect" \
        | oc apply -f -

deploymentconfig.apps.openshift.io/odh-tensorflow-serving-from-jupyter created
service/odh-tensorflow-serving-from-jupyter created


In [15]:
%%bash
oc logs deploymentconfig.apps.openshift.io/odh-tensorflow-serving-from-jupyter

--> Scaling odh-tensorflow-serving-from-jupyter-1 to 1


# Testing the predictions

As the training job uses [intect]() tool, we can use `intect-client` to call to our model server. Run the following cell to install the client.

In [16]:
%%bash
pip install git+https://github.com/CermakM/intect

Collecting git+https://github.com/CermakM/intect
  Cloning https://github.com/CermakM/intect to /tmp/pip-req-build-wrtfln65


# Calling prediction endpoint

We need to configure the server name - you can see the name of the deployed service above as `service/odh-tensorflow-serving-from-jupyter` - we'll use the part after slash. The model name needs to match the name defined in `MODEL_NAME` parameter above.

You can use [2.png](https://github.com/vpavlin/odh-tensorflow-jobs/blob/master/2.png) to test the prediction.

In [17]:
%%bash
curl -O -L https://github.com/vpavlin/odh-tensorflow-jobs/raw/master/2.png

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100   140  100   140    0     0    423      0 --:--:-- --:--:-- --:--:--   424
100  1867  100  1867    0     0   3327      0 --:--:-- --:--:-- --:--:--  3327


### Downloaded image 

![2.png](2.png)

In [18]:
%%bash
intect-client --host odh-tensorflow-serving-from-jupyter --port 6006 --model_name intect --images 2.png

outputs {
  key: "output"
  value {
    dtype: DT_STRING
    tensor_shape {
      dim {
        size: 1
      }
      dim {
        size: 1
      }
    }
    string_val: "2"
  }
}
model_spec {
  name: "intect"
  version {
    value: 1544708736
  }
  signature_name: "prediction"
}

outputs {
  key: "output"
  value {
    dtype: DT_FLOAT
    tensor_shape {
      dim {
        size: 1
      }
    }
    float_val: 0.9730215072631836
  }
}
model_spec {
  name: "intect"
  version {
    value: 1544708736
  }
  signature_name: "confidence"
}



  if issubdtype(ts, int):
  elif issubdtype(type(size), float):
