In [1]:
# Real life data

import logging
import threading
import itertools
import json
import os
import pandas as pd  
import numpy as np  
import matplotlib.pyplot as plt
from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d
import seaborn as seabornInstance
from sqlalchemy import Column, Integer, String, Float, DateTime, Boolean, func
from iotfunctions import base
from iotfunctions import bif
from iotfunctions import entity
from iotfunctions import metadata
from iotfunctions.metadata import EntityType
from iotfunctions.db import Database
from iotfunctions.enginelog import EngineLogging
from iotfunctions import estimator
from iotfunctions.ui import (UISingle, UIMultiItem, UIFunctionOutSingle,
                 UISingleItem, UIFunctionOutMulti, UIMulti, UIExpression,
                 UIText, UIStatusFlag, UIParameters)
from mmfunctions.anomaly import (SaliencybasedGeneralizedAnomalyScore, SpectralAnomalyScore,
                 FFTbasedGeneralizedAnomalyScore, KMeansAnomalyScore)
import datetime as dt
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.metrics import roc_curve, auc, roc_auc_score, r2_score

import scipy as sp
import scipy.fftpack
import skimage as ski

from skimage import util as skiutil # for nifty windowing
import pyod as pyod
from pyod.utils.data import generate_data
from pyod.utils.data import evaluate_print
from pyod.utils.example import visualize
from pyod.models.knn import KNN
from pyod.models.iforest import IForest
%matplotlib inline
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

EngineLogging.configure_console_logging(logging.INFO)



#### Train a 2-layered LSTM in Watson Machine Learning

 
Telemanom ([Detecting Spacecraft Anomalies Using LSTMs and Nonparametric Dynamic Thresholding](https://arxiv.org/pdf/1802.04431.pdf) - 2018


Let's find out first what ML libraries are supported by WML.


In [2]:
# make sure to downgrade to sklearn 0.22.2 (no >= 0.23)
from watson_machine_learning_client import WatsonMachineLearningAPIClient

with open('credentials_wml.json', encoding='utf-8') as F:
    wml_credentials = json.loads(F.read())
    
with open('credentials_cos.json', encoding='utf-8') as F:
    cos_credentials = json.loads(F.read())

wml_url=wml_credentials['url']
wml_instance_id=wml_credentials['instance_id']
wml_apikey=wml_credentials['apikey']

wml_data_source_type= 's3'


# don't use this endpoint
cos_endpoint = cos_credentials['endpoints']
cos_endpoint = 'https://s3.eu.cloud-object-storage.appdomain.cloud'

cos_apikey = cos_credentials['apikey']
cos_access_key = cos_credentials['cos_hmac_keys']['access_key_id']
cos_secret_key = cos_credentials['cos_hmac_keys']['secret_access_key']
# 'https://s3.eu.cloud-object-storage.appdomain.cloud'

cos_input_bucket = 'githubanalyzer-donotdelete-pr-b9xa3kxotzh5in'
cos_output_bucket = 'githubanalyzer-donotdelete-pr-b9xa3kxotzh5in'

client = WatsonMachineLearningAPIClient(wml_credentials)
#rep_list = client.runtimes.list(limit=4000)

### First step 

Apparently Keras is missing out. However, Telemanom is built on Keras, so we have to port it to either Tensorflow, Pytorch, Mxnet, Caffe or Theano.

I opted for Pytorch for skill building purposes and ported Telemanom to Pytorch.



<small>
    
```
    
class LSTM_2L(nn.Module):
    def __init__(self, n_features = 1, hidden_dims = [80,80], seq_length = 250,
                 batch_size = 64, n_predictions = 10, dropout = 0.3):
        super(LSTM_2L, self).__init__()
        print ('LSTM_2L', n_features, hidden_dims, seq_length, batch_size, n_predictions, dropout)

        self.n_features = n_features
        self.hidden_dims = hidden_dims
        self.seq_length = seq_length
        self.num_layers = len(self.hidden_dims)
        self.batch_size = batch_size

        self.lstm1 = nn.LSTM(
            input_size = self.n_features,
            hidden_size = self.hidden_dims[0],
            batch_first = True,
            dropout = dropout,
            num_layers = 2)

        self.linear = nn.Linear(self.hidden_dims[1], n_predictions)
        self.init_hidden_state()
        
    def init_hidden_state(self):

        self.hidden = (
            torch.randn(self.num_layers, self.batch_size, self.hidden_dims[0]), #.to(self.device),
            torch.randn(self.num_layers, self.batch_size, self.hidden_dims[0]), #.to(self.device),
            )

    def forward(self, sequences):

        batch_size, seq_len, n_features = sequences.size()  # batch first

        lstm1_out , (h1_n, c1_n) = self.lstm1(sequences, (self.hidden[0], self.hidden[1]))

        last_time_step = lstm1_out[:,-1,:]

        y_pred = self.linear(last_time_step)

        return y_pred
 ```
</small>

In [3]:
# part of mmfunctions
import numpy as np
import pandas as pd 

import telemanom
from telemanom.helpers import Config
from telemanom.errors import Errors
import telemanom.helpers as helpers
from telemanom.channel import Channel
from telemanom.modeling import Model

conf = Config("./telemanom/config.yaml")

conf.dictionary['l_s'] = 250
conf.dictionary['epochs'] = 80
conf.dictionary['dropout'] = 0.2
conf.batch_size = 512
conf.l_s = 250
conf.epochs = 80    # max
conf.dropout = 0.2
conf.lstm_batch_size=64

In [4]:
#
# Define structure for local data
#              telemanom supports multiple channels to reflect spacecraft sensors, we only need a single one now
#
device="Armstarknew"
chan = Channel(conf, device)
helpers.make_dirs(conf.use_id, conf, "./telemanom")
print(chan)
conf

# load data

chan.train = np.loadtxt('./telemanom/wml_train.csv')
chan.test = np.loadtxt('./telemanom/wml_test.csv')



Channel:Channel


#### The following steps replay the code in wml_telemanom.py

We jump over the next few cells unless we want to initiate a local training run

In [5]:
# producing overlapping windows of length 260 for lookback (250) and prediction (10)
chan.shape_data(chan.train, train=True)
chan.shape_data(chan.test, train=False)

2020-08-05T15:12:23.639 INFO telemanom.shape_data FFT channel: False
(129300, 2)
2020-08-05T15:12:24.059 INFO telemanom.shape_data FFT channel: False
(129195, 2)


In [6]:
# init the Python double stacked LSTM model
model = Model(conf, conf.use_id, chan, "./telemanom", False)

LSTM_2L 2 [80, 80] 250 64 10 0.2
Hidden dimensions are:  2 64 80
input shape:  (None, 2)


In [7]:
x = model.model.hidden[0].clone()

In [8]:
import torch
trainPath = './mytrainedpytorchmodel'

try:
    model.model.load_state_dict(torch.load(trainPath))
    model.model.eval()
except Exception:
    # drink a coffee - training takes roughly 30 minutes
    print('have to train')
    model.train_new(chan)
    torch.save(model.model.state_dict(), trainPath)

#model.train_new(chan)
torch.save(model.model.state_dict(), "./mytrainedpytorchmodel")

# no training run - we've already spent CPU cycles last week
#

In [9]:
(x, model.model.hidden[0])

(tensor([[[ 0.6062, -0.1722,  0.1428,  ...,  0.0610,  0.6618, -0.0901],
          [ 0.8051,  1.2393, -1.5292,  ...,  1.1181, -1.0100,  0.3673],
          [-1.1120,  0.1249,  0.6721,  ..., -1.2269, -0.6833,  0.6239],
          ...,
          [ 0.7335,  1.2442,  0.9831,  ...,  1.0030, -1.7051,  0.1122],
          [-0.6855, -1.0126,  2.3261,  ...,  0.5558,  0.0732,  0.2561],
          [ 0.0933,  0.7439,  1.6910,  ..., -0.0913, -0.0059,  1.2361]],
 
         [[ 0.5923, -1.7026,  0.9706,  ...,  0.9155, -1.0606, -1.5867],
          [-1.9774,  0.3929, -0.9255,  ...,  1.0483,  0.2315,  0.4416],
          [ 1.4346, -0.4047,  0.7843,  ..., -0.8567,  0.2618,  0.1443],
          ...,
          [-1.3002,  0.1392,  0.0236,  ..., -1.1719,  0.3881, -0.0950],
          [-0.6523,  2.1519, -0.6023,  ..., -1.0240, -0.1100, -0.1231],
          [-0.7261,  0.4100,  0.2952,  ..., -0.3738,  0.6326, -0.0222]]]),
 tensor([[[ 0.6062, -0.1722,  0.1428,  ...,  0.0610,  0.6618, -0.0901],
          [ 0.8051,  1.2393,

In [10]:
model.model.state_dict

<bound method Module.state_dict of LSTM_2L(
  (lstm1): LSTM(2, 80, num_layers=2, batch_first=True, dropout=0.2)
  (linear): Linear(in_features=80, out_features=10, bias=True)
)>

In [11]:
# attempt to export it as ONNX model

# switch off training mode
model.model.eval()

torch_in = None
torch_out = None

# switch off autograd, automatic differentiation
with torch.no_grad():
    
    # input tensor
    torch_in = torch.randn(64, 80, 2, requires_grad=True)
    
    # test dimensions
    torch_out,_ = model.model(torch_in)

    # default export
    torch.onnx.export(model.model, torch_in, 'lstm.onnx')
    
    # test model load
    import onnx
    onnx_model = onnx.load('lstm.onnx')
    # input shape [5, 3, 10]
    print(onnx_model.graph.input[0])

    onnx.checker.check_model(onnx_model)

forward| Batch size:  64  Sequence length:  80 Output length: 2
Shapes  torch.Size([64, 80, 80]) torch.Size([64, 80]) torch.Size([64, 80])
forward| Batch size:  tensor(64)  Sequence length:  tensor(80) Output length: tensor(2)
Shapes  torch.Size([64, 80, 80]) torch.Size([64, 80]) torch.Size([64, 80])
name: "input.1"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_value: 64
      }
      dim {
        dim_value: 80
      }
      dim {
        dim_value: 2
      }
    }
  }
}



  print ('Shapes ', lstm1_out.shape, last_time_step.shape, last_time_step.shape)


#### Running onnx models

Following the descriptions found here:
- https://pytorch.org/tutorials/advanced/super_resolution_with_onnxruntime.html

In [12]:
import onnxruntime

ort_session = onnxruntime.InferenceSession('lstm.onnx')

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

# compute ONNX Runtime output prediction
#   reuse torch_in and torch_out from previous model exporting step
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(torch_in)}
ort_outs = ort_session.run(None, ort_inputs)

print ('Shapes ', ort_outs[0].shape, to_numpy(torch_out).shape)

# compare ONNX Runtime and PyTorch results
np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05)


print("Exported model has been tested with ONNXRuntime, and the result looks good!")

Shapes  (64, 10) (64, 10)
Exported model has been tested with ONNXRuntime, and the result looks good!


#### Training done

In [13]:
from IPython.display import display, Markdown
Markdown('<strong>{}</strong><br/>{}'.format('Make sure you have uploaded the code in mmfunctions/telemanom as zip file to COS bucket', cos_input_bucket))

<strong>Make sure you have uploaded the code in mmfunctions/telemanom as zip file to COS bucket</strong><br/>githubanalyzer-donotdelete-pr-b9xa3kxotzh5in

In [14]:
# Zip the code in the ./telemanom subdirectory first

import subprocess
output = None
try:
    output = subprocess.check_output("ls ./telemanom/wml_model.zip", shell=True).decode('ascii')  + 'found - good'
except Exception:
    output = 'Not found - do it now and run \"zip -x \'.git*\' -9ry wml_model.zip  .\" in the telemanom directory'

Markdown('<strong>{}</strong><br/>'.format(output))

<strong>./telemanom/wml_model.zip
found - good</strong><br/>

In [15]:
# check whether we have uploaded the code
!s3cmd --access_key {cos_access_key} --secret_key {cos_secret_key} \
--access_token {cos_apikey} --host s3.eu.cloud-object-storage.appdomain.cloud --host-bucket=s3.eu.cloud-object-storage.appdomain.cloud \
ls s3://githubanalyzer-donotdelete-pr-b9xa3kxotzh5in

                          DIR  s3://githubanalyzer-donotdelete-pr-b9xa3kxotzh5in/_wml_checkpoints/
                          DIR  s3://githubanalyzer-donotdelete-pr-b9xa3kxotzh5in/b6c198e3-35b1-4ae3-85db-b542ea460ed2/
                          DIR  s3://githubanalyzer-donotdelete-pr-b9xa3kxotzh5in/d7498a25-3455-468a-bb11-e54bdeec346d/
                          DIR  s3://githubanalyzer-donotdelete-pr-b9xa3kxotzh5in/notebook/
                          DIR  s3://githubanalyzer-donotdelete-pr-b9xa3kxotzh5in/training-YAYfZiIGR/
                          DIR  s3://githubanalyzer-donotdelete-pr-b9xa3kxotzh5in/training-nFSMWiIMg/
2020-08-05 13:12      3563355  s3://githubanalyzer-donotdelete-pr-b9xa3kxotzh5in/wml_model.zip


In [84]:
# Now starting to work with WML
#
#   make sure we go with Open Neural Network Exchange (ONNX) to allow for pytorch model exporting
# 

wml_train_code='./telemanom/wml_model.zip' # where this notebook finds the code

wml_execution_command='python3 wml_telemanom.py' # command to start training

wml_framework_name='pytorch-onnx'

# we have to run on pytorch-onnx 1.2 (Open Neural Network Exchange) but it's not yet available
wml_framework_version='1.1'   # go with 1.1 until GA of CloudPak for Data 3.5 
wml_runtime = 'python'
wml_runtime_version='3.6' # and python 3.6

wml_run_definition = 'wml-pytorch-definition' # dummy name
wml_run_name = 'wml-pytorch-run' # more dummy
wml_model_name='wml-tensorflow-miregal' # even more dummy

wml_compute_name='k80'  # free tier machine type
wml_compute_nodes='1'   # free tier

wml_runtime_version_v4 = wml_framework_version + '-py' + wml_runtime_version   # sdk level
wml_compute_nodes_v4 = int(wml_compute_nodes)

model_code = wml_train_code


In [85]:
wml_train_code

'./telemanom/wml_model.zip'

In [86]:
#
#   (custom) libraries serve as meta data for model code to be installed on top of predefined container images
#
# define library meta data for our training code
#
lib_meta = {
    client.runtimes.LibraryMetaNames.NAME: wml_run_definition,
    client.runtimes.LibraryMetaNames.VERSION: wml_framework_version,
    client.runtimes.LibraryMetaNames.FILEPATH: model_code,
    client.runtimes.LibraryMetaNames.PLATFORM: {"name": wml_framework_name, "versions": [wml_framework_version]}
}

In [87]:
#
# do we have a library with that name defined ?
#   delete it first and then store the new updated library
#
library_details = client.runtimes.get_library_details()
for library_detail in library_details['resources']:
    if library_detail['entity']['name'] == wml_run_definition:
        # Delete library if exist because we cannot update model_code
        uid = client.runtimes.get_library_uid(library_detail)
        print ('delete ', library_detail)
        client.repository.delete(uid)
        break

custom_library_details = client.runtimes.store_library(lib_meta)
custom_library_uid = client.runtimes.get_library_uid(custom_library_details)


delete  {'metadata': {'guid': '99dd3825-2a0a-4a35-b271-b40e27fc7a44', 'id': '99dd3825-2a0a-4a35-b271-b40e27fc7a44', 'modified_at': '2020-08-05T13:13:05.420Z', 'created_at': '2020-08-05T13:12:48.603Z', 'href': '/v4/libraries/99dd3825-2a0a-4a35-b271-b40e27fc7a44'}, 'entity': {'space': {'id': '88740b60-6b2f-4f74-b6d8-20528d14db8b', 'href': '/v4/spaces/88740b60-6b2f-4f74-b6d8-20528d14db8b'}, 'name': 'wml-pytorch-definition', 'version': '1.1', 'platform': {'name': 'pytorch-onnx', 'versions': ['1.1']}}}


In [88]:
#
#  Pipelines define a sequence of operations
#
# define a pipeline with a single entry (node) for the training run
#  we could add more node for scaling/normalizing, imputation, feature extraction, "you name it"
#
doc = {
    "doc_type": "pipeline",
    "version": "2.0",
    "primary_pipeline": wml_framework_name,
    "pipelines": [{
        "id": wml_framework_name,
        "runtime_ref": "hybrid",
        "nodes": [{
            "id": "training",
            "type": "model_node",
            "op": "dl_train",
            "runtime_ref": wml_run_name,
            "inputs": [],
            "outputs": [],
            "parameters": {
                "name": "pytorch-telemanom",
                "description": wml_run_definition,
                "command": wml_execution_command,
                "training_lib_href": "/v4/libraries/"+custom_library_uid,
                "compute": {
                    "name": wml_compute_name,            # specify where to run it (not that I have a choice)
                    "nodes": wml_compute_nodes_v4
                }
            }
        }]
    }],
    "runtimes": [{
        "id": wml_run_name,
        "name": wml_framework_name,         # run it on a pytorch image
        "version": wml_runtime_version_v4
    }]
}

# put it in metadata object
metadata = {
    client.repository.PipelineMetaNames.NAME: wml_run_name,
    client.repository.PipelineMetaNames.DOCUMENT: doc
}

# and create the pipeline
pipeline_id = client.pipelines.get_uid(client.repository.store_pipeline(meta_props=metadata))


In [89]:
# this is my pipeline now
client.pipelines.get_details(pipeline_id)

{'metadata': {'name': 'wml-pytorch-run',
  'guid': 'fb11e3a0-b123-4551-9225-28ff94146536',
  'rev': '503e3bb5-8d87-43e4-9b13-8265b09fa190',
  'id': 'fb11e3a0-b123-4551-9225-28ff94146536',
  'modified_at': '2020-08-05T17:37:16.224Z',
  'created_at': '2020-08-05T17:37:16.159Z',
  'href': '/v4/pipelines/fb11e3a0-b123-4551-9225-28ff94146536?rev=503e3bb5-8d87-43e4-9b13-8265b09fa190'},
 'entity': {'space': {'id': '88740b60-6b2f-4f74-b6d8-20528d14db8b',
   'href': '/v4/spaces/88740b60-6b2f-4f74-b6d8-20528d14db8b'},
  'name': 'wml-pytorch-run',
  'document': {'doc_type': 'pipeline',
   'version': '2.0',
   'pipelines': [{'id': 'pytorch-onnx',
     'runtime_ref': 'hybrid',
     'nodes': [{'outputs': [],
       'id': 'training',
       'inputs': [],
       'type': 'model_node',
       'parameters': {'name': 'pytorch-telemanom',
        'description': 'wml-pytorch-definition',
        'compute': {'name': 'k80', 'nodes': 1},
        'command': 'python3 wml_telemanom.py',
        'training_lib_href

In [90]:
# 
# finally start the training run for v4
#   tell it where to load data and model code from and dump results to
#
metadata = {
    client.training.ConfigurationMetaNames.TRAINING_RESULTS_REFERENCE: {
        "name": "training-results-reference_name",
        "connection": {
            "endpoint_url": cos_endpoint,
            "access_key_id": cos_access_key,
            "secret_access_key": cos_secret_key
        },
        "location": {
            "bucket": cos_output_bucket
        },
        "type": wml_data_source_type
    },
    client.training.ConfigurationMetaNames.TRAINING_DATA_REFERENCES:[{
        "name": "training_input_data",
        "type": wml_data_source_type,
        "connection": {
            "endpoint_url": cos_endpoint,
            "access_key_id": cos_access_key,
            "secret_access_key": cos_secret_key
        },
        "location": {
            "bucket": cos_input_bucket
        }
    }],
    client.training.ConfigurationMetaNames.PIPELINE_UID: pipeline_id
}

training_id = client.training.get_uid(client.training.run(meta_props=metadata))
print("training_id", client.training.get_details(training_id))
print("get status", client.training.get_status(training_id))


Status code: 400, body: {
  "trace": "774c7e4e470ee9465a943e8c92d97d47",
  "errors": [{
    "code": "bad_request",
    "message": "Unsupported training runtime pytorch-onnx_1.2-py3.6"
  }]
}


ApiRequestFailure: Failure during training. (POST https://eu-de.ml.cloud.ibm.com/v4/trainings)
Status code: 400, body: {
  "trace": "774c7e4e470ee9465a943e8c92d97d47",
  "errors": [{
    "code": "bad_request",
    "message": "Unsupported training runtime pytorch-onnx_1.2-py3.6"
  }]
}

In [None]:
run_details = client.training.get_details(training_id)
run_uid = training_id

# print logs

client.training.monitor_logs(run_uid)
client.training.monitor_metrics(run_uid)

# should not have run after restarting the notebook 

In [83]:
#run_uid='2c002ed8-e508-4f20-bb15-b789f39a6974'
status = client.training.get_status(run_uid)
status

{'message': {'text': 'Node training: Shapes  torch.Size([64, 250, 80]) torch.Size([64, 80]) torch.Size([64, 80])\n',
  'level': 'info'},
 'running_at': '2020-08-05T13:14:37.894Z',
 'state': 'running'}

In [94]:
#client.training.cancel(run_uid)


'SUCCESS'

#### if training seems to have failed, just look at the logs in our COS output bucket

Dumb me, in the previous run I forgot to import sys.

Fortunately model training has succeeded and the model has been stored in COS. Phew.

<small>

```
...
Batch  1611
Batch  1612
After batch  1612 0.002384878075476655
[1] Training loss: 0.002384878075476655 	 Validation loss: 0.0014487287297119242 
Training complete...
Model saved in file: /mnt/results/githubanalyzer-donotdelete-pr-b9xa3kxotzh5in/training-h9VOfZVMR/model
['_submitted_code', 'learner-1', 'model', 'training-log.txt']
/mnt/results/githubanalyzer-donotdelete-pr-b9xa3kxotzh5in/training-h9VOfZVMR
Traceback (most recent call last):
  File "wml_telemanom.py", line 61, in <module>
    sys.stdout.flush()
NameError: name 'sys' is not defined
```
    
</small>
    


In [31]:
wml_runtime_version_v4

'1.1-py3.6'

In [36]:
# let's store the model

meta_props_pyt = {
    client.repository.ModelMetaNames.NAME: wml_model_name,
    client.repository.ModelMetaNames.RUNTIME_UID: wml_framework_name + '_' + wml_runtime_version_v4,
    client.repository.ModelMetaNames.TYPE: wml_framework_name + '_' + wml_framework_version
}

model_details = client.repository.store_model(run_uid, meta_props=meta_props_pyt)



In [39]:
model_details

{'metadata': {'name': 'wml-tensorflow-miregal',
  'guid': 'eeac7d0a-69e7-4f81-ac5b-35eff542a841',
  'rev': 'cea0af8b-0ebf-4e0e-88d9-69b98c78eefe',
  'id': 'eeac7d0a-69e7-4f81-ac5b-35eff542a841',
  'modified_at': '2020-07-27T14:55:59.286Z',
  'created_at': '2020-07-27T14:55:59.219Z',
  'href': '/v4/models/eeac7d0a-69e7-4f81-ac5b-35eff542a841?rev=cea0af8b-0ebf-4e0e-88d9-69b98c78eefe'},
 'entity': {'name': 'wml-tensorflow-miregal',
  'content_status': {'state': 'persisting'},
  'import': {'location': {'training': '3dd32ad4-d6bf-45be-b09f-cf2b79e75e7b',
    'pipeline_model': '3dd32ad4-d6bf-45be-b09f-cf2b79e75e7b/pipeline-model.json',
    'training_status': '3dd32ad4-d6bf-45be-b09f-cf2b79e75e7b/training-status.json',
    'pipeline': '3dd32ad4-d6bf-45be-b09f-cf2b79e75e7b/pipeline.json',
    'bucket': 'githubanalyzer-donotdelete-pr-b9xa3kxotzh5in',
    'assets_path': '3dd32ad4-d6bf-45be-b09f-cf2b79e75e7b/assets'},
   'type': 's3',
   'connection': {'access_key_id': 'cc04444c99374c9e9589b8f85e

In [41]:
#
# finally let's deploy it
#   use model name as deployment name
#
meta_props = {
        client.deployments.ConfigurationMetaNames.NAME: wml_model_name,
        client.deployments.ConfigurationMetaNames.ONLINE: {}
    }
deployment_details = client.deployments.create(model_details['metadata']['id'], meta_props)
deployment_details

{"trace":"-sulpksdfbwj8","errors":[{"code":"unsupported_framework_details","message":"Models of framework pytorch are not supported."}]}


--------------------------
Deployment creation failed
--------------------------




WMLClientError: Deployment creation failed. Error: 400. {"trace":"-sulpksdfbwj8","errors":[{"code":"unsupported_framework_details","message":"Models of framework pytorch are not supported."}]}

In [38]:
model_uid = model_details
model_details = client.repository.get_model_details(model_uid)



UnexpectedType: Unexpected type of 'model_uid', expected: '<class 'str'>', actual: '<class 'dict'>'.

# WTF ?
```
Training a PyTorch model using the Watson Machine Learning training service is supported, but deploying a trained PyTorch model in your Watson Machine Learning service is not supported.
```
https://dataplatform.cloud.ibm.com/docs/content/wsj/analyze-data/pm_service_supported_frameworks.html

it took me by surprise: 

* WML doesn't support training Keras models in the cloud, but you can upload the h5 model and treat it as a tensorflow model

Fortunately there is the wml_dev slack channel as last resort ...

#### Training parameters

```
loss_metric: 'mse'    # minimize mean square error
optimizer: 'adam'     # sort of adaptive stochastic gradient descent
validation_split: 0.2 # 20% of the data is used for validating (val_loss)
dropout: 0.3          # ditch 30% of the LSTMs results when minimizing the loss function to avoid overfitting
lstm_batch_size: 64   # number of training data batches to evaluate per optimizer run to update the model’s parameters

patience: 10          # try at least 10 times to decrease val_loss smaller by ...
min_delta: 0.0003     # ... at least min_delta, else stop, so we get at least 'patience' epochs
epochs: 35            # no more than 35 passes through the entier training dataset.

l_s: 250              # lookback: num previous timesteps provided to model to predict future values
n_predictions: 10     # number of steps ahead to predict
```

This is defined in `telemanom/config.yaml`
<br>