## Sample for Submarine Experiment SDK

The notebook shows how to use Submarine Experiment SDK to create, get, list, log, delete Submarine Experiment.

In [1]:
from __future__ import print_function
import submarine
from submarine.experiment.models.environment import Environment
from submarine.experiment.models.experiment_spec import ExperimentSpec
from submarine.experiment.models.experiment_task_spec import ExperimentTaskSpec
from submarine.experiment.models.experiment_meta import ExperimentMeta

### Create Submarine Client

In [2]:
submarine_client = submarine.ExperimentClient(host='http://localhost:8080')

### Define TensorFlow experiment spec¶
Define Submarine spec¶
The demo only creates a PS and worker of TF experiment to run mnist sample.

In [3]:
environment = Environment(image='gcr.io/kubeflow-ci/tf-dist-mnist-test:1.0')
experiment_meta = ExperimentMeta(name='mnist-dist',
                                 namespace='default',
                                 framework='Tensorflow',
                                 cmd='python /var/tf_dist_mnist/dist_mnist.py --train_steps=100'
                                 , env_vars={'ENV1': 'ENV1'})

worker_spec = ExperimentTaskSpec(resources='cpu=1,memory=1024M',
                                 replicas=1)
ps_spec = ExperimentTaskSpec(resources='cpu=1,memory=1024M',
                                 replicas=1)

experiment_spec = ExperimentSpec(meta=experiment_meta,
                                 environment=environment,
                                 spec={'Ps' : ps_spec,'Worker': worker_spec})


### Create experiment

In [4]:
experiment = submarine_client.create_experiment(experiment_spec=experiment_spec)
experiment

{'experimentId': 'experiment_1592797815559_0001',
 'name': 'mnist-dist',
 'uid': 'cdee2adf-b43b-11ea-b5f2-025000000001',
 'status': 'Accepted',
 'acceptedTime': '2020-06-22T11:52:33.000+08:00',
 'createdTime': None,
 'runningTime': None,
 'finishedTime': None,
 'spec': {'meta': {'name': 'mnist-dist',
   'namespace': 'default',
   'framework': 'Tensorflow',
   'cmd': 'python /var/tf_dist_mnist/dist_mnist.py --train_steps=100',
   'envVars': {'ENV1': 'ENV1'}},
  'environment': {'image': 'gcr.io/kubeflow-ci/tf-dist-mnist-test:1.0'},
  'spec': {'Ps': {'replicas': 1,
    'resources': 'cpu=1,memory=1024M',
    'name': None,
    'image': None,
    'cmd': None,
    'envVars': None,
    'resourceMap': {'memory': '1024M', 'cpu': '1'}},
   'Worker': {'replicas': 1,
    'resources': 'cpu=1,memory=1024M',
    'name': None,
    'image': None,
    'cmd': None,
    'envVars': None,
    'resourceMap': {'memory': '1024M', 'cpu': '1'}}}}}

### Get the created experiment

In [5]:
id = experiment['experimentId']
submarine_client.get_experiment(id)

{'experimentId': 'experiment_1592797815559_0001',
 'name': 'mnist-dist',
 'uid': 'cdee2adf-b43b-11ea-b5f2-025000000001',
 'status': 'Running',
 'acceptedTime': '2020-06-22T11:52:33.000+08:00',
 'createdTime': '2020-06-22T11:52:33.000+08:00',
 'runningTime': '2020-06-22T11:52:36.000+08:00',
 'finishedTime': None,
 'spec': {'meta': {'name': 'mnist-dist',
   'namespace': 'default',
   'framework': 'Tensorflow',
   'cmd': 'python /var/tf_dist_mnist/dist_mnist.py --train_steps=100',
   'envVars': {'ENV1': 'ENV1'}},
  'environment': {'image': 'gcr.io/kubeflow-ci/tf-dist-mnist-test:1.0'},
  'spec': {'Ps': {'replicas': 1,
    'resources': 'cpu=1,memory=1024M',
    'name': None,
    'image': None,
    'cmd': None,
    'envVars': None,
    'resourceMap': {'memory': '1024M', 'cpu': '1'}},
   'Worker': {'replicas': 1,
    'resources': 'cpu=1,memory=1024M',
    'name': None,
    'image': None,
    'cmd': None,
    'envVars': None,
    'resourceMap': {'memory': '1024M', 'cpu': '1'}}}}}

### List all running experiments

In [6]:
status = 'running'
submarine_client.list_experiments(status=status)

[{'experimentId': 'experiment_1592797815559_0001',
  'name': 'mnist-dist',
  'uid': 'cdee2adf-b43b-11ea-b5f2-025000000001',
  'status': 'Running',
  'acceptedTime': '2020-06-22T11:52:33.000+08:00',
  'createdTime': '2020-06-22T11:52:33.000+08:00',
  'runningTime': '2020-06-22T11:52:36.000+08:00',
  'finishedTime': None,
  'spec': {'meta': {'name': 'mnist-dist',
    'namespace': 'default',
    'framework': 'Tensorflow',
    'cmd': 'python /var/tf_dist_mnist/dist_mnist.py --train_steps=100',
    'envVars': {'ENV1': 'ENV1'}},
   'environment': {'image': 'gcr.io/kubeflow-ci/tf-dist-mnist-test:1.0'},
   'spec': {'Ps': {'replicas': 1,
     'resources': 'cpu=1,memory=1024M',
     'name': None,
     'image': None,
     'cmd': None,
     'envVars': None,
     'resourceMap': {'memory': '1024M', 'cpu': '1'}},
    'Worker': {'replicas': 1,
     'resources': 'cpu=1,memory=1024M',
     'name': None,
     'image': None,
     'cmd': None,
     'envVars': None,
     'resourceMap': {'memory': '1024M', '

### Wait for the experiment to finish

In [7]:
submarine_client.wait_for_finish(id)

  from ._conv import register_converters as _register_converters
2020-06-22 03:53:04.353611: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
2020-06-22 03:53:04.355167: I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:215] Initialize GrpcChannelCache for job ps -> {0 -> localhost:2222}
2020-06-22 03:53:04.355275: I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:215] Initialize GrpcChannelCache for job worker -> {0 -> mnist-dist-worker-0.default.svc:2222}
2020-06-22 03:53:04.355746: I tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc:324] Started server with target: grpc://localhost:2222


### Get specific experiment training log 

In [None]:
submarine_client.get_log(id)

### Delete the experiment

In [None]:
submarine_client.delete_experiment(id)