move to tf1.15

cloud-annotations · Mar 27, 2020 · d0dd329 · d0dd329
1 parent 1292795
commit d0dd329
Show file tree

Hide file tree

Showing 13 changed files with 368 additions and 128 deletions.
diff --git a/trainer/.dockerignore b/trainer/.dockerignore
@@ -0,0 +1,8 @@
+# Ignore everything.
+**
+
+# Allow `/src` and `Dockerfile`.
+!/Dockerfile
+!/src/**
+
+!/runner.sh
diff --git a/trainer/Dockerfile b/trainer/Dockerfile
@@ -0,0 +1,88 @@
+FROM tensorflow/tensorflow:1.15.2-py3
+
+RUN apt-get update \
+    && apt-get -y install automake autotools-dev fuse g++ git libcurl4-openssl-dev libfuse-dev libssl-dev libxml2-dev make pkg-config \
+    && apt-get -y install s3fs \
+    && apt-get -y install python-scipy
+
+RUN pip install \
+    absl-py==0.7.1 \
+    alabaster==0.7.12 \
+    appdirs==1.4.3 \
+    astor==0.7.1 \
+    Babel==2.8.0 \
+    certifi==2019.9.11 \
+    chardet==3.0.4 \
+    cloudpickle==1.2.2 \
+    decorator==4.4.1 \
+    docutils==0.16 \
+    gast==0.2.2 \
+    google-pasta==0.1.6 \
+    # graphsurgeon==0.4.1 \
+    grpcio==1.16.1 \
+    h5py==2.9.0 \
+    # hpo==0.3.1 \
+    idna==2.9 \
+    imagesize==1.2.0 \
+    Jinja2==2.11.1 \
+    joblib==0.14.1 \
+    Keras==2.2.5 \
+    Keras-Applications==1.0.8 \
+    Keras-Preprocessing==1.1.0 \
+    Markdown==3.1.1 \
+    MarkupSafe==1.1.1 \
+    # mkl-fft==1.0.15 \
+    # mkl-random==1.1.0 \
+    # mkl-service==2.3.0 \
+    mock==4.0.1 \
+    nose==1.3.7 \
+    numpy==1.16.5 \
+    numpydoc==0.9.2 \
+    olefile==0.46 \
+    packaging==20.1 \
+    pandas==1.0.1 \
+    Pillow==6.2.1 \
+    ply==3.11 \
+    protobuf==3.8.0 \
+    psutil==5.5.0 \
+    Pygments==2.5.2 \
+    Pyomo==5.2 \
+    pyparsing==2.4.6 \
+    python-dateutil==2.8.1 \
+    pytz==2019.3 \
+    PyUtilib==5.7.3 \
+    PyYAML==5.1.2 \
+    # rbfopt==0.2 \
+    requests==2.23.0 \
+    scikit-learn==0.22.2.post1 \
+    # scipy==1.3.1 \
+    six==1.12.0 \
+    sklearn==0.0 \
+    snowballstemmer==2.0.0 \
+    Sphinx==2.4.3 \
+    sphinxcontrib-applehelp==1.0.2 \
+    sphinxcontrib-devhelp==1.0.2 \
+    sphinxcontrib-htmlhelp==1.0.3 \
+    sphinxcontrib-jsmath==1.0.1 \
+    sphinxcontrib-qthelp==1.0.3 \
+    sphinxcontrib-serializinghtml==1.1.4 \
+    # tensorboard==1.15.0 \
+    # tensorflow==1.15.0 \
+    # tensorflow-estimator==1.15.1 \
+    # tensorflow-probability==0.8.0 \
+    # tensorflow-serving-api==1.15.0 \
+    # tensorrt==6.0.1.5 \
+    termcolor==1.1.0 \
+    # tflms==2.0.2 \
+    # uff==0.6.5 \
+    urllib3==1.25.8 \
+    Werkzeug==0.15.4 \
+    wrapt==1.11.2
+
+ADD src /
+ADD runner.sh .
+
+RUN chmod +x start.sh
+RUN chmod +x runner.sh
+
+ENTRYPOINT ["./runner.sh"]
diff --git a/trainer/README.md b/trainer/README.md
@@ -0,0 +1,25 @@
+Build the docker image:
+```
+docker build -t trainer .
+```
+
+Run with number of steps:
+```
+docker build -t trainer . && docker run -a stdin -a stdout -a stderr -i -t --privileged trainer 10
+```
+
+```
+zip -r trainer.zip src
+cacli train thumbs-up-down-v2 --steps 500 --script trainer.zip --frameworkv 1.15
+```
+
+coremltools==3.3
+tfcoreml==1.1
+tensorflowjs==1.4.0
+```
+python -m convert.convert --tfjs --coreml --tflite \
+  --tfjs-path=../model_web \
+  --mlmodel-path=../model_ios \
+  --tflite-path=../model_android \
+  --saved-model-path=../model
+```
diff --git a/trainer/buildandrun.sh b/trainer/buildandrun.sh
@@ -0,0 +1,7 @@
+source .env
+
+BUCKET=$1
+STEPS=$2
+
+docker build -t trainer .
+docker run -a stdin -a stdout -a stderr -i -t --privileged trainer $S3_ID $S3_KEY $BUCKET $STEPS
diff --git a/trainer/runner.sh b/trainer/runner.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+mkdir data_dir
+mkdir result_dir
+
+S3_ID=$1
+S3_KEY=$2
+BUCKET=$3
+STEPS=$4
+
+echo $S3_ID:$S3_KEY > .passwd-s3fs
+chmod 600 .passwd-s3fs
+s3fs $BUCKET /data_dir -o passwd_file=.passwd-s3fs -o url=https://s3.us.cloud-object-storage.appdomain.cloud/ -o use_path_request_style
+s3fs $BUCKET /result_dir -o passwd_file=.passwd-s3fs -o url=https://s3.us.cloud-object-storage.appdomain.cloud/ -o use_path_request_style
+
+export DATA_DIR=/data_dir
+export RESULT_DIR=/result_dir/training-local-$(cat /dev/urandom | tr -cd '\-_a-zA-Z0-9' | head -c 9)
+
+mkdir $RESULT_DIR
+
+./start.sh $STEPS
diff --git a/trainer/src/convert/convert.py b/trainer/src/convert/convert.py
@@ -9,19 +9,18 @@
 from convert.types import ModelType
 
 import tensorflow as tf
+tf.enable_eager_execution()
 
 parser = argparse.ArgumentParser()
 # export types
 parser.add_argument('--coreml', action='store_true')
 parser.add_argument('--tflite', action='store_true')
 parser.add_argument('--tfjs', action='store_true')
 
-# model params
-parser.add_argument('--input-name', type=str)
-parser.add_argument('--output-names', type=str, nargs='+')
+parser.add_argument('--model-type', type=str)
 
 # import paths
-parser.add_argument('--exported-graph-path', type=str, default='exported_graph')
+parser.add_argument('--saved-model-path', type=str)
 
 # export paths
 parser.add_argument('--mlmodel-path', type=str, default='model_ios')
@@ -30,16 +29,8 @@
 args = parser.parse_args()
 
 def infer_model_structure():
-    if args.input_name and args.output_names:
-        return {
-            'input_name': args.input_name,
-            'output_names': args.output_names,
-            'type': ModelType.NONE
-        }
-
     with tf.Session(graph=tf.Graph()) as sess:
-        saved_model_path = os.path.join(args.exported_graph_path, 'saved_model')
-        tf.saved_model.loader.load(sess, ['serve'], saved_model_path)
+        tf.saved_model.loader.load(sess, ['serve'], args.saved_model_path)
         graph = tf.get_default_graph()
         ops = [op.name for op in graph.get_operations()]
         op1 = 'Postprocessor/ExpandDims_1'
@@ -63,28 +54,53 @@ def infer_model_structure():
             }
 
 model_structure = infer_model_structure()
+print(args.model_type)
 
 try:
     if args.coreml:
+        print(' ' * 80)
+        print('_' * 80)
+        print('Converting to Core ML')
         from convert.convert_to_core_ml import convert_to_core_ml
-        convert_to_core_ml(args.exported_graph_path, model_structure, args.mlmodel_path)
+        convert_to_core_ml(args.saved_model_path, model_structure, args.mlmodel_path)
+        print('Successfully converted to Core ML')
+        print('_' * 80)
+        print(' ' * 80)
 except Exception as e:
     print(e)
     print("Unable to convert to Core ML")
+    print('_' * 80)
+    print(' ' * 80)
 
 try:
     if args.tflite:
+        print(' ' * 80)
+        print('_' * 80)
+        print('Converting to TensorFlow Lite')
         from convert.convert_to_tflite import convert_to_tflite
-        convert_to_tflite(args.exported_graph_path, model_structure, args.tflite_path)
+        convert_to_tflite(args.saved_model_path, model_structure, args.tflite_path)
+        print('Successfully converted to TensorFlow Lite')
+        print('_' * 80)
+        print(' ' * 80)
 except Exception as e:
     print(e)
     print("Unable to convert to TensorFlow Lite")
+    print('_' * 80)
+    print(' ' * 80)
 
 try:
     if args.tfjs:
+        print(' ' * 80)
+        print('_' * 80)
+        print('Converting to TensorFlow.js')
         from convert.convert_to_tfjs import convert_to_tfjs
         output_names = model_structure['output_names']
-        convert_to_tfjs(args.exported_graph_path, output_names, args.tfjs_path)
+        convert_to_tfjs(args.saved_model_path, output_names, args.tfjs_path)
+        print('Successfully converted to TensorFlow.js')
+        print('_' * 80)
+        print(' ' * 80)
 except Exception as e:
     print(e)
     print("Unable to convert to TensorFlow.js")
+    print('_' * 80)
+    print(' ' * 80)