#### Overview of Docker Architecture

![](img/docker_architecture.png)

In [1]:
# Set current directory
%cd /home/jupyter/docker_test

/home/jupyter/docker_test


#### Run this in TERMINAL to create Dockerfile

In [None]:
cat <<'HERE' | tee Dockerfile
# Docker file for submission of the BERT-joint baseline to the Natural Questions
# competition site: https://ai.google.com/research/NaturalQuestions/competition.

# use tensorflow version 1.15 and python 3
FROM tensorflow/tensorflow:1.15.0-gpu-py3

# Upgrade pip to avoid errors
RUN pip install --upgrade pip

# Install tqdm
RUN pip install --trusted-host pypi.python.org tqdm
# Install the BERT and Natural Questions libraries.
RUN pip install --trusted-host pypi.python.org bert-tensorflow 
# install with --no-dependencies to avoid wsgiref error
RUN pip install --trusted-host pypi.python.org natural-questions --no-dependencies

# Add everything in the current directory to a /nq_ensemble_model directory in the
# Docker container.
ADD . /nq_model

#### Run this in TERMINAL to create submission script

In [None]:
# Create submission.sh 
!cat <<'HERE' | tee submission.sh
#!/bin/bash
#
# submission.sh: The script to be launched in the Docker image.
#
# Usage: submission.sh <input_data_pattern> <output_file>
#   input_data_pattern: jsonl.gz NQ evaluation files,
#   output_file: json file containing answer key produced by the model.
#
# Sample usage:
#   submission.sh nq-dev-0?.jsonl.gz predictions.json

set -e
set -x

INPUT_PATH=$1
OUTPUT_PATH=$2

cd /nq_model
python3 -m run_nq_ensemble_modified \
  --max_seq_length=512 \
  --doc_stride=256 \
  --max_contexts=48 \
  --output_dir="/nq_model/output" \
  --predict_file="$INPUT_PATH"  \
  --final_output_prediction_file="$OUTPUT_PATH"

In [32]:
%%time
# Build the Docker image using Dockerfile and submission.sh in the current directory
IMAGE_NAME="nq-submission-bert-ensemble-newtf"
!docker build --tag=$IMAGE_NAME .

Sending build context to Docker daemon  12.08GB
Step 1/6 : FROM tensorflow/tensorflow:1.15.0-gpu-py3
1.15.0-gpu-py3: Pulling from tensorflow/tensorflow

[1B02085707: Pulling fs layer 
[1B5509d51d: Pulling fs layer 
[1B9fe70a46: Pulling fs layer 
[1Be1789921: Pulling fs layer 
[2Be1789921: Waiting fs layer 
[1Bfcda1e6e: Pulling fs layer 
[1Ba76e3193: Pulling fs layer 
[1Bc69d85cf: Pulling fs layer 
[1B1467f169: Pulling fs layer 
[1Bbd4663fe: Pulling fs layer 
[1Bdba96c29: Pulling fs layer 
[1B7e08066b: Pulling fs layer 
[1B8a00ccc9: Pulling fs layer 
[1Bef1817a6: Pulling fs layer 
[1B54337004: Pulling fs layer 
[1BDigest: sha256:9a754acdbe22901b06e716887bb95257cda7db79af61c59a68facf207bd73161[2K[10A[2K[9A[2K[16A[2K[9A[2K[8A[2K[9A[2K[8A[2K[9A[2K[8A[2K[9A[2K[6A[2K[9A[2K[16A[2K[8A[2K[16A[2K[9A[2K[16A[2K[9A[2K[16A[2K[9A[2K[9A[2K[6A[2K[9A[2K[6A[2K[16A[2K[5A[2K[16A[2K[4A[2K[9A[2K[3A[2K[2A[2K[3A[2K[16A[2K[1A[2

#### Test Docker image on  dev set sample

In [37]:
%%time
# Mount the small dev-sample into a temporary directory within the container
DATA_DIR="/tmp/nq-submission-test-data"
!mkdir -p "$DATA_DIR"
!gsutil cp -R "gs://bert-nq/tiny-dev" "$DATA_DIR"

# Run the Docker image with dev set as input and predictions.json as output
!docker run --runtime=nvidia -a stdin -a stdout -a stderr -v "$DATA_DIR":/data \
    "$IMAGE_NAME" bash "/nq_model/submission.sh" \
    "/data/tiny-dev/nq-dev-sample.no-annot.jsonl.gz" \
    "/data/predictions-newtf.json" 

Copying gs://bert-nq/tiny-dev/nq-dev-sample.jsonl.gz...
Copying gs://bert-nq/tiny-dev/nq-dev-sample.no-annot.jsonl.gz...                
- [2 files][ 50.2 MiB/ 50.2 MiB]                                                
Operation completed over 2 objects/50.2 MiB.                                     
+ INPUT_PATH=/data/tiny-dev/nq-dev-sample.no-annot.jsonl.gz
+ OUTPUT_PATH=/data/predictions-newtf.json
+ cd /nq_model
+ python3 -m run_nq_ensemble --max_seq_length=512 --doc_stride=256 --max_contexts=48 --output_dir=/nq_model/output --predict_file=/data/tiny-dev/nq-dev-sample.no-annot.jsonl.gz







The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

INFO:tensorflow:Using config: {'_m

In [21]:
%%time
# Mount the small dev-sample into a temporary directory within the container
DATA_DIR="/tmp/nq-submission-test-data"
!mkdir -p "$DATA_DIR"
!gsutil cp -R "gs://bert-nq/tiny-dev" "$DATA_DIR"

!docker run --runtime=nvidia -a stdin -a stdout -a stderr -v "$DATA_DIR":/data \
    "$IMAGE_NAME" bash "/nq_model/submission.sh" \
    "/data/tiny-dev/nq-dev-sample.no-annot.jsonl.gz" \
    "/data/predictions-td-no-annot.json"

Copying gs://bert-nq/tiny-dev/nq-dev-sample.jsonl.gz...
Copying gs://bert-nq/tiny-dev/nq-dev-sample.no-annot.jsonl.gz...                
- [2 files][ 50.2 MiB/ 50.2 MiB]                                                
Operation completed over 2 objects/50.2 MiB.                                     
+ INPUT_PATH=/data/tiny-dev/nq-dev-sample.no-annot.jsonl.gz
+ OUTPUT_PATH=/data/predictions-td-no-annot.json
+ cd /nq_model
+ python3 -m run_nq_ensemble --max_seq_length=512 --doc_stride=256 --max_contexts=48 --output_dir=/nq_model/output --predict_file=/data/tiny-dev/nq-dev-sample.no-annot.jsonl.gz
W0413 15:56:37.050721 140719534966592 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/bert/optimization.py:87: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0413 15:56:37.486640 140719534966592 deprecation_wrapper.py:119] From /nq_model/run_nq_ensemble.py:1164: The name tf.gfile.Glob is deprecated. Please use tf.io.gfile.glob

In [22]:
# Check that prediction scores are computed as expected
!python3 -m nq_eval \
  --gold_path="$DATA_DIR/tiny-dev/nq-dev-sample.jsonl.gz" \
  --predictions_path="$DATA_DIR/predictions-td-no-annot.json"

I0413 16:22:33.695448 139893362419072 eval_utils.py:261] parsing /tmp/nq-submission-test-data/tiny-dev/nq-dev-sample.jsonl.gz ..... 
I0413 16:22:36.708112 139893362419072 eval_utils.py:213] Reading predictions from file: /tmp/nq-submission-test-data/predictions-td-no-annot.json
{"long-best-threshold-f1": 0.6976744186046512, "long-best-threshold-precision": 0.6696428571428571, "long-best-threshold-recall": 0.7281553398058253, "long-best-threshold": 2.1822948632972023, "long-recall-at-precision>=0.5": 0.7766990291262136, "long-precision-at-precision>=0.5": 0.5031446540880503, "long-recall-at-precision>=0.75": 0.6310679611650486, "long-precision-at-precision>=0.75": 0.7558139534883721, "long-recall-at-precision>=0.9": 0.20388349514563106, "long-precision-at-precision>=0.9": 0.9130434782608695, "short-best-threshold-f1": 0.6666666666666666, "short-best-threshold-precision": 0.7719298245614035, "short-best-threshold-recall": 0.5866666666666667, "short-best-threshold": 2.89677501107692, "sho

#### Docker Build Submission

In [6]:
# Authenticate and configure project
!gcloud config set project natural-questions-v1

Updated property [core/project].


In [8]:
!gcloud builds submit --tag gcr.io/natural-questions-v1/nq-submission-bert-ensemble-v2 . --timeout=2h15m5s

Creating temporary tarball archive of 39 file(s) totalling 12.5 GiB before compression.
Uploading tarball of [.] to [gs://natural-questions-v1_cloudbuild/source/1586755726.21-0d1eb827d6d74df5a49323bb3117f22d.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/natural-questions-v1/builds/0d796192-d1e7-4c25-b432-f5031931a010].
Logs are available at [https://console.cloud.google.com/cloud-build/builds/0d796192-d1e7-4c25-b432-f5031931a010?project=512516930043].
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "0d796192-d1e7-4c25-b432-f5031931a010"

FETCHSOURCE
Fetching storage object: gs://natural-questions-v1_cloudbuild/source/1586755726.21-0d1eb827d6d74df5a49323bb3117f22d.tgz#1586756601374076
Copying gs://natural-questions-v1_cloudbuild/source/1586755726.21-0d1eb827d6d74df5a49323bb3117f22d.tgz#1586756601374076...
/ [1 files][ 11.4 GiB/ 11.4 GiB]   58.3 MiB/s                                   
Operation completed over 1 objects/11.4 G

#### Official Attempt Submission to Google Natural Questons Competition Results


&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; ![](img/my_dashboard.PNG)
![](img/official_scores.png)