# VariantSpark-k Tiny Warmup Job


Small warmup job - takes < 2 minutes to run w/ 4 qty of r4.rxlarge EC2 instance & Kubernetes

### Parameters
 - input bucket location - Lynn's S3 bucket (us-west-2 / Oregon)
 - input files `chr22...` and fc `22_16051249`
 - number of Spark executors 6 w/ 1 GM RAM (default) each
 - number of RF trees 500, w/ batch of 16
 - default `mtry`
 - calculate OOB as `-ro` paramater is configured

In [1]:
%%bash

set -e

MASTER=https://kubernetes.default.svc:443
INPUT_BUCKET=variant-spark-k-storage20180709164545375200000001

function fatal_error () {
	echo "ERROR: $1" 1>&2
	exit 1
}

if [ -z ${MASTER+x} ];
    then
        echo "You must set the MASTER environment variable to a kubernetes API endpoint";
        echo "Example: https://ABC.sk1.us-west-2.eks.amazonaws.com:443"
        exit 1
fi

if [ -z ${INPUT_BUCKET+x} ];
    then
        echo "You must set the INPUT_BUCKET environment variable to a bucket containing input data";
        echo "Example: variant-spark-k-storage"
        exit 1
fi

[[ $(type -P "spark-submit") ]] || fatal_error  "\`spark-submit\` cannot be found. Please make sure it's on your PATH."

spark-submit \
    --class au.csiro.variantspark.cli.VariantSparkApp \
    --driver-class-path ./conf \
    --master k8s://${MASTER} \
    --deploy-mode cluster \
    --name VariantSparkTiny \
    --conf spark.kubernetes.authenticate.driver.serviceAccountName=spark \
    --conf spark.executor.instances=6 \
    --conf spark.kubernetes.container.image=jamesrcounts/variantspark:002 \
    --jars http://central.maven.org/maven2/org/apache/hadoop/hadoop-aws/2.7.3/hadoop-aws-2.7.3.jar,http://central.maven.org/maven2/com/amazonaws/aws-java-sdk/1.7.4/aws-java-sdk-1.7.4.jar,http://central.maven.org/maven2/joda-time/joda-time/2.9.9/joda-time-2.9.9.jar \
    local:///opt/spark/jars/variant-spark_2.11-0.2.0-SNAPSHOT-all.jar importance \
        -if s3a://${INPUT_BUCKET}/input/chr22_1000.vcf \
        -ff s3a://${INPUT_BUCKET}/input/chr22-labels.csv \
        -fc 22_16051249 \
        -v \
        -rn 500 \
        -rbs 16 \
        -ro "$@"


2018-08-04 21:17:21 INFO  LoggingPodStatusWatcherImpl:54 - State changed, new state: 
	 pod name: variantsparktiny-2a88e7f0c37d330594cee995ac767f14-driver
	 namespace: default
	 labels: spark-app-selector -> spark-d2a8920e82c0467ba63dee79a51e91e5, spark-role -> driver
	 pod uid: c64ef6ff-982b-11e8-b3d6-062eec91cad6
	 creation time: 2018-08-04T21:17:21Z
	 service account name: spark
	 volumes: spark-init-properties, download-jars-volume, download-files-volume, spark-token-cv25b
	 node name: N/A
	 start time: N/A
	 container images: N/A
	 phase: Pending
	 status: []
2018-08-04 21:17:21 INFO  LoggingPodStatusWatcherImpl:54 - State changed, new state: 
	 pod name: variantsparktiny-2a88e7f0c37d330594cee995ac767f14-driver
	 namespace: default
	 labels: spark-app-selector -> spark-d2a8920e82c0467ba63dee79a51e91e5, spark-role -> driver
	 pod uid: c64ef6ff-982b-11e8-b3d6-062eec91cad6
	 creation time: 2018-08-04T21:17:21Z
	 service account name: spark
	 volumes: spark-init-properties, download-j