Permalink
Branch: master
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 804 lines (739 sloc) 27.6 KB
#!/bin/csh
setenv APP "aah"
setenv API "train"
setenv WWW "www.dcmartin.com"
if ($?TMP == 0) setenv TMP "/var/lib/age-at-home"
# don't update file information more than once per (in seconds)
setenv TTL 28800
setenv SECONDS `date "+%s"`
setenv DATE `echo $SECONDS \/ $TTL \* $TTL | bc`
# debug set/not
set DEBUG = true
#
# CLOUDANT SETUP
#
set creds = ~$USER/.cloudant_url
if (-e $creds) then
set cc = ( `cat $creds` )
if ($#cc > 0) set CU = $cc[1]
if ($#cc > 1) set CN = $cc[2]
if ($#cc > 2) set CP = $cc[3]
endif
if ($?CLOUDANT_URL) then
set CU = $CLOUDANT_URL
else if ($?CN && $?CP) then
set CU = "$CN":"$CP"@"$CN.cloudant.com"
else
echo `date` "$0 $$ -- no Cloudant URL ($creds)" >& /dev/stderr
exit
endif
if ($?DEBUG) echo `date` "$0 $$ -- Cloudant noSQL ($CU)" >& /dev/stderr
#
# VISUAL_RECOGNITION SETUP
#
set creds = ~$USER/.watson.visual-recognition.json
if (-e $creds) then
# this is to handle multiple entires
set keys = ( `/usr/local/bin/jq '.[]|.credentials.api_key' $creds` )
if ($#keys > 0) set api_key = `echo "$keys[1]" | sed 's/"//g'`
set urls = ( `/usr/local/bin/jq '.[]|.credentials.url' $creds` )
if ($#urls > 0) set TU = `echo "$urls[1]" | sed 's/"//g'`
else
echo `date` "$0 $$ -- no VisualRecognition ($creds)" >& /dev/stderr
exit
endif
if ($?TU && $?api_key) then
if ($?verid == 0) set verid = "v3"
if ($?vdate == 0) set vdate = "2016-05-20"
if ($?DEBUG) echo `date` "$0 $$ -- VisualRecognition $verid/$vdate ($TU)" >& /dev/stderr
else
echo `date` "$0 $$ -- invalid VisualRecognition ($creds)" >& /dev/stderr
exit
endif
#
# PROCESS CGI QUERY_STRING
#
if ($?QUERY_STRING) then
set device = `echo "$QUERY_STRING" | sed 's/.*db=\([^&]*\).*/\1/'`
if ($device == "$QUERY_STRING") unset device
set model = `echo "$QUERY_STRING" | sed 's/.*model=\([^&]*\).*/\1/'`
if ($model == "$QUERY_STRING") unset model
set notags = `echo "$QUERY_STRING" | sed 's/.*notags=\([^&]*\).*/\1/'`
if ($notags == "$QUERY_STRING") unset notags
endif
#
# PROCESS COMMAND LINE ARGUMENTS
#
echo "$0 [ -n <maxfiles> -D(elete old) -m <model_id> -j <job_id> -N <negative_class> -d <label_dir> -e {frame|sample}] <device-id>"
@ i = 1
while ($i <= $#argv)
set t = "$argv[$i]"
if (($#t == 1) && ($#argv >= $i)) then
if ("$t" == "-n") then
@ i++
set maxfiles = $argv[$i]
else if ("$t" == "-D") then
# delete existing model (must also specify -m <model>)
set delete = true
else if ("$t" == "-e") then
@ i++
# full frame (jpg) or sample (jpeg)
switch ($argv[$i])
case "sample":
set ext = "jpeg"
breaksw
case "frame":
default:
set ext = "jpg"
breaksw
endsw
else if ("$t" == "-m") then
@ i++
# model by <classifier_id>
set model = $argv[$i]
else if ("$t" == "-j") then
@ i++
# find by job_id (DATE)
setenv DATE $argv[$i]
else if ("$t" == "-N") then
@ i++
# negative example class
set notags = $argv[$i]
else if ("$t" == "-d") then
@ i++
# base path directory
setenv TMP "$argv[$i]"
else if ($#argv >= $i) then
# name of directory in $TMP/label
set device = "$argv[$i]"
endif
endif
@ i++
end
#
# configure defaults
#
if ($?device == 0) set device = rough-fog
if ($?ext == 0) set ext = jpg
#
# check arguments
#
if ($?device && $?model && $?notags) then
if ($?DEBUG) /bin/echo `/bin/date` "$0 $$ -- $device $model $notags"
setenv QUERY_STRING "db=$device"
else if ($?notags == 0) then
set location = `/usr/bin/curl -s -q -f -L "http://$WWW/CGI/aah-devices.cgi" | /usr/local/bin/jq -r '.|select(.name=="'"$device"'")|.location'`
if ($#location) then
if ($?DEBUG) /bin/echo `/bin/date` "$0 $$ -- found AAH_LOCATION ($location)"
else
if ($?DEBUG) /bin/echo `/bin/date` "$0 $$ -- not found AAH_LOCATION"
unset location
exit
endif
if ($?DEBUG) /bin/echo `/bin/date` "$0 $$ -- using default ($location) for negative (notags)"
set notags = "$location"
endif
setenv QUERY_STRING "db=$device&ext=$ext"
echo `date` "$0 $$ -- START ($QUERY_STRING)" >& /dev/stderr
#
# FIND OLD MODEL
#
if ($?CLOUDANT_OFF == 0) then
set db = ( `curl -s -q -L "$CU/$device-$API" | /usr/local/bin/jq '.'` )
if ($#db) then
set error = ( `echo "$db" | /usr/local/bin/jq '.error,.reason' | sed 's/"//g'` )
if ($#error > 0 && $error[1] == "not_found") then
if ($?DEBUG) echo `date` "$0 $$ -- $error[2-] ($device-$API)" >& /dev/stderr
unset db
endif
endif
if ($?db && $?model) then
set model = ( `curl -s -q -L "$CU/$device-$API/_all_docs?include_docs=true" | /usr/local/bin/jq '.rows[]|select(.doc.model=="'"$model"'")'` )
else if ($?db) then
set device_models = ( `curl -s -q -L "$CU/$device-$API/_all_docs?include_docs=true" | /usr/local/bin/jq '.rows[]|select(.doc.name=="'"$device"'")'` )
endif
if ($?model) then
if ($?DEBUG) echo `date` "$0 $$ -- FOUND MODEL ($#model)" >& /dev/stderr
# CHECK INVENTORY AGAINST TRAINED SET
set w = ( `echo "$model" | /usr/local/bin/jq '.date' | sed 's/"//g'` )
set s = ( `echo "$model" | /usr/local/bin/jq '.images[].class' | sed 's/"//g'` )
set p = ( `echo "$model" | /usr/local/bin/jq '.detail.classes[].class' | sed 's/"//g'` )
set n = ( `echo "$model" | /usr/local/bin/jq '.negative' | sed 's/"//g'` )
if ($?DEBUG) echo `date` "$0 $$ -- EXISTING MODEL ($model) date ($w) sets ($s) negative ($n) positive ($p)" >& /dev/stderr
exit
else if ($?device_models) then
if ($?DEBUG) echo `date` "$0 $$ -- DEVICE MODELS ($#device_models)" >& /dev/stderr
set classifier_ids = ( `echo "$device_models" | /usr/local/bin/jq '.doc.detail.classifier_id' | sed 's/"//g'` )
set models = ( `echo "$device_models" | /usr/local/bin/jq '.doc.model' | sed 's/"//g'` )
set dates = ( `echo "$device_models" | /usr/local/bin/jq '.doc.date' | sed 's/"//g'` )
# get models by date
set date_model = ( `echo "$device_models" | /usr/local/bin/jq '.|select(.doc.date=="'$DATE'")' ` )
if ($#date_model) then
set model = `echo "$date_model" | /usr/local/bin/jq '.doc.model' | sed 's/"//g'`
if ($?DEBUG) echo `date` "$0 $$ -- model ($model) by date ($DATE) in Cloudant" >& /dev/stderr
endif
if ($?DEBUG) echo `date` "$0 $$ -- classifiers ($classifier_ids) models ($models)" >& /dev/stderr
endif
else
if ($?DEBUG) echo `date` "$0 $$ -- Cloudant OFF" >& /dev/stderr
endif
if ($?model) then
if ($?DEBUG) echo `date` "$0 $$ -- model ($model) in Cloudant" >& /dev/stderr
else
if ($?DEBUG) echo `date` "$0 $$ -- NO MODEL ($device)" >& /dev/stderr
endif
##
## STEP 1 - INVENTORY
##
inventory:
if ($?previous && $?model) then
if ($?DEBUG) echo `date` "$0 $$ -- PREVIOUS MODEL ($model)" `/usr/local/bin/jq '.' "$previous"` >& /dev/stderr
endif
set INVENTORY = "$TMP/$APP-$API-$QUERY_STRING.$DATE.json"
set INPROGRESS = ( `echo "$INVENTORY".*` )
if ($#INPROGRESS) then
if ($?DEBUG) echo `date` "$0 $$ -- inventory in-progress ($INPROGRESS)" >& /dev/stderr
goto done
else if (-s "$INVENTORY") then
if ($?DEBUG) echo `date` "$0 $$ -- found $INVENTORY" >& /dev/stderr
goto batch
endif
if ($?DEBUG) echo `date` "$0 $$ -- finding old ($APP-$API-$QUERY_STRING.*.json)" >& /dev/stderr
set old = ( `echo "$TMP/$APP-$API-$QUERY_STRING".*.json` )
if ($#old > 0) then
if ($?DEBUG) echo `date` "$0 $$ -- removing old ($old)" >& /dev/stderr
rm -f $old
endif
# indicate in-progress
if ($?DEBUG) echo `date` "$0 $$ -- creating inventory $INVENTORY" >& /dev/stderr
# note specification of "label" subdirectory
if (! -d "$TMP/label") then
echo `date` "$0 $$ -- no directory ($TMP/label)" >& /dev/stderr
exit
endif
echo '{"name":"'$device'","negative":"'$notags'",' >! "$INVENTORY.$$"
unset classes
echo '"images":[' >> "$INVENTORY.$$"
# iterate
foreach xdir ( "$TMP/label/$device/"* )
if ($?DEBUG) echo `date` "$0 $$ -- processing $xdir" >& /dev/stderr
if ($?set) echo ',' >> "$INVENTORY.$$"
set set = "$xdir:t"
if ($?classes) then
set classes = "$classes"',"'"$set"'"'
else
set classes = '"'"$set"'"'
endif
echo '{"class":"'$set'","ids":[' >> "$INVENTORY.$$"
@ count = 0
@ bytes = 0
foreach file ( `find "$xdir" -name "*.$ext" -type f -print` )
if ($count > 0) echo ',' >> "$INVENTORY.$$"
@ bytes += `ls -l $file | awk '{ print $5 }'`
echo '"'"$file:t:r"'"' >> "$INVENTORY.$$"
@ count++
end
echo '],"bytes":'$bytes',"count":'$count'}' >> "$INVENTORY.$$"
end
echo '],"classes":['"$classes"']}' >> "$INVENTORY.$$"
/usr/local/bin/jq '.' "$INVENTORY.$$" >! "$INVENTORY"
rm -f "$INVENTORY.$$"
##
## STEP 2 - CALCULATE BATCH JOBS
##
batch:
set JOB = "$TMP/$APP-$API-$QUERY_STRING.$DATE.job"
set INPROGRESS = ( `echo "$JOB".*` )
if ($#INPROGRESS) then
if ($?DEBUG) echo `date` "$0 $$ -- batching in-progress ($INPROGRESS)" >& /dev/stderr
goto done
else if (-s "$JOB/job.json") then
if ($?DEBUG) echo `date` "$0 $$ -- existing batches ($JOB/job.json)" >& /dev/stderr
goto training
endif
set old = ( `echo "$TMP/$APP-$API-$QUERY_STRING".*.job` )
if ($#old > 0) then
if ($?DEBUG) echo `date` "$0 $$ -- removing old ($old); entries remain in DB and VR" >& /dev/stderr
rm -fr $old
endif
# indicate JOB in-progress
if ($?DEBUG) echo `date` "$0 $$ -- batching jobs $JOB" >& /dev/stderr
touch "$JOB.$$"
mkdir -p "$JOB"
#
# WATSON VISUAL RECOGNITION - INSTRUCTIONS
#
# SIZE LIMITATIONS (enforced by API)
#
# The service accepts a maximum of 10,000 images or 100 MB per .zip file
# The service requires a minimum of 10 images per .zip file.
# The service accepts a maximum of 256 MB per training call.
#
# GUIDELINES (not enforced)
#
# Include approximately the same number of images in each examples file.
# Including an unequal number of images can cause the quality of the trained classifier to decline.
#
# 1) A minimum of 50 images is recommended in each .zip file, as fewer than 50 images can decrease the quality of the trained classifier.
# 2) If the quality and content of training data is the same, then classifiers that are trained on more images will generally be more accurate
# than classifiers that are trained on fewer images. The benefits of training a classifier on more images plateaus at around 5000 images,
# and this can take a while to process. You can train a classifier on more than 5000 images, but it may not significantly increase that classifier's accuracy.
# 3) Uploading a total of 150-200 images per .zip file gives you the best balance between the time it takes to train and the improvement to classifier accuracy.
# More than 200 images increases the time, and it does increace the accuracy, but with diminishing returns for the amount of time it takes.
#
# ADDITIONAL INSTRUCTIONS
#
# Split into two or three sets: training, validation (for hyper-parameters) and testing (http://sebastianraschka.com/blog/2016/model-evaluation-selection-part3.html)
#
@ MAXIMAGES = 10000
@ MAXZIPBYTES = 1000 * 1000 * 90 # should be 100
@ MINIMAGES = 10
@ MAXSETBYTES = 256 * 1000 * 1000
@ MINSETSIZE = 50
@ MAXSETSIZE = 5000
@ MAXZIPSET = 200
@ SPLITSETS = 2
# get all classes as pairs of class name and count (including negative)
set allclass = ( `/usr/local/bin/jq '[.images|sort_by(.count)[]|{"class":.class,"count":.count,"bytes":.bytes}]' "$INVENTORY"`)
if ($?DEBUG) echo `date` "$0 $$ -- batching images by counts ($allclass)" >& /dev/stderr
# process all samples into training, validation and test sets
set pairs = ( `echo "$allclass" | /usr/local/bin/jq '.[]|.class,.count' | sed 's/"//g'` )
if ($#pairs == 0) then
if ($?DEBUG) echo `date` "$0 $$ -- NO PAIRS" >& /dev/stderr
exit
endif
@ p = 1
set job = '{"device":"'"$device"'","date":'"$DATE"',"jobs":['
unset sets
while ($p < $#pairs)
set class = $pairs[$p]
@ p++
set count = $pairs[$p]
@ p++
if ($?DEBUG) echo `date` "$0 $$ -- CLASS ($class) COUNT ($count)" >& /dev/stderr
# calculate split size to create only SPLITSETS number of sets
@ split = ( $count / $SPLITSETS ) + ( $count % $SPLITSETS )
# test is split size is sufficient
if ($split > $MINIMAGES) then
# test if too many samples
if ($split > $MAXSETSIZE) then
if ($?DEBUG) echo `date` "$0 $$ -- split size ($MAXSETSIZE); actual ($split)" >& /dev/stderr
set split = MAXSETSIZE
else
if ($?DEBUG) echo `date` "$0 $$ -- split size ($split)" >& /dev/stderr
endif
set base = "$JOB/$class."
/usr/local/bin/jq '.images[]|select(.class=="'$class'").ids[]' "$INVENTORY" | sed 's/"//g' | split -l $split - $base
set split = ( `echo $base*` )
if ($?sets) then
set sets = "$sets"'},{"class":"'"$class"'","sets":'
else
set sets = '{"class":"'"$class"'","sets":'
endif
else
if ($?DEBUG) echo `date` "$0 $$ -- insufficient samples ($split)" >& /dev/stderr
unset split
endif
if ($?split) then
if ($?DEBUG) echo `date` "$0 $$ -- processing ($split)" >& /dev/stderr
foreach s ( $split )
if ($?splits) then
set splits = "$splits"','
else
set splits = '['
endif
set sext = $s:e
# create ZIP file name
set zip = "$JOB/$class.zip"
if ($?DEBUG) echo `date` "$0 $$ -- creating ($zip) from split ($s)" >& /dev/stderr
foreach id ( `cat "$s"` )
if ($?ids) then
set ids = "$ids"',"'"$id"'"'
else
set ids = '["'"$id"'"'
endif
# slow but complete
find $TMP/label/$device -name "$id"."$ext" -print | xargs -I % zip -q -j -r -u "$zip" % >& /dev/stderr
end
set ids = "$ids"']'
# if ($?DEBUG) echo `date` "$0 $$ -- IDS: $ids" >& /dev/stderr
rm -f "$s"
if (-s "$zip") then
# put all results into temporary directory
set base = "$JOB/$class"
mkdir -p "$base"
if ($?DEBUG) echo `date` "$0 $$ -- splitting ($zip) into ($base)" >& /dev/stderr
zipsplit -b "$base/" -n $MAXZIPBYTES "$zip" >& /dev/null
# success?
if ($status == 0) then
# get all the zips created
set zips = ( `echo "$base/"*.zip` )
if ($#zips > 0) then
if ($?DEBUG) echo `date` "$0 $$ -- split into $#zips ZIP files ($zips)" >& /dev/stderr
@ i = 1
foreach z ( $zips )
set b = "$JOB/$class.$sext.$i.zip"
if ($?DEBUG) echo `date` "$0 $$ -- creating batch entry ($b)" >& /dev/stderr
mv "$z" "$b"
if ($?batch) then
set batch = "$batch"',"'"$b"'"'
else
set batch = '["'"$b"'"'
endif
@ i++
end
set batch = "$batch"']'
# if ($?DEBUG) echo `date` "$0 $$ -- BATCH: $batch" >& /dev/stderr
else
echo `date` "$0 $$ -- NO ZIPS" >& /dev/stderr
exit
endif
else
echo `date` "$0 $$ -- ZIPSPLIT failure" >& /dev/stderr
exit
endif
rm -fr "$base"
else
echo `date` "$0 $$ -- ZIP failure" >& /dev/stderr
exit
endif
if ($?zips) then
unset zips
endif
rm -f "$zip"
set splits = "$splits""$batch","$ids"
# if ($?DEBUG) echo `date` "$0 $$ -- SPLITS: $splits" >& /dev/stderr
unset batch
unset ids
end
set splits = "$splits"']'
else
echo `date` "$0 $$ -- NO SPLIT" >& /dev/stderr
unset splits
endif
if ($?splits) then
set sets = "$sets""$splits"
# if ($?DEBUG) echo `date` "$0 $$ -- SETS: $sets" >& /dev/stderr
endif
unset splits
end
# SETS
if ($?sets) then
set job = "$job""$sets"'}]}'
else
set job = "$job"'}]}'
else
# NO SETS?
if ($?DEBUG) echo `date` "$0 $$ !! NO SETS" >& /dev/stderr
endif
if ($?job) then
# store result
echo "$job" >! "$JOB/job.json"
else
if ($?DEBUG) echo `date` "$0 $$ !! NO JOB" >& /dev/stderr
exit
endif
# all done
rm -f "$JOB.$$"
##
## STEP 3 - EXECUTING TRAINING JOBS
##
training:
if ($?DEBUG) echo `date` "$0 $$ -- TRAIN" >& /dev/stderr
set TRAIN = "$TMP/$APP-$API-$QUERY_STRING.$DATE.job/train"
set INPROGRESS = ( `echo "$TRAIN".*` )
if ($#INPROGRESS) then
if ($?DEBUG) echo `date` "$0 $$ -- training in-progress ($INPROGRESS)" >& /dev/stderr
goto done
else if (-d "$TRAIN") then
if ($?DEBUG) echo `date` "$0 $$ -- existing training ($TRAIN)" >& /dev/stderr
goto training
endif
set old = ( `echo "$TMP/$APP-$API-$QUERY_STRING.$DATE.job/train".*.json` )
if ($#old > 0) then
if ($?DEBUG) echo `date` "$0 $$ -- NOT removing old ($old)" >& /dev/stderr
# rm -fr $old
endif
# check requisites
set INVENTORY = "$TMP/$APP-$API-$QUERY_STRING.$DATE.json"
set JOB = "$TMP/$APP-$API-$QUERY_STRING.$DATE.job"
if ((-s "$INVENTORY") && (-d "$JOB")) then
if ($?DEBUG) echo `date` "$0 $$ -- found inventory ($INVENTORY) and job ($JOB)" >& /dev/stderr
else
if ($?DEBUG) echo `date` "$0 $$ -- no inventory ($INVENTORY) or job ($JOB)" >& /dev/stderr
goto cleanup
endif
# get parameters
set device = ( `/usr/local/bin/jq '.name' "$INVENTORY" | sed 's/"//g'` )
if ($?DEBUG) echo `date` "$0 $$ -- device ($device)" >& /dev/stderr
#
# GET EXISTING CLASSIFIERS FROM WATSON_VR
#
if ($?model) then
if ($?DEBUG) echo `date` "$0 $$ -- searching for classifier by ID" >& /dev/stderr
set classifiers = ( `curl -q -s -L "$TU/$verid/classifiers?api_key=$api_key&version=$vdate" | /usr/local/bin/jq '[.classifiers[]|select(.classifier_id=="'"$model"'")]'` )
if ($?DEBUG) echo `date` "$0 $$ -- got ($classifiers) for $model on $device" >& /dev/stderr
else if ($?device) then
if ($?DEBUG) echo `date` "$0 $$ -- searching for classifier by device" >& /dev/stderr
set classifiers = ( `curl -q -s -L "$TU/$verid/classifiers?api_key=$api_key&version=$vdate" | /usr/local/bin/jq '[.classifiers[]|select(.name=="'"$device"'")]'` )
set model = ( `echo "$classifiers" | /usr/local/bin/jq '.[].classifier_id' | sed 's/"//g'` )
if ($#model > 0) then
if ($?DEBUG) echo `date` "$0 $$ -- using $model[1] from $device for classifier by device" >& /dev/stderr
set model = $model[1]
else
unset model
endif
endif
if ($?classifiers) then
if ($?DEBUG) echo `date` "$0 $$ -- CLASSIFIERS ($classifiers)" >& /dev/stderr
else
if ($?DEBUG) echo `date` "$0 $$ -- NO EXISTING CLASSIFIERS" >& /dev/stderr
set classifiers = ()
endif
#
# TEST IF MODEL READY FOR TRAINING
#
# Can only train a classifier in "ready" state
#
if ($#classifiers > 0 && $?model) then
set ready = ( `echo "$classifiers" | /usr/local/bin/jq '[.[]|select(.status=="ready")]'` )
set cids = `echo "$classifiers" | /usr/local/bin/jq '.[]|select(.classifier_id=="'"$model"'").classifier_id' | sed 's/"//g'`
if ($#cids > 0) then
foreach cid ( $cids )
if ($cid == "$model") then
if ($?DEBUG) echo `date` "$0 $$ -- matched specified classifier ($model)" >& /dev/stderr
if ($?delete) then
if ($?DEBUG) echo `date` "$0 $$ -- deleting classifier id ($cid)" >& /dev/stderr
curl -s -q -X DELETE "$TU/$verid/classifiers/$cid?api_key=$api_key&version=$vdate"
unset model
else
if ($?DEBUG) echo `date` "$0 $$ -- getting details for classifier id ($cid)" >& /dev/stderr
set detail = ( `curl -s -q "$TU/$verid/classifiers/$cid?api_key=$api_key&version=$vdate" | /usr/local/bin/jq '.'` )
endif
break
endif
end
# test if model matched (and not deleted)
if ($?model) then
if ($cid != "$model") then
if ($?DEBUG) echo `date` "$0 $$ -- no classifier ($model) for device ($device) is ready" >& /dev/stderr
set training = ( `echo "$classifiers" | /usr/local/bin/jq '[.[]|select(.status=="training")]'` )
set cids = ( `echo $training | /usr/local/bin/jq '.[]|select(.name=="'"$model"'").classifier_id' | sed 's/"//g'` )
if ($?DEBUG) echo `date` "$0 $$ -- classifiers ($cids) are in training" >& /dev/stderr
foreach cid ( $cids )
if ($cid == "$model") then
if ($?DEBUG) echo `date` "$0 $$ -- classifier id ($cid) in training" >& /dev/stderr
goto cleanup
endif
end
# no matching model
unset model
endif
endif
endif
endif
#
# GET CLASSES (OLD, NET, NEGATIVE)
#
if ($?detail) then
if ($?DEBUG) echo `date` "$0 $$ -- model details ($detail)" >& /dev/stderr
set oldclasses = ( `echo "$detail" | /usr/local/bin/jq '.classes[].class' | sed 's/"//g'` )
else
unset oldclasses
endif
set newclasses = ( `/usr/local/bin/jq '.images[].class' "$INVENTORY" | sed 's/"//g'` )
set notags = ( `/usr/local/bin/jq '.negative' "$INVENTORY" | sed 's/"//g'` )
set net = ()
foreach nc ( $newclasses )
if ($?oldclasses && $nc != "$notags") then
foreach oc ( $oldclasses )
if ($nc == "$oc") break
end
if ($nc != "$oc") then
set net = ( $net $nc )
endif
else if ($nc != "$notags") then
set net = ( $net $nc )
endif
end
if ($?DEBUG) echo `date` "$0 $$ -- OLD ($?oldclasses) NEW ($net) NEGATIVE ($notags) +++" >& /dev/stderr
#
# RUN TRAINING JOBS
#
set positive = ( `/usr/local/bin/jq '.images|sort_by(.count)[]|select(.class!="'$notags'")|.class' "$INVENTORY" | sed 's/"//g'` )
set negative = ( `/usr/local/bin/jq '.images|sort_by(.count)[]|select(.class=="'$notags'")|.class' "$INVENTORY" | sed 's/"//g'` )
if ($?DEBUG) echo `date` "$0 $$ -- POSITIVE ($positive) NEGATIVE ($negative) +++" >& /dev/stderr
# the zero'th set is the training example ZIPs; corresponding +1 set are the IDs used in those ZIPs
set negatives = ( `/usr/local/bin/jq '.jobs[]|select(.class=="'"$notags"'").sets[0][]' "$JOB/job.json" | sed 's/"//g'` )
@ ttime = 0
@ h = 1
@ j = 1
while ($h <= $#positive)
set p = $positive[$h]
# first (0) set is training
set positives = ( `/usr/local/bin/jq '.jobs[]|select(.class=="'"$p"'").sets[0][]' "$JOB/job.json" | sed 's/"//g'` )
@ i = 1
while ($i <= $#positives)
set job = "$JOB/$p.$$.json"
if ($?negatives) then
set examples = ( -F "$p""_positive_examples=@$positives[$i]" -F "negative_examples=@$negatives[$j]" )
else if ($?model) then
set examples = ( -F "$p""_positive_examples=@$positives[$i]" )
else
echo `date` "$0 $$ ** INSUFFICIENT EXAMPLE SETS **"
exit
endif
again:
# timer start
@ start = `date +%s`
# run training
if ($?model) then
if ($?DEBUG) echo `date` "$0 $$ -- RETRAIN $model ($examples)" >& /dev/stderr
curl -f -s -q -S -L "$TU/$verid/classifiers/$model?api_key=$api_key&version=$vdate" -o $job $examples >& /dev/stderr
else
if ($?DEBUG) echo `date` "$0 $$ -- CREATE $device ($examples)" >& /dev/stderr
curl -f -s -q -S -L "$TU/$verid/classifiers?api_key=$api_key&version=$vdate" -F "name=$device" -o $job $examples >& /dev/stderr
endif
set cstatus = $status
# check status
if ($cstatus != 0) then
echo `date` "$0 $$ ** FAILURE ($cstatus)" >& /dev/stderr
if (-s "$job") then
echo `date` "$0 $$ ** " `cat "$job"` >& /dev/stderr
# check result code
set code = `/usr/local/bin/jq '.code' "$job" | sed 's/"//g'`
set message = `/usr/local/bin/jq '.error' "$job" | sed 's/"//g'`
if ($?DEBUG) echo `date` "$0 $$ -- FAILURE ($message)" >& /dev/stderr
endif
rm -f "$job"
goto next
else if (! -s "$job") then
echo `date` "$0 $$ ** no output ($job)" >& /dev/stderr
goto again
else
# check result code
set code = `/usr/local/bin/jq '.code' "$job" | sed 's/"//g'`
if ($code == 400) then
set message = `/usr/local/bin/jq '.error' "$job" | sed 's/"//g'`
if ($?DEBUG) echo `date` "$0 $$ -- FAILURE ($message)" >& /dev/stderr
goto next
endif
# get classifier id to check on progress
set cid = `/usr/local/bin/jq '.classifier_id' "$job" | sed 's/"//g'`
set sts = `/usr/local/bin/jq '.status' "$job" | sed 's/"//g'`
if ($#cid > 0 && $cid != "null" && $sts != "error") then
if ($?DEBUG) echo `date` "$0 $$ -- LEARNING ($cid) status ($sts)" >& /dev/stderr
set model = "$cid"
else
echo -n `date` "$0 $$ ** FAILURE ($cid) ($sts) ($job)" >& /dev/stderr
goto next
endif
endif
rm -f "$job"
@ elapsed = `date +%s` - $start
if ($?DEBUG) echo -n `date` "$0 $$ -- LOAD COMPLETE ($elapsed) WAITING "
while ($sts != "ready")
set detail = ( `curl -s -q -L "$TU/$verid/classifiers/$model?api_key=$api_key&version=$vdate" | /usr/local/bin/jq '.'` )
set sts = ( `echo "$detail" | /usr/local/bin/jq '.status' | sed 's/"//g'` )
if ($sts != "null") then
if ($?DEBUG) echo -n "."
else
if ($?DEBUG) echo -n "!"
endif
sleep 10
end
@ elapsed = `date +%s` - $start
if ($?DEBUG) echo " ($elapsed seconds)"
set model = "$cid"
@ ttime += $elapsed
next:
# increment to next batch
@ i++
@ j++
if ($?negatives) then
if ($j <= $#negatives) then
set negatives = ( $negatives[$j-] )
else
unset negatives
endif
endif
end
@ h++
end
if ($?DEBUG) echo `date` "$0 $$ -- COMPLETE $DATE ($model) time ($ttime) ($detail)" >& /dev/stderr
#
# UPDATE INVENTORY INDICATING SUCCESS
#
update:
# get jobs batches
set batches = ( `/usr/local/bin/jq '.' "$JOB/job.json"` )
set created = `echo "$detail" | /usr/local/bin/jq '.created' | sed 's/"//g'`
set created = `date -j -f '%Y-%m-%dT%H:%M:%S' +%s $created:r`
set retrained = `echo "$detail" | /usr/local/bin/jq '.retrained' | sed 's/"//g'`
set retrained = `date -j -f '%Y-%m-%dT%H:%M:%S' +%s $retrained:r`
set classes = `echo "$batches" | /usr/local/bin/jq '[.jobs[].class]'`
# update INVENTORY with resulting model
cat "$INVENTORY" \
| /usr/local/bin/jq '.classes='"$classes" \
| /usr/local/bin/jq '.created="'$created'"' \
| /usr/local/bin/jq '.retrained="'$retrained'"' \
| /usr/local/bin/jq '.sets='"$batches" \
| /usr/local/bin/jq '.date="'"$DATE"'"' \
| /usr/local/bin/jq '.model="'"$model"'"' \
| /usr/local/bin/jq '.detail='"$detail" \
>! "$INVENTORY.$$"
if (-s "$INVENTORY.$$") then
mv -f "$INVENTORY.$$" "$INVENTORY"
else
echo `date` "$0 $$ -- FAILURE UPDATING INVENTORY WITH MODEL AND DETAILS" >& /dev/stderr
exit
endif
#
# update Cloudant
#
if ($?CLOUDANT_OFF == 0 && $?CU && $?device) then
if ($?DEBUG) echo `date` "$0 $$ -- test if device exists ($device-$API)" >& /dev/stderr
set devdb = `curl -s -q -X GET "$CU/$device-$API" | /usr/local/bin/jq '.db_name'`
if ( "$devdb" == "null" ) then
if ($?DEBUG) echo `date` "$0 $$ -- creating device $CU/$device-$API" >>& /dev/stderr
# create device
set devdb = `curl -s -q -X PUT "$CU/$device-$API" | /usr/local/bin/jq '.ok'`
# test for success
if ( "$devdb" != "true" ) then
# failure
if ($?DEBUG) echo `date` "$0 $$ -- failure creating Cloudant database ($device-$API)" >& /dev/stderr
setenv CLOUDANT_OFF TRUE
else
if ($?DEBUG) echo `date` "$0 $$ -- success creating device $device-$API" >& /dev/stderr
endif
endif
if ( $?CLOUDANT_OFF == 0 ) then
curl -s -q -o "$INVENTORY.$$" "$CU/$device-$API/$model" >>&! /tmp/LOG
if (-s "$INVENTORY.$$") then
set doc = ( `cat "$INVENTORY.$$" | /usr/local/bin/jq -r '._id,._rev'` )
if ($#doc == 2 && $doc[1] == $model && $doc[2] != "") then
set rev = $doc[2]
if ($?DEBUG) echo `date` "$0 $$ -- deleting old output ($rev)" >& /dev/stderr
curl -s -q -X DELETE "$CU/$device-$API/$model?rev=$rev" >& /dev/stderr
endif
else
if ($?DEBUG) echo `date` "$0 $$ -- no old output to delete" >& /dev/stderr
endif
rm -f "$INVENTORY.$$"
if ($?DEBUG) echo `date` "$0 $$ -- storing new output" >& /dev/stderr
curl -s -q -H "Content-type: application/json" -X PUT "$CU/$device-$API/$model" -d "@$INVENTORY" >>&! /tmp/LOG
if ($status == 0) then
if ($?DEBUG) echo `date` "$0 $$ -- success storing new output" >& /dev/stderr
else
if ($?DEBUG) echo `date` "$0 $$ -- failure storing new output" >& /dev/stderr
endif
else
if ($?DEBUG) echo `date` "$0 $$ -- Cloudant OFF ($device-$API)" >& /dev/stderr
endif
else
if ($?DEBUG) echo `date` "$0 $$ -- no Cloudant update" >& /dev/stderr
endif
cleanup:
rm -f "$TMP/$APP-$API-$QUERY_STRING.$DATE.json".*
done:
echo `date` "$0 $$ -- FINISH ($*)"