# Start a Job 
1. Look for file(s) to process in landing_zone
2. Copy to node
3. Extract/unzip into working zone
4. Run notebook and params from extracted job.yaml
5. Cleanup

Conventions
* All zip files contain a job.yaml file which includes Notebook and Params sections

In [0]:
# for now, let's just take the first zip in the landing zone and process it
landing_zone_path = 's3://cg-test-bucket-1000/databricks/landing-zone/'
lz_files = dbutils.fs.ls(landing_zone_path)
lz_zip_files = [f for f in lz_files if f.name[-4:] == '.zip']
zipfile = lz_zip_files[0]
print(zipfile)

### Make a unique identifier for each job run

In [0]:
import uuid

jobid = uuid.uuid4()
print(jobid)

### Extract zip file to temporary directory

In [0]:
## do work in tmp in dbfs - consider move to s3 working-zone?
jobfolder = '/tmp/' + str(jobid)
dbutils.fs.mkdirs(jobfolder)
dbfs_zip_path = jobfolder + '/' + zipfile.name
dbutils.fs.cp(zipfile.path, dbfs_zip_path)
print ('copying', zipfile.path, ' into ', dbfs_zip_path)

In [0]:
# now unzip it
import os
#os.popen('ls /dbfs/tmp/*/*.zip').read()
unzipped_folder = '/dbfs' + jobfolder + '/unzipped'
os.popen('unzip /dbfs' + jobfolder + '/' + zipfile.name + ' -d ' + unzipped_folder).read()

In [0]:
dbutils.fs.ls('/tmp/')

### Read the yaml file for parameters

In [0]:
%sh
pip install pyyaml

In [0]:
import yaml

with open('/dbfs/tmp/' + str(jobid) + '/unzipped/job.yaml', 'r') as file:
    job = yaml.safe_load(file)

print(job)

### Add extra 'standard' parameters

In [0]:
from collections import ChainMap
params_as_dict = dict(ChainMap(*job['Job']['Parameters']))

In [0]:
params_as_dict['unzipped_folder'] = '/tmp/' + str(jobid) + '/unzipped'
params_as_dict['jobid'] = str(jobid)
print(params_as_dict)

### Execute Notebook

In [0]:
dbutils.notebook.run(job['Job']['Notebook'], 600, {"params": str(params_as_dict)})

### Remove from temp, save to archive, remove from landing zone

In [0]:
#cleanup - remove from tmp
dbutils.fs.rm('/tmp/' + str(jobid), True)

In [0]:
#move to archive?
archive_path = 's3://cg-test-bucket-1000/databricks/archive/' + str(jobid) + '/'+ zipfile.name
dbutils.fs.cp(landing_zone_path + zipfile.name, archive_path)

In [0]:
#cleanup - remove from s3
dbutils.fs.rm(landing_zone_path + zipfile.name)