In [0]:
import json
from pathlib import Path
import os
from functions.s3_utils import read_text

# Variables
color = dbutils.widgets.get('color')
job_settings = json.loads(dbutils.widgets.get('job_settings'))
table = job_settings['table']
settings_uri = job_settings.get('settings_uri')
# Propagate AWS credentials so executors can authenticate
access_key = dbutils.secrets.get('edsm', 'aws_access_key_id')
secret_key = dbutils.secrets.get('edsm', 'aws_secret_access_key')
os.environ['AWS_ACCESS_KEY_ID'] = access_key
os.environ['AWS_SECRET_ACCESS_KEY'] = secret_key


from functions.utility import get_function, create_bad_records_table, apply_job_type

if settings_uri:
    raw = read_text(settings_uri, dbutils)
    settings = json.loads(raw)
else:
    settings_path = next(Path().glob(f'./layer_*_{color}/{table}.json'))
    settings = json.loads(Path(settings_path).read_text())
settings = apply_job_type(settings)

dst_table_name = settings['dst_table_name']

# Print job and table settings
settings_message = f'\n\nDictionary from {color}_settings.json:\n\n'
settings_message += json.dumps(job_settings, indent=4)
settings_message += f'\n\nContents of {table}.json:\n\n'
settings_message += json.dumps(settings, indent=4)
print(settings_message)

# One function for pipeline
if 'pipeline_function' in settings:
    pipeline_function = get_function(settings['pipeline_function'])
    pipeline_function(settings, spark)

# Individual functions for each step
elif all(k in settings for k in ['read_function', 'transform_function', 'write_function']):
    read_function = get_function(settings['read_function'])
    transform_function = get_function(settings['transform_function'])
    write_function = get_function(settings['write_function'])

    df = read_function(settings, spark)
    df = transform_function(df, settings, spark)
    write_function(df, settings, spark)
else:
    raise Exception('Could not find any ingest function name in settings.')
if color == 'bronze':
    create_bad_records_table(settings, spark)
