In [0]:
import json
from pathlib import Path
from glob import glob
from functions.sanity import validate_settings, initialize_empty_tables
from functions.job import save_job_configuration
from functions.utility import apply_job_type
from functions.s3_utils import upload_file

from functions.config import PROJECT_ROOT

catalog = dbutils.widgets.get("catalog")
settings_s3_prefix = dbutils.widgets.get("settings_s3_prefix")


# Upload settings files and build task values
job_settings = {}
for color in [ 'bronze', 'silver', 'gold' ]:
    paths = glob(f'./layer_*_{color}/*.json')
    entries = []
    for p in paths:
        table = Path(p).stem
        s3_uri = f"{settings_s3_prefix}/{color}/{Path(p).name}"
        upload_file(p, s3_uri, dbutils)
        entries.append({'table': table, 'settings_uri': s3_uri})
    job_settings[color] = entries
for key, value in job_settings.items():
    dbutils.jobs.taskValues.set(key=key, value=value)

# Load anything in layer_*/*.json if it has a key "dst_table_name"
history_settings = []
for p in glob('./layer_*/*.json'):
    with open(p) as f:
        settings = json.load(f)
        settings = apply_job_type(settings)
        if str(settings.get('build_history', 'false')).lower() == 'true':
            entry = {'full_table_name': settings['dst_table_name']}
            if 'history_schema' in settings:
                entry['history_schema'] = settings['history_schema']
            history_settings.append(entry)
dbutils.jobs.taskValues.set(key='history_settings', value=history_settings)

# Sanity check
validate_settings(dbutils)
initialize_empty_tables(spark)

# Save job configuration
save_job_configuration(dbutils, f"/Volumes/{catalog}/bronze/utility/jobs")
