In [ ]:
%%configure -f
{
"conf": {
     "spark.dynamicAllocation.disableIfMinMaxNotSpecified.enabled": true,
     "spark.dynamicAllocation.enabled": true,
     "spark.dynamicAllocation.minExecutors": 2,
     "spark.dynamicAllocation.maxExecutors": 8
   }
}

In [ ]:
image_file_path = ''
ais_file_path = ''
input_container = ''
output_container = ''
blob_account_name = ''
azure_storage_domain = ''

In [ ]:
# used to get the config 
from pyspark.sql import SparkSession
import json
sc = spark.sparkContext
spark = SparkSession.builder.appName(f'Anomaly Prep {mssparkutils.runtime.context}').getOrCreate()

In [ ]:
# Initiate logging
import logging
from opencensus.ext.azure.log_exporter import AzureLogHandler
from opencensus.ext.azure.trace_exporter import AzureExporter
from opencensus.trace import config_integration
from opencensus.trace.samplers import AlwaysOnSampler
from opencensus.trace.tracer import Tracer
# Load secrets
instrumentation_connection_string = mssparkutils.credentials.getSecretWithLS("keyvault", "AppInsightsConnectionString")
config_integration.trace_integrations(['logging'])

logger = logging.getLogger(__name__)
logger.addHandler(AzureLogHandler(connection_string=instrumentation_connection_string))
logger.setLevel(logging.INFO)

tracer = Tracer(
    exporter=AzureExporter(
        connection_string=instrumentation_connection_string
    ),
    sampler=AlwaysOnSampler()
)

# Spool parameters
run_time_parameters = {'custom_dimensions': {
  'image_file_path': image_file_path,
  'ais_file_path': ais_file_path,  
  'input_container': input_container, 
  'output_container': output_container,
  'notebook_name': mssparkutils.runtime.context['notebookname']
} }
  
logger.info(f"{mssparkutils.runtime.context['notebookname']}: INITIALISED", extra=run_time_parameters)

In [ ]:
import os 
from py4j.protocol import Py4JJavaError

# Setup blob paths 
output_abfss_path = f'abfss://{input_container}@{blob_account_name}.dfs.{azure_storage_domain}/'
global_config_abfss_path = f'abfss://configuration@{blob_account_name}.dfs.{azure_storage_domain}/anomdet.config.global.json'

# Get components to build output locations 
base_dir, filename = os.path.split(image_file_path) 
file_base, _ext = filename.split('.')
tgt_dir = f'{base_dir}/{file_base}'.lstrip('/')

config_path = f'{tgt_dir}/anomdet.config.json'
config_abfss_path = f"{output_abfss_path}/{config_path}"

# Copy configuration from configuration/anomdet.config.global.json if it doesn't already exist 
try: 
    mssparkutils.fs.head(config_abfss_path)
except Py4JJavaError as e: 
    if 'java.io.FileNotFoundException' in str(e): 
        mssparkutils.fs.cp(global_config_abfss_path, config_abfss_path)
    else: 
        raise e 

config_n = json.loads(''.join(sc.textFile(config_abfss_path).collect()))
low_res_name = str(config_n["translate_options"]["widthPct"][1]) + "_pct_"

# Setup notebook outputs 
output = {
    'custom_dimensions': {
        'input_image_low_res': f'{tgt_dir}/{low_res_name}{file_base}.png',
        'ship_bb_image_low_res': f'{tgt_dir}/{file_base}_ship_bb_low_res.png',
        'ship_bb_image_high_res': f'{tgt_dir}/{file_base}_ship_bb_high_res.png',
        'ais_image': f'{tgt_dir}/{file_base}_ais.png',
        'anomaly_image': f'{tgt_dir}/{file_base}_anomaly.png',
        'config_path': config_path,
        'output_path': tgt_dir,
        'kml_path': f'{tgt_dir}/{file_base}.xml'
    }
}

# return the object to the pipeline
logger.info(f"{mssparkutils.runtime.context['notebookname']}: OUTPUT", extra=output)
mssparkutils.notebook.exit(output['custom_dimensions'])