
### Step 1: Connect to ADLS

Since we are using Azure Credentials Passthrough connection method, we just need to use the custom access token and mount the folder that has all our TestStand files. 

In [None]:
# Install fsspec if needed
# %pip install fsspec

# Declare script variables
database_name = "dbo"

bronze_source = "REDACTED"
bronze_target = bronze_source + database_name
bronze_mnt = "/mnt/bronze/" + database_name

silver_source = "REDACTED"
silver_target = silver_source + database_name
silver_mnt = "/mnt/silver/" + database_name

gold_source = "REDACTED"
gold_target = gold_source + database_name
gold_mnt = "/mnt/gold/" + database_name

# Azure Credentials Passthrough configuration.
configs = {
  "fs.azure.account.auth.type": "CustomAccessToken",
  "fs.azure.account.custom.token.provider.class": spark.conf.get("spark.databricks.passthrough.adls.gen2.tokenProviderClassName")
}



### Step 2: Mount bronze container


In [None]:
# Add bronze_mnt if not already mounted.
if any(mount.mountPoint == bronze_mnt for mount in dbutils.fs.mounts()):
    pass
else:
    dbutils.fs.mount(
        source=bronze_source, mount_point=bronze_mnt, extra_configs=configs
    )

# List files in directory
dbutils.fs.ls(bronze_target)

[FileInfo(path='abfss://bronze@arcdatalakeg2.dfs.core.windows.net/dbo/PROP_ANALOGWAVEFORM.parquet', name='PROP_ANALOGWAVEFORM.parquet', size=343, modificationTime=1708270317000),
 FileInfo(path='abfss://bronze@arcdatalakeg2.dfs.core.windows.net/dbo/PROP_BINARY.parquet', name='PROP_BINARY.parquet', size=1044738, modificationTime=1708270299000),
 FileInfo(path='abfss://bronze@arcdatalakeg2.dfs.core.windows.net/dbo/PROP_DIGITALWAVEFORM.parquet', name='PROP_DIGITALWAVEFORM.parquet', size=311, modificationTime=1708270284000),
 FileInfo(path='abfss://bronze@arcdatalakeg2.dfs.core.windows.net/dbo/PROP_NUMERICLIMIT.parquet', name='PROP_NUMERICLIMIT.parquet', size=1289983, modificationTime=1708270268000),
 FileInfo(path='abfss://bronze@arcdatalakeg2.dfs.core.windows.net/dbo/PROP_RESULT.parquet', name='PROP_RESULT.parquet', size=23944627, modificationTime=1708270257000),
 FileInfo(path='abfss://bronze@arcdatalakeg2.dfs.core.windows.net/dbo/STEP_RESULT.parquet', name='STEP_RESULT.parquet', size=1


### Step 3: Mount silver container


In [None]:
# Add silver_mnt if not already mounted.
if any(mount.mountPoint == silver_mnt for mount in dbutils.fs.mounts()):
    pass
else:
    dbutils.fs.mount(
        source=silver_source, mount_point=silver_mnt, extra_configs=configs
    )

# List files in directory
dbutils.fs.ls(silver_target)

[]


### Step 4: Mount gold container


In [None]:
# Add gold_mnt if not already mounted.
if any(mount.mountPoint == gold_mnt for mount in dbutils.fs.mounts()):
    pass
else:
    dbutils.fs.mount(
      source=gold_source, mount_point=gold_mnt, extra_configs=configs
    )

# List files in directory
dbutils.fs.ls(gold_target)

[0;31m---------------------------------------------------------------------------[0m
[0;31mExecutionError[0m                            Traceback (most recent call last)
File [0;32m<command-2461738334509896>, line 10[0m
[1;32m      5[0m     dbutils[38;5;241m.[39mfs[38;5;241m.[39mmount(
[1;32m      6[0m         source[38;5;241m=[39mbronze_source, mount_point[38;5;241m=[39mbronze_mnt, extra_configs[38;5;241m=[39mconfigs
[1;32m      7[0m     )
[1;32m      9[0m [38;5;66;03m# List files in directory[39;00m
[0;32m---> 10[0m dbutils[38;5;241m.[39mfs[38;5;241m.[39mls(bronze_target)

File [0;32m/databricks/python_shell/dbruntime/dbutils.py:378[0m, in [0;36mDBUtils.FSHandler.prettify_exception_message.<locals>.f_with_exception_handling[0;34m(*args, **kwargs)[0m
[1;32m    376[0m exc[38;5;241m.[39m__context__ [38;5;241m=[39m [38;5;28;01mNone[39;00m
[1;32m    377[0m exc[38;5;241m.[39m__cause__ [38;5;241m=[39m [38;5;28;01mNone[39;00m
[0;32m--> 37


### Step 5: Setup Database and Extract Data to Tables in Databricks

We instantiate a DBFS database and USE it. Then we iterate over the TSDB directory at that level and create a table for each CSV file in that mount point. 


In [None]:
# Create and use DATABASE [drop IF ALREADY EXISTS]
spark.sql(f"drop database IF EXISTS {database_name} cascade ")
spark.sql(f"create database {database_name}")
spark.sql(f"use {database_name}")

# Create all tables into DBFS database
tables_list = dbutils.fs.ls(bronze_target)

for table in tables_list:
    table_name = table.name.strip(".parquet")
    spark.sql(f"DROP TABLE IF EXISTS `{table_name}`")
    spark.sql(f"""
              CREATE TABLE `{table_name}` 
              USING parquet 
              OPTIONS(path '{table.path}', header 'true', inferschema 'true')
              """)
    
# Display table
if debug_mode:
    df = spark.sql('select * from uut_result')
    display(df)

[0;31m---------------------------------------------------------------------------[0m
[0;31mExecutionError[0m                            Traceback (most recent call last)
File [0;32m<command-2461738334509896>, line 10[0m
[1;32m      5[0m     dbutils[38;5;241m.[39mfs[38;5;241m.[39mmount(
[1;32m      6[0m         source[38;5;241m=[39mbronze_source, mount_point[38;5;241m=[39mbronze_mnt, extra_configs[38;5;241m=[39mconfigs
[1;32m      7[0m     )
[1;32m      9[0m [38;5;66;03m# List files in directory[39;00m
[0;32m---> 10[0m dbutils[38;5;241m.[39mfs[38;5;241m.[39mls(bronze_target)

File [0;32m/databricks/python_shell/dbruntime/dbutils.py:378[0m, in [0;36mDBUtils.FSHandler.prettify_exception_message.<locals>.f_with_exception_handling[0;34m(*args, **kwargs)[0m
[1;32m    376[0m exc[38;5;241m.[39m__context__ [38;5;241m=[39m [38;5;28;01mNone[39;00m
[1;32m    377[0m exc[38;5;241m.[39m__cause__ [38;5;241m=[39m [38;5;28;01mNone[39;00m
[0;32m--> 37