# Default notebook

This default notebook is executed using Databricks Workflows as defined in resources/ub_dlt_demo_job.yml.

In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.types import *


spark = SparkSession.builder.getOrCreate()

# Set values for the storage account
storage_account = "ubsadatabrickspocnpl2"
storage_container = "umpquapocdev"
lz_base_path = "umpqua_poc/landing_zone"

In [None]:
# Define schema for the dataframe
schema = StructType([
    StructField("customer_id", IntegerType(), True),
    StructField("group_id", IntegerType(), True),
    StructField("group_name", StringType(), True),
    StructField("customer_name", StringType(), True),
    StructField("tax_id", DoubleType(), True),
    StructField("is_company", IntegerType(), True),
    StructField("is_treasury", IntegerType(), True),
    StructField("primary_cif", IntegerType(), True),
    StructField("service_charge_plan_id", IntegerType(), True),
    StructField("plan_name", StringType(), True),
    StructField("charge_account", StringType(), True),
    StructField("create_date", StringType(), True),
    StructField("street_address1", StringType(), True),
    StructField("street_address2", StringType(), True),
    StructField("city", StringType(), True),
    StructField("state", StringType(), True),
    StructField("postal_code", IntegerType(), True),
    StructField("province", StringType(), True),
    StructField("is_international", IntegerType(), True),
    StructField("iso_code_a3", StringType(), True),
])

# Create dataframe with specified schema and values
data = [
    (12345,2,"Business Customers","LocalCorp", None,1,1,15688,0,"No Charge", None,"2018-11-05 14:02:00.753","1675 E Valley Rd",None,"Santa Barbara","CA",93108,None,0,"USA"),
    (23456,1,"Retail Users","WorldCorp Inc.",121212121.0,1,0,238731,0,"No Charge",None,"2018-11-30 09:37:02.007","635 S SPENCER",None,"HINTON","OK",73047,None,0,"USA"),
    (34567,3,"Consumer","Papa Georgio",111111113.0,0,0,299914,0,"No Charge",None,"2015-05-04 16:08:01.530","206 W. Island","Apt #3","Colfax","WA",99111,None,0,"USA")
]
pii_df = spark.createDataFrame(data, schema)

# Show the dataframe
pii_df.display()

In [None]:
# Write the dataframe to the landing zone location
pii_lz_dest = f"abfss://{storage_container}@{storage_account}.dfs.core.windows.net/{lz_base_path}/customerpiidata"

pii_df.write.csv(
    path=pii_lz_dest,
    sep="||",
    header=True,
    mode="append",
)