In [0]:
# Step 1: Import modules
import logging

# Step 2: Configure logger
logger = logging.getLogger("ETLJob")
logger.setLevel(logging.INFO)

# Step 3: Add a stream handler (so logs appear in notebook / job output)
if not logger.handlers:
    handler = logging.StreamHandler()
    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)

# Step 4: Start logging
logger.info("Logger initialized successfully")


In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
spark = SparkSession.builder.getOrCreate()

logger.info("Reading sample Delta table")
try:
    # Example Delta table (replace with your path)
    df = spark.read.table("bronze.contracts.contract_final")
    logger.info(f"Number of records: {df.count()}")
except Exception as e:
    logger.error(f"Failed to read Delta table: {e}")
    raise
df = df.groupBy("status").agg(count("*").alias("count"))
display(df)
logger.info("Notebook execution completed successfully")
