In [0]:
# # --------------------------------------------------------
# # 1. Define widgets (job parameters)
# # --------------------------------------------------------
# dbutils.widgets.text("file_name", "", "CSV file name (e.g. DimSalesTerritory.csv)")
# dbutils.widgets.text(
#     "base_path",
#     "/Volumes/trg_catalog/trg_schema/trg_volume",
#     "Base folder path"
# )

# file_name = dbutils.widgets.get("file_name")
# base_path = dbutils.widgets.get("base_path")

# if not file_name:
#     raise ValueError("file_name parameter is empty – please pass a CSV file name.")

# # Build the full path to the file
# full_path = f"{base_path.rstrip('/')}/{file_name}"

# print(f"📂 Reading file: {full_path}")

# # --------------------------------------------------------
# # 2. Read CSV and count rows
# # --------------------------------------------------------
# df = (spark.read
#         .option("header", True)
#         .csv(full_path))

# row_count = df.count()
# print(f"✅ Row count in {file_name}: {row_count}")

# # --------------------------------------------------------
# # 3. (Optional) Expose value to other tasks in the Job
# # --------------------------------------------------------
# try:
#     dbutils.jobs.taskValues.set(key="row_count", value=row_count)
#     print("Row count stored in dbutils.jobs.taskValues with key = 'row_count'")
# except Exception as e:
#     # Will fail here if not running as a job – safe to ignore while testing in notebook
#     print(f"(Info) Could not set taskValues (probably not running as a job): {e}")

# 1. Widgets
dbutils.widgets.text("file_name", "", "CSV file name (e.g. DimSalesTerritory.csv)")
dbutils.widgets.text(
    "base_path",
    "/Volumes/trg_catalog/trg_schema/trg_volume",
    "Base folder path"
)


file_name = dbutils.widgets.get("file_name")
base_path = dbutils.widgets.get("base_path")

if not file_name:
    raise ValueError("file_name parameter is empty – please pass a CSV file name.")

# List files in the base path
files = dbutils.fs.ls(base_path)
file_names = [f.name for f in files]

if file_name not in file_names:
    raise FileNotFoundError(f"🚫 File '{file_name}' not found in {base_path}")

full_path = f"{base_path.rstrip('/')}/{file_name}"
print(f"📂 Reading file: {full_path}")

# 2. Read CSV & count
df = (
    spark.read
        .option("header", True)
        .csv(full_path)
)

row_count = df.count()
print(f"✅ Row count in {file_name}: {row_count}")

# 3. Store values for next task in the SAME JOB
dbutils.jobs.taskValues.set(key="row_count", value=row_count)
dbutils.jobs.taskValues.set(key="file_name", value=file_name)

print("Row count and file name stored in dbutils.jobs.taskValues")


