In [0]:
# ============================================================
# Notebook: 01_raw_jira_ingestion (FIXED VERSION)
# Handles dots, quotes, apostrophes, weird Jira fields
# ============================================================
import re
# -----------------------------
# 1. Read CSV
# -----------------------------
jira_df = spark.read \
   .option("header", True) \
   .option("inferSchema", True) \
   .csv("/Volumes/sla-intelligence-platform/raw_data/landing_zone/GFG_FINAL.csv")
print("✅ CSV loaded")
print("ok")
# -----------------------------
# 2. Clean column names SAFELY
# -----------------------------
def clean_col(c):
   c = c.lower().strip()
   c = re.sub(r"[^\w]", "_", c)   # replace invalid characters
   c = re.sub(r"_+", "_", c)      # collapse multiple _
   c = c.strip("_")
   return c
clean_columns = [clean_col(c) for c in jira_df.columns]
# RENAME ALL COLUMNS AT ONCE (SAFE)
jira_df_clean = jira_df.toDF(*clean_columns)
print("✅ Columns cleaned safely")
jira_df_clean.printSchema()
# -----------------------------
# 3. Create RAW database
# -----------------------------
spark.sql("CREATE DATABASE IF NOT EXISTS slainte_raw")
# -----------------------------
# 4. Write Delta table
# -----------------------------
spark.sql("DROP TABLE IF EXISTS slainte_raw.raw_jira_issues")
jira_df_clean.write \
   .format("delta") \
   .mode("overwrite") \
   .saveAsTable("slainte_raw.raw_jira_issues")
print("✅ Delta table written")
# -----------------------------
# 5. Validation
# -----------------------------
spark.table("slainte_raw.raw_jira_issues").show(5)
print("Row count:", spark.table("slainte_raw.raw_jira_issues").count())

In [0]:
df=spark.table("slainte_raw.raw_jira_issues")
display(df)