In [0]:
from pyspark.sql.functions import col, window, max, avg, count, to_date, current_timestamp

# ==========================================
# 1. CONFIGURATION
# ==========================================
storage_account_name = "databricksete1995"
gold_container       = "gold"

# Input (Read from Silver)
source_table = "databricks_cata.silver.silver_earthquake"

# Outputs (Two different Gold Tables)
target_table_daily  = "databricks_cata.gold.gold_daily_summary"
target_table_impact = "databricks_cata.gold.gold_high_impact"

# Checkpoint Paths (Must be unique for each stream)
checkpoint_daily  = f"abfss://{gold_container}@{storage_account_name}.dfs.core.windows.net/_checkpoints/gold_daily"
checkpoint_impact = f"abfss://{gold_container}@{storage_account_name}.dfs.core.windows.net/_checkpoints/gold_impact"

# Storage Paths
path_daily  = f"abfss://{gold_container}@{storage_account_name}.dfs.core.windows.net/daily_summary"
path_impact = f"abfss://{gold_container}@{storage_account_name}.dfs.core.windows.net/high_impact_events"

print(f"üöÄ Gold Stream Initializing...")
print(f"üì• Reading from: {source_table}")

# ==========================================
# 2. READ STREAM (From Silver)
# ==========================================
df_silver = spark.readStream.table(source_table)

# ==========================================
# 3. STREAM A: HIGH IMPACT EVENTS (Filter)
# ==========================================
df_impact = df_silver.filter(
    (col("magnitude") >= 3.0)
)

query_impact = (df_impact.writeStream
    .format("delta")
    .outputMode("append")
    .option("checkpointLocation", checkpoint_impact)
    .option("path", path_impact)
    .option("mergeSchema", "true")
    
    # ‚úÖ 1. Process Batch & Stop
    .trigger(availableNow=True) 
    
    .table(target_table_impact))

print(f"‚úÖ Stream A Started: '{target_table_impact}'")

# ==========================================
# 4. STREAM B: DAILY AGGREGATION (Aggregate)
# ==========================================
df_daily = (df_silver
    .withWatermark("event_time", "24 hours")
    .groupBy(window(col("event_time"), "1 day").alias("time_window"))
    .agg(
        count("*").alias("total_earthquakes"),
        max("magnitude").alias("max_magnitude"),
        avg("depth").alias("avg_depth_km"),
        count("tsunami_flag").alias("tsunami_alerts_count")
    )
    .select(
        col("time_window.start").alias("date"),
        col("total_earthquakes"),
        col("max_magnitude"),
        col("avg_depth_km"),
        col("tsunami_alerts_count"),
        current_timestamp().alias("last_calculated_at")
    )
)

query_daily = (df_daily.writeStream
    .format("delta")
    .outputMode("complete")
    .option("checkpointLocation", checkpoint_daily)
    .option("path", path_daily)
    .option("mergeSchema", "true")
    
    # ‚úÖ 2. Process Batch & Stop
    .trigger(availableNow=True)
    
    .table(target_table_daily))

print(f"‚úÖ Stream B Started: '{target_table_daily}'")

# ==========================================
# 5. WAIT FOR COMPLETION (Crucial for ADF)
# ==========================================
print("‚è≥ Waiting for both streams to finish processing...")

# This forces the Notebook to pause here until BOTH tables are fully updated.
query_impact.awaitTermination()
query_daily.awaitTermination()

print("üöÄ Gold Layer Processing Complete!")

In [0]:
# ==========================================
# CREATE GOLD MAP TABLE (Required for 3D Map)
# ==========================================
storage_account_name = "databricksete1995"
gold_container       = "gold"

# 1. Configuration
source_table = "databricks_cata.silver.silver_earthquake"
target_table = "databricks_cata.gold.gold_live_map"

checkpoint_path = f"abfss://{gold_container}@{storage_account_name}.dfs.core.windows.net/_checkpoints/gold_map_v2"
output_path     = f"abfss://{gold_container}@{storage_account_name}.dfs.core.windows.net/gold_live_map"

# 2. Read Clean Data
df_silver = spark.readStream.table(source_table)

# 3. Select Only Map Columns (Lat/Lon/Depth are crucial here)
df_map = df_silver.select(
    "event_time",
    "latitude",
    "longitude",
    "depth",         # <--- Crucial for 3D visualization
    "magnitude",
    "place",
    "alert_level"
)

# 4. Write to Gold
# We use 'trigger(availableNow=True)' to process all current data immediately and then stop.
# This makes sure the table is ready for your 3D plot instantly.
(df_map.writeStream
    .format("delta")
    .outputMode("append")
    .option("checkpointLocation", checkpoint_path)
    .option("path", output_path)
    .trigger(availableNow=True) 
    .table(target_table))

print(f"‚úÖ Created table: {target_table}")

In [0]:
import plotly.express as px

# 1. Load the Gold Data
df = spark.table("databricks_cata.gold.gold_live_map").toPandas()

# 2. Create the 3D Scatter Plot
# X = Longitude, Y = Latitude, Z = Depth (Inverted because depth goes down!)
fig = px.scatter_3d(df, 
                    x='longitude', 
                    y='latitude', 
                    z='depth',
                    color='magnitude',       # Color dots by strength
                    size='magnitude',        # Size dots by strength
                    hover_name='place',      # Show name when hovering
                    opacity=0.7,
                    color_continuous_scale=px.colors.sequential.Viridis,
                    title="3D Earthquake Map: Location vs Depth")

# 3. Invert Z-Axis (So deep quakes look "deep")
fig.update_scenes(zaxis_autorange="reversed")

# 4. Display
fig.show()

In [0]:
display(spark.read.table("databricks_cata.gold.gold_high_impact"))