In [0]:
import urllib.request
import pandas as pd
from delta.tables import DeltaTable

try:
    current_stage = spark.table("pipeline_config").first()["current_stage"]
except:
    current_stage = "1. Group Stage"
    print("Config table not found. Defaulting to Group Stage.")

print(f" WORKER: Received order for '{current_stage}'")


GITHUB_BASE_URL = "https://raw.githubusercontent.com/elkadirianas/AF-CAN/main/split_data"
file_map = {
    "1. Group Stage": "1_group_stage",
    "2. Round of 16": "2_round_of_16",
    "3. Quarter-finals": "3_quarter_finals",
    "4. Semi-finals": "4_semi_finals",
    "5. 3rd Place Final": "5_3rd_place",
    "6. Final": "6_final"
}

slug = file_map[current_stage]
matches_url = f"{GITHUB_BASE_URL}/{slug}_matches.csv"
events_url  = f"{GITHUB_BASE_URL}/{slug}_events.csv"
local_matches = f"/tmp/{slug}_matches.csv"
local_events  = f"/tmp/{slug}_events.csv"


print(f" Downloading {slug}...")
urllib.request.urlretrieve(matches_url, local_matches)
urllib.request.urlretrieve(events_url, local_events)


pdf_matches = pd.read_csv(local_matches)
pdf_events  = pd.read_csv(local_events, low_memory=False)

df_matches = spark.createDataFrame(pdf_matches)
df_events  = spark.createDataFrame(pdf_events)

print(" Upserting to Bronze Tables...")

if not spark.catalog.tableExists("afcon_matches"):
    df_matches.write.format("delta").saveAsTable("afcon_matches")
else:
    DeltaTable.forName(spark, "afcon_matches").alias("t").merge(
        df_matches.alias("s"), "t.match_id = s.match_id"
    ).whenNotMatchedInsertAll().execute()

if not spark.catalog.tableExists("afcon_events"):
    df_events.write.format("delta").saveAsTable("afcon_events")
else:
    DeltaTable.forName(spark, "afcon_events").alias("t").merge(
        df_events.alias("s"), "t.id = s.id"
    ).whenNotMatchedInsertAll().execute()

print(f" INGESTION COMPLETE: {current_stage}")

⚠️ Config table not found. Defaulting to Group Stage.
🤖 WORKER: Received order for '1. Group Stage'
⬇️ Downloading 1_group_stage...
🔄 Upserting to Bronze Tables...
✅ INGESTION COMPLETE: 1. Group Stage
