In [0]:
from pyspark.sql.functions import col, to_date
from delta.tables import DeltaTable

df_matches = spark.table("default.afcon_matches")


silver_matches = df_matches.select(
    col("match_id").cast("int"),
    to_date(col("match_date")).alias("match_date"),
    col("`home_team.home_team_name`").alias("home_team"),
    col("`home_team.home_team_id`").cast("int").alias("home_team_id"),
    col("`away_team.away_team_name`").alias("away_team"),
    col("`away_team.away_team_id`").cast("int").alias("away_team_id"),
    col("home_score").cast("int"),
    col("away_score").cast("int"),
    col("`competition_stage.name`").alias("stage"),
    col("`stadium.name`").alias("stadium"),
    col("`referee.name`").alias("referee")
)


if not spark.catalog.tableExists("silver_matches"):
    silver_matches.write.format("delta").saveAsTable("silver_matches")
    print(" Created New Silver Table")
else:
    # Incremental Upsert 
    DeltaTable.forName(spark, "silver_matches").alias("t").merge(
        silver_matches.alias("s"), 
        "t.match_id = s.match_id" 
    ).whenMatchedUpdateAll().whenNotMatchedInsertAll().execute()
    print(" Merged New Matches into Silver")

print(" silver_matches created.")

✨ Created New Silver Table
✅ silver_matches created.


match_id,match_date,home_team,home_team_id,away_team,away_team_id,home_score,away_score,stage,stadium,referee
3920396,2024-01-18,Egypt,774,Ghana,4885,2,2,Group Stage,Stade Félix Houphouët-Boigny,Pierre Ghislain Atcho
3920386,2024-01-14,Nigeria,775,Equatorial Guinea,4992,1,1,Group Stage,Stade Olympique Alassane Ouattara,Abongile Tom
3920385,2024-01-14,Egypt,774,Mozambique,4978,2,2,Group Stage,Stade Félix Houphouët-Boigny,Dahane Beida
3920387,2024-01-14,Ghana,4885,Cape Verde Islands,4924,1,2,Group Stage,Stade Félix Houphouët-Boigny,Jean Jacques Ndala Ngambo
3920419,2024-01-24,Zambia,4963,Morocco,788,0,1,Group Stage,Stade Laurent Pokou,Patrice Tanguy Mebiame


In [0]:
from pyspark.sql.functions import split, regexp_replace, col

df_events = spark.table("default.afcon_events")


silver_events = df_events.withColumn("clean_location", regexp_replace("location", "[\\[\\]]", "")) \
                         .withColumn("loc_x", split(col("clean_location"), ",")[0].cast("double")) \
                         .withColumn("loc_y", split(col("clean_location"), ",")[1].cast("double")) \
                         .select(
                             col("id").alias("event_id"),
                             col("match_id").cast("int"),
                             col("index"),
                             col("period").cast("int"),
                             col("timestamp"),
                             col("minute").cast("int"),
                             col("second").cast("int"),
                             col("`type.name`").alias("event_type"),
                             col("`play_pattern.name`").alias("play_pattern"),
                             col("`team.name`").alias("team_name"),
                             col("`player.name`").alias("player_name"),
                             col("`player.id`").cast("int").alias("player_id"),
                             col("loc_x"),
                             col("loc_y"),
                             col("`pass.length`").cast("double").alias("pass_length"),
                             col("`shot.statsbomb_xg`").cast("double").alias("xg"),
                             col("`pass.angle`").cast("double").alias("pass_angle"),
                             col("`pass.outcome.name`").alias("pass_outcome"),
                             col("`shot.outcome.name`").alias("shot_outcome"),
                             col("under_pressure").cast("boolean")
                         )

if not spark.catalog.tableExists("silver_events"):
    silver_events.write.format("delta").saveAsTable("silver_events")
else:
    DeltaTable.forName(spark, "silver_events").alias("t").merge(
        silver_events.alias("s"), 
        "t.event_id = s.event_id"
    ).whenMatchedUpdateAll().whenNotMatchedInsertAll().execute()
    print(" Merged New Events into Silver")
    
print(" silver_events created.")

✅ silver_events created.


event_id,match_id,index,period,timestamp,minute,second,event_type,play_pattern,team_name,player_name,player_id,loc_x,loc_y,pass_length,xg,pass_angle,pass_outcome,shot_outcome,under_pressure
f5aed381-7737-4721-9d85-c282a57958cf,3920394,2408,2,00:19:35.573,64,35,Pass,Regular Play,Morocco,Hakim Ziyech,5237,61.5,75.7,13.138112,,3.0654044,,,
0ac24d07-05db-40bd-bbf4-7cc2a80d87b3,3920394,2409,2,00:19:36.826,64,36,Ball Receipt*,Regular Play,Morocco,Achraf Hakimi Mouh,5245,48.4,76.7,,,,,,
e23f60da-576b-4efd-b1bc-928ebb2f1716,3920394,2410,2,00:19:36.826,64,36,Carry,Regular Play,Morocco,Achraf Hakimi Mouh,5245,48.4,76.7,,,,,,
d926c941-b83c-48e2-9f50-cf83c7e612bb,3920394,2411,2,00:19:39.282,64,39,Pass,Regular Play,Morocco,Achraf Hakimi Mouh,5245,57.9,74.4,9.714422,,0.35756585,,,
3b2d4a21-d8c1-43c6-ad2b-d2c4a63028ba,3920394,2412,2,00:19:40.833,64,40,Ball Receipt*,Regular Play,Morocco,Hakim Ziyech,5237,67.0,77.8,,,,,,
