In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, sum

spark = SparkSession.builder.getOrCreate()

# Define silver and gold paths
silver_catalog = "silver"
silver_schema = "training_io"
gold_catalog = "gold"
gold_schema = "training_io"

# Define table names
silver_players = f"{silver_catalog}.{silver_schema}.players"
silver_teams = f"{silver_catalog}.{silver_schema}.teams"
silver_goals = f"{silver_catalog}.{silver_schema}.goals"
gold_view = f"{gold_catalog}.{gold_schema}.vw_player_summary"

# Read silver tables
players_df = spark.read.table(silver_players)
teams_df = spark.read.table(silver_teams)
goals_df = spark.read.table(silver_goals)

# Join and aggregate
gold_view_df = (
    players_df.alias("p")
    .join(teams_df.alias("t"), col("p.id_team") == col("t.id_team"), "left")
    .join(goals_df.alias("g"), col("p.id_player") == col("g.id_player"), "left")
    .groupBy(
        col("p.player_name"),
        col("p.nationality"),
        col("p.field_position"),
        col("p.age"),
        col("t.team"),
        col("t.country")
    )
    .agg(
        sum("g.goals").alias("total_goals"),
        sum("g.penalties_scored").alias("total_penalties")
    )
)

# Create the persistent view directly from the DataFrame
gold_view_df.write.saveAsTable(gold_view, mode="overwrite")