In [0]:
dbutils.widgets.removeAll()

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql import functions as F

# =========================
# (1) PARÁMETROS Y WIDGETS
# =========================
dbutils.widgets.text("catalogo", "catalog_footballdata")
dbutils.widgets.text("esquema_source", "silver")
dbutils.widgets.text("esquema_sink", "golden")

catalogo = dbutils.widgets.get("catalogo")
esquema_source = dbutils.widgets.get("esquema_source")
esquema_sink = dbutils.widgets.get("esquema_sink")

# =========================
# (2) LECTURA DE TABLAS SILVER
# =========================
df_fact_events = spark.read.table(f"{catalogo}.{esquema_source}.fact_match_events")
df_fact_physical = spark.read.table(f"{catalogo}.{esquema_source}.fact_physical_performance")

In [0]:
# =========================
# (3) AGREGACIÓN 1: DISTRIBUCIÓN DE POSESIÓN (Bar/Pie Chart 1)
# =========================
# Filtramos solo acciones de Cristal con balón y agrupamos por fase
agg_possession = df_fact_events \
    .filter((col("es_cristal") == True) & 
            (col("event_type").isin("player_possession", "on_ball_engagement"))) \
    .groupBy("match_id", "rival_name", "team_in_possession_phase_type", "dificultad_rival") \
    .agg(F.sum("duration").alias("total_duration_seconds")) \
    .withColumn("total_duration_minutes", F.round(col("total_duration_seconds") / 60, 2))

In [0]:
# =========================
# (4) AGREGACIÓN 2: BLOQUES DEFENSIVOS (Bar Chart 2)
# =========================
# Hacemos lo mismo aquí para que el análisis defensivo también sea por partido
agg_defensive = df_fact_events \
    .filter((col("es_cristal") == False) & 
            (col("team_out_of_possession_phase_type").isin("medium_block", "low_block", "high_block"))) \
    .groupBy("match_id", "rival_name", "team_out_of_possession_phase_type", "dificultad_rival") \
    .agg(F.sum("duration").alias("time_in_block_seconds"),
         F.count("event_id").alias("event_count"))

In [0]:
# =========================
# (5) AGREGACIÓN 3: EVOLUCIÓN FÍSICA (Line Chart)
# =========================
agg_physical_evolution = df_fact_physical \
    .groupBy("player_name", "match_date_dt", "match_name", "position_group") \
    .agg(
        F.avg("distancia_por_minuto").alias("avg_dist_min"),
        F.avg("sprints_por_minuto").alias("avg_sprints_min"),
        F.avg("intensidad_hi_por_minuto").alias("avg_hi_dist_min")
    ) \
    .orderBy("player_name", "match_date_dt")

In [0]:
# =========================
# (6) CARGA A CAPA GOLD (SINK)
# =========================
agg_possession.write.mode("overwrite").option("overwriteSchema", "true") \
    .saveAsTable(f"{catalogo}.{esquema_sink}.agg_possession_stats")

agg_defensive.write.mode("overwrite").option("overwriteSchema", "true") \
    .saveAsTable(f"{catalogo}.{esquema_sink}.agg_defensive_blocks")

agg_physical_evolution.write.mode("overwrite").option("overwriteSchema", "true") \
    .saveAsTable(f"{catalogo}.{esquema_sink}.agg_player_evolution")

print(f"Pipeline finalizado. Las tablas Gold ahora permiten filtros por partido y análisis de xThreat.")

In [0]:
%sql
SELECT * FROM catalog_footballdata.golden.agg_defensive_blocks ORDER BY match_id DESC