In [0]:
query = """
WITH RANK AS (
    SELECT 
        ANIO,
        POZO, 
        ROUND(SUM(PRODUCCION_MBD),2) AS TOTAL_ANUAL,
        DENSE_RANK() OVER (PARTITION BY ANIO ORDER BY ROUND(AVG(PRODUCCION_MBD),2) DESC) AS rk
    FROM workspace.pmx_etl.silver_pemex_prod
    WHERE POZO != 'NACIONAL'
    GROUP BY ANIO, POZO
)
SELECT * FROM RANK WHERE rk <= 10
"""

In [0]:

spark.sql(
f"""
CREATE OR REPLACE TABLE workspace.pmx_etl.rank_top_pozos_mensual
USING DELTA
PARTITIONED BY (POZO,ANIO)
{query}
"""
)

In [0]:
import plotly.express as px

df = spark.sql(query)
pdf = df.toPandas()

# Ordena los años para que la línea salga bien
pdf = pdf.sort_values(["POZO", "ANIO"])

# Gráfico de líneas
fig = px.line(
    pdf,
    x="ANIO", y="TOTAL_ANUAL", color="POZO",
    markers=True,
    title="Top 10 pozos por año - Evolución (TOTAL_ANUAL MBD)",
    labels={"ANIO":"Año", "TOTAL_ANUAL":"Total anual (MBD)", "POZO":"Pozo"}
)

fig.update_traces(mode="lines+markers")
fig.update_layout(
    height=600,
    margin=dict(l=20,r=20,t=60,b=20),
    legend_title="Pozo"
)

display(fig)