# PySpark Window Functions on Football Dataset
Demonstrates ranking, cumulative goals, lag/lead using SQL and DataFrame API.

In [ ]:
from pyspark.sql import SparkSession
import pyspark.sql.functions as F
from pyspark.sql.window import Window

spark = SparkSession.builder.appName("Football Window Functions").getOrCreate()

In [ ]:
url = 'https://raw.githubusercontent.com/footballcsv/world/main/WorldCup.csv'
football_df = spark.read.csv(url, header=True, inferSchema=True)
football_df.show(10)
football_df.createOrReplaceTempView('football_table')

In [ ]:
windowSpec = Window.partitionBy('Year').orderBy(F.desc('Goals'))
football_ranked = football_df.withColumn('Rank', F.rank().over(windowSpec))
football_ranked.show(10)

In [ ]:
windowSpec2 = Window.partitionBy('Team').orderBy('Year').rowsBetween(Window.unboundedPreceding, Window.currentRow)
football_cumulative = football_df.withColumn('CumulativeGoals', F.sum('Goals').over(windowSpec2))
football_cumulative.show(10)

In [ ]:
query = '''
SELECT *, 
       LAG(Goals,1) OVER(PARTITION BY Team ORDER BY Year) as PrevGoals,
       LEAD(Goals,1) OVER(PARTITION BY Team ORDER BY Year) as NextGoals
FROM football_table
'''
football_lag_lead = spark.sql(query)
football_lag_lead.show(10)