In [0]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.window import Window
from pyspark.sql.functions import row_number

# goal_for – goals_scored goals_conceded
# goal_agnst – goals_conceded
# mp - matches played
#  https://sparkbyexamples.com/pyspark/pyspark-window-functions/#ranking-functions
def euro_cup_2024_simulation():
    spark = SparkSession.builder.appName("EuroCup2024").getOrCreate()
    def euro_cup_teams(spark):
        columns = [ "grp", "team"]
        data = [
            ["A", "Germany"],
            ["A", "Scotland"],
            ["A", "Hungary"],
            ["A", "Switzerland"],
            ["B", "Spain"],
            ["B", "Croatia"],
            ["B", "Italy"],
            ["B", "Albania"],
            ["C", "Slovenia"],
            ["C", "Denmark"],
            ["C", "Serbia"],
            ["C", "England"],
            ["D", "Finland"], #playoff winner A  
            ["D", "Netherlands"],
            ["D", "Austria"],
            ["D", "France"],
            ["E", "Belgium"],
            ["E", "Slovakia"],
            ["E", "Romania"],
            ["E", "Iceland"],   #playoff winner B
            ["F", "Türkiye"],
            ["F", "Greece"],    #playoff winner c
            ["F", "Portugal"],
            ["F", "Czechia"]
        ]

        return spark.createDataFrame(data, columns)

    def group_stage_modification(teams):

        teams.createOrReplaceTempView("teams")
        group_stage_df = spark.sql(
        """
        WITH cte2 AS (
        WITH cte AS (
            SELECT CONCAT(a.team, " VS ", b.team) AS match_fixtures,
                a.team AS team1, a.grp AS grp1, b.team AS team2, b.grp AS grp2,
                FLOOR(RAND()*5) AS team1_score, FLOOR(RAND()*5) AS team2_score,
                CASE WHEN team1_score > team2_score THEN 3
                        WHEN team1_score = team2_score THEN 1 ELSE 0 END AS team1pts,
                CASE WHEN team1_score < team2_score THEN 3
                        WHEN team1_score = team2_score THEN 1 ELSE 0 END AS team2pts
            FROM teams a
            CROSS JOIN teams b
            ON a.grp = b.grp AND a.team < b.team
        )
        SELECT team1, grp1, team1_score, team2_score, team1pts 
        FROM cte 
        UNION ALL 
        SELECT team2, grp2, team1_score, team2_score, team2pts AS pts 
        FROM cte 
        )
        SELECT team1 AS team, grp1 AS grp, 3 AS mp,
            FLOOR(SUM(team1pts)/3) AS win,
            CASE WHEN SUM(team1pts) % 3 = 0 THEN 0 ELSE 1 END AS draw,
            CASE WHEN win + draw = 3 THEN 0
                    WHEN win + draw = 2 THEN 1
                    WHEN win + draw = 1 THEN 2 ELSE 3 END AS loss,
            SUM(team1_score) AS gf,
            SUM(team2_score) AS ga,
            SUM(team1_score) - SUM(team2_score) AS gd,
            SUM(team1pts) AS pts
        FROM cte2 
        GROUP BY team1, grp1 
        ORDER BY grp, pts DESC""" 
        )

        print("Group stage table")
        display(group_stage_df)

        return group_stage_df

    # Group of 16
    def group_of_16(group_stage_df):
        
        windowSpec = Window.partitionBy("grp").orderBy(F.col("pts").desc(), F.col("gd").desc())
        group_stage_df = group_stage_df.withColumn("group_rank", row_number().over(windowSpec))
        group_stage_df.createOrReplaceTempView("groupStage")

        result = spark.sql(
            """
            SELECT * \
            FROM groupStage \
            WHERE group_rank <= 3 \
            ORDER BY CASE WHEN group_rank <= 2 THEN pts END DESC, \
                    CASE WHEN group_rank <= 2 THEN gd END DESC, \
                    CASE WHEN group_rank = 3 THEN pts END DESC, \
                    CASE WHEN group_rank = 3 THEN gd END DESC \
            LIMIT 16"""
        )

        print("Group of 16")
        display(result)
        return result

    # KnockOuts
    def knockout_stage_result(group_of_16_df):
        # random shuffling
        group_of_16_df = group_of_16_df.withColumn("random", F.round(F.rand(), 3))

        window_spec = Window.partitionBy().orderBy("random")
        group_of_16_df = group_of_16_df.withColumn("random_rank", row_number().over(window_spec))

        # two groups with 8 teams each
        group_1_df = group_of_16_df.filter("random_rank <= 8").select("team", "random_rank")
        group_2_df = group_of_16_df.filter("random_rank > 8").select("team", "random_rank")

        group_1_df.createOrReplaceTempView("group_1")
        group_2_df.createOrReplaceTempView("group_2")

        def knockouts(group, r):
            result = spark.sql(
                "SELECT \
                    g1.team AS team1, \
                    g2.team AS team2, \
                    CASE WHEN rand() > 0.5 THEN g1.team ELSE g2.team END AS winner \
                FROM \
                    (SELECT team, random_rank FROM {} WHERE random_rank <= {}) g1 \
                JOIN \
                    (SELECT team, random_rank FROM {} WHERE random_rank > {}) g2 \
                ON \
                    g1.random_rank = g2.random_rank - 4".format(group, r, group, r)
            )
            return result

        knockouts_1_df = knockouts("group_1", 4)
        knockouts_2_df = knockouts("group_2", 12)

        print("Knockouts 1 result")
        display(knockouts_1_df)

        print("Knockouts  2 result")
        display(knockouts_2_df)

        return knockouts_1_df, knockouts_2_df

    # Quater Finals
    def quarterfinals_result(knockouts_1_df, knockouts_2_df):
        
        #window_spec = Window.partitionBy().orderBy("winner")

        quarterfinalist_1_df = knockouts_1_df.withColumn("id", F.monotonically_increasing_id()).select("winner", "id")
        quarterfinalist_2_df = knockouts_2_df.withColumn("id", F.monotonically_increasing_id()).select("winner", "id")

        quarterfinalist_1_df.createOrReplaceTempView("quarterfinalist_1")
        quarterfinalist_2_df.createOrReplaceTempView("quarterfinalist_2")

        # function to get quarter finals results
        def generate_quarterfinals(group_view):
            result = spark.sql(
                """SELECT
                    g1.winner AS team1,
                    g2.winner AS team2,
                    CASE WHEN rand() > 0.5 THEN g1.winner ELSE g2.winner END AS winner
                FROM {} g1
                JOIN {} g2 
                ON g1.id = g2.id - 1 
                Where g1.id % 2 = 0""" \
            .format(group_view, group_view)
            )
            return result

        quarterfinal_1_df = generate_quarterfinals("quarterfinalist_1")
        quarterfinal_2_df = generate_quarterfinals("quarterfinalist_2")

        print("Quarter Final 1 result")
        display(quarterfinal_1_df)

        print("Quarter Final 2 result")
        display(quarterfinal_2_df)

        return quarterfinal_1_df, quarterfinal_2_df

    # Semis
    def semifinal_results(quarterfinal_1_df, quarterfinal_2_df):

        quarterfinal_1_df.createOrReplaceTempView("semifinalist_1")
        quarterfinal_2_df.createOrReplaceTempView("semifinalist_2")

        # function to get semifinals result
        def generate_semifinals(teams):
            result = spark.sql("""
                SELECT team1, team2, CASE WHEN rand() > 0.5 THEN team1 ELSE team2 END AS winner
                FROM (
                    SELECT winner AS team1, LEAD(winner) OVER (ORDER BY rand()) AS team2
                    FROM {}
                    LIMIT 1
                )
            """.format(teams))
            return result

        semifinal_1_df = generate_semifinals("semifinalist_1")
        semifinal_2_df = generate_semifinals("semifinalist_2")

        # Display semifinal result
        print("Semifinal 1 Result")
        display(semifinal_1_df)

        print("Semifinal 2 Result")
        display(semifinal_2_df)

        return semifinal_1_df, semifinal_2_df

    # Finals
    def final_stage_result(semifinal_1_df, semifinal_2_df):

        semifinal_1_df.createOrReplaceTempView("finalist_1")
        semifinal_2_df.createOrReplaceTempView("finalist_2")

        # Function to get winner
        def generate_finals(team1,teams2):
            result = spark.sql("""
                SELECT t1.winner AS team1, t2.winner AS team2, CASE WHEN rand() > 0.5 THEN t1.winner ELSE t1.winner END AS winner
                FROM {} AS t1
                JOIN {} AS t2
                ON t1.winner != t2.winner
            """.format(team1,teams2))
            return result

        finals_df = generate_finals("finalist_1" , "finalist_2")

        print("Euro Cup 2024 Result")
        display(finals_df)

        # Main simulation

    teams = euro_cup_teams(spark)
    group_stage_df = group_stage_modification(teams)
    group_of_16_df = group_of_16(group_stage_df)
    knockouts_1_df, knockouts_2_df = knockout_stage_result(group_of_16_df)
    quarterfinal_1_df, quarterfinal_2_df =quarterfinals_result(knockouts_1_df, knockouts_2_df)
    semifinal_1_df, semifinal_2_df = semifinal_results(quarterfinal_1_df, quarterfinal_2_df)
    final_stage_result(semifinal_1_df, semifinal_2_df)

euro_cup_2024_simulation()

Group stage table


team,grp,mp,win,draw,loss,gf,ga,gd,pts
Germany,A,3,1,1,1,6,3,3,5
Scotland,A,3,1,1,1,7,6,1,5
Hungary,A,3,1,1,1,10,3,7,4
Switzerland,A,3,0,1,2,7,2,5,1
Italy,B,3,3,0,0,5,6,-1,9
Croatia,B,3,1,1,1,6,6,0,4
Albania,B,3,1,1,1,8,8,0,4
Spain,B,3,0,0,3,5,0,5,0
Slovenia,C,3,2,0,1,4,8,-4,6
England,C,3,1,1,1,7,9,-2,4


Group of 16


team,grp,mp,win,draw,loss,gf,ga,gd,pts,group_rank
Finland,D,3,3,0,0,8,4,4,9,1
Italy,B,3,3,0,0,5,6,-1,9,1
Türkiye,F,3,3,0,0,5,9,-4,9,1
Romania,E,3,2,1,0,7,7,0,7,1
Slovenia,C,3,2,0,1,4,8,-4,6,1
Portugal,F,3,2,0,1,3,10,-7,6,2
Germany,A,3,1,1,1,6,3,3,5,1
Scotland,A,3,1,1,1,7,6,1,5,2
Iceland,E,3,1,1,1,5,7,-2,5,2
Croatia,B,3,1,1,1,6,6,0,4,2


Knockouts 1 result


team1,team2,winner
Slovakia,Iceland,Iceland
Denmark,Slovenia,Denmark
Hungary,Portugal,Hungary
Croatia,Austria,Croatia


Knockouts  2 result


team1,team2,winner
Germany,Italy,Italy
England,Scotland,England
Albania,Türkiye,Albania
Romania,Finland,Romania


Quarter Final 1 result


team1,team2,winner
Iceland,Denmark,Iceland
Hungary,Croatia,Croatia


Quarter Final 2 result


team1,team2,winner
Italy,England,England
Albania,Romania,Romania


Semifinal 1 Result


team1,team2,winner
Croatia,Iceland,Croatia


Semifinal 2 Result


team1,team2,winner
Romania,England,England


Euro Cup 2024 Result


team1,team2,winner
Croatia,England,Croatia
