In [0]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
from pyspark.sql.window import Window
from pyspark.sql.functions import row_number

# Initialize Spark session
spark = SparkSession.builder.appName("EuroCup2024").getOrCreate()

In [0]:
columns = [ "grp", "team"]
data = [
    ["A", "Germany"],
    ["A", "Scotland"],
    ["A", "Hungary"],
    ["A", "Switzerland"],
    ["B", "Spain"],
    ["B", "Croatia"],
    ["B", "Italy"],
    ["B", "Albania"],
    ["C", "Slovenia"],
    ["C", "Denmark"],
    ["C", "Serbia"],
    ["C", "England"],
    ["D", "Finland"],    #playoff winner A  
    ["D", "Netherlands"],
    ["D", "Austria"],
    ["D", "France"],     
    ["E", "Belgium"],
    ["E", "Slovakia"],
    ["E", "Romania"],
    ["E", "Iceland"],   #playoff winner B
    ["F", "Türkiye"],
    ["F", "Greece"],    #playoff winner C  
    ["F", "Portugal"],
    ["F", "Czechia"]
]

teams = spark.createDataFrame(data, columns)


In [0]:
#generate points table 

teams.createOrReplaceTempView("teams")
# query fro points table
group_stage_df = spark.sql(
"""
WITH cte2 AS (
        WITH cte AS (
            SELECT CONCAT(a.team, " VS ", b.team) AS match_fixtures,
                a.team AS team1, a.grp AS grp1, b.team AS team2, b.grp AS grp2,
                FLOOR(RAND()*5) AS team1_score, FLOOR(RAND()*5) AS team2_score,
                CASE WHEN team1_score > team2_score THEN 3
                        WHEN team1_score = team2_score THEN 1 ELSE 0 END AS team1pts,
                CASE WHEN team1_score < team2_score THEN 3
                        WHEN team1_score = team2_score THEN 1 ELSE 0 END AS team2pts
            FROM teams a
            CROSS JOIN teams b
            ON a.grp = b.grp AND a.team < b.team
        )
        SELECT team1, grp1, team1_score, team2_score, team1pts 
        FROM cte 
        UNION ALL 
        SELECT team2, grp2, team1_score, team2_score, team2pts AS pts 
        FROM cte 
        )
        SELECT team1 AS team, grp1 AS grp, 3 AS mp,
            FLOOR(SUM(team1pts)/3) AS win,
            CASE WHEN SUM(team1pts) % 3 = 0 THEN 0 ELSE 1 END AS draw,
            CASE WHEN win + draw = 3 THEN 0
                    WHEN win + draw = 2 THEN 1
                    WHEN win + draw = 1 THEN 2 ELSE 3 END AS loss,
            SUM(team1_score) AS gf,
            SUM(team2_score) AS ga,
            SUM(team1_score) - SUM(team2_score) AS gd,
            SUM(team1pts) AS pts
        FROM cte2 
        GROUP BY team1, grp1 
        ORDER BY grp, pts DESC""" 
)

display(group_stage_df)

team,grp,mp,win,draw,loss,gf,ga,gd,pts
Hungary,A,3,2,1,0,7,5,2,7
Scotland,A,3,1,1,1,10,7,3,4
Germany,A,3,1,0,2,6,7,-1,3
Switzerland,A,3,1,0,2,9,7,2,3
Italy,B,3,2,1,0,5,7,-2,7
Spain,B,3,2,0,1,8,10,-2,6
Albania,B,3,0,1,2,5,6,-1,2
Croatia,B,3,0,1,2,6,11,-5,1
Serbia,C,3,2,0,1,4,3,1,6
England,C,3,2,0,1,8,2,6,6


### Group of 16

In [0]:
# group of 16 qualifications
from pyspark.sql.functions import concat, col

windowSpec = Window.partitionBy("grp").orderBy(F.col("pts").desc(), F.col("gd").desc())
group_stage_df = group_stage_df.withColumn("group_rank", row_number().over(windowSpec))
group_stage_df.createOrReplaceTempView("groupStage")

group_of_16_df = spark.sql(
            """
            SELECT * \
            FROM groupStage \
            WHERE group_rank <= 3 \
            ORDER BY CASE WHEN group_rank <= 2 THEN pts END DESC, \
                    CASE WHEN group_rank <= 2 THEN gd END DESC, \
                    CASE WHEN group_rank = 3 THEN pts END DESC, \
                    CASE WHEN group_rank = 3 THEN gd END DESC \
            LIMIT 16"""
        )

group_of_16_df = group_of_16_df.withColumn("id", concat(F.col("group_rank"), F.col("grp")))
group_of_16_df.show()

+--------+---+---+---+----+----+---+---+---+---+----------+---+
|    team|grp| mp|win|draw|loss| gf| ga| gd|pts|group_rank| id|
+--------+---+---+---+----+----+---+---+---+---+----------+---+
| Romania|  E|  3|  3|   0|   0|  6| 10| -4|  9|         1| 1E|
| Hungary|  A|  3|  2|   1|   0|  7|  5|  2|  7|         1| 1A|
|   Italy|  B|  3|  2|   1|   0|  5|  7| -2|  7|         1| 1B|
| England|  C|  3|  2|   0|   1|  8|  2|  6|  6|         1| 1C|
|  Serbia|  C|  3|  2|   0|   1|  4|  3|  1|  6|         2| 2C|
|  France|  D|  3|  2|   0|   1|  6|  5|  1|  6|         1| 1D|
|   Spain|  B|  3|  2|   0|   1|  8| 10| -2|  6|         2| 2B|
|Portugal|  F|  3|  1|   1|   1|  9|  5|  4|  5|         1| 1F|
| Czechia|  F|  3|  1|   1|   1|  7|  4|  3|  5|         2| 2F|
|Scotland|  A|  3|  1|   1|   1| 10|  7|  3|  4|         2| 2A|
| Belgium|  E|  3|  1|   1|   1|  7|  7|  0|  4|         2| 2E|
| Austria|  D|  3|  1|   1|   1|  8|  9| -1|  4|         2| 2D|
| Denmark|  C|  3|  2|   0|   1|  5|  4|

###Specific match-ups 


In [0]:
Third-placed teams
qualify from groups     1B  1C  1E  1F
                        vs  vs  vs  vs

A	B	C	D			3A	3D	3B	3C
A	B	C		E		3A	3E	3B	3C
A	B	C			F	3A	3F	3B	3C
A	B		D	E		3D	3E	3A	3B
A	B		D		F	3D	3F	3A	3B
A	B			E	F	3E	3F	3B	3A
A		C	D	E		3E	3D	3C	3A
A		C	D		F	3F	3D	3C	3A
A		C		E	F	3E	3F	3C	3A
A			D	E	F	3E	3F	3D	3A
    B	C	D	E		3E	3D	3B	3C
    B	C	D		F	3F	3D	3C	3B
    B	C		E	F	3F	3E	3C	3B
    B		D	E	F	3F	3E	3D	3B
        C	D	E	F	3F	3E	3D	3C



In [0]:
from PIL import Image

In [0]:
displayHTML("<img src='dbfs:/FileStore/match_ups.jpg'>")

In [0]:
group_of_16_df.createOrReplaceTempView("group_of_16")

team_original_df = spark.sql(
    """select id 
    from {} 
    where group_rank =3""".format("group_of_16")
)
team_original_df.show()
id_list = [row.id for row in team_original_df.collect()]

# Print the list of values
print(id_list)

team_list = [
    ["3A", "3D", "3B", "3C"],
    ["3A", "3E", "3B", "3C"],
    ["3A", "3F", "3B", "3C"],
    ["3D", "3E", "3A", "3B"],
    ["3D", "3F", "3A", "3B"],
    ["3E", "3F", "3B", "3A"],
    ["3E", "3D", "3C", "3A"],
    ["3F", "3D", "3C", "3A"],
    ["3E", "3F", "3C", "3A"],
    ["3E", "3F", "3D", "3A"],
    ["3E", "3D", "3B", "3C"],
    ["3F", "3D", "3C", "3B"],
    ["3F", "3E", "3C", "3B"],
    ["3F", "3E", "3D", "3B"],
    ["3F", "3E", "3D", "3C"]
]

for i in range (0,len(team_list)):
    if sorted(id_list) == sorted(team_list[i]):
        team2_list=team_list[i]
        break

team1_list=['1B', '1C', '1E', '1F']

data = list(zip(team1_list,team2_list))
match_fixture_1_df = spark.createDataFrame(data, ["Team1", "Team2"])

team3_list =['1A','2D','1D','2A']
team4_list =['2C','2E','2F','2D']

data2 = list(zip(team3_list,team4_list))
match_fixture_2_df = spark.createDataFrame(data2, ["Team1", "Team2"])
match_fixture_1_df.show()
match_fixture_2_df.show()

+---+
| id|
+---+
| 3C|
| 3E|
| 3D|
| 3F|
+---+

['3C', '3E', '3D', '3F']
+-----+-----+
|Team1|Team2|
+-----+-----+
|   1B|   3F|
|   1C|   3E|
|   1E|   3D|
|   1F|   3C|
+-----+-----+

+-----+-----+
|Team1|Team2|
+-----+-----+
|   1A|   2C|
|   2D|   2E|
|   1D|   2F|
|   2A|   2D|
+-----+-----+



### Knockouts

In [0]:
match_fixture_1_df.createOrReplaceTempView("knockout_1")
match_fixture_2_df.createOrReplaceTempView("knockout_2")

# join DataFrames based on the id column
def get_knockout_winners(match_fixture, group_of_16):
    joined_df = spark.sql("""
        SELECT mf.Team1 AS id, 
        g1.team AS Team1, 
        mf.Team2 AS id, 
        g2.team AS Team2,
        CASE WHEN rand() > 0.5 THEN g1.team ELSE g2.team END AS winner
        FROM {} mf
        JOIN {} g1 ON mf.Team1 = g1.id
        JOIN {} g2 ON mf.Team2 = g2.id
    """.format(match_fixture,group_of_16,group_of_16))
    return joined_df

knockout_1_df = get_knockout_winners("knockout_1", "group_of_16")
knockout_2_df = get_knockout_winners("knockout_2", "group_of_16")

print("Knockout 1 result")
knockout_1_df.show()
print("Knockout 2 result")
knockout_2_df.show()


Knockout 1 result
+---+--------+---+-------+-------+
| id|   Team1| id|  Team2| winner|
+---+--------+---+-------+-------+
| 1B|   Italy| 3F|Türkiye|  Italy|
| 1C| England| 3E|Iceland|Iceland|
| 1E| Romania| 3D|Finland|Finland|
| 1F|Portugal| 3C|Denmark|Denmark|
+---+--------+---+-------+-------+

Knockout 2 result
+---+--------+---+-------+--------+
| id|   Team1| id|  Team2|  winner|
+---+--------+---+-------+--------+
| 1A| Hungary| 2C| Serbia|  Serbia|
| 2D| Austria| 2E|Belgium| Belgium|
| 1D|  France| 2F|Czechia| Czechia|
| 2A|Scotland| 2D|Austria|Scotland|
+---+--------+---+-------+--------+



### Quater final

In [0]:
quarterfinalist_1_df = knockout_1_df.withColumn("row_num", F.monotonically_increasing_id()).select("winner", "row_num")
quarterfinalist_2_df = knockout_2_df.withColumn("row_num", F.monotonically_increasing_id()).select("winner", "row_num")

quarterfinalist_1_df.createOrReplaceTempView("quarterfinalist_1")
quarterfinalist_2_df.createOrReplaceTempView("quarterfinalist_2")

def get_quarterfinal_winners(quarterfinalist_1, quarterfinalist_2):
    winner_df = spark.sql(
        """SELECT q1.winner as Team1, q2.winner as Team2,
        CASE WHEN rand() > 0.5 THEN q1.winner ELSE q2.winner END AS winner
        FROM {} q1
        JOIN {} q2
        ON q1.row_num = q2.row_num """.format(quarterfinalist_1, quarterfinalist_2)
    )
    return winner_df

quarterfinal_df = get_quarterfinal_winners("quarterfinalist_1", "quarterfinalist_2")

print("Quarter Final Result")
quarterfinal_df.show()


Quarter Final Result
+-------+--------+--------+
|  Team1|   Team2|  winner|
+-------+--------+--------+
|  Italy|  Serbia|   Italy|
|Iceland| Belgium| Belgium|
|Finland| Czechia| Czechia|
|Denmark|Scotland|Scotland|
+-------+--------+--------+



### Semi Final

In [0]:
semifinalist_df = quarterfinal_df.withColumn("row_num", F.monotonically_increasing_id()).select("winner", "row_num")
# semifinalist_df.show()
semifinal_df = spark.sql(
    """
    SELECT sf1.winner as Team1,
    sf2.winner as Team2,
    CASE WHEN rand() > 0.5 THEN sf1.winner ELSE sf2.winner END AS Winner
    FROM semis sf1
    JOIN semis sf2
    ON sf1.row_num = sf2.row_num -1
    where sf1.row_num % 2 = 0 """    
)
print("Semi Final Result")
semifinal_df.show()

Semi Final Result
+-----------+--------+-----------+
|      Team1|   Team2|     Winner|
+-----------+--------+-----------+
|Switzerland|Slovakia|Switzerland|
|    Austria| Türkiye|    Austria|
+-----------+--------+-----------+



### Final Match

In [0]:
semifinal_df.createOrReplaceTempView("finalist")
finals_df = spark.sql("""
        SELECT f1.Winner AS team1, f2.Winner AS team2, CASE WHEN rand() > 0.5 THEN f1.Winner ELSE f1.Winner END AS Winner
        FROM finalist AS f1
        JOIN finalist AS f2
        ON f1.Winner != f2.Winner
        limit 1"""
)

print("---------- Euro Cup 2024 Final ------------" )
display(finals_df)

---------- Euro Cup 2024 Final ------------


team1,team2,Winner
Switzerland,Austria,Switzerland


In [0]:

Third-placed teams
qualify from groups     1B  1C  1E  1F
                        vs  vs  vs  vs

A	B	C	D			3A	3D	3B	3C
A	B	C		E		3A	3E	3B	3C
A	B	C			F	3A	3F	3B	3C
A	B		D	E		3D	3E	3A	3B
A	B		D		F	3D	3F	3A	3B
A	B			E	F	3E	3F	3B	3A
A		C	D	E		3E	3D	3C	3A
A		C	D		F	3F	3D	3C	3A
A		C		E	F	3E	3F	3C	3A
A			D	E	F	3E	3F	3D	3A
    B	C	D	E		3E	3D	3B	3C
    B	C	D		F	3F	3D	3C	3B
    B	C		E	F	3F	3E	3C	3B
    B		D	E	F	3F	3E	3D	3B
        C	D	E	F	3F	3E	3D	3C


