#Group Stage result

In [0]:
from pyspark.sql import SparkSession
import pyspark.sql.functions as F
from pyspark.sql.window import Window

spark = SparkSession.builder.appName("EuroGroupStageResults").getOrCreate()

data = [
    ("A", "Italy", 3, 3, 0, 0, 7, 0, 9),
    ("A", "Wales", 3, 1, 1, 1, 3, 2, 4),
    ("A", "Switzerland", 3, 1, 1, 1, 4, 5, 4),
    ("A", "Turkey", 3, 0, 0, 3, 1, 8, 0),
    ("B", "Belgium", 3, 3, 0, 0, 7, 1, 9),
    ("B", "Denmark", 3, 1, 0, 2, 5, 4, 3),
    ("B", "Finland", 3, 1, 0, 2, 1, 3, 3),
    ("B", "Russia", 3, 0, 0, 3, 2, 7, 0),
    ("C", "Netherlands", 3, 3, 0, 0, 8, 2, 9),
    ("C", "Austria", 3, 2, 0, 1, 4, 3, 6),
    ("C", "Ukraine", 3, 1, 0, 2, 4, 5, 3),
    ("C", "North Macedonia", 3, 0, 0, 3, 2, 8, 0),
    ("D", "England", 3, 2, 1, 0, 2, 0, 7),
    ("D", "Croatia", 3, 1, 0, 2, 4, 5, 3),
    ("D", "Czech Republic", 3, 1, 0, 2, 3, 6, 3),
    ("D", "Scotland", 3, 0, 1, 2, 1, 5, 1),
    ("E", "Sweden", 3, 2, 0, 1, 4, 2, 6),
    ("E", "Spain", 3, 1, 2, 0, 6, 1, 5),
    ("E", "Slovakia", 3, 1, 1, 1, 2, 2, 4),
    ("E", "Poland", 3, 0, 1, 2, 4, 6, 1),
    ("F", "France", 3, 1, 1, 1, 4, 3, 4),
    ("F", "Germany", 3, 1, 1, 1, 6, 5, 4),
    ("F", "Portugal", 3, 1, 1, 1, 7, 6, 4),
    ("F", "Hungary", 3, 0, 1, 2, 3, 6, 1)
]

columns = ["group", "team_name", "matches_played", "wins", "draws", "losses", "goals_scored", "goals_conceded", "points"]

group_stage_results_df = spark.createDataFrame(data, columns)

# Show the group stage results DataFrame
group_stage_results_df.show()


+-----+---------------+--------------+----+-----+------+------------+--------------+------+
|group|      team_name|matches_played|wins|draws|losses|goals_scored|goals_conceded|points|
+-----+---------------+--------------+----+-----+------+------------+--------------+------+
|    A|          Italy|             3|   3|    0|     0|           7|             0|     9|
|    A|          Wales|             3|   1|    1|     1|           3|             2|     4|
|    A|    Switzerland|             3|   1|    1|     1|           4|             5|     4|
|    A|         Turkey|             3|   0|    0|     3|           1|             8|     0|
|    B|        Belgium|             3|   3|    0|     0|           7|             1|     9|
|    B|        Denmark|             3|   1|    0|     2|           5|             4|     3|
|    B|        Finland|             3|   1|    0|     2|           1|             3|     3|
|    B|         Russia|             3|   0|    0|     3|           2|           

In [0]:
group_stage_results_df.count()

Out[78]: 24

In [0]:
group_stage_results_df = group_stage_results_df.orderBy(F.col("points").desc())
group_stage_results_df.show()

+-----+--------------+--------------+----+-----+------+------------+--------------+------+
|group|     team_name|matches_played|wins|draws|losses|goals_scored|goals_conceded|points|
+-----+--------------+--------------+----+-----+------+------------+--------------+------+
|    C|   Netherlands|             3|   3|    0|     0|           8|             2|     9|
|    B|       Belgium|             3|   3|    0|     0|           7|             1|     9|
|    A|         Italy|             3|   3|    0|     0|           7|             0|     9|
|    D|       England|             3|   2|    1|     0|           2|             0|     7|
|    C|       Austria|             3|   2|    0|     1|           4|             3|     6|
|    E|        Sweden|             3|   2|    0|     1|           4|             2|     6|
|    E|         Spain|             3|   1|    2|     0|           6|             1|     5|
|    E|      Slovakia|             3|   1|    1|     1|           2|             2|     4|

#Qulified teams After Group Stage

In [0]:
# Create a window specification to partition by group and order by points descending
window_spec = Window.partitionBy("group").orderBy(F.col("points").desc())

# Add a row number column based on the window specification
ranked_df = group_stage_results_df.withColumn("rank", F.row_number().over(window_spec))

# Filter the ranked DataFrame to select the top 3 teams from each group
top2_teams_df = ranked_df.filter(F.col("rank") <= 2)

# Select the third-placed team from each group
third_place_df = ranked_df.filter(F.col("rank") == 3)

# Select the top 4 teams overall based on points
top_4_teams_df = third_place_df.orderBy(F.col("points").desc()).limit(4)

# Combine the third-placed teams and the top 4 teams overall
group_stage_selected_teams = top2_teams_df.union(top_4_teams_df)

# ordering df point wise
group_stage_selected_teams = group_stage_selected_teams.orderBy(F.col("points").desc())

# Show the new table with the selected teams
group_stage_selected_teams.show()

# Save the top 3 teams DataFrame as a new table
group_stage_selected_teams.createOrReplaceTempView("selected_teams_df")


+-----+-----------+--------------+----+-----+------+------------+--------------+------+----+
|group|  team_name|matches_played|wins|draws|losses|goals_scored|goals_conceded|points|rank|
+-----+-----------+--------------+----+-----+------+------------+--------------+------+----+
|    A|      Italy|             3|   3|    0|     0|           7|             0|     9|   1|
|    B|    Belgium|             3|   3|    0|     0|           7|             1|     9|   1|
|    C|Netherlands|             3|   3|    0|     0|           8|             2|     9|   1|
|    D|    England|             3|   2|    1|     0|           2|             0|     7|   1|
|    C|    Austria|             3|   2|    0|     1|           4|             3|     6|   2|
|    E|     Sweden|             3|   2|    0|     1|           4|             2|     6|   1|
|    E|      Spain|             3|   1|    2|     0|           6|             1|     5|   2|
|    A|Switzerland|             3|   1|    1|     1|           4|     

In [0]:
# count of teams in selected teams
group_stage_selected_teams.count()

Out[81]: 16

# Randomly Divided in two groups of 8-8

In [0]:

# Randomly shuffle the DataFrame to create random matches
shuffled_group_stage_df = group_stage_selected_teams.orderBy(F.rand())

# Assign a unique identifier to each row
shuffled_group_stage_df = shuffled_group_stage_df.withColumn("id", F.monotonically_increasing_id())
shuffled_group_stage_df.show()

# Create a temporary view for further query
shuffled_group_stage_df.createOrReplaceTempView("shuffled_group_stage_df")

knokout_matches_df = spark.sql(
    """
    SELECT
        t1.team_name AS team1,
        t2.team_name AS team2
    FROM
        shuffled_group_stage_df t1
    JOIN
        shuffled_group_stage_df t2
    ON
        t1.id = t2.id - 1
    WHERE
        t1.id % 2 = 0
    """
)

# Show the knockout matches DataFrame
knokout_matches_df.show()


+-----+-----------+--------------+----+-----+------+------------+--------------+------+----+---+
|group|  team_name|matches_played|wins|draws|losses|goals_scored|goals_conceded|points|rank| id|
+-----+-----------+--------------+----+-----+------+------------+--------------+------+----+---+
|    A|      Wales|             3|   1|    1|     1|           3|             2|     4|   2|  0|
|    A|Switzerland|             3|   1|    1|     1|           4|             5|     4|   3|  1|
|    B|    Denmark|             3|   1|    0|     2|           5|             4|     3|   2|  2|
|    D|    England|             3|   2|    1|     0|           2|             0|     7|   1|  3|
|    A|      Italy|             3|   3|    0|     0|           7|             0|     9|   1|  4|
|    B|    Belgium|             3|   3|    0|     0|           7|             1|     9|   1|  5|
|    E|      Spain|             3|   1|    2|     0|           6|             1|     5|   2|  6|
|    B|    Finland|           

# Knok out winners

In [0]:

# Simulate matches and decide winners
winners_knokout_df = knokout_matches_df.withColumn("winner", F.when(F.rand() > 0.5, F.col("team1")).otherwise(F.col("team2")))

# Select the winners of each match
winners_knokout_df = winners_knokout_df.select("winner")

# Create temporary view for further query
winners_knokout_df.createOrReplaceTempView("winners_knokout_df")

winners_knokout_df.show()

+-----------+
|     winner|
+-----------+
|      Wales|
|    England|
|      Italy|
|      Spain|
|     Sweden|
|    Austria|
|   Portugal|
|Netherlands|
+-----------+



#Quarter Finals

In [0]:

# Randomly shuffle the DataFrame to create random matches
shuffled_knokout_winners = winners_knokout_df.orderBy(F.rand())

# Assign a unique identifier to each row
shuffled_knokout_winners = shuffled_knokout_winners.withColumn("id", F.monotonically_increasing_id())
shuffled_knokout_winners.show()

# Create a temporary view for further query
shuffled_knokout_winners.createOrReplaceTempView("shuffled_knokout_winners")

quarter_final_matches_df = spark.sql(
    """
    SELECT
        t3.winner AS team1,
        t4.winner AS team2
    FROM
        shuffled_knokout_winners t3
    JOIN
        shuffled_knokout_winners t4
    ON
        t3.id = t4.id - 1
    WHERE
        t3.id % 2 = 0
    """
)

# Show the knockout matches DataFrame
quarter_final_matches_df.show()


+-----------+---+
|     winner| id|
+-----------+---+
|      Wales|  0|
|    England|  1|
|     Sweden|  2|
|Netherlands|  3|
|      Spain|  4|
|    Austria|  5|
|      Italy|  6|
|   Portugal|  7|
+-----------+---+

+------+-----------+
| team1|      team2|
+------+-----------+
| Wales|    England|
|Sweden|Netherlands|
| Spain|    Austria|
| Italy|   Portugal|
+------+-----------+



#Semi Finalist

In [0]:
winners_quarter_final__df = quarter_final_matches_df.withColumn("winner", F.when(F.rand() > 0.5, F.col("team1")).otherwise(F.col("team2")))

# Select the winners of each match
winners_quarter_final__df = winners_quarter_final__df.select("winner")

# Create temporary view for further query
winners_quarter_final__df.createOrReplaceTempView("winners_quarter_final__df")

winners_quarter_final__df.show()

+-----------+
|     winner|
+-----------+
|      Wales|
|Netherlands|
|      Spain|
|   Portugal|
+-----------+



#Semi Final

In [0]:

# Randomly shuffle the DataFrame to create random matches
shuffled_quarter_winners = winners_quarter_final__df.orderBy(F.rand())

# Assign a unique identifier to each row
shuffled_quarter_winners = shuffled_quarter_winners.withColumn("id", F.monotonically_increasing_id())
shuffled_quarter_winners.show()

# Create a temporary view for further query
shuffled_quarter_winners.createOrReplaceTempView("shuffled_quarter_winners")

semi_final_matches_df = spark.sql(
    """
    SELECT
        t3.winner AS team1,
        t4.winner AS team2
    FROM
        shuffled_quarter_winners t3
    JOIN
        shuffled_quarter_winners t4
    ON
        t3.id = t4.id - 1
    WHERE
        t3.id % 2 = 0
    """
)

# Show the knockout matches DataFrame
semi_final_matches_df.show()


+-----------+---+
|     winner| id|
+-----------+---+
|Netherlands|  0|
|   Portugal|  1|
|      Spain|  2|
|      Wales|  3|
+-----------+---+

+-----------+--------+
|      team1|   team2|
+-----------+--------+
|Netherlands|Portugal|
|      Spain|   Wales|
+-----------+--------+



#Finalists

In [0]:
Finalists_df = semi_final_matches_df.withColumn("winner", F.when(F.rand() > 0.5, F.col("team1")).otherwise(F.col("team2")))

# Select the winners of each match
Finalists_df = Finalists_df.select("winner")

# Create temporary view for further query


Finalists_df.show()

+--------+
|  winner|
+--------+
|Portugal|
|   Spain|
+--------+



# Winner

In [0]:
Finalists_df = Finalists_df.withColumn("id", F.monotonically_increasing_id())

# Create a temporary view for further query
Finalists_df.createOrReplaceTempView("Finalists_df")

final_matches_df = spark.sql(
    """
    SELECT
        t3.winner AS team1,
        t4.winner AS team2
    FROM
        Finalists_df t3
    JOIN
        Finalists_df t4
    ON
        t3.id = t4.id - 1
    WHERE
        t3.id % 2 = 0
    """
)

final_matches_df = final_matches_df.withColumn("winner", F.when(F.rand() > 0.5, F.col("team1")).otherwise(F.col("team2")))
final_matches_df.show()

# Select the winners of each match
Winner = final_matches_df.select("winner")

Winner.show()


+--------+-----+------+
|   team1|team2|winner|
+--------+-----+------+
|Portugal|Spain| Spain|
+--------+-----+------+

+------+
|winner|
+------+
| Spain|
+------+

