Table for the Groups of Teams

In [0]:
dbutils.fs.rm("dbfs:/user/hive/warehouse/teams_group",True)

Out[40]: True

In [0]:
dbutils.fs.rm("dbfs:/user/hive/warehouse/matches",True)

Out[41]: True

In [0]:
dbutils.fs.rm("dbfs:/user/hive/warehouse/third_place_combination",True)

Out[42]: True

In [0]:
%sql
-- Drop the table if it exists
DROP TABLE IF EXISTS teams_group;

In [0]:
%sql
CREATE or REPLACE TABLE teams_group (
    team_id INTEGER,
    team_name VARCHAR(20),
    group_name VARCHAR(1) 
);

In [0]:
%sql
INSERT INTO teams_group (team_id,team_name,group_name) VALUES
-- Group A
    (1, 'Germany','A'),
    (2, 'Scotland','A'),
    (3, 'Hungary','A'),
    (4, 'Switzerland','A'),

-- Group B
    (5, 'Spain','B'),
    (6, 'Croatia','B'),
    (7, 'Italy','B'),
    (8, 'Albania','B'),

-- Group C
    (9, 'Slovenia','C'),
    (10, 'Denmark','C'),
    (11, 'Serbia','C'),
    (12, 'England','C'),

-- Group D
    (13, 'Netherlands','D'),
    (14, 'Austria','D'),
    (15, 'France','D'),
    (16, 'Play-off Winner A','D'),

-- Group E
    (17, 'Belgium','E'),
    (18, 'Slovakia','E'),
    (19, 'Romania','E'),
    (20, 'Play-off Winner B','E'),

-- Group F
    (21, 'Turkey','F'),
    (22, 'Play-off Winner C','F'),
    (23, 'Portugal','F'),
    (24, 'Czech Republic','F');


num_affected_rows,num_inserted_rows
24,24


Table for Inserting all the matches in EuroCup2024

In [0]:
%sql
-- Drop the table if it exists
DROP TABLE IF EXISTS matches;

In [0]:
%sql
CREATE OR REPLACE TABLE matches(
    match_id integer,
    match_fixtures VARCHAR(50),
    group_name VARCHAR(1),
    home_team VARCHAR(20),
    away_team VARCHAR(20),
    home_team_goals INTEGER,
    away_team_goals INTEGER,
    match_results VARCHAR(20),
    match_stage VARCHAR(20)
);

Create & Insert Match Fixtures of all teams & groups

-- Total 36 Matches in total

In [0]:
%sql
INSERT INTO matches
SELECT
row_number() OVER (ORDER BY 1) as match_id,
concat(team1.team_name, ' Vs ' , team2.team_name )AS match_fixtures,
team1.group_name AS group_name,
team1.team_name AS home_team,
team2.team_name AS away_team,
cast(rand() * 6 AS INTEGER) AS goals_scored,
cast(rand() * 6 AS INTEGER) AS goals_allowed,
'NONE' AS match_result,
'Group Stage' AS match_stage
FROM
teams_group AS team1
CROSS JOIN
teams_group AS team2 
WHERE
team1.group_name = team2.group_name AND team1.team_id < team2.team_id;  --team2 ID should be greater than team1 ID to prevent repeatition 

num_affected_rows,num_inserted_rows
36,36


In [0]:
%sql
SELECT * FROM matches ORDER BY match_id;

match_id,match_fixtures,group_name,home_team,away_team,home_team_goals,away_team_goals,match_results,match_stage
1,Germany Vs Switzerland,A,Germany,Switzerland,0,1,NONE,Group Stage
2,Germany Vs Hungary,A,Germany,Hungary,3,3,NONE,Group Stage
3,Germany Vs Scotland,A,Germany,Scotland,1,5,NONE,Group Stage
4,Scotland Vs Switzerland,A,Scotland,Switzerland,2,2,NONE,Group Stage
5,Scotland Vs Hungary,A,Scotland,Hungary,3,4,NONE,Group Stage
6,Hungary Vs Switzerland,A,Hungary,Switzerland,0,0,NONE,Group Stage
7,Spain Vs Albania,B,Spain,Albania,1,4,NONE,Group Stage
8,Spain Vs Italy,B,Spain,Italy,4,0,NONE,Group Stage
9,Spain Vs Croatia,B,Spain,Croatia,5,5,NONE,Group Stage
10,Croatia Vs Albania,B,Croatia,Albania,1,4,NONE,Group Stage


Next we update Match Results.

We can see in the above tabe that the match_results is set to 'NONE' by default

In [0]:
%sql
UPDATE matches SET match_results = 
CASE
WHEN home_team_goals > away_team_goals THEN home_team
WHEN home_team_goals < away_team_goals THEN away_team
ELSE 'DRAW'
END;

num_affected_rows
36


In [0]:
%sql
SELECT * FROM matches ORDER BY match_id;

match_id,match_fixtures,group_name,home_team,away_team,home_team_goals,away_team_goals,match_results,match_stage
1,Germany Vs Switzerland,A,Germany,Switzerland,0,1,Switzerland,Group Stage
2,Germany Vs Hungary,A,Germany,Hungary,3,3,DRAW,Group Stage
3,Germany Vs Scotland,A,Germany,Scotland,1,5,Scotland,Group Stage
4,Scotland Vs Switzerland,A,Scotland,Switzerland,2,2,DRAW,Group Stage
5,Scotland Vs Hungary,A,Scotland,Hungary,3,4,Hungary,Group Stage
6,Hungary Vs Switzerland,A,Hungary,Switzerland,0,0,DRAW,Group Stage
7,Spain Vs Albania,B,Spain,Albania,1,4,Albania,Group Stage
8,Spain Vs Italy,B,Spain,Italy,4,0,Spain,Group Stage
9,Spain Vs Croatia,B,Spain,Croatia,5,5,DRAW,Group Stage
10,Croatia Vs Albania,B,Croatia,Albania,1,4,Albania,Group Stage


Create a new Table to store GroupStage matches statistics [ like WIN, LOSE, DRAW ]

In [0]:
%sql
CREATE
OR REPLACE TABLE match_stats AS
SELECT
  team_id,
  group_name,
  COUNT(*) AS matches_played,
  SUM (
    CASE
      WHEN goals_scored > goals_allowed THEN 1
      ELSE 0
    END
  ) AS wins,
  SUM (
    CASE
      WHEN goals_scored = goals_allowed THEN 1
      ELSE 0
    END
  ) AS draw,
  SUM(
    CASE
      WHEN goals_scored < goals_allowed THEN 1
      ELSE 0
    END
  ) AS lose,
  SUM(goals_scored) AS goals_scored,
  SUM(goals_allowed) AS goals_allowed,
  SUM(goals_scored) - SUM(goals_allowed) AS goal_diff,
  SUM(
    CASE
      WHEN goals_scored > goals_allowed THEN 3
      WHEN goals_scored = goals_allowed THEN 1
      ELSE 0
    END
  ) AS points
FROM(
    SELECT
      group_name,
      home_team AS team_id,
      home_team_goals AS goals_scored,
      away_team_goals AS goals_allowed
    FROM
      matches
    UNION ALL
    SELECT
      group_name,
      away_team AS team_id,
      away_team_goals AS goals_scored,
      home_team_goals AS goals_allowed
    FROM
      matches
  ) AS group_matches
GROUP BY
  team_id,
  group_name
ORDER BY
  group_name;

num_affected_rows,num_inserted_rows


Create new table to store all the qualifying teams from each group

-TOP 2 teams from each group forwards to TOP 16

-TOP 4 teams that placed 3rd from all groups (4 out of 6)

In [0]:
%sql
CREATE OR REPLACE TABLE round_of_16 AS
SELECT * FROM(
  SELECT team_id, group_name, points, goal_diff,
  row_number() OVER (PARTITION BY group_name ORDER BY points DESC, goal_diff DESC, goals_scored DESC, wins DESC)AS ranks
  FROM match_stats
)AS top2_teams
WHERE ranks <= 2;

num_affected_rows,num_inserted_rows


Top 4 teams out of 3rd position teams from each groups [ 4 out of 6 ]

In [0]:
%sql
INSERT INTO round_of_16
SELECT * FROM(
  SELECT team_id, group_name, points, goal_diff,
  row_number() OVER (PARTITION BY group_name ORDER BY points DESC, goal_diff DESC, goals_scored DESC, wins DESC) AS ranks
  FROM match_stats
)AS top2_teams
WHERE ranks = 3 LIMIT 4;

num_affected_rows,num_inserted_rows
4,4


In [0]:
%sql
SELECT * from round_of_16 ORDER BY group_name;

team_id,group_name,points,goal_diff,ranks
Scotland,A,4,3,3
Hungary,A,5,1,1
Switzerland,A,5,1,2
Croatia,B,4,0,3
Albania,B,6,3,1
Spain,B,4,1,2
Slovenia,C,4,4,3
Serbia,C,7,7,1
England,C,6,-2,2
Netherlands,D,2,-1,3


Inserting Matches into Round of 16 Match Table

Probable Combinations for the Matches in the Round of 16 Match Stage

In [0]:
%sql
CREATE or replace  TABLE third_place_combination(
  ID INTEGER,
  selected_groups VARCHAR(4),
  Team1B CHAR(1),
  Team1C CHAR(1),
  Team1E CHAR(1),
  Team1F CHAR(1)
);

-- Step 2: Insert the provided values into the "THIRDMATCHUP" table
INSERT INTO  third_place_combination (ID, selected_groups, Team1B, Team1C, Team1E, Team1F) VALUES
(1, 'ABCD', 'A', 'D', 'B', 'C'),
(2, 'ABCE', 'A', 'E', 'B', 'C'),
(3, 'ABCF', 'A', 'F', 'B', 'C'),
(4, 'ABDE', 'D', 'E', 'A', 'B'),
(5, 'ABDF', 'D', 'F', 'A', 'B'),
(6, 'ABEF', 'E', 'F', 'B', 'A'),
(7, 'ACDE', 'E', 'D', 'C', 'A'),
(8, 'ACDF', 'F', 'D', 'C', 'A'),
(9, 'ACEF', 'E', 'F', 'C', 'A'),
(10, 'ADEF', 'E', 'F', 'D', 'A'),
(11, 'BCDE', 'E', 'D', 'B', 'C'),
(12, 'BCDF', 'F', 'D', 'C', 'B'),
(13, 'BCEF', 'F', 'E', 'C', 'B'),
(14, 'BDEF', 'F', 'E', 'D', 'B'),
(15, 'CDEF', 'F', 'E', 'D', 'C');

num_affected_rows,num_inserted_rows
15,15


TOP 16  

Total 8 Matches from 37 to 44

In [0]:
%sql
INSERT INTO matches (match_id, match_fixtures, group_name, home_team, away_team, home_team_goals, away_team_goals, match_results, match_stage)
WITH combination AS (
  SELECT concat_ws('', sort_array(collect_list(group_name)))AS group_combination
  FROM round_of_16
  WHERE ranks = 3
),
keys AS (
  SELECT * FROM third_place_combination
  WHERE selected_groups = (SELECT group_combination FROM combination)
)
SELECT
row_number() OVER (ORDER BY 1) + 36 AS match_id,
CONCAT(r1.team_id, ' VS ' ,r2.team_id)AS match_fixtures,
'N' AS group_name,
r1.team_id AS home_team,
r2.team_id AS away_team,
CAST(rand() * 6 AS INTEGER) AS goals_scored,
CAST(rand() * 6 AS INTEGER) AS goals_allowed,
'NONE' AS match_result,
'R16' AS match_stage
FROM round_of_16 r1
JOIN round_of_16 r2 ON r1.group_name = 'A' AND r1.ranks = 1 AND r2.group_name = 'C' AND r2.ranks = 2


UNION ALL

SELECT 
    row_number() OVER (ORDER BY 1) + 37 AS match_id,
    CONCAT(r1.team_id, ' VS ', r2.team_id) AS match_fixtures,
    'N' AS group_name,
    r1.team_id AS home_team,
    r2.team_id AS away_team,
    CAST(rand() * 6 AS INT) AS goals_scored, 
    CAST(rand() * 6 AS INT) AS goals_allowed,
    'NONE' AS match_result,
    'R16' AS match_stage
FROM 
    round_of_16 r1
JOIN 
 round_of_16 r2 ON r1.group_name = 'A' AND r1.ranks = 2 AND r2.group_name = 'B' AND r2.ranks = 2
   

UNION ALL

SELECT 
    row_number() OVER (ORDER BY 1) + 38 AS match_id,
    CONCAT(r1.team_id, ' VS ', r2.team_id) AS match_fixtures,
    'N' AS group_name,
    r1.team_id AS home_team,
    r2.team_id AS away_team,
    CAST(rand() * 6 AS INT) AS goals_scored, 
    CAST(rand() * 6 AS INT) AS goals_allowed,
    'NONE' AS match_result,
    'R16' AS match_stage
FROM 
    round_of_16 r1
JOIN 
    round_of_16 r2 ON r1.group_name = 'B' AND r1.ranks = 1 AND r2.group_name = (SELECT TEAM1B FROM keys) AND r2.ranks = 3

UNION ALL

SELECT 
    row_number() OVER (ORDER BY 1) + 39 AS match_id,
    CONCAT(r1.team_id, ' VS ', r2.team_id) AS match_fixtures,
    'N' AS group_name,
    r1.team_id AS home_team,
    r2.team_id AS away_team,
    CAST(rand() * 6 AS INT) AS goals_scored, 
    CAST(rand() * 6 AS INT) AS goals_allowed,
    'NONE' AS match_result,
    'R16' AS match_stage
FROM 
    round_of_16 r1
JOIN 
    round_of_16 r2 ON r1.group_name = 'C' AND r1.ranks = 1 AND r2.group_name = (select Team1C from keys) AND r2.ranks = 3

UNION ALL

SELECT 
    Row_Number() OVER (ORDER BY 1) + 40 AS match_id,
    CONCAT(r1.team_id, ' VS ', r2.team_id) AS match_fixtures,
    'N' AS group_name,
    r1.team_id AS home_team,
    r2.team_id AS away_team,
    CAST(rand() * 6 AS INT) AS goals_scored, 
    CAST(rand() * 6 AS INT) AS goals_allowed,
    'NONE' AS match_result,
    'R16' AS match_stage
FROM 
    round_of_16 r1
JOIN 
     round_of_16 r2 ON r1.group_name = 'F' AND r1.ranks = 1 AND r2.group_name = (select Team1F from keys) AND r2.ranks = 3


UNION ALL

SELECT 
    Row_Number() OVER (ORDER BY 1) + 41 AS match_id,
    CONCAT(r1.team_id, ' VS ', r2.team_id) AS match_fixtures,
    'N' AS group_name,
    r1.team_id AS home_team,
    r2.team_id AS away_team,
    CAST(rand() * 6 AS INT) AS goals_scored, 
    CAST(rand() * 6 AS INT) AS goals_allowed,
    'NONE' AS match_result,
    'R16' AS match_stage
FROM 
    round_of_16 r1
JOIN 
    round_of_16 r2 ON r1.group_name = 'D' AND r1.ranks = 2 AND r2.group_name = 'E' AND r2.ranks = 2
   

UNION ALL

SELECT 
    Row_Number() OVER (ORDER BY 1) + 42 AS match_id,
    CONCAT(r1.team_id, ' VS ', r2.team_id) AS match_fixtures,
    'N' AS group_name,
    r1.team_id AS home_team,
    r2.team_id AS away_team,
    CAST(rand() * 6 AS INT) AS goals_scored, 
    CAST(rand() * 6 AS INT) AS goals_allowed,
    'NONE' AS match_result,
    'R16' AS match_stage
FROM 
    round_of_16 r1
JOIN 
    round_of_16 r2 ON r1.group_name = 'E' AND r1.ranks = 1 AND r2.group_name = (SELECT Team1E from keys) AND r2.ranks = 3
   

UNION ALL

SELECT 
    Row_Number() OVER (ORDER BY 1) + 43 AS match_id,
    CONCAT(r1.team_id, ' VS ', r2.team_id) AS match_fixtures,
    'N' AS group_name,
    r1.team_id AS home_team,
    r2.team_id AS away_team,
    CAST(rand() * 6 AS INT) AS goals_scored, 
    CAST(rand() * 6 AS INT) AS goals_allowed,
    'NONE' AS match_result,
    'R16' AS match_stage
FROM 
    round_of_16 r1
JOIN 
    round_of_16 r2 ON r1.group_name = 'D' AND r1.ranks = 2 AND r2.group_name = 'F' AND r2.ranks = 2;

num_affected_rows,num_inserted_rows
8,8


In [0]:
%sql
select * from matches order by match_id;

match_id,match_fixtures,group_name,home_team,away_team,home_team_goals,away_team_goals,match_results,match_stage
1,Germany Vs Switzerland,A,Germany,Switzerland,0,1,Switzerland,Group Stage
2,Germany Vs Hungary,A,Germany,Hungary,3,3,DRAW,Group Stage
3,Germany Vs Scotland,A,Germany,Scotland,1,5,Scotland,Group Stage
4,Scotland Vs Switzerland,A,Scotland,Switzerland,2,2,DRAW,Group Stage
5,Scotland Vs Hungary,A,Scotland,Hungary,3,4,Hungary,Group Stage
6,Hungary Vs Switzerland,A,Hungary,Switzerland,0,0,DRAW,Group Stage
7,Spain Vs Albania,B,Spain,Albania,1,4,Albania,Group Stage
8,Spain Vs Italy,B,Spain,Italy,4,0,Spain,Group Stage
9,Spain Vs Croatia,B,Spain,Croatia,5,5,DRAW,Group Stage
10,Croatia Vs Albania,B,Croatia,Albania,1,4,Albania,Group Stage


In [0]:
%sql
UPDATE matches
SET match_results = 
CASE
WHEN home_team_goals > away_team_goals THEN home_team
WHEN home_team_goals < away_team_goals THEN away_team
ELSE
CASE
WHEN (home_team_goals + 1) % 2 = 1 THEN home_team
ELSE away_team
END
END
WHERE match_stage = 'R16';

num_affected_rows
8


Quarter Finals

In [0]:
%sql
INSERT INTO matches (match_id, match_fixtures,group_name, home_team, away_team, home_team_goals, away_team_goals, match_results, match_stage)
SELECT 
    Row_Number() OVER(ORDER BY 1) + 44 AS match_id,
    CONCAT(QF1.match_results, ' VS ', QF2.match_results) AS match_fixtures,
    'N' as group_name,
    QF1.match_results AS home_team,
    QF2.match_results AS away_team,
    CAST(rand() * 6 AS INT) AS goals_scored, 
    CAST(rand() * 6 AS INT) AS goals_allowed,
    'NONE' AS match_results,
    'Quarter-Final' AS match_stage
FROM 
    matches QF1
JOIN 
    matches QF2 ON 
        QF1.match_id = 39 AND QF2.match_id = 37

UNION ALL

SELECT 
    Row_Number() OVER(ORDER BY 1) + 45 AS match_id,
    CONCAT(QF1.match_results, ' Vs. ', QF2.match_results) AS match_fixtures,
    'N' as group_name,
    QF1.match_results AS home_team,
    QF2.match_results AS away_team,
    CAST(rand() * 6 AS INT) AS goals_scored, 
    CAST(rand() * 6 AS INT) AS goals_allowed,
    'None' AS match_results,
    'Quarter-Final' AS match_stage
FROM 
    matches QF1
JOIN 
    matches QF2 ON 
        QF1.match_id = 41 AND QF2.match_id = 42

UNION ALL

SELECT 
    Row_Number() OVER(ORDER BY 1) + 46 AS match_id,
    CONCAT(QF1.match_results, ' Vs. ', QF2.match_results) AS match_fixtures,
    'N' as group_name,
    QF1.match_results AS home_team,
    QF2.match_results AS away_team,
    CAST(rand() * 6 AS INT) AS goals_scored, 
    CAST(rand() * 6 AS INT) AS goals_allowed,
    'None' AS match_results,
    'Quarter-Final' AS match_stage
FROM 
    matches QF1
JOIN 
    matches QF2 ON 
        QF1.match_id = 43 AND QF2.match_id = 44

UNION ALL

SELECT 
    Row_Number() OVER(ORDER BY 1) + 47 AS match_id,
    CONCAT(QF1.match_results, ' Vs. ', QF2.match_results) AS match_fixtures,
    'N' as GroupName,
    QF1.match_results AS home_team,
    QF2.match_results AS away_team,
    CAST(rand() * 6 AS INT) AS goals_scored, 
    CAST(rand() * 6 AS INT) AS goals_allowed,
    'None' AS match_results,
    'Quarter-Final' AS match_stage
FROM 
    matches QF1
JOIN 
    matches QF2 ON 
        QF1.match_id = 40 AND QF2.match_id = 38;

num_affected_rows,num_inserted_rows
4,4


In [0]:
%sql
UPDATE matches
SET match_results = 
    CASE
        WHEN home_team_goals > away_team_goals THEN home_team
        WHEN home_team_goals < away_team_goals THEN away_team
        ELSE 
        CASE 
                WHEN (home_team_goals + 1) % 2 = 1 THEN home_team
                ELSE away_team
            END
    END
WHERE match_stage = 'Quarter-Final';

num_affected_rows
4


Semi-Final Rounds

In [0]:
%sql
INSERT INTO matches (match_id, match_fixtures,group_name, home_team, away_team, home_team_goals, away_team_goals, match_results, match_stage)
SELECT 
    Row_Number() OVER(ORDER BY 1) + 48 AS match_id,
    CONCAT(SF1.match_results, ' Vs. ', SF2.match_results) AS fixture,
    'N' as GroupName,
    SF1.match_results AS home_team,
    SF2.match_results AS away_team,
    CAST(rand() * 6 AS INT) AS goals_scored, 
    CAST(rand() * 6 AS INT) AS goals_scored,
    'NONE' AS match_results,
    'Semi-Final' AS match_stage
FROM 
    matches SF1
JOIN 
    matches SF2 ON (
        (SF1.match_id = 45 AND SF2.match_id = 46) OR
        (SF1.match_id = 47 AND SF2.match_id = 48)
    );

num_affected_rows,num_inserted_rows
2,2


In [0]:
%sql
UPDATE matches
SET match_results = 
    CASE
        WHEN home_team_goals > away_team_goals THEN home_team
        WHEN home_team_goals < away_team_goals THEN away_team
        ELSE 
          CASE 
                  WHEN (home_team_goals + 1) % 2 = 1 THEN home_team
                  ELSE away_team
              END
    END
WHERE match_stage = 'Semi-Final';

num_affected_rows
2


Match for Third Place

In [0]:
%sql
INSERT INTO matches (match_id, match_fixtures,group_name, home_team, away_team, home_team_goals, away_team_goals, match_results, match_stage)
WITH losing_teams AS (
    SELECT
        CASE
            WHEN match_results != home_team THEN home_team
            ELSE away_team
        END AS losing_teams
    FROM
        matches
    WHERE 
        match_stage = 'Semi-Final'
)

SELECT 
    Row_Number() OVER(ORDER BY 1) + 50 AS match_id,
    CONCAT(T1.losing_teams, ' Vs. ', T2.losing_teams) AS fixture,
    'N' as GroupName,
    T1.losing_teams AS home_team,
    T2.losing_teams AS away_team,
    CAST(rand() * 6 AS INTEGER) AS goals_scored, 
    CAST(rand() * 6 AS INTEGER) AS goals_allowed,
    'None' AS match_results,
    'Third-Place' AS match_stage
FROM 
    losing_teams T1
JOIN 
    losing_teams T2 ON T1.losing_teams < T2.losing_teams;


num_affected_rows,num_inserted_rows
1,1


In [0]:
%sql
UPDATE matches
SET match_results = 
    CASE
        WHEN home_team_goals > away_team_goals THEN home_team
        WHEN home_team_goals < away_team_goals THEN away_team
        ELSE 
          CASE 
                  WHEN (home_team_goals + 1) % 2 = 1 THEN home_team
                  ELSE away_team
              END
    END
WHERE match_stage = 'Third-Place';

num_affected_rows
1


Final Match

In [0]:
%sql
INSERT INTO matches (match_id, match_fixtures,group_name, home_team, away_team, home_team_goals, away_team_goals, match_results, match_stage)
SELECT 
    Row_Number() OVER(ORDER BY 1) + 52 AS match_id,
    CONCAT(F1.match_results, ' Vs. ', F2.match_results) AS fixture,
    'N' as GroupName,
    F1.match_results AS home_team,
    F2.match_results AS AwayTeam,
    CAST(rand() * 6 AS INT) AS goals_scored, 
    CAST(rand() * 6 AS INT) AS goals_allowed,
    'None' AS match_results,
    'Final' AS match_stage
FROM 
    matches F1
JOIN 
    matches F2 ON 
        F1.match_id = 49 AND F2.match_id = 50;

num_affected_rows,num_inserted_rows
1,1


In [0]:
%sql
UPDATE matches
SET match_results = 
    CASE
        WHEN home_team_goals > away_team_goals THEN home_team
        WHEN home_team_goals < away_team_goals THEN away_team
        ELSE 
          CASE 
                WHEN (home_team_goals + 1) % 2 = 1 THEN home_team
                ELSE away_team
            END
    END
WHERE match_stage = 'Final';

num_affected_rows
1


In [0]:
%sql
SELECT * FROM matches where match_stage = 'R16' ORDER BY match_id;

match_id,match_fixtures,group_name,home_team,away_team,home_team_goals,away_team_goals,match_results,match_stage
37,Hungary VS England,N,Hungary,England,3,1,Hungary,R16
38,Switzerland VS Spain,N,Switzerland,Spain,5,0,Switzerland,R16
39,Albania VS Scotland,N,Albania,Scotland,2,4,Scotland,R16
40,Serbia VS Netherlands,N,Serbia,Netherlands,0,0,Serbia,R16
41,Czech Republic VS Slovenia,N,Czech Republic,Slovenia,5,2,Czech Republic,R16
42,Austria VS Romania,N,Austria,Romania,0,0,Austria,R16
43,Play-off Winner B VS Croatia,N,Play-off Winner B,Croatia,4,4,Play-off Winner B,R16
44,Austria VS Portugal,N,Austria,Portugal,3,2,Austria,R16


In [0]:
%sql
SELECT * FROM matches where match_stage = 'Quarter-Final' ORDER BY match_id;

match_id,match_fixtures,group_name,home_team,away_team,home_team_goals,away_team_goals,match_results,match_stage
45,Scotland VS Hungary,N,Scotland,Hungary,2,1,Scotland,Quarter-Final
46,Czech Republic Vs. Austria,N,Czech Republic,Austria,0,4,Austria,Quarter-Final
47,Play-off Winner B Vs. Austria,N,Play-off Winner B,Austria,3,1,Play-off Winner B,Quarter-Final
48,Serbia Vs. Switzerland,N,Serbia,Switzerland,3,1,Serbia,Quarter-Final


In [0]:
%sql
SELECT * FROM matches WHERE match_stage = 'Semi-Final' ORDER BY match_id;

match_id,match_fixtures,group_name,home_team,away_team,home_team_goals,away_team_goals,match_results,match_stage
49,Play-off Winner B Vs. Serbia,N,Play-off Winner B,Serbia,4,4,Play-off Winner B,Semi-Final
50,Scotland Vs. Austria,N,Scotland,Austria,3,1,Scotland,Semi-Final


In [0]:
%sql
SELECT * FROM matches where match_stage = 'Third-Place' ORDER BY match_id;

match_id,match_fixtures,group_name,home_team,away_team,home_team_goals,away_team_goals,match_results,match_stage
51,Austria Vs. Serbia,N,Austria,Serbia,3,0,Austria,Third-Place


In [0]:
%sql
SELECT * FROM matches where match_stage = 'Final' ORDER BY match_id;

match_id,match_fixtures,group_name,home_team,away_team,home_team_goals,away_team_goals,match_results,match_stage
53,Play-off Winner B Vs. Scotland,N,Play-off Winner B,Scotland,0,1,Scotland,Final


Tie-Sheet Visualization

In [0]:
#import libraries
import pandas as pd
from pyspark.sql import SparkSession

In [0]:
#Spark Session
spark = SparkSession.builder.appName("Visualization").getOrCreate()

Group Stage Visualization

-Loading the Dataset using Spark

In [0]:
# Query from spark sql for different stages
all_teams_stats = spark.sql("SELECT * from match_stats")
round_of_16 = spark.sql("""
                        SELECT 
    CASE WHEN match_id IN (39,37,41,42) THEN 'left' ELSE 'right' END AS side,
    match_id,
    home_team, 
    away_team, 
    home_team_goals, 
    away_team_goals 
FROM 
    matches 
WHERE 
    match_id IN (39,37,41,42,43,44,40,38) 
    AND match_stage = 'R16' 
ORDER BY 
    CASE 
        WHEN match_id = 39 THEN 1
        WHEN match_id = 37 THEN 2
        WHEN match_id = 41 THEN 3
        WHEN match_id = 42 THEN 4
        WHEN match_id = 43 THEN 5
        WHEN match_id = 44 THEN 6
        WHEN match_id = 40 THEN 7
        WHEN match_id = 38 THEN 8
    END;

                        """)
quarter_final = spark.sql("SELECT home_team, away_team, home_team_goals, away_team_goals FROM matches WHERE match_stage = 'Quarter-Final' ORDER BY match_id")
semi_final = spark.sql("SELECT home_team, away_team, home_team_goals, away_team_goals FROM matches WHERE match_stage = 'Semi-Final' ORDER BY match_id DESC")
final = spark.sql("SELECT home_team, away_team, home_team_goals, away_team_goals FROM matches WHERE match_stage = 'Final' ORDER BY match_id")
third_place = spark.sql("SELECT match_id FROM matches WHERE match_stage = 'Third-Place' ORDER BY match_id")


Converting Spark dataframe to Pandas Dataframe

In [0]:
all_teams_stats_df = all_teams_stats.toPandas()
round_of_16_df = round_of_16.toPandas()
quarter_final_df = quarter_final.toPandas()
semi_final_df = semi_final.toPandas()
final_df = final.toPandas()
third_place_df = third_place.toPandas()

In [0]:
# Create table visualization DataFrame
table_visualization = pd.DataFrame({
    'Teams': all_teams_stats_df['team_id'],
    'Group': all_teams_stats_df['group_name'],
    'Pld': all_teams_stats_df['matches_played'],
    'W': all_teams_stats_df['wins'],
    'D': all_teams_stats_df['draw'],
    'L': all_teams_stats_df['lose'],
    'GF': all_teams_stats_df['goals_scored'],
    'GA': all_teams_stats_df['goals_allowed'],
    'GD': all_teams_stats_df['goal_diff'],
    'Pts': all_teams_stats_df['points']
})

# Create blank row as a dictionary to add one blank row for each group
blank_row = {
    'Teams': '', 
    'Group': '', 
    'Pld': '', 
    'W': '',
    'D': '',
    'L': '',
    'GF': '',
    'GA': '',
    'GD': '',
    'Pts': ''
}

groups = []
for i in range(0, len(table_visualization), 4):
    group = table_visualization.iloc[i:i+4]  
    groups.append(group) 
    if i < len(table_visualization) - 4:
        groups.append(pd.DataFrame([blank_row])) 

visualization_with_space = pd.concat(groups).reset_index(drop=True)
half_table_index = len(visualization_with_space) // 2
spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", "false")
display(visualization_with_space)
print(half_table_index)


Teams,Group,Pld,W,D,L,GF,GA,GD,Pts
Germany,A,3.0,0.0,1.0,2.0,4.0,9.0,-5.0,1.0
Scotland,A,3.0,1.0,1.0,1.0,10.0,7.0,3.0,4.0
Hungary,A,3.0,1.0,2.0,0.0,7.0,6.0,1.0,5.0
Switzerland,A,3.0,1.0,2.0,0.0,3.0,2.0,1.0,5.0
,,,,,,,,,
Italy,B,3.0,1.0,0.0,2.0,5.0,9.0,-4.0,3.0
Croatia,B,3.0,1.0,1.0,1.0,11.0,11.0,0.0,4.0
Spain,B,3.0,1.0,1.0,1.0,10.0,9.0,1.0,4.0
Albania,B,3.0,2.0,0.0,1.0,8.0,5.0,3.0,6.0
,,,,,,,,,


14


In [0]:
#Table information for the Group A,B,C
left_table = pd.DataFrame(columns=['Teams','Group','Pld','W','D','L','GF','GA','GD','Pts'])
left_table = pd.concat([left_table, pd.DataFrame([blank_row])], ignore_index=True)
left_table = pd.concat([left_table, visualization_with_space.iloc[:half_table_index]]).reset_index(drop=True)

#Table information for the group C,D,E
right_table = pd.DataFrame(columns=['Teams','Group','Pld','W','D','L','GF','GA','GD','Pts'])
right_table = pd.concat([right_table, visualization_with_space.iloc[half_table_index +1:]]).reset_index(drop=True)
right_table = pd.concat([pd.DataFrame([blank_row]), right_table], ignore_index=True)

display(left_table)
display(right_table)

Teams,Group,Pld,W,D,L,GF,GA,GD,Pts
,,,,,,,,,
Germany,A,3.0,0.0,1.0,2.0,4.0,9.0,-5.0,1.0
Scotland,A,3.0,1.0,1.0,1.0,10.0,7.0,3.0,4.0
Hungary,A,3.0,1.0,2.0,0.0,7.0,6.0,1.0,5.0
Switzerland,A,3.0,1.0,2.0,0.0,3.0,2.0,1.0,5.0
,,,,,,,,,
Italy,B,3.0,1.0,0.0,2.0,5.0,9.0,-4.0,3.0
Croatia,B,3.0,1.0,1.0,1.0,11.0,11.0,0.0,4.0
Spain,B,3.0,1.0,1.0,1.0,10.0,9.0,1.0,4.0
Albania,B,3.0,2.0,0.0,1.0,8.0,5.0,3.0,6.0


Teams,Group,Pld,W,D,L,GF,GA,GD,Pts
,,,,,,,,,
France,D,3.0,2.0,1.0,0.0,14.0,7.0,7.0,7.0
Austria,D,3.0,2.0,0.0,1.0,8.0,8.0,0.0,6.0
Netherlands,D,3.0,0.0,2.0,1.0,11.0,12.0,-1.0,2.0
Play-off Winner A,D,3.0,0.0,1.0,2.0,3.0,9.0,-6.0,1.0
,,,,,,,,,
Slovakia,E,3.0,0.0,1.0,2.0,4.0,9.0,-5.0,1.0
Belgium,E,3.0,1.0,1.0,1.0,3.0,3.0,0.0,4.0
Romania,E,3.0,2.0,0.0,1.0,8.0,8.0,0.0,6.0
Play-off Winner B,E,3.0,2.0,0.0,1.0,10.0,5.0,5.0,6.0


Splitting the Round of 16 tp Left and Right for Visualization

In [0]:
# Create an empty DataFrame for round of sixteen
visualize_r16 = pd.DataFrame(columns=['Round_of_16', 'rest'])

for _ in range(2):
    visualize_r16 = pd.concat([visualize_r16, pd.DataFrame({'Round_of_16': ['']})], ignore_index=True)

for index, row in round_of_16_df.iterrows():
    visualize_r16 = pd.concat([visualize_r16, pd.DataFrame({'Round_of_16': [f"{row['home_team']} ({row['home_team_goals']})"]})], ignore_index=True)
    visualize_r16 = pd.concat([visualize_r16, pd.DataFrame({'Round_of_16': [f"{row['away_team']} ({row['away_team_goals']})"]})], ignore_index=True)

    for _ in range(2):
        visualize_r16 = pd.concat([visualize_r16, pd.DataFrame({'Round_of_16': ['']})], ignore_index=True)

visualize_r16['rest'] = ''

splited_r16_index = len(visualize_r16) // 2
print(splited_r16_index)
display(visualize_r16)


17


Round_of_16,rest
,
,
Albania (2),
Scotland (4),
,
,
Hungary (3),
England (1),
,
,


In [0]:
left_side_r16 = visualize_r16.iloc[:splited_r16_index + 1].reset_index(drop = True)
right_side_r16 = visualize_r16.iloc[splited_r16_index - 1:].reset_index(drop = True)
display(left_side_r16)
display(right_side_r16)

Round_of_16,rest
,
,
Albania (2),
Scotland (4),
,
,
Hungary (3),
England (1),
,
,


Round_of_16,rest
,
,
Play-off Winner B (4),
Croatia (4),
,
,
Austria (3),
Portugal (2),
,
,


Quarter Final Visualization

In [0]:
# Dataframe for quarter final
visualize_qf = pd.DataFrame(columns=['quarter_final'])

for _ in range(4):
    visualize_qf = pd.concat([visualize_qf, pd.DataFrame({'quarter_final': ['']})], ignore_index=True)

for index, row in quarter_final_df.iterrows():
    visualize_qf = pd.concat([visualize_qf, pd.DataFrame({'quarter_final': [f"{row['home_team']} ({row['home_team_goals']})"]})], ignore_index=True)
    visualize_qf = pd.concat([visualize_qf, pd.DataFrame({'quarter_final': [f"{row['away_team']} ({row['away_team_goals']})"]})], ignore_index=True)

    for _ in range(6):
        visualize_qf = pd.concat([visualize_qf, pd.DataFrame({'quarter_final': ['']})], ignore_index=True)
visualize_qf['rest'] = ''
splited_qf_index = len(visualize_qf) // 2
display(visualize_qf)


quarter_final,rest
,
,
,
,
Scotland (2),
Hungary (1),
,
,
,
,


In [0]:
left_side_qf = pd.DataFrame(columns=['Quater_Final'])
left_side_qf = pd.concat([left_side_qf,visualize_qf.iloc[:splited_qf_index]]).reset_index(drop=True)
right_side_qf = pd.DataFrame(columns=['Quater_Final'])
right_side_qf = pd.concat([right_side_qf,visualize_qf.iloc[splited_qf_index-2:]]).reset_index(drop=True)


display(left_side_qf)
display(right_side_qf)

Quater_Final,quarter_final,rest
,,
,,
,,
,,
,Scotland (2),
,Hungary (1),
,,
,,
,,
,,


Quater_Final,quarter_final,rest
,,
,,
,,
,,
,Play-off Winner B (3),
,Austria (1),
,,
,,
,,
,,


SemiFinal Visualization

In [0]:
import pandas as pd

visualize_sf = pd.DataFrame(columns=['semi_final'])

for _ in range(8):
    visualize_sf = pd.concat([visualize_sf, pd.DataFrame({'semi_final': ['']})], ignore_index=True)

for index, row in semi_final_df.iterrows():
    visualize_sf = pd.concat([visualize_sf, pd.DataFrame({'semi_final': [f"{row['home_team']} ({row['home_team_goals']})"]})], ignore_index=True)
    visualize_sf = pd.concat([visualize_sf, pd.DataFrame({'semi_final': [f"{row['away_team']} ({row['away_team_goals']})"]})], ignore_index=True)
    for _ in range(6):
        visualize_sf = pd.concat([visualize_sf, pd.DataFrame({'semi_final': ['']})], ignore_index=True)
visualize_sf['rest'] = ''
splited_sf_index = len(visualize_sf) // 2
print(splited_sf_index)
display(visualize_sf)


12


semi_final,rest
,
,
,
,
,
,
,
,
Scotland (3),
Austria (1),


In [0]:
left_side_sf = pd.DataFrame(columns=['Semi_Final'])
left_side_sf = pd.concat([left_side_sf, visualize_sf.iloc[:splited_sf_index]]).reset_index(drop=True)

for _ in range(5):
    left_side_sf = pd.concat([left_side_sf, pd.DataFrame({'Semi_Final': [''], 'rest': ['']})], ignore_index=True)

right_side_sf = pd.DataFrame({'Semi_Final': [''] * 7, 'rest': [''] * 7})
right_side_sf = pd.concat([right_side_sf, visualize_sf.iloc[splited_sf_index:]]).reset_index(drop=True)

display(left_side_sf)
display(right_side_sf)

Semi_Final,semi_final,rest
,,
,,
,,
,,
,,
,,
,,
,,
,Scotland (3),
,Austria (1),


Semi_Final,rest,semi_final
,,
,,
,,
,,
,,
,,
,,
,,
,,
,,


Final Match Visualization

In [0]:
visualize_final = pd.DataFrame(columns=['Final'])
for _ in range(6):
    visualize_final = pd.concat([visualize_final, pd.DataFrame({'Final': ['']})], ignore_index=True)

for index,row in final_df.iterrows():
    visualize_final = pd.concat([visualize_final, pd.DataFrame({'Final': [f"{row['home_team']} ({row['home_team_goals']})"]})], ignore_index=True)
    visualize_final = pd.concat([visualize_final, pd.DataFrame({'Final': [f"{row['away_team']} ({row['away_team_goals']})"]})], ignore_index=True)

    if (index + 1) %2 == 0:
        visualize_final = pd.concat([visualize_final, pd.DataFrame({'Final': ['']})], ignore_index=True)
        visualize_final = pd.concat([visualize_final, pd.DataFrame({'Final': ['']})], ignore_index=True)
    for _ in range(2):
        visualize_final = pd.concat([visualize_final, pd.DataFrame({'Final': ['']})], ignore_index=True)
visualize_final['space'] = ''
display(visualize_final)


Final,space
,
,
,
,
,
,
Play-off Winner B (0),
Scotland (1),
,
,


In [0]:
left_side_qf.columns = [f"{col}q2" for col in left_side_qf.columns]

merged_df = pd.concat([left_side_r16, left_side_qf], axis=1)

left_side_sf.columns = [f"{col}s1" for col in left_side_sf.columns]
merged_df = pd.concat([merged_df, left_side_sf], axis=1)

visualize_final.columns = [f"{col}f" for col in visualize_final.columns]
merged_df = pd.concat([merged_df, visualize_final], axis=1)

right_side_sf.columns = [f"{col}s2" for col in right_side_sf.columns]

merged_df = pd.concat([merged_df, right_side_sf], axis=1)



right_side_qf.columns = [f"{col}q1" for col in right_side_qf.columns]
# Concatenate the DataFrames
merged_df = pd.concat([merged_df, right_side_qf], axis=1)

right_side_r16.columns = [f"{col}r2" for col in right_side_r16.columns]

# Concatenate the DataFrames
merged_df = pd.concat([merged_df,right_side_r16], axis=1)

left_table.columns = [f"{col}g1" for col in left_table.columns]
right_table.columns = [f"{col}g2" for col in right_table.columns]

# Concatenate the DataFrames
merged_df = pd.concat([left_table,merged_df, right_table], axis=1)

merged_df.fillna('', inplace=True)

display(merged_df)

Teamsg1,Groupg1,Pldg1,Wg1,Dg1,Lg1,GFg1,GAg1,GDg1,Ptsg1,Round_of_16,rest,Quater_Finalq2,quarter_finalq2,restq2,Semi_Finals1,semi_finals1,rests1,Finalf,spacef,Semi_Finals2,rests2,semi_finals2,Quater_Finalq1,quarter_finalq1,restq1,Round_of_16r2,restr2,Teamsg2,Groupg2,Pldg2,Wg2,Dg2,Lg2,GFg2,GAg2,GDg2,Ptsg2
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Germany,A,3.0,0.0,1.0,2.0,4.0,9.0,-5.0,1.0,,,,,,,,,,,,,,,,,,,France,D,3.0,2.0,1.0,0.0,14.0,7.0,7.0,7.0
Scotland,A,3.0,1.0,1.0,1.0,10.0,7.0,3.0,4.0,Albania (2),,,,,,,,,,,,,,,,Play-off Winner B (4),,Austria,D,3.0,2.0,0.0,1.0,8.0,8.0,0.0,6.0
Hungary,A,3.0,1.0,2.0,0.0,7.0,6.0,1.0,5.0,Scotland (4),,,,,,,,,,,,,,,,Croatia (4),,Netherlands,D,3.0,0.0,2.0,1.0,11.0,12.0,-1.0,2.0
Switzerland,A,3.0,1.0,2.0,0.0,3.0,2.0,1.0,5.0,,,,Scotland (2),,,,,,,,,,,Play-off Winner B (3),,,,Play-off Winner A,D,3.0,0.0,1.0,2.0,3.0,9.0,-6.0,1.0
,,,,,,,,,,,,,Hungary (1),,,,,,,,,,,Austria (1),,,,,,,,,,,,,
Italy,B,3.0,1.0,0.0,2.0,5.0,9.0,-4.0,3.0,Hungary (3),,,,,,,,Play-off Winner B (0),,,,,,,,Austria (3),,Slovakia,E,3.0,0.0,1.0,2.0,4.0,9.0,-5.0,1.0
Croatia,B,3.0,1.0,1.0,1.0,11.0,11.0,0.0,4.0,England (1),,,,,,,,Scotland (1),,,,,,,,Portugal (2),,Belgium,E,3.0,1.0,1.0,1.0,3.0,3.0,0.0,4.0
Spain,B,3.0,1.0,1.0,1.0,10.0,9.0,1.0,4.0,,,,,,,Scotland (3),,,,,,,,,,,,Romania,E,3.0,2.0,0.0,1.0,8.0,8.0,0.0,6.0
Albania,B,3.0,2.0,0.0,1.0,8.0,5.0,3.0,6.0,,,,,,,Austria (1),,,,,,,,,,,,Play-off Winner B,E,3.0,2.0,0.0,1.0,10.0,5.0,5.0,6.0
