In [0]:
%python

# Ensure that the specified directory is empty. 
dbutils.fs.rm("dbfs:/user/hive/warehouse/euroteams", recurse=True)
dbutils.fs.rm("dbfs:/user/hive/warehouse//groupstage_match", recurse=True)
dbutils.fs.rm("dbfs:/user/hive/warehouse/groupstandings", recurse=True)
dbutils.fs.rm("dbfs:/user/hive/warehouse/combinationforthirdplace", recurse=True)
dbutils.fs.rm("dbfs:/user/hive/warehouse/matchesfixtures",recurse=True)

Out[117]: True

In [0]:
%sql
DROP table if exists EuroTeams;
DROP table if exists GroupStage_match;
DROP table if exists GroupStandings;
DROP table if exists CombinationforThirdPlace;
DROP table if exists MatchesFixtures;

In [0]:
%sql
CREATE OR REPLACE TABLE EuroTeams(
TeamId INTEGER,
TeamName VARCHAR(255),
GroupName VARCHAR(255)
);

INSERT INTO EuroTeams(TeamId, TeamName, GroupName)
SELECT
    TeamId,
    TeamName,
    CASE
        WHEN TeamId <= 4 THEN 'A'
        WHEN TeamId <= 8 THEN 'B'
        WHEN TeamId <= 12 THEN 'C'
        WHEN TeamId <= 16 THEN 'D'
        WHEN TeamId <= 20 THEN 'E'
        ELSE 'F'
    END AS GroupName
FROM (
    VALUES
    (1, 'Germany'),
    (2, 'Scotland'),
    (3, 'Hungary'),
    (4, 'Switzerland'),
    (5, 'Spain'),
    (6, 'Croatia'),
    (7, 'Italy'),
    (8, 'Albania'),
    (9, 'Slovenia'),
    (10, 'Denmark'),
    (11, 'Serbia'),
    (12, 'England'),
    (13, 'Finland'),
    (14, 'Netherlands'),
    (15, 'Austria'),
    (16, 'France'),
    (17, 'Belgium'),
    (18, 'Slovakia'),
    (19, 'Romania'),
    (20, 'Ukraine'),
    (21, 'Türkiye'),
    (22, 'Greece'),
    (23, 'Portugal'),
    (24, 'Czech')
) AS Teams (TeamId, TeamName);


num_affected_rows,num_inserted_rows
24,24


In [0]:
%sql
Select * from Euroteams

TeamId,TeamName,GroupName
1,Germany,A
2,Scotland,A
3,Hungary,A
4,Switzerland,A
5,Spain,B
6,Croatia,B
7,Italy,B
8,Albania,B
9,Slovenia,C
10,Denmark,C


In [0]:
%sql
SELECT *
FROM Euroteams
WHERE GroupName = 'A';

TeamId,TeamName,GroupName
1,Germany,A
2,Scotland,A
3,Hungary,A
4,Switzerland,A


In [0]:
%sql
-- Create the GroupStageMatch table
CREATE OR REPLACE TABLE GroupStage_match(
    MatchId INTEGER,
    Team1 VARCHAR(255),
    Team2 VARCHAR(255),
    Team1Score INTEGER,
    Team2Score INTEGER,
    Result VARCHAR(5),
    MatchWinner VARCHAR(255),
    MatchStatus VARCHAR(20)
);

-- Insert data into the GroupStageMatch table
INSERT INTO GroupStage_match (MatchId, Team1, Team2, Team1Score, Team2Score, Result, MatchWinner, MatchStatus)
SELECT DISTINCT
    ROW_NUMBER() OVER (ORDER BY t1.TeamId) AS MatchId,
    t1.TeamName AS Team1,
    t2.TeamName AS Team2,
    (SELECT FLOOR(RAND() * 6) + 1) AS Team1Score,
    (SELECT FLOOR(RAND() * 6) + 1) AS Team2Score,
    CONCAT(Team1Score, '-', Team2Score) AS Result,
    CASE
        WHEN Team1Score > Team2Score THEN t1.TeamName
        WHEN Team1Score < Team2Score THEN t2.TeamName
        ELSE 'draw'
    END AS MatchWinner,
    'completed' AS MatchStatus
FROM
    Euroteams t1
JOIN
    Euroteams t2 ON t1.GroupName = t2.GroupName AND t1.TeamId < t2.TeamId
ORDER BY MatchId; 


num_affected_rows,num_inserted_rows
36,36


In [0]:
%sql
Select * from  GroupStage_match

MatchId,Team1,Team2,Team1Score,Team2Score,Result,MatchWinner,MatchStatus
1,Germany,Switzerland,2,4,2-4,Switzerland,completed
2,Germany,Hungary,4,5,4-5,Hungary,completed
3,Germany,Scotland,2,3,2-3,Scotland,completed
4,Scotland,Switzerland,3,3,3-3,draw,completed
5,Scotland,Hungary,6,3,6-3,Scotland,completed
6,Hungary,Switzerland,6,2,6-2,Hungary,completed
7,Spain,Albania,4,5,4-5,Albania,completed
8,Spain,Italy,6,5,6-5,Spain,completed
9,Spain,Croatia,2,5,2-5,Croatia,completed
10,Croatia,Albania,2,6,2-6,Albania,completed


Creating the group standings or rankings

In [0]:
%sql
-- Ensure that the table exists with the correct schema
CREATE or REPLACE TABLE GroupStandings (
    TeamId INTEGER,
    TeamName VARCHAR(255),
    GroupName VARCHAR(255),
    MatchesPlayed INTEGER, 
    Wins INTEGER, 
    Draws INTEGER, 
    Losses INTEGER,
    GoalsFor INTEGER, 
    GoalsAgainst INTEGER, 
    GoalDifference INTEGER, 
    Points INTEGER,
    Rankings INTEGER
);

-- Calculate wins, draws, and losses for each team
WITH MatchResults AS (
    SELECT
        Team1 AS TeamName,
        CASE
            WHEN MatchWinner = Team1 THEN 1 
            ELSE 0
        END AS Wins,
        CASE
            WHEN MatchWinner = 'draw' THEN 1
            ELSE 0 
        END AS Draws,
        Team1Score AS GoalsFor,
        Team2Score AS GoalsAgainst
    FROM GroupStage_match
    UNION ALL
    SELECT
        Team2 AS TeamName,
        CASE
            WHEN MatchWinner = Team2 THEN 1
            ELSE 0 
        END AS Wins,
        CASE
            WHEN MatchWinner = 'draw' THEN 1
            ELSE 0 
        END AS Draws,
        Team2Score AS GoalsFor,
        Team1Score AS GoalsAgainst
    FROM GroupStage_match
)
INSERT INTO GroupStandings (TeamId, TeamName, GroupName, MatchesPlayed, Wins, Draws, Losses, GoalsFor, GoalsAgainst, GoalDifference, Points, Rankings)
SELECT
    t.TeamId,
    t.TeamName,
    t.GroupName,
    COUNT(M.Wins) AS MatchesPlayed,
    SUM(M.Wins) AS Wins,
    SUM(M.Draws) AS Draws,
    COUNT(*) - SUM(M.Wins) - SUM(M.Draws) AS Losses,
    SUM(M.GoalsFor) AS GoalsFor,
    SUM(M.GoalsAgainst) AS GoalsAgainst,
    SUM(M.GoalsFor - M.GoalsAgainst) AS GoalDifference,
    (SUM(M.Wins) * 3) + SUM(M.Draws) AS Points,
    0 as Rankings
FROM Euroteams t
LEFT JOIN MatchResults M ON t.TeamName = M.TeamName
GROUP BY t.TeamId, t.TeamName, t.GroupName;


WITH RankedTeams AS (
SELECT
TeamName,
row_number() OVER (PARTITION BY GroupName ORDER BY Points DESC, GoalDifference DESC, GoalsFor DESC, Wins DESC) AS GroupRank
FROM GroupStandings
)
MERGE INTO GroupStandings gs
USING RankedTeams rt
ON gs. TeamName = rt. TeamName
WHEN MATCHED THEN UPDATE SET gs.Rankings = rt.GroupRank;

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
24,24,0,0


In [0]:
%sql
-- Select and display the final GroupStandings table with rankings
SELECT * FROM GroupStandings
ORDER BY GroupName, Rankings;


TeamId,TeamName,GroupName,MatchesPlayed,Wins,Draws,Losses,GoalsFor,GoalsAgainst,GoalDifference,Points,Rankings
2,Scotland,A,3,2,1,0,12,8,4,7,1
3,Hungary,A,3,2,0,1,14,12,2,6,2
4,Switzerland,A,3,1,1,1,9,11,-2,4,3
1,Germany,A,3,0,0,3,8,12,-4,0,4
8,Albania,B,3,2,0,1,13,9,4,6,1
7,Italy,B,3,2,0,1,12,11,1,6,2
6,Croatia,B,3,1,0,2,10,12,-2,3,3
5,Spain,B,3,1,0,2,12,15,-3,3,4
11,Serbia,C,3,1,2,0,8,5,3,5,1
10,Denmark,C,3,1,2,0,13,12,1,5,2



The criteria for determining the best third-placed teams are as follows:

-Points: The teams are ranked based on the number of points earned in the group stage matches (3 points for a win, 1 point for a draw).

-Goal Difference: If two or more teams have the same number of points, their goal difference is considered. Goal difference is calculated by subtracting the total number of goals conceded from the total number of goals scored.

-Goals Scored: If teams still cannot be separated after goal difference, the number of goals scored in all group stage matches is used as the next tiebreaker.

-Fair Play: If teams are still equal, fair play conduct during the tournament is considered. This includes the number of yellow and red cards. The team with fewer disciplinary points advances.

-Drawing of Lots: As a last resort, if teams are still tied after the above criteria, a draw of lots may be used to determine the final rankings.

Positive Goal Difference:

 A team with a positive goal difference (having scored more goals than conceded) is generally considered more favorably than a team with a lower or negative goal difference.
For example, if Team A has a goal difference of +5 (scored 10, conceded 5), and Team B has a goal difference of +2 (scored 8, conceded 6), Team A would be ranked higher in the standings.


Some important points
It is not allowed to use window functions inside WHERE clause.

Round of 16

 Rank all teams within each group based on points, goal difference, goals for, and wins

In [0]:
%sql
CREATE OR REPLACE VIEW Roundof16 AS
WITH CombinedTeams AS (
    (
        SELECT
            Rankings,
            TeamName,
            GroupName
        FROM
           GroupStandings
        WHERE
            Rankings LIKE '1' OR Rankings LIKE '2'
    )
    UNION ALL
    (
        SELECT
            Rankings,
            TeamName,
            GroupName
        FROM
            GroupStandings
        WHERE
            Rankings LIKE '3'
        ORDER BY Points DESC, GoalDifference DESC, GoalsFor DESC, Wins DESC, GoalsAgainst ASC, TeamName
        LIMIT 4
    )
)
SELECT * FROM CombinedTeams
ORDER BY Rankings, GroupName;




In [0]:
%sql
Select * from RoundOf16

Rankings,TeamName,GroupName
1,Scotland,A
1,Albania,B
1,Serbia,C
1,Austria,D
1,Slovakia,E
1,Czech,F
2,Hungary,A
2,Italy,B
2,Denmark,C
2,Netherlands,D


In [0]:
%sql
-- %sql
-- DROP VIEW IF EXISTS RoundOf16;

COMBINATION TABLE

In [0]:
%sql
CREATE or replace  TABLE CombinationforThirdPlace(
  ID INTEGER,
  SELECTEDGROUPS VARCHAR(4),
  TeamB1 CHAR(1),
  TeamC1 CHAR(1),
  TeamE1 CHAR(1),
  TeamF1 CHAR(1)
);

-- Step 2: Insert the provided values into the "THIRDMATCHUP" table
INSERT INTO  CombinationForThirdPlace (ID, SELECTEDGROUPS, TeamB1, TeamC1, TeamE1, TeamF1) VALUES
(1, 'ABCD', 'A', 'D', 'B', 'C'),
(2, 'ABCE', 'A', 'E', 'B', 'C'),
(3, 'ABCF', 'A', 'F', 'B', 'C'),
(4, 'ABDE', 'D', 'E', 'A', 'B'),
(5, 'ABDF', 'D', 'F', 'A', 'B'),
(6, 'ABEF', 'E', 'F', 'B', 'A'),
(7, 'ACDE', 'E', 'D', 'C', 'A'),
(8, 'ACDF', 'F', 'D', 'C', 'A'),
(9, 'ACEF', 'E', 'F', 'C', 'A'),
(10, 'ADEF', 'E', 'F', 'D', 'A'),
(11, 'BCDE', 'E', 'D', 'B', 'C'),
(12, 'BCDF', 'F', 'D', 'C', 'B'),
(13, 'BCEF', 'F', 'E', 'C', 'B'),
(14, 'BDEF', 'F', 'E', 'D', 'B'),
(15, 'CDEF', 'F', 'E', 'D', 'C');

num_affected_rows,num_inserted_rows
15,15


In [0]:
%sql
select * from CombinationforThirdPlace

ID,SELECTEDGROUPS,TeamB1,TeamC1,TeamE1,TeamF1
1,ABCD,A,D,B,C
2,ABCE,A,E,B,C
3,ABCF,A,F,B,C
4,ABDE,D,E,A,B
5,ABDF,D,F,A,B
6,ABEF,E,F,B,A
7,ACDE,E,D,C,A
8,ACDF,F,D,C,A
9,ACEF,E,F,C,A
10,ADEF,E,F,D,A


In [0]:
%sql
SELECT
    TeamName,
    CONCAT_WS('', COLLECT_LIST(GroupCombination[0])) AS FinalCombination
FROM (
    SELECT
        TeamName,
        COLLECT_LIST(GroupName) AS GroupCombination
    FROM
        RoundOf16
    WHERE
        Rankings = 3
    GROUP BY
        TeamName
)
GROUP BY
    TeamName;


TeamName,FinalCombination
Switzerland,A
Croatia,B
Slovenia,C
Greece,F


In [0]:
%sql
CREATE OR REPLACE VIEW MatchedCombinations AS
SELECT
    c.ID,
    c.SELECTEDGROUPS,
    c.TeamB1,
    c.TeamC1,
    c.TeamE1,
    c.TeamF1,
    r.FinalCombination
FROM
    CombinationForThirdPlace c
JOIN (
    SELECT
        REGEXP_REPLACE(CONCAT_WS('', COLLECT_LIST(SUBSTRING(GroupName, 1))), '\\s', '') AS FinalCombination
    FROM
        RoundOf16
    WHERE
        Rankings = 3
) r ON c.SELECTEDGROUPS = r.FinalCombination;


In [0]:
%sql
SELECT
        REGEXP_REPLACE(CONCAT_WS('', COLLECT_LIST(SUBSTRING(GroupName, 1))), '\\s', '') AS FinalCombination

    FROM
        RoundOf16
    WHERE
        Rankings = 3

FinalCombination
ABCF


In [0]:
%sql
SELECT * FROM MatchedCombinations 

ID,SELECTEDGROUPS,TeamB1,TeamC1,TeamE1,TeamF1,FinalCombination
3,ABCF,A,F,B,C,ABCF


UEFA set out the following schedule for the round of 16:
- Match 1: Winner of Group B vs. 3rd-placed team from Group A/D/E/F

- Match 2: Winner of Group A vs. Runner-up of Group C

- Match 3: Winner of Group F vs. 3rd-placed team from Group A/B/C

- Match 4: Runner-up of Group D vs. Runner-up of Group E

- Match 5: Winner of Group E vs. 3rd-placed team from Group A/B/C/D

- Match 6: Winner of Group D vs. Runner-up of Group F

- Match 7: Winner of Group C vs. 3rd-placed team from Group D/E/F

- Match 8: Runner-up of Group A vs. Runner-up of Group B

In [0]:
%sql
CREATE OR REPLACE TABLE MatchesFixtures (
    MatchID INT,
    Team1 VARCHAR(53),
    Team1Goals INT,
    Team2 VARCHAR(53),
    Team2Goals INT,
    Result VARCHAR(53),
    MatchWinner VARCHAR(53),
    MatchStage VARCHAR(53)
);


Creating a new table matchfixtures for inserting data from the quarterfinals


In [0]:
%sql
-- Insert match fixtures data
INSERT INTO MatchesFixtures (MatchID, Team1, Team1Goals, Team2, Team2Goals, Result, MatchWinner, MatchStage)
SELECT
    ROW_NUMBER() OVER (ORDER BY 
      CASE
        WHEN t1.Rankings = 1 AND t1.GroupName = 'B' THEN 1
        WHEN t1.Rankings = 1 AND t1.GroupName = 'A' THEN 2
        WHEN t1.Rankings = 1 AND t1.GroupName = 'F' THEN 3
        WHEN t1.Rankings = 2 AND t1.GroupName = 'D' THEN 4
        WHEN t1.Rankings = 1 AND t1.GroupName = 'E' THEN 5
        WHEN t1.Rankings = 1 AND t1.GroupName = 'D' THEN 6
        WHEN t1.Rankings = 1 AND t1.GroupName = 'C' THEN 7
        WHEN t1.Rankings = 2 AND t1.GroupName = 'A' THEN 8

      END
    ) AS MatchID,
    t1.TeamName AS Team1,
    ROUND(RAND() * 6) AS Team1Goals,
    t2.TeamName AS Team2,
    ROUND(RAND() * 6) AS Team2Goals,
    CONCAT(Team1Goals, '-', Team2Goals) AS Result,
    CASE
        WHEN Team1Goals > Team2Goals THEN t1.TeamName
        WHEN Team2Goals > Team1Goals THEN t2.TeamName
        ELSE
          CASE
            WHEN Team1 < Team2 THEN Team1
            ELSE Team2
          END
    END AS MatchWinner,
    'R16' AS MatchStage
FROM RoundOf16 t1
JOIN RoundOf16 t2 ON (
    (t1.Rankings = 1 AND t1.GroupName = 'B' AND t2.Rankings = 3 AND t2.GroupName = (SELECT TeamB1 FROM MatchedCombinations)) OR
    (t1.Rankings = 1 AND t1.GroupName = 'A' AND t2.Rankings = 2 AND t2.GroupName = 'C') OR
    (t1.Rankings = 1 AND t1.GroupName = 'F' AND t2.Rankings = 3 AND t2.GroupName = (SELECT TeamF1 FROM MatchedCombinations)) OR
    (t1.Rankings = 2 AND t1.GroupName = 'D' AND t2.Rankings = 2 AND t2.GroupName = 'E') OR
    (t1.Rankings = 1 AND t1.GroupName = 'E' AND t2.Rankings = 3 AND t2.GroupName = (SELECT TeamE1 FROM MatchedCombinations)) OR
    (t1.Rankings = 1 AND t1.GroupName = 'D' AND t2.Rankings = 2 AND t2.GroupName = 'F') OR
    (t1.Rankings = 1 AND t1.GroupName = 'C' AND t2.Rankings = 3 AND t2.GroupName = (SELECT TeamC1 FROM MatchedCombinations)) OR
    (t1.Rankings = 2 AND t1.GroupName = 'A' AND t2.Rankings = 2 AND t2.GroupName = 'B')
);

num_affected_rows,num_inserted_rows
8,8


In [0]:
%sql
select * from MatchesFixtures 

MatchID,Team1,Team1Goals,Team2,Team2Goals,Result,MatchWinner,MatchStage
1,Albania,4,Switzerland,6,4.0-6.0,Switzerland,R16
2,Scotland,4,Denmark,3,4.0-3.0,Scotland,R16
3,Czech,5,Slovenia,4,5.0-4.0,Czech,R16
4,Netherlands,2,Ukraine,3,2.0-3.0,Ukraine,R16
5,Slovakia,3,Croatia,5,3.0-5.0,Croatia,R16
6,Austria,3,Türkiye,4,3.0-4.0,Türkiye,R16
7,Serbia,3,Greece,5,3.0-5.0,Greece,R16
8,Hungary,0,Italy,2,0.0-2.0,Italy,R16


In [0]:
%sql
INSERT INTO MatchesFixtures (MatchID, Team1, Team1Goals,Team2,Team2Goals, Result, MatchWinner, MatchStage)
SELECT
    ROW_NUMBER() OVER (ORDER BY 1) + 8 AS MatchID,
    t1.MatchWinner AS Team1,
    ROUND(RAND() * 6) AS Team1Goals,
    t2.MatchWinner AS Team2,
    ROUND(RAND() * 6) AS Team2Goals,
    CONCAT(t1.Team1Goals, '-', t2.Team2Goals) AS Result,
    'Not played' AS MatchWinner,
    'QuarterFinals' AS MatchStage

FROM MatchesFixtures t1
CROSS JOIN MatchesFixtures t2
ON
  (t1.MatchID = 1 AND t2.MatchId = 2) OR
  (t1.MatchID = 3 AND t2.MatchId = 4) OR
  (t1.MatchID = 5 AND t2.MatchId = 6) OR
  (t1.MatchID = 7 AND t2.MatchId = 8);

UPDATE MatchesFixtures
SET MatchWinner = 
    CASE
        WHEN Team1Goals > Team2Goals THEN Team1
        WHEN Team1Goals < Team2Goals THEN Team2
        ELSE
            CASE
                WHEN Team1 < Team2 THEN Team1
                ELSE Team2
            END
    END
WHERE MatchStage = 'QuarterFinals';

num_affected_rows
4


In [0]:
%sql
select * from MatchesFixtures

MatchID,Team1,Team1Goals,Team2,Team2Goals,Result,MatchWinner,MatchStage
1,Albania,4,Switzerland,6,4.0-6.0,Switzerland,R16
2,Scotland,4,Denmark,3,4.0-3.0,Scotland,R16
3,Czech,5,Slovenia,4,5.0-4.0,Czech,R16
4,Netherlands,2,Ukraine,3,2.0-3.0,Ukraine,R16
5,Slovakia,3,Croatia,5,3.0-5.0,Croatia,R16
6,Austria,3,Türkiye,4,3.0-4.0,Türkiye,R16
7,Serbia,3,Greece,5,3.0-5.0,Greece,R16
8,Hungary,0,Italy,2,0.0-2.0,Italy,R16
9,Switzerland,1,Scotland,2,4-3,Scotland,QuarterFinals
10,Czech,5,Ukraine,3,5-3,Czech,QuarterFinals


Semi-Finale


In [0]:
%sql
INSERT INTO MatchesFixtures (MatchID, Team1, Team1Goals,Team2,Team2Goals, Result, MatchWinner, MatchStage)
SELECT
    ROW_NUMBER() OVER (ORDER BY 1) + 12 AS MatchID,
    t1.MatchWinner AS Team1,
    ROUND(RAND() * 6) AS Team1Goals,
    t2.MatchWinner AS Team2,
    ROUND(RAND() * 6) AS Team2Goals,
    CONCAT(t1.Team1Goals, '-', t2.Team2Goals) AS Result,
    'Not played' AS MatchWinner,
    'SemiFinals' AS MatchStage

FROM MatchesFixtures t1
CROSS JOIN MatchesFixtures t2
ON
  (t1.MatchID = 9 AND t2.MatchId = 10) OR
  (t1.MatchID = 11 AND t2.MatchId = 12);

UPDATE MatchesFixtures
SET MatchWinner = 
    CASE
        WHEN Team1Goals > Team2Goals THEN Team1
        WHEN Team1Goals < Team2Goals THEN Team2
        ELSE
            CASE
                WHEN Team1 < Team2 THEN Team1
                ELSE Team2
            END
    END
WHERE MatchStage = 'SemiFinals';

num_affected_rows
2


In [0]:
%sql
select * from MatchesFixtures

MatchID,Team1,Team1Goals,Team2,Team2Goals,Result,MatchWinner,MatchStage
1,Albania,4,Switzerland,6,4.0-6.0,Switzerland,R16
2,Scotland,4,Denmark,3,4.0-3.0,Scotland,R16
3,Czech,5,Slovenia,4,5.0-4.0,Czech,R16
4,Netherlands,2,Ukraine,3,2.0-3.0,Ukraine,R16
5,Slovakia,3,Croatia,5,3.0-5.0,Croatia,R16
6,Austria,3,Türkiye,4,3.0-4.0,Türkiye,R16
7,Serbia,3,Greece,5,3.0-5.0,Greece,R16
8,Hungary,0,Italy,2,0.0-2.0,Italy,R16
9,Switzerland,1,Scotland,2,4-3,Scotland,QuarterFinals
10,Czech,5,Ukraine,3,5-3,Czech,QuarterFinals


#Third place

In [0]:
%sql
INSERT INTO MatchesFixtures (MatchID, Team1, Team1Goals,Team2,Team2Goals, Result, MatchWinner, MatchStage)
SELECT
    ROW_NUMBER() OVER (ORDER BY 1) + 14 AS MatchID,
    CASE
      WHEN
        t1.MatchWinner!= t1.Team1 THEN t1.Team1 ELSE t1.Team2
        END AS Team1,
    ROUND(RAND() * 6) AS Team1Goals,
    CASE
      WHEN
        t2.MatchWinner!= t2.Team2 THEN t2.Team2 ELSE t2.Team1
        END AS Team2,
    ROUND(RAND() * 6) AS Team2Goals,
    CONCAT(t1.Team1Goals, '-', t2.Team2Goals) AS Result,
    'Not Played' AS MatchWinner,
    'ThirdPlace' AS MatchStage
FROM MatchesFixtures t1
CROSS JOIN MatchesFixtures t2
ON
  (t1.MatchID = 13 AND t2.MatchId = 14);

UPDATE MatchesFixtures
SET MatchWinner = 
    CASE
        WHEN Team1Goals > Team2Goals THEN Team1
        WHEN Team1Goals < Team2Goals THEN Team2
        ELSE
            CASE
                WHEN Team1 < Team2 THEN Team1
                ELSE Team2
            END
    END
WHERE MatchStage = 'ThirdPlace';


num_affected_rows
1


In [0]:
%sql
select * from MatchesFixtures

MatchID,Team1,Team1Goals,Team2,Team2Goals,Result,MatchWinner,MatchStage
1,Albania,4,Switzerland,6,4.0-6.0,Switzerland,R16
2,Scotland,4,Denmark,3,4.0-3.0,Scotland,R16
3,Czech,5,Slovenia,4,5.0-4.0,Czech,R16
4,Netherlands,2,Ukraine,3,2.0-3.0,Ukraine,R16
5,Slovakia,3,Croatia,5,3.0-5.0,Croatia,R16
6,Austria,3,Türkiye,4,3.0-4.0,Türkiye,R16
7,Serbia,3,Greece,5,3.0-5.0,Greece,R16
8,Hungary,0,Italy,2,0.0-2.0,Italy,R16
9,Switzerland,1,Scotland,2,4-3,Scotland,QuarterFinals
10,Czech,5,Ukraine,3,5-3,Czech,QuarterFinals


#Finals

In [0]:
%sql
INSERT INTO MatchesFixtures (MatchID, Team1, Team1Goals,Team2,Team2Goals, Result, MatchWinner, MatchStage)
SELECT
    ROW_NUMBER() OVER (ORDER BY 1) + 15 AS MatchID,
    t1.MatchWinner AS Team1,
    ROUND(RAND() * 6) AS Team1Goals,
    t2.MatchWinner AS Team2,
    ROUND(RAND() * 6) AS Team2Goals,
    CONCAT(t1.Team1Goals, '-', t2.Team2Goals) AS Result,
    'Not played' AS MatchWinner,
    'Finals' AS MatchStage

FROM MatchesFixtures t1
CROSS JOIN MatchesFixtures t2
ON
  (t1.MatchID = 13 AND t2.MatchId = 14);

UPDATE MatchesFixtures
SET MatchWinner = 
    CASE
        WHEN Team1Goals > Team2Goals THEN Team1
        WHEN Team1Goals < Team2Goals THEN Team2
        ELSE
            CASE
                WHEN Team1 < Team2 THEN Team1
                ELSE Team2
            END
    END
WHERE MatchStage = 'Finals';

num_affected_rows
1


In [0]:
%sql
select * from MatchesFixtures

MatchID,Team1,Team1Goals,Team2,Team2Goals,Result,MatchWinner,MatchStage
1,Albania,4,Switzerland,6,4.0-6.0,Switzerland,R16
2,Scotland,4,Denmark,3,4.0-3.0,Scotland,R16
3,Czech,5,Slovenia,4,5.0-4.0,Czech,R16
4,Netherlands,2,Ukraine,3,2.0-3.0,Ukraine,R16
5,Slovakia,3,Croatia,5,3.0-5.0,Croatia,R16
6,Austria,3,Türkiye,4,3.0-4.0,Türkiye,R16
7,Serbia,3,Greece,5,3.0-5.0,Greece,R16
8,Hungary,0,Italy,2,0.0-2.0,Italy,R16
9,Switzerland,1,Scotland,2,4-3,Scotland,QuarterFinals
10,Czech,5,Ukraine,3,5-3,Czech,QuarterFinals


In [0]:
import pandas as pd
from pyspark.sql import SparkSession

In [0]:
spark = SparkSession.builder \
    .appName("Visualizing_Euro_Cup") \
    .getOrCreate()

Group Stage Visulaization

In [0]:
Groupstage = spark.sql("SELECT * FROM GroupStandings ORDER BY GroupName,Rankings") 
Groupstage_pd = Groupstage.toPandas()


In [0]:
display(Groupstage_pd)

TeamId,TeamName,GroupName,MatchesPlayed,Wins,Draws,Losses,GoalsFor,GoalsAgainst,GoalDifference,Points,Rankings
2,Scotland,A,3,2,1,0,12,8,4,7,1
3,Hungary,A,3,2,0,1,14,12,2,6,2
4,Switzerland,A,3,1,1,1,9,11,-2,4,3
1,Germany,A,3,0,0,3,8,12,-4,0,4
8,Albania,B,3,2,0,1,13,9,4,6,1
7,Italy,B,3,2,0,1,12,11,1,6,2
6,Croatia,B,3,1,0,2,10,12,-2,3,3
5,Spain,B,3,1,0,2,12,15,-3,3,4
11,Serbia,C,3,1,2,0,8,5,3,5,1
10,Denmark,C,3,1,2,0,13,12,1,5,2


Groupstage visualization

In [0]:
visualization = pd.DataFrame({
    'Rank': Groupstage_pd ['Rankings'],
    'Teams': Groupstage_pd ['TeamName'],
    'Group': Groupstage_pd ['GroupName'],
    'played_games':Groupstage_pd ['MatchesPlayed'],
    'W':  Groupstage_pd ['Wins'],
    'D': Groupstage_pd ['Draws'],
    'L':  Groupstage_pd ['Losses'],
    'GF':  Groupstage_pd ['GoalsFor'],
    'GA': Groupstage_pd ['GoalsAgainst'],
    'GD':  Groupstage_pd ['GoalDifference'],
    'Pts':  Groupstage_pd ['Points']
})


#empty row for group stage
empty_row = pd.DataFrame({col: [''] for col in visualization.columns})

In [0]:
left_table = visualization[visualization['Group'].str.contains('^[ABC]')]

right_table = visualization[visualization['Group'].str.contains('^[DEF]')]

left_table.reset_index(drop=True, inplace=True)
right_table.reset_index(drop=True, inplace=True)

display(left_table)
display(right_table)

Rank,Teams,Group,played_games,W,D,L,GF,GA,GD,Pts
1,Scotland,A,3,2,1,0,12,8,4,7
2,Hungary,A,3,2,0,1,14,12,2,6
3,Switzerland,A,3,1,1,1,9,11,-2,4
4,Germany,A,3,0,0,3,8,12,-4,0
1,Albania,B,3,2,0,1,13,9,4,6
2,Italy,B,3,2,0,1,12,11,1,6
3,Croatia,B,3,1,0,2,10,12,-2,3
4,Spain,B,3,1,0,2,12,15,-3,3
1,Serbia,C,3,1,2,0,8,5,3,5
2,Denmark,C,3,1,2,0,13,12,1,5


Rank,Teams,Group,played_games,W,D,L,GF,GA,GD,Pts
1,Austria,D,3,2,1,0,10,6,4,7
2,Netherlands,D,3,2,0,1,12,9,3,6
3,Finland,D,3,1,0,2,9,14,-5,3
4,France,D,3,0,1,2,6,8,-2,1
1,Slovakia,E,3,2,1,0,13,5,8,7
2,Ukraine,E,3,1,1,1,8,7,1,4
3,Belgium,E,3,1,0,2,8,12,-4,3
4,Romania,E,3,1,0,2,8,13,-5,3
1,Czech,F,3,3,0,0,14,6,8,9
2,Türkiye,F,3,2,0,1,11,7,4,6


In [0]:
import numpy as np

Displaying Left Table

In [0]:
blank_row = {col: '' for col in visualization.columns}

blank_row_df = pd.DataFrame([blank_row])

# Divide the teams into groups of four
groups = [left_table.iloc[i:i+4] for i in range(0, len(left_table), 4)]

# Iterate over the groups and add a blank row except for the last one
for i in range(len(groups)):
    if i < len(groups) - 1:
        groups[i] = pd.concat([groups[i], blank_row_df], ignore_index=True)

# Concatenate the groups to form the final table with spacing
left_table_with_space = pd.concat(groups).reset_index(drop=True)

# Rename the columns with the new names
left_table_with_space = left_table_with_space.rename(columns={
    'Rank': 'L_Rank',
    'Teams': 'L_Teams',
    'Group': 'L_Group',
    'played_games': 'L_MP',
    'W': 'L_W',
    'D': 'L_D',
    'L': 'L_L',
    'GF': 'L_GF',
    'GA': 'L_GA',
    'GD': 'L_GD',
    'Pts': 'L_Pts'
})

# Calculate the middle part of the full table
half_table_index = len(left_table_with_space) // 2

# Display the table with spacing
display(left_table_with_space)
print(half_table_index)


  Could not convert '' with type str: tried to convert to int64
Attempting non-optimization as 'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to true.
  warn(msg)


L_Rank,L_Teams,L_Group,L_MP,L_W,L_D,L_L,L_GF,L_GA,L_GD,L_Pts
1.0,Scotland,A,3.0,2.0,1.0,0.0,12.0,8.0,4.0,7.0
2.0,Hungary,A,3.0,2.0,0.0,1.0,14.0,12.0,2.0,6.0
3.0,Switzerland,A,3.0,1.0,1.0,1.0,9.0,11.0,-2.0,4.0
4.0,Germany,A,3.0,0.0,0.0,3.0,8.0,12.0,-4.0,0.0
,,,,,,,,,,
1.0,Albania,B,3.0,2.0,0.0,1.0,13.0,9.0,4.0,6.0
2.0,Italy,B,3.0,2.0,0.0,1.0,12.0,11.0,1.0,6.0
3.0,Croatia,B,3.0,1.0,0.0,2.0,10.0,12.0,-2.0,3.0
4.0,Spain,B,3.0,1.0,0.0,2.0,12.0,15.0,-3.0,3.0
,,,,,,,,,,


7


In [0]:
print(left_table_with_space.shape)


(14, 11)


Displaying Right Table

In [0]:
# Divide the teams into groups of four
groups = [right_table.iloc[i:i+4] for i in range(0, len(right_table), 4)]

for i in range(len(groups)):
    if i < len(groups) - 1:
        groups[i] = pd.concat([groups[i], blank_row_df], ignore_index=True)

right_table_with_space = pd.concat(groups).reset_index(drop=True)

right_table_with_space = right_table_with_space.rename(columns={
    'Rank': 'R_Rank',
    'Teams': 'R_Teams',
    'Group': 'R_Group',
    'played_games': 'R_MP',
    'W': 'R_W',
    'D': 'R_D',
    'L': 'R_L',
    'GF': 'R_GF',
    'GA': 'R_GA',
    'GD': 'R_GD',
    'Pts': 'R_Pts'
})

display(right_table_with_space)


  Could not convert '' with type str: tried to convert to int64
Attempting non-optimization as 'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to true.
  warn(msg)


R_Rank,R_Teams,R_Group,R_MP,R_W,R_D,R_L,R_GF,R_GA,R_GD,R_Pts
1.0,Austria,D,3.0,2.0,1.0,0.0,10.0,6.0,4.0,7.0
2.0,Netherlands,D,3.0,2.0,0.0,1.0,12.0,9.0,3.0,6.0
3.0,Finland,D,3.0,1.0,0.0,2.0,9.0,14.0,-5.0,3.0
4.0,France,D,3.0,0.0,1.0,2.0,6.0,8.0,-2.0,1.0
,,,,,,,,,,
1.0,Slovakia,E,3.0,2.0,1.0,0.0,13.0,5.0,8.0,7.0
2.0,Ukraine,E,3.0,1.0,1.0,1.0,8.0,7.0,1.0,4.0
3.0,Belgium,E,3.0,1.0,0.0,2.0,8.0,12.0,-4.0,3.0
4.0,Romania,E,3.0,1.0,0.0,2.0,8.0,13.0,-5.0,3.0
,,,,,,,,,,


In [0]:
print(right_table_with_space.shape)


(14, 11)


Displaying Round of 16

In [0]:
#getting data from sql table and into pandas dataframe
round_of_16 = spark.sql("SELECT * FROM MatchesFixtures WHERE MatchStage = 'R16'")
round_of_16_pd = round_of_16.toPandas()

#converting to string and removing .0 from floats occuring after conversion
round_of_16_pd = round_of_16_pd.applymap(lambda x: str(x).split(".")[0])

display(round_of_16_pd)

MatchID,Team1,Team1Goals,Team2,Team2Goals,Result,MatchWinner,MatchStage
1,Albania,4,Switzerland,6,4,Switzerland,R16
2,Scotland,4,Denmark,3,4,Scotland,R16
3,Czech,5,Slovenia,4,5,Czech,R16
4,Netherlands,2,Ukraine,3,2,Ukraine,R16
5,Slovakia,3,Croatia,5,3,Croatia,R16
6,Austria,3,Türkiye,4,3,Türkiye,R16
7,Serbia,3,Greece,5,3,Greece,R16
8,Hungary,0,Italy,2,0,Italy,R16


In [0]:
#storing required data in df
r16_df = pd.DataFrame(columns=['R16_Teams', 'R16_Score'])

#displaying all teams and respective scores in columns
for index, row in round_of_16_pd.iterrows():
    team1 = row['Team1']
    team1goals = row['Team1Goals']
    team2 = row['Team2']
    team2goals = row['Team2Goals']
    r16_df = pd.concat([r16_df, pd.DataFrame({'R16_Score': [team1goals], 'R16_Teams': [team1]})], ignore_index=True)
    r16_df = pd.concat([r16_df, pd.DataFrame({'R16_Score': [team2goals], 'R16_Teams': [team2]})], ignore_index=True)

display(r16_df)


R16_Teams,R16_Score
Albania,4
Switzerland,6
Scotland,4
Denmark,3
Czech,5
Slovenia,4
Netherlands,2
Ukraine,3
Slovakia,3
Croatia,5


In [0]:
# calculating midpoint of the DataFrame
midpoint_index = len(r16_df) // 2

# splitting DataFrame into two equal halves
left_r16 = r16_df.iloc[:midpoint_index].reset_index(drop=True)
right_r16 = r16_df.iloc[midpoint_index:].reset_index(drop=True)

# displaying the split DataFrames
display(left_r16)
display(right_r16)


R16_Teams,R16_Score
Albania,4
Switzerland,6
Scotland,4
Denmark,3
Czech,5
Slovenia,4
Netherlands,2
Ukraine,3


R16_Teams,R16_Score
Slovakia,3
Croatia,5
Austria,3
Türkiye,4
Serbia,3
Greece,5
Hungary,0
Italy,2


In [0]:

empty_row_r16 = pd.DataFrame({col: [''] for col in r16_df.columns})


grouped_r16_left = [left_r16.iloc[i:i+2] for i in range(0, len(left_r16), 2)]

for i in range(len(grouped_r16_left)):
    if i < len(grouped_r16_left):
        grouped_r16_left[i] = pd.concat([empty_row_r16, grouped_r16_left[i], empty_row_r16], ignore_index=True)


spaced_r16_left = pd.concat(grouped_r16_left).reset_index(drop=True)


new_column_names_r16_left = {'R16_Teams': 'L_R16_Teams',
                              'R16_Score': 'L_R16_Score'}
spaced_r16_left = spaced_r16_left.rename(columns=new_column_names_r16_left)


display(spaced_r16_left)


L_R16_Teams,L_R16_Score
,
Albania,4.0
Switzerland,6.0
,
,
Scotland,4.0
Denmark,3.0
,
,
Czech,5.0


In [0]:
# Group the right_r16 DataFrame into pairs
grouped_r16_right = [right_r16.iloc[i:i+2] for i in range(0, len(right_r16), 2)]

for i in range(len(grouped_r16_right)):
    if i < len(grouped_r16_right):
        grouped_r16_right[i] = pd.concat([empty_row_r16, grouped_r16_right[i], empty_row_r16], ignore_index=True)


spaced_r16_right = pd.concat(grouped_r16_right).reset_index(drop=True)


new_column_names_r16_right = {'R16_Teams': 'R_R16_Teams',
                               'R16_Score': 'R_R16_Score'}
spaced_r16_right = spaced_r16_right.rename(columns=new_column_names_r16_right)


display(spaced_r16_right)


R_R16_Teams,R_R16_Score
,
Slovakia,3.0
Croatia,5.0
,
,
Austria,3.0
Türkiye,4.0
,
,
Serbia,3.0


Displaying Quarter Finals

In [0]:
# Getting data from SQL table and into Pandas DataFrame for quarter finals
quarter_finals_df = spark.sql("SELECT * FROM MatchesFixtures WHERE MatchStage = 'QuarterFinals'")
quarter_finals_df_pd = quarter_finals_df.toPandas()

# Converting to string and removing .0 from floats occurring after conversion
quarter_finals_df_pd = quarter_finals_df_pd.applymap(lambda x: str(x).split(".")[0])

display(quarter_finals_df_pd)


MatchID,Team1,Team1Goals,Team2,Team2Goals,Result,MatchWinner,MatchStage
9,Switzerland,1,Scotland,2,4-3,Scotland,QuarterFinals
10,Czech,5,Ukraine,3,5-3,Czech,QuarterFinals
11,Croatia,2,Türkiye,4,3-4,Türkiye,QuarterFinals
12,Greece,5,Italy,6,3-2,Italy,QuarterFinals


In [0]:
# Storing required data in a DataFrame for quarter finals visualization
qf_visualization = pd.DataFrame(columns=['QF_Teams', 'QF_Score'])

# Displaying all teams and respective scores in columns
for index, row in quarter_finals_df_pd.iterrows():
    team1 = row['Team1']
    team1_goals = row['Team1Goals']
    team2 = row['Team2']
    team2_goals = row['Team2Goals']
    qf_visualization = pd.concat([qf_visualization, pd.DataFrame({'QF_Teams': [team1], 'QF_Score': [team1_goals]})], ignore_index=True)
    qf_visualization = pd.concat([qf_visualization, pd.DataFrame({'QF_Teams': [team2], 'QF_Score': [team2_goals]})], ignore_index=True)

display(qf_visualization)


QF_Teams,QF_Score
Switzerland,1
Scotland,2
Czech,5
Ukraine,3
Croatia,2
Türkiye,4
Greece,5
Italy,6


In [0]:

midpoint_qf = len(qf_visualization) // 2


left_qf_visualization = qf_visualization.iloc[:midpoint_qf]
right_qf_visualization = qf_visualization.iloc[midpoint_qf:]


left_qf_visualization.reset_index(drop=True, inplace=True)
right_qf_visualization.reset_index(drop=True, inplace=True)


display(left_qf_visualization)
display(right_qf_visualization)


QF_Teams,QF_Score
Switzerland,1
Scotland,2
Czech,5
Ukraine,3


QF_Teams,QF_Score
Croatia,2
Türkiye,4
Greece,5
Italy,6


In [0]:
# Creating an empty row for quarter finals
empty_row_qf = pd.DataFrame({col: [''] for col in qf_visualization.columns})

# Grouping left quarter finals DataFrame into pairs
grouped_qf_left = [left_qf_visualization.iloc[i:i+2] for i in range(0, len(left_qf_visualization), 2)]

# Adding empty rows between the grouped pairs
for i in range(len(grouped_qf_left)):
    if i < len(grouped_qf_left):
        grouped_qf_left[i] = pd.concat([empty_row_qf, empty_row_qf, empty_row_qf, empty_row_qf, grouped_qf_left[i]], ignore_index=True)

# Concatenating the modified groups back into a DataFrame
spaced_qf_left = pd.concat(grouped_qf_left).reset_index(drop=True)

# Adding additional empty rows at the bottom
for _ in range(4):
    spaced_qf_left = pd.concat([spaced_qf_left, empty_row_qf], ignore_index=True)

# Renaming the columns with new names
new_column_names_qf_left = {'QF_Teams': 'L_QF_Teams',
                             'QF_Score': 'L_QF_Score'}
spaced_qf_left = spaced_qf_left.rename(columns=new_column_names_qf_left)

# Displaying the spaced quarter finals DataFrame for the left side
display(spaced_qf_left)


L_QF_Teams,L_QF_Score
,
,
,
,
Switzerland,1.0
Scotland,2.0
,
,
,
,


In [0]:

empty_row_qf = pd.DataFrame({col: [''] for col in qf_visualization.columns})


grouped_qf_right = [right_qf_visualization.iloc[i:i+2] for i in range(0, len(right_qf_visualization), 2)]

for i in range(len(grouped_qf_right)):
    if i < len(grouped_qf_right):
        grouped_qf_right[i] = pd.concat([empty_row_qf, empty_row_qf, empty_row_qf, empty_row_qf, grouped_qf_right[i]], ignore_index=True)


spaced_qf_right = pd.concat(grouped_qf_right).reset_index(drop=True)


for _ in range(4):
    spaced_qf_right = pd.concat([spaced_qf_right, empty_row_qf], ignore_index=True)


new_column_names_qf_right = {'QF_Teams': 'R_QF_Teams',
                              'QF_Score': 'R_QF_Score'}
spaced_qf_right = spaced_qf_right.rename(columns=new_column_names_qf_right)

display(spaced_qf_right)


R_QF_Teams,R_QF_Score
,
,
,
,
Croatia,2.0
Türkiye,4.0
,
,
,
,


Displaying semi-finals

In [0]:
sf = spark.sql("SELECT * FROM MatchesFixtures WHERE MatchStage = 'SemiFinals'")
sf_pd = sf.toPandas()
sf_pd = sf_pd.applymap(lambda x: str(x).split(".")[0])

# Storing required data in a DataFrame for semi-finals
sf_visualization = pd.DataFrame(columns=['SF_Teams', 'SF_Score'])

# Displaying all teams and respective scores in columns
for index, row in sf_pd.iterrows():
    team1 = row['Team1']
    team1_goals = row['Team1Goals']
    team2 = row['Team2']
    team2_goals = row['Team2Goals']
    sf_visualization = pd.concat([sf_visualization, pd.DataFrame({'SF_Teams': [team1], 'SF_Score': [team1_goals]})], ignore_index=True)
    sf_visualization = pd.concat([sf_visualization, pd.DataFrame({'SF_Teams': [team2], 'SF_Score': [team2_goals]})], ignore_index=True)

display(sf_pd)
display(sf_visualization)


MatchID,Team1,Team1Goals,Team2,Team2Goals,Result,MatchWinner,MatchStage
13,Scotland,4,Czech,1,1-3,Scotland,SemiFinals
14,Türkiye,3,Italy,1,2-6,Türkiye,SemiFinals


SF_Teams,SF_Score
Scotland,4
Czech,1
Türkiye,3
Italy,1


In [0]:
# calculating midpoint of the dataFrame
midpoint_index2 = len(sf_visualization) // 2

# splitting dataFrame into two equal halves
left_sf_visualization = sf_visualization.iloc[:midpoint_index2]
right_sf_visualization = sf_visualization.iloc[midpoint_index2:]

# index reset
left_sf_visualization.reset_index(drop=True, inplace=True)
right_sf_visualization.reset_index(drop=True, inplace=True)

# displaying the split DataFrames
display(left_sf_visualization)
display(right_sf_visualization)


SF_Teams,SF_Score
Scotland,4
Czech,1


SF_Teams,SF_Score
Türkiye,3
Italy,1


In [0]:
# Creating an empty row for semi-finals
empty_row_sf = pd.DataFrame({col: [''] for col in sf_visualization.columns})

# Grouping left semi-finals DataFrame into individual rows
grouped_sf_left = [left_sf_visualization.iloc[i:i+1] for i in range(0, len(left_sf_visualization), 1)]

# Adding empty rows between the grouped rows
for i in range(len(grouped_sf_left)):
    if i < len(grouped_sf_left):
        grouped_sf_left[i] = pd.concat([grouped_sf_left[i], empty_row_sf, empty_row_sf], ignore_index=True)

# Concatenating the modified groups back into a DataFrame
spaced_sf_left = pd.concat(grouped_sf_left).reset_index(drop=True)

# Adding additional empty rows at the bottom
for _ in range(6):
    spaced_sf_left = pd.concat([empty_row_sf, spaced_sf_left], ignore_index=True)

for _ in range(4):
    spaced_sf_left = pd.concat([spaced_sf_left, empty_row_sf], ignore_index=True)

# Renaming the columns with new names
new_column_names_sf_left = {'SF_Teams': 'L_SF_Teams',
                             'SF_Score': 'L_SF_Score'}

spaced_sf_left = spaced_sf_left.rename(columns=new_column_names_sf_left)
display(spaced_sf_left)


L_SF_Teams,L_SF_Score
,
,
,
,
,
,
Scotland,4.0
,
,
Czech,1.0


In [0]:
# Grouping right semi-finals DataFrame into individual rows
grouped_sf_right = [right_sf_visualization.iloc[i:i+1] for i in range(0, len(right_sf_visualization), 1)]


for i in range(len(grouped_sf_right)):
    if i < len(grouped_sf_right):
        grouped_sf_right[i] = pd.concat([grouped_sf_right[i], empty_row_sf, empty_row_sf], ignore_index=True)


spaced_sf_right = pd.concat(grouped_sf_right).reset_index(drop=True)


for _ in range(6):
    spaced_sf_right = pd.concat([empty_row_sf, spaced_sf_right], ignore_index=True)

for _ in range(4):
    spaced_sf_right = pd.concat([spaced_sf_right, empty_row_sf], ignore_index=True)


new_column_names_sf_right = {'SF_Teams': 'R_SF_Teams',
                              'SF_Score': 'R_SF_Score'}

spaced_sf_right = spaced_sf_right.rename(columns=new_column_names_sf_right)

display(spaced_sf_right)


R_SF_Teams,R_SF_Score
,
,
,
,
,
,
Türkiye,3.0
,
,
Italy,1.0


Displaying Final Match

In [0]:

fin_df = spark.sql("SELECT * FROM MatchesFixtures WHERE MatchStage = 'Finals'")
fin_pd = fin_df.toPandas()
fin_pd = fin_pd.applymap(lambda x: str(x).split(".")[0])

fin_visualization = pd.DataFrame(columns=['Fin_Teams', 'Fin_Score'])

for index, row in fin_pd.iterrows():
    team1 = row['Team1']
    team1_goals = row['Team1Goals']
    team2 = row['Team2']
    team2_goals = row['Team2Goals']
    fin_visualization = pd.concat([fin_visualization, pd.DataFrame({'Fin_Teams': [team1], 'Fin_Score': [team1_goals]})], ignore_index=True)
    fin_visualization = pd.concat([fin_visualization, pd.DataFrame({'Fin_Teams': [team2], 'Fin_Score': [team2_goals]})], ignore_index=True)

display(fin_pd)
display(fin_visualization)


MatchID,Team1,Team1Goals,Team2,Team2Goals,Result,MatchWinner,MatchStage
16,Scotland,2,Türkiye,3,4-1,Türkiye,Finals


Fin_Teams,Fin_Score
Scotland,2
Türkiye,3


In [0]:
fin_row_pd = pd.DataFrame({col: [''] for col in fin_visualization.columns})

fin1 = fin_visualization.iloc[:1]  
fin2 = fin_visualization.iloc[1:]  

for _ in range(8):
    fin1 = pd.concat([fin_row_pd, fin1], ignore_index=True)
    fin2 = pd.concat([fin_row_pd, fin2], ignore_index=True)

for _ in range(7):
    fin1 = pd.concat([fin1, fin_row_pd], ignore_index=True)
    fin2 = pd.concat([fin2, fin_row_pd], ignore_index=True)

lfin_new_column_name = {'Fin_Teams': 'L_Fin_Teams',
                        'Fin_Score': 'L_Fin_Score'}
rfin_new_column_name = {'Fin_Teams': 'R_Fin_Teams',
                        'Fin_Score': 'R_Fin_Score'}

fin1 = fin1.rename(columns=lfin_new_column_name)
fin2 = fin2.rename(columns=rfin_new_column_name)

rfin_new_column_order = ['R_Fin_Score', 'R_Fin_Teams']
fin2 = fin2[rfin_new_column_order]

display(fin1)
display(fin2)


L_Fin_Teams,L_Fin_Score
,
,
,
,
,
,
,
,
Scotland,2.0
,


R_Fin_Score,R_Fin_Teams
,
,
,
,
,
,
,
,
3.0,Türkiye
,


In [0]:
winner = spark.sql("SELECT MatchWinner FROM MatchesFixtures WHERE MatchStage = 'Finals'")
winner_pd = winner.toPandas()
winner_pd = winner_pd.applymap(lambda x: str(x).split(".")[0])

win_row_pd = pd.DataFrame({col: [''] for col in winner_pd.columns})

for _ in range(7):
    winner_pd = pd.concat([win_row_pd, winner_pd], ignore_index=True)

for _ in range(8):
    winner_pd = pd.concat([winner_pd, win_row_pd], ignore_index=True)

fin_col = {"MatchWinner": "EuroCupWinner"}
winner_pd = winner_pd.rename(columns=fin_col)
display(winner_pd)


EuroCupWinner
Türkiye


In [0]:
# Concatenating DataFrames with keys
concatenated_df = pd.concat([left_table_with_space,
                        spaced_r16_left, 
                        spaced_qf_left, 
                        spaced_sf_left,
                        fin1,
                        winner_pd,
                        fin2,
                        spaced_sf_right,
                        spaced_qf_right,
                        spaced_r16_right,
                        right_table_with_space
                         ], 
                        axis=1, 
                        )

# Displaying the concatenated DataFrame




     

In [0]:
display(concatenated_df)

  Could not convert '' with type str: tried to convert to int64
Attempting non-optimization as 'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to true.
  warn(msg)


L_Rank,L_Teams,L_Group,L_MP,L_W,L_D,L_L,L_GF,L_GA,L_GD,L_Pts,L_R16_Teams,L_R16_Score,L_QF_Teams,L_QF_Score,L_SF_Teams,L_SF_Score,L_Fin_Teams,L_Fin_Score,EuroCupWinner,R_Fin_Score,R_Fin_Teams,R_SF_Teams,R_SF_Score,R_QF_Teams,R_QF_Score,R_R16_Teams,R_R16_Score,R_Rank,R_Teams,R_Group,R_MP,R_W,R_D,R_L,R_GF,R_GA,R_GD,R_Pts
1.0,Scotland,A,3.0,2.0,1.0,0.0,12.0,8.0,4.0,7.0,,,,,,,,,,,,,,,,,,1.0,Austria,D,3.0,2.0,1.0,0.0,10.0,6.0,4.0,7.0
2.0,Hungary,A,3.0,2.0,0.0,1.0,14.0,12.0,2.0,6.0,Albania,4.0,,,,,,,,,,,,,,Slovakia,3.0,2.0,Netherlands,D,3.0,2.0,0.0,1.0,12.0,9.0,3.0,6.0
3.0,Switzerland,A,3.0,1.0,1.0,1.0,9.0,11.0,-2.0,4.0,Switzerland,6.0,,,,,,,,,,,,,,Croatia,5.0,3.0,Finland,D,3.0,1.0,0.0,2.0,9.0,14.0,-5.0,3.0
4.0,Germany,A,3.0,0.0,0.0,3.0,8.0,12.0,-4.0,0.0,,,,,,,,,,,,,,,,,,4.0,France,D,3.0,0.0,1.0,2.0,6.0,8.0,-2.0,1.0
,,,,,,,,,,,,,Switzerland,1.0,,,,,,,,,,Croatia,2.0,,,,,,,,,,,,,
1.0,Albania,B,3.0,2.0,0.0,1.0,13.0,9.0,4.0,6.0,Scotland,4.0,Scotland,2.0,,,,,,,,,,Türkiye,4.0,Austria,3.0,1.0,Slovakia,E,3.0,2.0,1.0,0.0,13.0,5.0,8.0,7.0
2.0,Italy,B,3.0,2.0,0.0,1.0,12.0,11.0,1.0,6.0,Denmark,3.0,,,Scotland,4.0,,,,,,Türkiye,3.0,,,Türkiye,4.0,2.0,Ukraine,E,3.0,1.0,1.0,1.0,8.0,7.0,1.0,4.0
3.0,Croatia,B,3.0,1.0,0.0,2.0,10.0,12.0,-2.0,3.0,,,,,,,,,Türkiye,,,,,,,,,3.0,Belgium,E,3.0,1.0,0.0,2.0,8.0,12.0,-4.0,3.0
4.0,Spain,B,3.0,1.0,0.0,2.0,12.0,15.0,-3.0,3.0,,,,,,,Scotland,2.0,,3.0,Türkiye,,,,,,,4.0,Romania,E,3.0,1.0,0.0,2.0,8.0,13.0,-5.0,3.0
,,,,,,,,,,,Czech,5.0,,,Czech,1.0,,,,,,Italy,1.0,,,Serbia,3.0,,,,,,,,,,,
