In [0]:
%python

dbutils.fs.rm("dbfs:/user/hive/warehouse/eurocup", recurse=True)


Out[5]: True

In [0]:
%python
dbutils.fs.rm("dbfs:/user/hive/warehouse/matches", recurse=True)


Out[6]: True

In [0]:
%python
dbutils.fs.rm("dbfs:/user/hive/warehouse/grouprank", recurse=True)

Out[7]: True

In [0]:
%python
dbutils.fs.rm("dbfs:/user/hive/warehouse/thirdplacecombination", recurse=True)

Out[8]: True

In [0]:
 %python
 spark.conf.set("spark.databricks.delta.commitValidation.enabled", "False")


In [0]:
%sql
--creating table Eurocup for all participating team 
CREATE OR REPLACE TABLE euroCup(
  teamId INT,
  teamName VARCHAR(255),
  groupName VARCHAR(255)
)

In [0]:
%sql
select * from euroCup

teamId,teamName,groupName


In [0]:
%sql
--inserting participating teams
INSERT INTO euroCup (teamId, teamName, groupName)
VALUES
  (1, 'Germany', 'A'),
  (2, 'Scotland', 'A'),
  (3, 'Hungary', 'A'),
  (4, 'Switzerland', 'A'),
  (5, 'Spain', 'B'),
  (6, 'Croatia', 'B'),
  (7, 'Italy', 'B'),
  (8, 'Albania', 'B'),
  (9, 'Slovenia', 'C'),
  (10, 'Denmark', 'C'),
  (11, 'Serbia', 'C'),
  (12, 'England', 'C'),
  (13, 'Play-off winner A', 'D'),
  (14, 'Netherlands', 'D'),
  (15, 'Austria', 'D'),
  (16, 'France', 'D'),
  (17, 'Belgium', 'E'),
  (18, 'Slovakia', 'E'),
  (19, 'Romania', 'E'),
  (20, 'Play-off winner B', 'E'),
  (21, 'Turkey', 'F'),
  (22, 'Play-off winner c', 'F'),
  (23, 'Portugal', 'F'),
  (24, 'Czech', 'F');

num_affected_rows,num_inserted_rows
24,24


In [0]:
%sql
-- %python
-- # spark.conf.set("spark.databricks.delta.commitValidation.enabled", "false")


In [0]:
%sql
--creating tables for matches
CREATE or Replace TABLE Matches (
  matchId INTEGER,
  TeamH VARCHAR(255),
  TeamA VARCHAR(255),
  TeamHScore INTEGER,
  TeamAScore INTEGER,
  Result VARCHAR(255),
  Stage VARCHAR(255)
);

In [0]:
%sql
INSERT INTO Matches (matchId, TeamH, TeamA, TeamHScore, TeamAScore, Result, Stage)
SELECT DISTINCT
    ROW_NUMBER() OVER (ORDER BY t1.teamId) AS matchId,
    t1.teamName AS TeamA,
    t2.teamName AS TeamH,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamHScore,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamAScore,
    'draw' AS Result,
    'group stage' AS Stage
FROM
    euroCup t1
JOIN
    euroCup t2 ON t1.groupName = t2.groupName AND t1.teamId < t2.teamId;

num_affected_rows,num_inserted_rows
36,36


In [0]:
%sql
--updating match details 
UPDATE Matches
SET Result = 
    CASE
        WHEN TeamHScore > TeamAScore THEN TeamH
        WHEN TeamHScore < TeamAScore THEN TeamA
        ELSE 'draw'
    END;




num_affected_rows
36


In [0]:
%sql
--Verifying the match results
SELECT * FROM Matches
order by matchId;

matchId,TeamH,TeamA,TeamHScore,TeamAScore,Result,Stage
1,Germany,Switzerland,5,5,draw,group stage
2,Germany,Hungary,4,1,Germany,group stage
3,Germany,Scotland,4,1,Germany,group stage
4,Scotland,Switzerland,3,2,Scotland,group stage
5,Scotland,Hungary,3,1,Scotland,group stage
6,Hungary,Switzerland,1,2,Switzerland,group stage
7,Spain,Albania,1,2,Albania,group stage
8,Spain,Italy,5,3,Spain,group stage
9,Spain,Croatia,3,2,Spain,group stage
10,Croatia,Albania,4,3,Croatia,group stage


In [0]:
%sql
--creating ranking table 
CREATE or Replace TABLE groupRank (
  teamId INTEGER,
  teamName VARCHAR(255),
  groupName VARCHAR(255),
  Points INTEGER,
  Standings INTEGER,
  GF INTEGER,
  GA INTEGER,
  GD INTEGER,
  Win INTEGER,
  isProgress BOOLEAN
)

In [0]:
%sql
-- Calculate points, goals scored, goals conceded, goal difference, and win count for each team
WITH MatchResults AS (
  SELECT 
    TeamH AS teamName,
    CASE 
      WHEN Result = CAST(TeamH AS VARCHAR(255)) THEN 1  
      ELSE 0  
    END AS Win, 
    CASE 
      WHEN Result = CAST(TeamH AS VARCHAR(255)) THEN 3  
      WHEN Result = CAST(TeamA AS VARCHAR(255)) THEN 0  
      ELSE 1  -- Draw
    END AS Points,
    TeamHScore AS GF, 
    TeamAScore AS GA 
  FROM matches
  UNION ALL
  SELECT 
    TeamA AS teamName,
    CASE 
      WHEN Result = CAST(TeamA AS VARCHAR(255)) THEN 1  -- TeamA wins
      ELSE 0  -- TeamA does not win
    END AS Win, 
    CASE 
      WHEN Result = CAST(TeamA AS VARCHAR(255)) THEN 3  -- TeamA wins
      WHEN Result = CAST(TeamH AS VARCHAR(255)) THEN 0  -- TeamA loses
      ELSE 1  -- Draw
    END AS Points,
    TeamAScore AS GF, -- Goals scored by Team2
    TeamHScore AS GA -- Goals conceded by Team2
  FROM matches
)
INSERT INTO groupRank (teamId, teamName, groupName, Points, Standings, GF, GA, GD, Win, isProgress)
SELECT
  e.teamId,
  e.teamName,
  e.groupName,
  SUM(M.Points) AS Points,
  0 as Standings,
  SUM(M.GF) AS GF, -- Total goals scored by the team
  SUM(M.GA) AS GA, -- Total goals conceded by the team
  SUM(M.GF) - SUM(M.GA) AS GD, -- Goal difference for the team
  SUM(M.Win) AS Win,
  0 as isProgress
FROM 
  euroCup e
LEFT JOIN
  MatchResults M ON e.teamName = M.teamName
GROUP BY
  e.teamId, e.teamName, e.groupName;

num_affected_rows,num_inserted_rows
24,24


In [0]:
%sql
-- Rank all teams within each group based on points, goal difference, goals scored, and win count
WITH TeamRanked AS (
  SELECT 
    teamName,
    row_number() OVER (PARTITION BY groupName ORDER BY Points DESC, GD DESC, GF DESC, Win DESC) AS GroupRank
  FROM groupRank
)
MERGE INTO groupRank R
USING TeamRanked TR
ON R.teamName = TR.teamName
WHEN MATCHED THEN UPDATE SET R.Standings = TR.GroupRank;


num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
24,24,0,0


In [0]:
%sql
WITH TeamRanked AS (
  SELECT 
    *,
    ROW_NUMBER() OVER (PARTITION BY groupName ORDER BY Points DESC, GD DESC, GF DESC, Win DESC) AS GroupRank
  FROM groupRank
),
Top3RankTeams AS (
  SELECT 
    *,
    ROW_NUMBER() OVER (ORDER BY Points DESC, GD DESC, GF DESC, Win DESC) AS RankWithinGroup
  FROM groupRank
  WHERE Standings = 3
),
isProgressCalc AS (
  SELECT 
    r.*,
    CASE 
        WHEN r.GroupRank = 4 THEN 0
        WHEN r.GroupRank = 3 AND t.RankWithinGroup <= 4 THEN 1
        WHEN r.GroupRank IN (1, 2) THEN 1
        ELSE 0
    END AS isProgressCalc 
  FROM TeamRanked r
  LEFT JOIN Top3RankTeams t ON r.teamName = t.teamName
)

MERGE INTO groupRank r
USING isProgressCalc ipc
ON r.teamId = ipc.teamId
WHEN MATCHED THEN UPDATE SET r.isprogress = ipc.isProgressCalc;

SELECT * FROM groupRank ORDER BY groupName, Standings;


teamId,teamName,groupName,Points,Standings,GF,GA,GD,Win,isProgress
1,Germany,A,7,1,13,7,6,2,True
2,Scotland,A,6,2,7,7,0,2,True
4,Switzerland,A,4,3,9,9,0,1,True
3,Hungary,A,0,4,3,9,-6,0,False
6,Croatia,B,6,1,11,9,2,2,True
5,Spain,B,6,2,9,7,2,2,True
7,Italy,B,3,3,10,12,-2,1,True
8,Albania,B,3,4,7,9,-2,1,False
10,Denmark,C,7,1,11,8,3,2,True
12,England,C,6,2,10,6,4,2,True


In [0]:
%sql
SELECT * FROM groupRank ORDER BY groupName, Standings;

teamId,teamName,groupName,Points,Standings,GF,GA,GD,Win,isProgress
1,Germany,A,7,1,13,7,6,2,True
2,Scotland,A,6,2,7,7,0,2,True
4,Switzerland,A,4,3,9,9,0,1,True
3,Hungary,A,0,4,3,9,-6,0,False
6,Croatia,B,6,1,11,9,2,2,True
5,Spain,B,6,2,9,7,2,2,True
7,Italy,B,3,3,10,12,-2,1,True
8,Albania,B,3,4,7,9,-2,1,False
10,Denmark,C,7,1,11,8,3,2,True
12,England,C,6,2,10,6,4,2,True


In [0]:
%sql
--third place combination table
CREATE or Replace TABLE thirdPlaceCombination (
  ID INTEGER,
  SELECTEDGROUPS VARCHAR(4),
  TeamHB CHAR(1),
  TeamHC CHAR(1),
  TeamHE CHAR(1),
  TeamHF CHAR(1)
);

In [0]:
%sql
INSERT INTO thirdPlaceCombination (ID, SELECTEDGROUPS, TeamHB, TeamHC, TeamHE, TeamHF) VALUES
(1, 'ABCD', 'A', 'D', 'B', 'C'),
(2, 'ABCE', 'A', 'E', 'B', 'C'),
(3, 'ABCF', 'A', 'F', 'B', 'C'),
(4, 'ABDE', 'D', 'E', 'A', 'B'),
(5, 'ABDF', 'D', 'F', 'A', 'B'),
(6, 'ABEF', 'E', 'F', 'B', 'A'),
(7, 'ACDE', 'E', 'D', 'C', 'A'),
(8, 'ACDF', 'F', 'D', 'C', 'A'),
(9, 'ACEF', 'E', 'F', 'C', 'A'),
(10, 'ADEF', 'E', 'F', 'D', 'A'),
(11, 'BCDE', 'E', 'D', 'B', 'C'),
(12, 'BCDF', 'F', 'D', 'C', 'B'),
(13, 'BCEF', 'F', 'E', 'C', 'B'),
(14, 'BDEF', 'F', 'E', 'D', 'B'),
(15, 'CDEF', 'F', 'E', 'D', 'C');

num_affected_rows,num_inserted_rows
15,15


In [0]:
%sql
--Round of 16
With SelectedGroup AS
(
SELECT 
    CONCAT_WS('', SORT_ARRAY(COLLECT_SET(SUBSTRING(groupName, 1, 1)))) AS Result
FROM groupRank
WHERE Standings = 3 AND IsProgress = TRUE
),

SelectedGrpTbl AS
(
SELECT * 
FROM thirdPlaceCombination
WHERE thirdPlaceCombination.SELECTEDGROUPS = (SELECT Result FROM SelectedGroup)
)
INSERT INTO Matches (matchId, TeamH, TeamA, TeamHScore, TeamAScore, Result, stage)
SELECT 
    37 AS matchId, 
    (SELECT teamName FROM groupRank WHERE Standings = 1 AND groupName = 'B') AS TeamH, 
    (SELECT teamName FROM groupRank WHERE Standings = 3 AND groupName = (select TeamHB from SelectedGrpTbl)) AS TeamA,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamHScore,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamAScore,
    'draw' as Result,
    'Round of 16' as stage
union All
SELECT 
    38 AS matchId, 
    (SELECT teamName FROM groupRank WHERE Standings = 1 AND groupName = 'A') AS TeamH, 
    (SELECT teamName FROM groupRank WHERE Standings = 2 AND groupName = 'C') AS TeamA,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamHScore,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamAScore,
    'draw' as Result,
    'Round of 16' as stage
UNION ALL
SELECT 
    39 AS matchId, 
    (SELECT teamName FROM groupRank WHERE Standings = 1 AND groupName = 'F') AS TeamH, 
    (SELECT teamName FROM groupRank WHERE Standings = 3 AND groupName = (select TeamHF from SelectedGrpTbl)) AS TeamA,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamHScore,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamAScore,
    'draw' as Result,
    'Round of 16' as stage
union All
SELECT 
    40 AS matchId, 
    (SELECT teamName FROM groupRank WHERE Standings = 2 AND groupName = 'D') AS TeamH, 
    (SELECT teamName FROM groupRank WHERE Standings = 2 AND groupName = 'E') AS TeamA,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamHScore,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamAScore,
    'draw' as Result,
    'Round of 16' as stage
UNION ALL
SELECT 
    41 AS matchId, 
    (SELECT teamName FROM groupRank WHERE Standings = 1 AND groupName = 'E') AS TeamH, 
    (SELECT teamName FROM groupRank WHERE Standings = 3 AND groupName = (select TeamHE from SelectedGrpTbl)) AS TeamA,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamHScore,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamAScore,
    'draw' as Result,
    'Round of 16' as stage
union All
SELECT 
    42 AS matchId, 
    (SELECT teamName FROM groupRank WHERE Standings = 1 AND groupName = 'D') AS TeamH, 
    (SELECT teamName FROM groupRank WHERE Standings = 2 AND groupName = 'F') AS TeamA,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamHScore,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamAScore,
    'draw' as Result,
    'Round of 16' as stage
UNION ALL
SELECT  
    43 AS matchId, 
    (SELECT teamName FROM groupRank WHERE Standings = 1 AND groupName = 'C') AS TeamH, 
    (SELECT teamName FROM groupRank WHERE Standings = 3 AND groupName = (select TeamHC from SelectedGrpTbl)) AS TeamA,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamHScore,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamAScore,
    'draw' as Result,
    'Round of 16' as stage
union All
SELECT 
    44 AS matchId, 
    (SELECT teamName FROM groupRank WHERE Standings = 2 AND groupName = 'A') AS TeamH, 
    (SELECT teamName FROM groupRank WHERE Standings = 2 AND groupName = 'B') AS TeamA,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamHScore,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamAScore,
    'draw' as Result,
    'Round of 16' as stage;

num_affected_rows,num_inserted_rows
8,8


In [0]:
%sql
--updationg the Result column
UPDATE Matches
SET Result = 
    CASE 
        WHEN TeamHScore > TeamAScore THEN TeamH
        WHEN TeamHScore < TeamAScore THEN TeamA
        ELSE 
            CASE 
                WHEN (TeamHScore + 1) % 2 = 1 THEN TeamH
                ELSE TeamA
            END
    END
where stage = 'Round of 16';
SELECT * FROM Matches
order by matchId;

matchId,TeamH,TeamA,TeamHScore,TeamAScore,Result,Stage
1,Germany,Switzerland,5,5,draw,group stage
2,Germany,Hungary,4,1,Germany,group stage
3,Germany,Scotland,4,1,Germany,group stage
4,Scotland,Switzerland,3,2,Scotland,group stage
5,Scotland,Hungary,3,1,Scotland,group stage
6,Hungary,Switzerland,1,2,Switzerland,group stage
7,Spain,Albania,1,2,Albania,group stage
8,Spain,Italy,5,3,Spain,group stage
9,Spain,Croatia,3,2,Spain,group stage
10,Croatia,Albania,4,3,Croatia,group stage


In [0]:
%sql
--Quater-Final Matches
INSERT INTO Matches (matchId, TeamH, TeamA, TeamHscore, TeamAscore, Result, stage)
SELECT 
    45 AS matchId, 
    (SELECT Result FROM Matches WHERE matchId = 37) AS TeamA, 
    (SELECT Result FROM Matches WHERE matchId = 38) AS TeamH,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamHScore,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamAScore,
    'draw' as Result,
    'Quater Final' as stage
UNION ALL
SELECT 
    46 AS matchId, 
    (SELECT Result FROM Matches WHERE matchId = 39) AS TeamA, 
    (SELECT Result FROM Matches WHERE matchId = 40) AS TeamH,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamHScore,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamAScore,
    'draw' as Result,
    'Quater Final' as stage
UNION ALL
SELECT 
    47 AS matchId, 
    (SELECT Result FROM Matches WHERE matchId = 41) AS TeamH, 
    (SELECT Result FROM Matches WHERE matchId = 42) AS TeamA,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamHScore,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamAScore,
    'draw' as Result,
    'Quater Final' as stage
UNION ALL
SELECT 
    48 AS matchId, 
    (SELECT Result FROM Matches WHERE matchId = 43) AS TeamH, 
    (SELECT Result FROM Matches WHERE matchId = 44) AS TeamA,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamHScore,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamAScore,
    'draw' as Result,
    'Quater Final' as stage;

num_affected_rows,num_inserted_rows
4,4


In [0]:
%sql
UPDATE Matches
SET Result = 
    CASE 
        WHEN TeamHscore > TeamAscore THEN TeamH
        WHEN TeamHscore < TeamAscore THEN TeamA
        ELSE 
            CASE 
                WHEN (TeamHscore + 1) % 2 = 1 THEN TeamH
                ELSE TeamA
            END
    END
where stage = 'Quater Final';

select * from Matches
order by matchId

matchId,TeamH,TeamA,TeamHScore,TeamAScore,Result,Stage
1,Germany,Switzerland,5,5,draw,group stage
2,Germany,Hungary,4,1,Germany,group stage
3,Germany,Scotland,4,1,Germany,group stage
4,Scotland,Switzerland,3,2,Scotland,group stage
5,Scotland,Hungary,3,1,Scotland,group stage
6,Hungary,Switzerland,1,2,Switzerland,group stage
7,Spain,Albania,1,2,Albania,group stage
8,Spain,Italy,5,3,Spain,group stage
9,Spain,Croatia,3,2,Spain,group stage
10,Croatia,Albania,4,3,Croatia,group stage


In [0]:
%sql
--Semi-Final Matches
INSERT INTO Matches (matchId, TeamH, TeamA, TeamHScore, TeamAScore, Result, stage)
SELECT 
    49 AS matchId, 
    (SELECT Result FROM Matches WHERE matchId = 45) AS TeamH, 
    (SELECT Result FROM Matches WHERE matchId = 46) AS TeamA,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamHScore,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamAScore,
    'draw' as Result,
    'Semi Final' as stage
UNION ALL
SELECT 
    50 AS matchId, 
    (SELECT Result FROM Matches WHERE matchId = 47) AS TeamH, 
    (SELECT Result FROM Matches WHERE matchId = 48) AS TeamA,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamHScore,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamAScore,
    'draw' as Result,
    'Semi Final' as stage;

num_affected_rows,num_inserted_rows
2,2


In [0]:
%sql
UPDATE Matches
SET Result = 
    CASE 
        WHEN TeamHScore > TeamAScore THEN TeamH
        WHEN TeamHScore < TeamAScore THEN TeamA
        ELSE 
            CASE 
                WHEN (TeamHScore + 1) % 2 = 1 THEN TeamH
                ELSE TeamA
            END
    END
where stage = 'Semi Final';

select * from Matches
order by matchId

matchId,TeamH,TeamA,TeamHScore,TeamAScore,Result,Stage
1,Germany,Switzerland,5,5,draw,group stage
2,Germany,Hungary,4,1,Germany,group stage
3,Germany,Scotland,4,1,Germany,group stage
4,Scotland,Switzerland,3,2,Scotland,group stage
5,Scotland,Hungary,3,1,Scotland,group stage
6,Hungary,Switzerland,1,2,Switzerland,group stage
7,Spain,Albania,1,2,Albania,group stage
8,Spain,Italy,5,3,Spain,group stage
9,Spain,Croatia,3,2,Spain,group stage
10,Croatia,Albania,4,3,Croatia,group stage


In [0]:
%sql
--Third-Place match
INSERT INTO Matches (matchId, TeamH, TeamA, TeamHScore, TeamAScore, Result, stage)
SELECT 
    51 AS matchId, 
    (SELECT Result FROM Matches WHERE stage='Quater Final' and Result not in (select Result FROM Matches WHERE stage='Semi Final') LIMIT 1 OFFSET 0) AS teamH, 
    (SELECT Result FROM Matches WHERE stage='Quater Final' and Result not in (select Result FROM Matches WHERE stage='Semi Final') LIMIT 1 OFFSET 1) AS team2,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamHscore,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamAScore,
    'draw' as Result,
    'Third Place' as stage;
UPDATE Matches
SET Result = 
    CASE 
        WHEN TeamHScore > TeamAScore THEN TeamH
        WHEN TeamHScore < TeamAScore THEN TeamA
        ELSE 
            CASE 
                WHEN (TeamHscore + 1) % 2 = 1 THEN TeamH
                ELSE TeamA
            END
    END
where stage = 'Third Place';

select * from Matches
order by matchId

matchId,TeamH,TeamA,TeamHScore,TeamAScore,Result,Stage
1,Germany,Switzerland,5,5,draw,group stage
2,Germany,Hungary,4,1,Germany,group stage
3,Germany,Scotland,4,1,Germany,group stage
4,Scotland,Switzerland,3,2,Scotland,group stage
5,Scotland,Hungary,3,1,Scotland,group stage
6,Hungary,Switzerland,1,2,Switzerland,group stage
7,Spain,Albania,1,2,Albania,group stage
8,Spain,Italy,5,3,Spain,group stage
9,Spain,Croatia,3,2,Spain,group stage
10,Croatia,Albania,4,3,Croatia,group stage


In [0]:
%sql
--Final Match
INSERT INTO Matches (matchId, TeamH, TeamA, TeamHScore, TeamAScore, Result, stage)
SELECT 
    52 AS matchId, 
    (SELECT Result FROM Matches WHERE matchId = 49) AS TeamH, 
    (SELECT Result FROM Matches WHERE matchId = 50) AS TeamA,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamHScore,
    (SELECT FLOOR(RAND() * 5) + 1) AS TeamAScore,
    'draw' as Result,
    'Final' as stage;
UPDATE Matches
SET Result = 
    CASE 
        WHEN TeamHScore > TeamAScore THEN TeamH
        WHEN TeamHScore < TeamAScore THEN TeamA
        ELSE 
            CASE 
                WHEN (TeamHScore + 1) % 2 = 1 THEN TeamH
                ELSE TeamA
            END
    END
where stage = 'Final';

select * from Matches
order by matchId

matchId,TeamH,TeamA,TeamHScore,TeamAScore,Result,Stage
1,Germany,Switzerland,5,5,draw,group stage
2,Germany,Hungary,4,1,Germany,group stage
3,Germany,Scotland,4,1,Germany,group stage
4,Scotland,Switzerland,3,2,Scotland,group stage
5,Scotland,Hungary,3,1,Scotland,group stage
6,Hungary,Switzerland,1,2,Switzerland,group stage
7,Spain,Albania,1,2,Albania,group stage
8,Spain,Italy,5,3,Spain,group stage
9,Spain,Croatia,3,2,Spain,group stage
10,Croatia,Albania,4,3,Croatia,group stage


In [0]:
%sql
SELECT * FROM Matches where stage = 'Round of 16' ORDER BY matchId;

matchId,TeamH,TeamA,TeamHScore,TeamAScore,Result,Stage
37,Croatia,Switzerland,4,1,Croatia,Round of 16
38,Germany,England,5,1,Germany,Round of 16
39,Portugal,Slovenia,4,4,Portugal,Round of 16
40,Austria,Romania,1,2,Romania,Round of 16
41,Play-off winner B,Italy,2,1,Play-off winner B,Round of 16
42,Play-off winner A,Czech,1,3,Czech,Round of 16
43,Denmark,Turkey,1,5,Turkey,Round of 16
44,Scotland,Spain,1,3,Spain,Round of 16


In [0]:
%sql
SELECT * FROM Matches where stage = 'Quater Final' ORDER BY matchId;

matchId,TeamH,TeamA,TeamHScore,TeamAScore,Result,Stage
45,Croatia,Germany,2,1,Croatia,Quater Final
46,Portugal,Romania,2,3,Romania,Quater Final
47,Play-off winner B,Czech,1,5,Czech,Quater Final
48,Turkey,Spain,1,3,Spain,Quater Final


In [0]:
%sql
SELECT * FROM Matches where stage = 'Semi Final' ORDER BY matchId;

matchId,TeamH,TeamA,TeamHScore,TeamAScore,Result,Stage
49,Croatia,Romania,3,1,Croatia,Semi Final
50,Czech,Spain,5,4,Czech,Semi Final


In [0]:
%sql
SELECT * FROM Matches where stage = 'Final' ORDER BY matchId;

matchId,TeamH,TeamA,TeamHScore,TeamAScore,Result,Stage
52,Croatia,Czech,5,1,Croatia,Final


In [0]:
%python
#import libraries
import pandas as pd
from pyspark.sql import SparkSession



In [0]:
%python

#Spark Session
spark = SparkSession.builder.appName("Visualization").getOrCreate()

In [0]:
%python

groupRank = spark.sql("SELECT * FROM groupRank")

# Convert the DataFrame to a Pandas DataFrame
groupRank_pandas = groupRank.toPandas()

# Create visualize_table DataFrame with 'teamName' column dynamically
visualize_table = pd.DataFrame({
    'teamName': groupRank_pandas['teamName'],
    'groupName': groupRank_pandas['groupName'],
    'Standings': groupRank_pandas['Standings'],
    'Points': groupRank_pandas['Points'],
    'GD': groupRank_pandas['GD']
})

# Create a DataFrame with blank rows
blank_rows = pd.DataFrame({'teamName': [''] * 1, 'groupName': [''] * 1, 'Standings': [''] * 1, 'Points': [''] * 1, 'GD': [''] * 1})

# Initialize an empty list to store chunks of data
chunks = []

# Split the visualize_table DataFrame into chunks of 4 records
for i in range(0, len(visualize_table), 4):
    chunk = visualize_table.iloc[i:i+4]  # Get a chunk of 4 records
    chunks.append(chunk)  # Append the chunk to the list
    if i < len(visualize_table) - 4:
        chunks.append(blank_rows)  # Append 2 blank rows after each chunk, except for the last chunk

# Concatenate all chunks to create the final DataFrame
visualize_table_with_blanks = pd.concat(chunks).reset_index(drop=True)
visualize_table_with_blanks = visualize_table_with_blanks.append(blank_rows, ignore_index = True)
half_length = len(visualize_table_with_blanks) // 2

# Create visualize_quater_final1 DataFrame with 4 blank rows at the beginning
visualize_table_with_blanks1 = pd.DataFrame(columns=['teamName','groupName','Standings','Points','GD'])
visualize_table_with_blanks1 = visualize_table_with_blanks1.append(blank_rows, ignore_index = True)
visualize_table_with_blanks1 = pd.concat([visualize_table_with_blanks1, visualize_table_with_blanks.iloc[:half_length]]).reset_index(drop=True)
#visualize_table_with_blanks1 = visualize_table_with_blanks1.drop(visualize_table_with_blanks1.tail(2).index)
visualize_table_with_blanks1['space'] = ''
visualize_table_with_blanks2 = pd.DataFrame(columns=['teamName','groupName','Standings','Points','GD'])
visualize_table_with_blanks2 = visualize_table_with_blanks2.append(blank_rows, ignore_index = True)
visualize_table_with_blanks2 = pd.concat([visualize_table_with_blanks2, visualize_table_with_blanks.iloc[half_length:]]).reset_index(drop=True)



  visualize_table_with_blanks = visualize_table_with_blanks.append(blank_rows, ignore_index = True)
  visualize_table_with_blanks1 = visualize_table_with_blanks1.append(blank_rows, ignore_index = True)
  visualize_table_with_blanks2 = visualize_table_with_blanks2.append(blank_rows, ignore_index = True)


In [0]:
%python
RO16 = spark.sql("SELECT * FROM Matches WHERE stage = 'Round of 16' order by matchId")
RO16_pandas = RO16.toPandas()
visualize_table16 = pd.DataFrame(columns=['Round of 16'])
visualize_table16 = visualize_table16.append({'Round of 16': ''}, ignore_index=True)
visualize_table16 = visualize_table16.append({'Round of 16': ''}, ignore_index=True)
for index, row in RO16_pandas.iterrows():
    visualize_table16 = visualize_table16.append({'Roundof 16': f"{row['TeamH']} ({row['TeamHScore']})"}, ignore_index=True)
    visualize_table16 = visualize_table16.append({'Roundof 16': f"{row['TeamA']} ({row['TeamAScore']})"}, ignore_index=True)
    
    # Insert 2 blank rows after every record
    visualize_table16 = visualize_table16.append({'Roundof 16': ''}, ignore_index=True)
    visualize_table16 = visualize_table16.append({'Roundof 16': ''}, ignore_index=True)
visualize_table16['space'] = ''

split_index = len(visualize_table16) // 2 - 1

# Split the final DataFrame into two halves
visualize_table16pool1 = visualize_table16.iloc[:split_index].reset_index(drop=True)
visualize_table16pool2 = visualize_table16.iloc[split_index:].reset_index(drop=True)
visualize_table16pool2 = visualize_table16pool2.drop(visualize_table16pool2.tail(2).index)

  visualize_table16 = visualize_table16.append({'Round of 16': ''}, ignore_index=True)
  visualize_table16 = visualize_table16.append({'Round of 16': ''}, ignore_index=True)
  visualize_table16 = visualize_table16.append({'Roundof 16': f"{row['TeamH']} ({row['TeamHScore']})"}, ignore_index=True)
  visualize_table16 = visualize_table16.append({'Roundof 16': f"{row['TeamA']} ({row['TeamAScore']})"}, ignore_index=True)
  visualize_table16 = visualize_table16.append({'Roundof 16': ''}, ignore_index=True)
  visualize_table16 = visualize_table16.append({'Roundof 16': ''}, ignore_index=True)
  visualize_table16 = visualize_table16.append({'Roundof 16': f"{row['TeamH']} ({row['TeamHScore']})"}, ignore_index=True)
  visualize_table16 = visualize_table16.append({'Roundof 16': f"{row['TeamA']} ({row['TeamAScore']})"}, ignore_index=True)
  visualize_table16 = visualize_table16.append({'Roundof 16': ''}, ignore_index=True)
  visualize_table16 = visualize_table16.append({'Roundof 16': ''}, ignore_in

In [0]:
%python
# quater final table view
#Quater final score
quater_final_score = spark.sql("SELECT * FROM Matches WHERE stage = 'Quater Final' ORDER BY matchId")
quater_final_score_pandas = quater_final_score.toPandas()
visualize_score16 = pd.DataFrame(columns=['teamscore'])
for i in range(4):
    visualize_score16 = visualize_score16.append({'teamscore': ''}, ignore_index=True)
for index, row in quater_final_score_pandas.iterrows():
    # Insert two records
    visualize_score16 = visualize_score16.append({'teamscore': row['TeamHScore']}, ignore_index=True)
    visualize_score16 = visualize_score16.append({'teamscore': row['TeamAScore']}, ignore_index=True)
    for i in range(6):
        visualize_score16 = visualize_score16.append({'teamscore': ''}, ignore_index=True)
visualize_score16 = visualize_score16.drop(visualize_score16.tail(3).index)

quater_final = spark.sql("SELECT * FROM Matches WHERE stage = 'Round of 16' ORDER BY matchId")
quater_final_pandas = quater_final.toPandas()
visualize_quater_final = pd.DataFrame(columns=['Quater Final'])
for i in range(4):
    visualize_quater_final = visualize_quater_final.append({'Quater Final': ''}, ignore_index=True)
for index, row in quater_final_pandas.iterrows():
    # Insert two records
    visualize_quater_final = visualize_quater_final.append({'Quater Final': row['Result']}, ignore_index=True)
    
    # Insert 2 blank rows after every two records
    if (index + 1) % 2 == 0:
        for i in range(6):
            visualize_quater_final = visualize_quater_final.append({'Quater Final': ''}, ignore_index=True)
visualize_quater_final = visualize_quater_final.drop(visualize_quater_final.tail(3).index)

# Concatenate the DataFrames
visualize_quater_final = pd.concat([visualize_quater_final, visualize_score16], axis=1)

visualize_quater_final['space'] = ''

# Split the DataFrame into two halves
half_length = len(visualize_quater_final) // 2

visualize_quater_final1 = pd.DataFrame(columns=['Quater Final'])
visualize_quater_final1 = pd.concat([visualize_quater_final1, visualize_quater_final.iloc[:half_length]]).reset_index(drop=True)

visualize_quater_final2 = pd.DataFrame(columns=['Quater Final'])
visualize_quater_final2 = pd.concat([visualize_quater_final2, visualize_quater_final.iloc[half_length:]]).reset_index(drop=True)
visualize_quater_final2 = visualize_quater_final2.drop(visualize_quater_final2.tail(1).index)


  visualize_score16 = visualize_score16.append({'teamscore': ''}, ignore_index=True)
  visualize_score16 = visualize_score16.append({'teamscore': ''}, ignore_index=True)
  visualize_score16 = visualize_score16.append({'teamscore': ''}, ignore_index=True)
  visualize_score16 = visualize_score16.append({'teamscore': ''}, ignore_index=True)
  visualize_score16 = visualize_score16.append({'teamscore': row['TeamHScore']}, ignore_index=True)
  visualize_score16 = visualize_score16.append({'teamscore': row['TeamAScore']}, ignore_index=True)
  visualize_score16 = visualize_score16.append({'teamscore': ''}, ignore_index=True)
  visualize_score16 = visualize_score16.append({'teamscore': ''}, ignore_index=True)
  visualize_score16 = visualize_score16.append({'teamscore': ''}, ignore_index=True)
  visualize_score16 = visualize_score16.append({'teamscore': ''}, ignore_index=True)
  visualize_score16 = visualize_score16.append({'teamscore': ''}, ignore_index=True)
  visualize_score16 = visualize_sco

In [0]:
%python
# semifinal score
semi_final_score = spark.sql("SELECT * FROM MAtches WHERE stage = 'Semi Final' ORDER BY matchId")
semi_final_score_pandas = semi_final_score.toPandas()
visualize_score_semi = pd.DataFrame(columns=['teamscoreSF'])
for i in range(8):
    visualize_score_semi = visualize_score_semi.append({'teamscoreSF': ''}, ignore_index=True)
for index, row in semi_final_score_pandas.iterrows():
    visualize_score_semi = visualize_score_semi.append({'teamscoreSF': row['TeamHScore']}, ignore_index=True)
    visualize_score_semi = visualize_score_semi.append({'teamscoreSF': row['TeamAScore']}, ignore_index=True)
    for i in range(2):
        visualize_score_semi = visualize_score_semi.append({'teamscoreSF': ''}, ignore_index=True)
for i in range(6):
        visualize_score_semi = visualize_score_semi.append({'teamscoreSF': ''}, ignore_index=True)

# semifinal table view

semi_final = spark.sql("SELECT * FROM Matches WHERE stage = 'Quater Final' ORDER BY matchId")
semi_final_pandas = semi_final.toPandas()
visualize_semi_final = pd.DataFrame(columns=['semi Final'])
for i in range(8):
    visualize_semi_final = visualize_semi_final.append({'semi Final': ''}, ignore_index=True)

# Iterate over each row in quater_final_pandas DataFrame
for index, row in semi_final_pandas.iterrows():
    # Insert two records
    visualize_semi_final = visualize_semi_final.append({'semi Final': row['Result']}, ignore_index=True)
    
    # Insert 2 blank rows after every two records
    if (index + 1) % 2 == 0:
        visualize_semi_final = visualize_semi_final.append({'semi Final': ''}, ignore_index=True)
        visualize_semi_final = visualize_semi_final.append({'semi Final': ''}, ignore_index=True)
for i in range(6):
    visualize_semi_final = visualize_semi_final.append({'semi Final': ''}, ignore_index=True)
visualize_semi_final = pd.concat([visualize_semi_final, visualize_score_semi], axis=1)
visualize_semi_final['space'] = ''
# Split the DataFrame into two halves
half_length = len(visualize_semi_final) // 2

# Create visualize_quater_final1 DataFrame with 4 blank rows at the beginning
visualize_semi_final1 = pd.DataFrame(columns=['semi Final'])
visualize_semi_final1 = pd.concat([visualize_semi_final1, visualize_semi_final.iloc[:half_length]]).reset_index(drop=True)
for i in range(5):
    visualize_semi_final1 = visualize_semi_final1.append({'semi Final': '', 'teamscoreSF': '', 'space': ''}, ignore_index=True)
# Create visualize_quater_final2 DataFrame with 4 blank rows at the beginning
visualize_semi_final2 = pd.DataFrame({'semi Final': [''] * 7, 'teamscoreSF': [''] * 7,'space': [''] * 7})
visualize_semi_final2 = pd.concat([visualize_semi_final2, visualize_semi_final.iloc[half_length:]]).reset_index(drop=True)
visualize_semi_final2 = visualize_semi_final2.drop(visualize_semi_final2.tail(2).index)

  visualize_score_semi = visualize_score_semi.append({'teamscoreSF': ''}, ignore_index=True)
  visualize_score_semi = visualize_score_semi.append({'teamscoreSF': ''}, ignore_index=True)
  visualize_score_semi = visualize_score_semi.append({'teamscoreSF': ''}, ignore_index=True)
  visualize_score_semi = visualize_score_semi.append({'teamscoreSF': ''}, ignore_index=True)
  visualize_score_semi = visualize_score_semi.append({'teamscoreSF': ''}, ignore_index=True)
  visualize_score_semi = visualize_score_semi.append({'teamscoreSF': ''}, ignore_index=True)
  visualize_score_semi = visualize_score_semi.append({'teamscoreSF': ''}, ignore_index=True)
  visualize_score_semi = visualize_score_semi.append({'teamscoreSF': ''}, ignore_index=True)
  visualize_score_semi = visualize_score_semi.append({'teamscoreSF': row['TeamHScore']}, ignore_index=True)
  visualize_score_semi = visualize_score_semi.append({'teamscoreSF': row['TeamAScore']}, ignore_index=True)
  visualize_score_semi = visualize_score

In [0]:
%python
# final table view
final = spark.sql("SELECT * FROM Matches WHERE stage = 'Final' ORDER BY matchId")
third = spark.sql("SELECT * FROM Matches WHERE stage = 'Third Place' ORDER BY matchId")

# Convert the DataFrame to a Pandas DataFrame
final_pandas = final.toPandas()
third_pandas = third.toPandas()

# Create a new DataFrame to store the teamnames
visualize_final = pd.DataFrame(columns=['Finals'])
for i in range(6):
    visualize_final = visualize_final.append({'Finals': ''}, ignore_index=True)

# Iterate over each row in quater_final_pandas DataFrame
for index, row in final_pandas.iterrows():
    visualize_final = visualize_final.append({'Finals': f"{row['TeamH']} ({row['TeamHScore']})"}, ignore_index=True)
    visualize_final = visualize_final.append({'Finals': f"{row['TeamA']} ({row['TeamAScore']})"}, ignore_index=True)
    visualize_final = visualize_final.append({'Finals': f"Winner: {row['Result']}"}, ignore_index=True)
    
    # Insert 2 blank rows after every two records
    if (index + 1) % 2 == 0:
        visualize_final = visualize_final.append({'Finals': ''}, ignore_index=True)
        visualize_final = visualize_final.append({'Finals': ''}, ignore_index=True)
    for i in range(2):
        visualize_final = visualize_final.append({'Finals': ''}, ignore_index=True)
for index, row in third_pandas.iterrows():
    # Insert two records
    visualize_final = visualize_final.append({'Finals': f"{row['TeamH']} ({row['TeamHScore']})"}, ignore_index=True)
    visualize_final = visualize_final.append({'Finals': f"{row['TeamA']} ({row['TeamAScore']})"}, ignore_index=True)
    visualize_final = visualize_final.append({'Finals': f"Third: {row['Result']}"}, ignore_index=True)
for i in range(2):
    visualize_final = visualize_final.append({'Finals': ''}, ignore_index=True)
visualize_final['space'] = ''


  visualize_final = visualize_final.append({'Finals': ''}, ignore_index=True)
  visualize_final = visualize_final.append({'Finals': ''}, ignore_index=True)
  visualize_final = visualize_final.append({'Finals': ''}, ignore_index=True)
  visualize_final = visualize_final.append({'Finals': ''}, ignore_index=True)
  visualize_final = visualize_final.append({'Finals': ''}, ignore_index=True)
  visualize_final = visualize_final.append({'Finals': ''}, ignore_index=True)
  visualize_final = visualize_final.append({'Finals': f"{row['TeamH']} ({row['TeamHScore']})"}, ignore_index=True)
  visualize_final = visualize_final.append({'Finals': f"{row['TeamA']} ({row['TeamAScore']})"}, ignore_index=True)
  visualize_final = visualize_final.append({'Finals': f"Winner: {row['Result']}"}, ignore_index=True)
  visualize_final = visualize_final.append({'Finals': ''}, ignore_index=True)
  visualize_final = visualize_final.append({'Finals': ''}, ignore_index=True)
  visualize_final = visualize_final.append({

In [0]:
%python
# mearging gropstage pool and quaterfinal
visualize_quater_final1.columns = [f"{col}q1" for col in visualize_quater_final1.columns]
merged_df = pd.concat([visualize_table16pool1, visualize_quater_final1], axis=1)

visualize_semi_final1.columns = [f"{col}s1" for col in visualize_semi_final1.columns]
merged_df = pd.concat([merged_df, visualize_semi_final1], axis=1)

visualize_final.columns = [f"{col}f" for col in visualize_final.columns]
merged_df = pd.concat([merged_df, visualize_final], axis=1)

visualize_semi_final2.columns = [f"{col}s2" for col in visualize_semi_final2.columns]

# Concatenate the DataFrames
merged_df = pd.concat([merged_df, visualize_semi_final2], axis=1)

visualize_quater_final2.columns = [f"{col}q2" for col in visualize_quater_final2.columns]

# Concatenate the DataFrames
merged_df = pd.concat([merged_df, visualize_quater_final2], axis=1)

visualize_table16pool2.columns = [f"{col}r2" for col in visualize_table16pool2.columns]

# Concatenate the DataFrames
merged_df = pd.concat([merged_df, visualize_table16pool2], axis=1)

visualize_table_with_blanks1.columns = [f"{col}g1" for col in visualize_table_with_blanks1.columns]
visualize_table_with_blanks2.columns = [f"{col}g2" for col in visualize_table_with_blanks2.columns]

# Concatenate the DataFrames
merged_df = pd.concat([visualize_table_with_blanks1,merged_df, visualize_table_with_blanks2], axis=1)

# Display the merged DataFrame
display(merged_df)

  Expected bytes, got a 'int' object
Attempting non-optimization as 'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to true.
  warn(msg)


teamNameg1,groupNameg1,Standingsg1,Pointsg1,GDg1,spaceg1,Round of 16,Roundof 16,space,Quater Finalq1,teamscoreq1,spaceq1,semi Finals1,teamscoreSFs1,spaces1,Finalsf,spacef,semi Finals2,teamscoreSFs2,spaces2,Quater Finalq2,teamscoreq2,spaceq2,Round of 16r2,Roundof 16r2,spacer2,teamNameg2,groupNameg2,Standingsg2,Pointsg2,GDg2
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Germany,A,1.0,7.0,6.0,,,,,,,,,,,,,,,,,,,,,,Play-off winner A,D,1.0,9.0,6.0
Scotland,A,2.0,6.0,0.0,,,Croatia (4),,,,,,,,,,,,,,,,,Play-off winner B (2),,Austria,D,2.0,6.0,4.0
Switzerland,A,3.0,4.0,0.0,,,Switzerland (1),,,,,,,,,,,,,,,,,Italy (1),,France,D,3.0,3.0,-6.0
Hungary,A,4.0,0.0,-6.0,,,,,Croatia,2.0,,,,,,,,,,Play-off winner B,1.0,,,,,Netherlands,D,4.0,0.0,-4.0
,,,,,,,,,Germany,1.0,,,,,,,,,,Czech,5.0,,,,,,,,,
Croatia,B,1.0,6.0,2.0,,,Germany (5),,,,,,,,Croatia (5),,,,,,,,,Play-off winner A (1),,Play-off winner B,E,1.0,6.0,7.0
Spain,B,2.0,6.0,2.0,,,England (1),,,,,,,,Czech (1),,,,,,,,,Czech (3),,Romania,E,2.0,6.0,2.0
Italy,B,3.0,3.0,-2.0,,,,,,,,Croatia,3.0,,Winner: Croatia,,Czech,5.0,,,,,,,,Slovakia,E,3.0,3.0,-4.0
Albania,B,4.0,3.0,-2.0,,,,,,,,Romania,1.0,,,,Spain,4.0,,,,,,,,Belgium,E,4.0,3.0,-5.0
