In [1]:
import getml
from challenge.utils.data import load_ctu_dataset

getml.set_project("db_transformer_premier_league")

# Task: PremierLeague
### Dataset Description
> <span style="font-weight: 500; color: #3b3b3b;">ⓘ️&nbsp; Generated by `gpt-4o`</span>
>
> It seems that the *PremierLeague* dataset is not available in the repository. However, I can provide a description based on the data model you provided.
> 
> *Data Model:*
> 
> The *PremierLeague* dataset consists of three tables: `Actions`, `Players`, and `Teams`. These tables provide detailed information about player actions, player details, and team information in the Premier League.
> 
> - **Actions**: Contains numerous columns such as `PlayerID` (int), `MatchID` (int), `TeamID` (int), `Goals` (int), `Assists` (int), and many other performance metrics. This table records various actions and statistics for players during matches.
> 
> - **Players**: Includes `PlayerID` (int) and `Name` (varchar). This table provides basic information about the players.
> 
> - **Teams**: Contains `TeamID` (int) and `Name` (varchar). This table provides information about the teams in the league.
> 
> *Task and Target Column:*
> 
> The dataset can be used for various tasks such as *classification* or *regression*, depending on the specific analysis or prediction goals, such as predicting player performance or team outcomes.
> 
> *Column Types:*
> 
> - Integer: `PlayerID`, `MatchID`, `TeamID`, `Goals`, `Assists`, etc.
> - Varchar: `Name` (in `Players` and `Teams`)
> 
> *Metadata:*
> 
> - **Number of Tables**: 3
> - **Target Table**: Depends on the specific analysis task
> - **Target Column**: Depends on the specific analysis task
> 
> This dataset is used in the sports domain to analyze and evaluate player and team performance in the Premier League.

### Tables
Population table: matches

<h4>
  <details open>
     <summary>ER Diagram</summary>
       <img src="https://relational.fel.cvut.cz/assets/img/datasets-generated/PremierLeague.svg" alt="PremierLeague ER Diagram">
   </details>
</h4>

To load the dataset, we use the `load_ctu_dataset` function from the `utils`
module. This function returns a tuple with the population table as the first
element and the a dictionary of peripheral tables as the second element.

In [2]:
matches, peripheral = load_ctu_dataset("PremierLeague")

(
    players,
    teams,
    actions,
) = peripheral.values()

Analyzing schema:   0%|          | 0/4 [00:00<?, ?it/s]

Downloading tables:   0%|          | 0/4 [00:00<?, ?it/s]

Building data:   0%|          | 0/4 [00:00<?, ?it/s]

Now, we can inspect all tables and annotate the columns with [roles](https://getml.com/latest/user_guide/concepts/annotating_data/).

The population table (`matches`). We already set the `target` role for the target (`ResultOfTeamHome`). If the task is a multiclass classification,
we split the target column into multiple columns in an one-vs-all fashion. In this case, the original target is still avaiable as `ResultOfTeamHome`.

In [3]:
# TODO: Annotate remaining columns with roles
matches

name,ResultOfTeamHome=0,ResultOfTeamHome=1,ResultOfTeamHome=2,ResultOfTeamHome,MatchID,TeamHomeID,TeamAwayID,TeamHomeFormation,TeamAwayFormation,Date,split
role,target,target,target,unused_float,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string
0.0,1,0,0,0,1,7,3,6,8,2011-12-21 00:00:00.000000,val
1.0,0,1,0,1,2,3,7,8,6,2012-03-24 00:00:00.000000,train
2.0,1,0,0,0,3,5,3,8,8,2011-09-17 00:00:00.000000,val
3.0,0,1,0,1,4,3,5,8,6,2012-02-04 00:00:00.000000,train
4.0,0,1,0,1,5,3,30,8,5,2011-09-24 00:00:00.000000,train
,...,...,...,...,...,...,...,...,...,...,...
375.0,0,0,1,2,376,111,35,5,4,2012-03-17 00:00:00.000000,val
376.0,0,1,0,1,377,35,39,5,5,2011-10-16 00:00:00.000000,train
377.0,1,0,0,0,378,39,35,2,5,2012-02-12 00:00:00.000000,train
378.0,0,1,0,1,379,39,111,5,4,2011-11-06 00:00:00.000000,train


Peripheral tables,

In [4]:
# TODO: Annotate columns with roles
players

name,TeamID,Name
role,unused_string,unused_string
0.0,1,Manchester United
1.0,3,Arsenal
2.0,4,Newcastle United
3.0,5,Blackburn Rovers
4.0,6,Tottenham Hotspur
,...,...
15.0,54,Fulham
16.0,56,Sunderland
17.0,80,Swansea City
18.0,110,Stoke City


In [5]:
# TODO: Annotate columns with roles
teams

name,PlayerID,MatchID,TeamID,FirstGoal,WinningGoal,ShotsonTargetincgoals,SavesMade,TimePlayed,PositionID,Starts,SubstituteOn,SubstituteOff,Goals,ShotsOffTargetincwoodwork,BlockedShots,PenaltiesTaken,PenaltyGoals,PenaltiesSaved,PenaltiesOffTarget,PenaltiesNotScored,DirectFreekickGoals,DirectFreekickOnTarget,DirectFreekickOffTarget,BlockedDirectFreekick,GoalsfromInsideBox,ShotsOnfromInsideBox,ShotsOfffromInsideBox,BlockedShotsfromInsideBox,GoalsfromOutsideBox,ShotsOnTargetOutsideBox,ShotsOffTargetOutsideBox,BlockedShotsOutsideBox,HeadedGoals,HeadedShotsOnTarget,HeadedShotsOffTarget,HeadedBlockedShots,LeftFootGoals,LeftFootShotsOnTarget,LeftFootShotsOffTarget,LeftFootBlockedShots,RightFootGoals,RightFootShotsOnTarget,RightFootShotsOffTarget,RightFootBlockedShots,OtherGoals,OtherShotsOnTarget,OtherShotsOffTarget,OtherBlockedShots,ShotsClearedoffLine,ShotsClearedoffLineInsideArea,ShotsClearedoffLineOutsideArea,GoalsOpenPlay,GoalsfromCorners,GoalsfromThrows,GoalsfromDirectFreeKick,GoalsfromSetPlay,Goalsfrompenalties,AttemptsOpenPlayontarget,AttemptsfromCornersontarget,AttemptsfromThrowsontarget,AttemptsfromDirectFreeKickontarget,AttemptsfromSetPlayontarget,AttemptsfromPenaltiesontarget,AttemptsOpenPlayofftarget,AttemptsfromCornersofftarget,AttemptsfromThrowsofftarget,AttemptsfromDirectFreeKickofftarget,AttemptsfromSetPlayofftarget,AttemptsfromPenaltiesofftarget,Goalsasasubstitute,TotalSuccessfulPassesAll,TotalUnsuccessfulPassesAll,Assists,KeyPasses,TotalSuccessfulPassesExclCrossesCorners,TotalUnsuccessfulPassesExclCrossesCorners,SuccessfulPassesOwnHalf,UnsuccessfulPassesOwnHalf,SuccessfulPassesOppositionHalf,UnsuccessfulPassesOppositionHalf,SuccessfulPassesDefensivethird,UnsuccessfulPassesDefensivethird,SuccessfulPassesMiddlethird,UnsuccessfulPassesMiddlethird,SuccessfulPassesFinalthird,UnsuccessfulPassesFinalthird,SuccessfulShortPasses,UnsuccessfulShortPasses,SuccessfulLongPasses,UnsuccessfulLongPasses,SuccessfulFlickOns,UnsuccessfulFlickOns,SuccessfulCrossesCorners,UnsuccessfulCrossesCorners,CornersTakeninclshortcorners,CornersConceded,SuccessfulCornersintoBox,UnsuccessfulCornersintoBox,ShortCorners,ThrowInstoOwnPlayer,ThrowInstoOppositionPlayer,SuccessfulDribbles,UnsuccessfulDribbles,SuccessfulCrossesCornersLeft,UnsuccessfulCrossesCornersLeft,SuccessfulCrossesLeft,UnsuccessfulCrossesLeft,SuccessfulCornersLeft,UnsuccessfulCornersLeft,SuccessfulCrossesCornersRight,UnsuccessfulCrossesCornersRight,SuccessfulCrossesRight,UnsuccessfulCrossesRight,SuccessfulCornersRight,UnsuccessfulCornersRight,SuccessfulLongBalls,UnsuccessfulLongBalls,SuccessfulLayOffs,UnsuccessfulLayOffs,ThroughBall,SuccessfulCrossesCornersintheair,UnsuccessfulCrossesCornersintheair,Successfulcrossesintheair,Unsuccessfulcrossesintheair,Successfulopenplaycrosses,Unsuccessfulopenplaycrosses,Touches,GoalAssistCorner,GoalAssistFreeKick,GoalAssistThrowIn,GoalAssistGoalKick,GoalAssistSetPiece,KeyCorner,KeyFreeKick,KeyThrowIn,KeyGoalKick,KeySetPieces,Duelswon,Duelslost,AerialDuelswon,AerialDuelslost,GroundDuelswon,GroundDuelslost,TacklesWon,TacklesLost,LastManTackle,TotalClearances,HeadedClearances,OtherClearances,ClearancesOfftheLine,Blocks,Interceptions,Recoveries,TotalFoulsConceded,FoulsConcededexchandballspens,TotalFoulsWon,FoulsWoninDangerAreaincpens,FoulsWonnotindangerarea,FoulWonPenalty,HandballsConceded,PenaltiesConceded,Offsides,YellowCards,RedCards,GoalsConceded,GoalsConcededInsideBox,GoalsConcededOutsideBox,SavesMadefromInsideBox,SavesMadefromOutsideBox,SavesfromPenalty,Catches,Punches,Drops,CrossesnotClaimed,GKDistribution,GKSuccessfulDistribution,GKUnsuccessfulDistribution,CleanSheets,TeamCleansheet,ErrorleadingtoGoal,Errorleadingtoattempt,ChallengeLost,ShotsOnConceded,ShotsOnConcededInsideBox,ShotsOnConcededOutsideBox,PositioninFormation,Turnovers,Dispossessed,BigChances,BigChancesFaced,PassForward,PassBackward,PassLeft,PassRight,UnsuccessfulBallTouch,SuccessfulBallTouch,TakeOnsOverrun,Touchesopenplayfinalthird,Touchesopenplayoppbox,Touchesopenplayoppsixyards,Team1,Team2,shot_eff,passes_eff,tackle_eff,dribble_eff
role,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string
0.0,3,17,1,0,0,0,0,22,4,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,23,1,1,0,23,1,11,0,12,1,2,0,17,0,4,1,21,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,28,0,0,0,0,0,0,0,0,0,0,2,1,0,0,2,1,2,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,14,0,0,0,0,6,1,8,9,1,0,0,2,0,0,Manchester United,Arsenal,-1,0.958333,1,-1
1.0,3,18,1,0,0,1,0,90,4,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,40,20,1,0,39,15,14,0,25,15,2,0,23,6,14,9,37,10,2,5,0,1,1,5,3,0,0,1,2,2,0,1,1,1,1,1,1,0,0,0,4,0,3,0,1,0,2,3,1,1,1,5,1,4,1,4,79,0,0,0,0,0,0,0,0,0,0,5,8,0,0,5,8,1,0,0,0,0,0,0,0,4,12,0,0,3,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,8,0,5,0,0,15,6,15,24,1,2,1,25,5,0,Manchester United,Arsenal,1,0.666667,1,0.5
2.0,3,53,1,0,0,0,0,26,4,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20,7,0,1,20,5,2,2,18,3,2,0,9,3,9,2,17,4,3,1,0,0,0,2,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,1,0,1,0,2,2,0,0,0,2,0,1,0,1,32,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,10,3,6,8,1,0,0,10,1,0,Manchester United,Aston Villa,-1,0.740741,-1,-1
3.0,3,88,1,0,0,0,0,27,4,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21,8,0,0,19,3,5,0,14,3,1,0,12,1,6,2,18,2,1,1,0,0,2,5,2,0,1,1,0,0,0,2,0,1,4,1,3,0,1,1,1,0,1,1,0,0,0,0,0,0,2,5,1,4,1,4,32,0,0,0,0,0,0,0,0,0,0,2,1,0,0,2,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,1,0,0,5,2,18,4,0,0,0,14,1,0,Manchester United,Blackburn Rovers,-1,0.724138,-1,1
4.0,3,119,1,0,0,0,0,29,4,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,20,4,0,1,20,3,7,0,13,3,1,0,11,0,8,3,18,1,2,2,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,32,0,0,0,0,0,0,0,0,0,0,1,2,0,0,1,2,1,0,0,0,0,0,0,0,3,8,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,0,2,0,0,10,1,12,1,0,2,0,7,0,0,Manchester United,Bolton Wanderers,-1,0.833333,1,-1
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10364.0,106603,289,1,0,0,0,0,23,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,2,0,0,6,2,3,1,3,1,2,1,3,0,1,1,6,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,0,0,0,0,0,2,1,0,1,2,0,1,1,0,1,0,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,3,0,5,0,0,0,0,1,0,0,Manchester United,Wolverhampton Wanderers,-1,0.75,0.5,-1
10365.0,107853,114,54,0,0,1,0,2,6,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,0,0,0,0,0,0,0,0,0,0,0,1,0,0,Fulham,Bolton Wanderers,1,-1,-1,-1
10366.0,114042,50,7,0,0,0,0,4,4,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,2,1,1,0,1,1,1,0,1,0,0,1,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,2,0,0,0,2,0,1,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,0,0,0,0,2,0,0,1,1,0,0,1,0,0,Aston Villa,Liverpool,-1,0.666667,1,1
10367.0,114042,54,7,0,0,0,0,15,4,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,10,1,0,0,10,1,1,0,9,1,1,0,5,0,4,1,9,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,0,0,0,0,0,0,0,0,0,0,3,3,0,0,3,3,1,1,0,0,0,0,0,0,1,4,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,18,0,0,0,0,3,1,6,1,1,0,0,2,1,0,Aston Villa,Manchester United,0,0.909091,0.5,0


In [6]:
# TODO: Annotate columns with roles
actions

name,PlayerID,Name
role,unused_string,unused_string
0.0,3,Giggs Ryan
1.0,363,Scholes Paul
2.0,973,Woodgate Jonathan
3.0,999,Smith Alan
4.0,1059,Gabbidon Daniel
,...,...
534.0,105190,Forde Anthony
535.0,105322,Riley Joe
536.0,106603,Fryers Ezekiel
537.0,107853,Trotta Marcello


The next step is to define the data model. Refer to [https://relational.fel.cvut.cz/dataset/PremierLeague](https://relational.fel.cvut.cz/dataset/PremierLeague)
for a description of the dataset.

In [7]:
dm = getml.data.DataModel(population=matches.to_placeholder())
dm.add(getml.data.to_placeholder(**peripheral))

# TODO
# dm.population.join(...)

Now we can create the container and add the tables to it.

In [8]:
container = getml.data.Container(population=matches, split=matches.split)
container.add(**peripheral)

container

Unnamed: 0,subset,name,rows,type
0,train,matches,266,View
1,val,matches,114,View

Unnamed: 0,name,rows,type
0,teams,20,DataFrame
1,actions,10369,DataFrame
2,players,539,DataFrame
