In [1]:
import getml
from challenge.utils.data import load_ctu_dataset

getml.set_project("basketball_men")

# Task: Basketball_men
### Dataset Description
> <span style="font-weight: 500; color: #3b3b3b;">ⓘ️&nbsp; Generated by `gpt-4o`</span>
>
> The *Basketball_men* dataset is designed to predict the rank of basketball teams, focusing on various aspects of team and player performance. The task is a *regression* task, with the target column being `rank` in the `teams` table.
> 
> **Data Model:**
> - **Tables:** 9 (awards_coaches, players_teams, draft, series_post, coaches, player_allstar, awards_players, teams, players)
> - **Columns:**
>   - **awards_coaches:** Information about coach awards.
>   - **players_teams:** Player performance data.
>   - **draft:** Draft details.
>   - **series_post:** Post-season series information.
>   - **coaches:** Coach performance data.
>   - **player_allstar:** All-star player details.
>   - **awards_players:** Player awards.
>   - **teams:** Team details, including the target column `rank`.
>   - **players:** Player information.
> 
> **Task and Target:**
> - **Task:** Regression
> - **Target Column:** `rank` (in the teams table)
> 
> **Metadata:**
> - **Size:** 18.3 MB
> - **Number of Rows:** 43,841
> - **Number of Columns:** 195
> - **Missing Values:** Yes
> - **Compound Keys:** Yes
> - **Loops:** Yes
> - **Type:** Real
> - **Instance Count:** 1,536
> 
> This dataset is used to analyze and predict team rankings based on historical performance data, providing insights into team dynamics and player contributions in the sport of basketball.

### Tables
Population table: teams

<h4>
  <details open>
     <summary>ER Diagram</summary>
       <img src="https://relational.fel.cvut.cz/assets/img/datasets-generated/Basketball_men.svg" alt="Basketball_men ER Diagram">
   </details>
</h4>

To load the dataset, we use the `load_ctu_dataset` function from the `utils`
module. This function returns a tuple with the population table as the first
element and the a dictionary of peripheral tables as the second element.

In [2]:
teams, peripheral = load_ctu_dataset("Basketball_men")

(
    awards_coaches,
    awards_players,
    coaches,
    draft,
    player_allstar,
    players,
    players_teams,
    series_post,
) = peripheral.values()

Analyzing schema:   0%|          | 0/9 [00:00<?, ?it/s]

Downloading tables:   0%|          | 0/9 [00:00<?, ?it/s]

Building data:   0%|          | 0/9 [00:00<?, ?it/s]

Now, we can inspect all tables and annotate the columns with [roles](https://getml.com/latest/user_guide/concepts/annotating_data/).

The population table (`teams`).

We already set the `target` role for the target (`rank`).


rank is the target column for a regression task.

In [3]:
# TODO: Annotate remaining columns with roles
teams

name,rank,year,confRank,o_fgm,o_fga,o_ftm,o_fta,o_3pm,o_3pa,o_oreb,o_dreb,o_reb,o_asts,o_pf,o_stl,o_to,o_blk,o_pts,d_fgm,d_fga,d_ftm,d_fta,d_3pm,d_3pa,d_oreb,d_dreb,d_reb,d_asts,d_pf,d_stl,d_to,d_blk,d_pts,o_tmRebound,d_tmRebound,homeWon,homeLost,awayWon,awayLost,neutWon,neutLoss,confWon,confLoss,divWon,divLoss,pace,won,lost,games,min,attendance,lgID,tmID,franchID,confID,divID,playoff,name,arena,bbtmID,split
role,target,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string
0.0,1,1937,0,249,0,183,0,0,0,0,0,0,0,0,0,0,0,681,0,0,0,0,0,0,0,0,0,0,0,0,0,0,578,0,0,8,1,5,3,0,0,0,0,0,0,0,14,4,18,,0,NBL,AFS,AFS,,EA,CF,Akron Firestone Non-Skids,,AFS,val
1.0,2,1937,0,243,0,159,0,0,0,0,0,0,0,0,0,0,0,645,0,0,0,0,0,0,0,0,0,0,0,0,0,0,498,0,0,8,1,5,4,0,0,0,0,0,0,0,13,5,18,,0,NBL,AGW,AGW,,EA,WC,Akron Goodyear Wingfoots,,AGW,val
2.0,4,1937,0,108,0,46,0,0,0,0,0,0,0,0,0,0,0,262,0,0,0,0,0,0,0,0,0,0,0,0,0,0,275,0,0,2,2,1,4,0,0,0,0,0,0,0,3,6,9,,0,NBL,BFB,BFB,,EA,,Buffalo Bisons,,BFB,val
3.0,5,1937,0,110,0,42,0,0,0,0,0,0,0,0,0,0,0,262,0,0,0,0,0,0,0,0,0,0,0,0,0,0,338,0,0,3,1,0,5,0,0,0,0,0,0,0,3,7,10,,0,NBL,CNC,CNC,,WE,,Richmond King Clothiers/Cincinna...,,CNC,train
4.0,6,1937,0,109,0,64,0,0,0,0,0,0,0,0,0,0,0,282,0,0,0,0,0,0,0,0,0,0,0,0,0,0,426,0,0,1,3,0,7,0,0,0,0,0,0,0,1,12,13,,0,NBL,COL,COL,,EA,,Columbus Athletic Supply,,COL,val
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1531.0,5,2011,0,2490,5712,1127,1532,411,1301,882,1947,2829,1271,1287,561,952,323,6518,2668,5609,1118,1468,439,1236,815,2154,2969,1571,1313,518,1014,423,6893,0,0,0,0,0,0,0,0,0,0,0,0,0,22,44,0,,0,NBA,SAC,SAC,WC,PC,,Sacramento Kings,,,train
1532.0,1,2011,0,2611,5463,1067,1427,552,1405,683,2153,2836,1528,1143,490,895,293,6841,2512,5560,932,1233,412,1166,680,2039,2719,1291,1244,489,906,330,6368,0,0,0,0,0,0,0,0,0,0,0,0,0,50,16,0,,0,NBA,SAS,SAS,WC,SW,CF,San Antonio Spurs,,,train
1533.0,4,2011,0,2268,5154,1085,1410,365,1075,697,2074,2771,1382,1532,429,1001,321,5986,2234,5130,1335,1795,401,1224,677,1997,2674,1327,1214,478,870,322,6204,0,0,0,0,0,0,0,0,0,0,0,0,0,23,43,0,,0,NBA,TOR,TOR,EC,AT,,Toronto Raptors,,,val
1534.0,3,2011,0,2523,5531,1258,1668,273,845,861,2055,2916,1439,1441,545,936,385,6577,2403,5310,1302,1727,428,1259,728,1988,2716,1355,1366,494,968,375,6536,0,0,0,0,0,0,0,0,0,0,0,0,0,36,30,0,,0,NBA,UTA,UTA,WC,NW,C1,Utah Jazz,,,val


Peripheral tables,

In [4]:
# TODO: Annotate columns with roles
awards_coaches

name,id,year,coachID,award,lgID,note
role,unused_float,unused_float,unused_string,unused_string,unused_string,unused_string
0.0,1,1962,gallaha01,NBA Coach of the Year,NBA,
1.0,2,1963,hannual01,NBA Coach of the Year,NBA,
2.0,3,1964,auerbre01,NBA Coach of the Year,NBA,
3.0,4,1965,schaydo01,NBA Coach of the Year,NBA,
4.0,5,1966,kerrjo01,NBA Coach of the Year,NBA,
,...,...,...,...,...,...
56.0,57,2007,scottby01,NBA Coach of the Year,NBA,
57.0,58,2008,brownmi02,NBA Coach of the Year,NBA,
58.0,59,2009,brooksc01,NBA Coach of the Year,NBA,
59.0,60,2010,thiboto01,NBA Coach of the Year,NBA,


In [5]:
# TODO: Annotate columns with roles
awards_players

name,year,playerID,award,lgID,note,pos
role,unused_float,unused_string,unused_string,unused_string,unused_string,unused_string
0.0,1969,abdulka01,All-Defensive Second Team,NBA,,
1.0,1969,abdulka01,All-NBA Second Team,NBA,,C
2.0,1969,abdulka01,Rookie of the Year,NBA,,
3.0,1970,abdulka01,All-Defensive Second Team,NBA,,
4.0,1970,abdulka01,All-NBA First Team,NBA,,C
,...,...,...,...,...,...
1714.0,2007,youngth01,All-Rookie Second Team,NBA,,
1715.0,1946,zasloma01,All-NBA First Team,NBA,,
1716.0,1947,zasloma01,All-NBA First Team,NBA,,
1717.0,1948,zasloma01,All-NBA First Team,NBA,,


In [6]:
# TODO: Annotate columns with roles
coaches

name,year,stint,won,lost,post_wins,post_losses,coachID,tmID,lgID
role,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_string,unused_string,unused_string
0.0,1988,2,14,21,0,3,adelmri01,POR,NBA
1.0,1989,1,59,23,12,9,adelmri01,POR,NBA
2.0,1990,1,63,19,9,7,adelmri01,POR,NBA
3.0,1991,1,57,25,13,8,adelmri01,POR,NBA
4.0,1992,1,51,31,1,3,adelmri01,POR,NBA
,...,...,...,...,...,...,...,...,...
1684.0,1961,1,6,9,,,woolpph01,SFS,ABL1
1685.0,1973,2,0,3,0,0,youngdr01,KCO,NBA
1686.0,1968,4,10,23,3,4,youngve01,MNP,ABA
1687.0,1967,1,36,42,0,0,zasloma01,NJA,ABA


In [7]:
# TODO: Annotate columns with roles
draft

name,id,draftYear,draftRound,draftSelection,draftOverall,tmID,firstName,lastName,suffixName,playerID,draftFrom,lgID
role,unused_float,unused_float,unused_float,unused_float,unused_float,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string
0.0,1,1967,0,0,0,ANA,Darrell,Hardy,,hardyda01,Baylor,ABA
1.0,2,1967,0,0,0,ANA,Bob,Krulish,,,Pacific,ABA
2.0,3,1967,0,0,0,ANA,Bob,Lewis,,lewisbo01,North Carolina,ABA
3.0,4,1967,0,0,0,ANA,Mike,Lynn,,lynnmi01,UCLA,ABA
4.0,5,1967,0,0,0,ANA,Tom,Workman,,workmto01,Seattle,ABA
,...,...,...,...,...,...,...,...,...,...,...,...
8616.0,8999,2011,2,26,56,LAL,Chukwudiebere,Maduabum,,,Bakersfield Jam (D-League),NBA
8617.0,9000,2011,2,27,57,DAL,Tanguy,Ngombo,,,Al Rayyan (Qatar),NBA
8618.0,9001,2011,2,28,58,LAL,Ater,Majok,,,Gold Coast Blaze (Australia),NBA
8619.0,9002,2011,2,29,59,SAS,Adam,Hanga,,,Assignia Manresa (Spain),NBA


In [8]:
# TODO: Annotate columns with roles
player_allstar

name,season_id,games_played,minutes,points,o_rebounds,d_rebounds,rebounds,assists,steals,blocks,turnovers,personal_fouls,fg_attempted,fg_made,ft_attempted,ft_made,three_attempted,three_made,playerID,last_name,first_name,conference,league_id
role,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_string,unused_string,unused_string,unused_string,unused_string
0.0,1969,1,18,10,,,11,4,,,,,8,4,2,2,,,abdulka01,Abdul-Jabbar,Kareem,East,NBA
1.0,1970,1,30,19,,,14,1,,,,,16,8,4,3,,,abdulka01,Abdul-Jabbar,Kareem,West,NBA
2.0,1971,1,19,12,,,7,2,,,,,10,5,2,2,,,abdulka01,Abdul-Jabbar,Kareem,West,NBA
3.0,1972,1,98,,,,,,,,,,,,,,,,abdulka01,Abdul-Jabbar,Kareem,West,NBA
4.0,1973,1,23,14,,,8,6,,,,,11,7,0,0,,,abdulka01,Abdul-Jabbar,Kareem,West,NBA
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1603.0,1956,1,25,9,,,9,0,,,,,10,4,1,1,,,yardlge01,Yardley,George,West,NBA
1604.0,1957,1,32,19,,,9,1,,,,,15,8,5,3,,,yardlge01,Yardley,George,West,NBA
1605.0,1958,1,17,6,,,4,0,,,,,8,2,2,2,,,yardlge01,Yardley,George,West,NBA
1606.0,1959,1,16,11,,,3,0,,,,,9,5,2,1,,,yardlge01,Yardley,George,East,NBA


In [9]:
# TODO: Annotate columns with roles
players

name,firstseason,lastseason,height,weight,playerID,useFirst,firstName,middleName,lastName,nameGiven,fullGivenName,nameSuffix,nameNick,pos,college,collegeOther,birthDate,birthCity,birthState,birthCountry,highSchool,hsCity,hsState,hsCountry,deathDate,race
role,unused_float,unused_float,unused_float,unused_float,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string
0.0,0,0,82,240,abdelal01,Alaa,Alaa,,Abdelnaby,,,,,F-C,Duke,,1968-06-24,Cairo,,EGY,Bloomfield Senior,Bloomfield,NJ,USA,,B
1.0,0,0,85,225,abdulka01,Kareem,Kareem,,Abdul-Jabbar,,"Ferdinand Lewis Alcindor, Jr.",,"Lew, Cap",C,UCLA,,1947-04-16,New York,NY,USA,Power Memorial,New York,NY,USA,,B
2.0,0,0,74,185,abdulma01,Mahdi,Mahdi,,Abdul-Rahman,,"Walter Raphael Hazzard, Jr.",,Walt,G,UCLA,Santa Monica City,1942-04-15,Wilmington,DE,USA,Overbrook / Moton,Philadelphia / Easton,PA / MD,USA,2011-11-18,B
3.0,0,0,73,162,abdulma02,Mahmoud,Mahmoud,,Abdul-Rauf,,Chris Wayne Jackson,,,G,Louisiana State,,1969-03-09,Gulfport,MS,USA,Gulfport,Gulfport,MS,USA,,B
4.0,0,0,78,223,abdulta01,Tariq,Tariq,,Abdul-Wahad,,Olivier Michael Saint-Jean,,,G-F,San Jose State,Michigan,1974-11-03,Maisons Alfort,,FRA,Lycee Aristide Briand,Evreux,,FRA,,B
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5057.0,0,0,72,185,ziegeba01,,Maurice,J.,Ziegenhorn,,,,Babe,G-F,Notre Dame,,1918-11-21,Chicago,IL,USA,St. Phillip,Chicago,IL,USA,1970-08-23,W
5058.0,0,0,75,195,zimmede01,Derrick,Derrick,Dewayne,Zimmerman,,,,,G,Mississippi State,,1981-02-02,Monroe,LA,USA,Wossman,Monroe,LA,USA,,B
5059.0,0,0,85,240,zoetji01,Jim,Jim,,Zoet,,,,,C,Kent State,,1953-12-20,Uxbridge,ON,CAN,Port Perry,Port Perry,ON,CAN,,W
5060.0,0,0,73,170,zopfbi01,Bill,William,Charles,Zopf,,,Jr.,"Bill, Zip",G,Duquesne,,1948-06-07,,,,Monaca Senior,Monaca,PA,USA,,W


In [10]:
# TODO: Annotate columns with roles
players_teams

name,id,year,stint,GP,GS,minutes,points,oRebounds,dRebounds,rebounds,assists,steals,blocks,turnovers,PF,fgAttempted,fgMade,ftAttempted,ftMade,threeAttempted,threeMade,PostGP,PostGS,PostMinutes,PostPoints,PostoRebounds,PostdRebounds,PostRebounds,PostAssists,PostSteals,PostBlocks,PostTurnovers,PostPF,PostfgAttempted,PostfgMade,PostftAttempted,PostftMade,PostthreeAttempted,PostthreeMade,playerID,tmID,lgID,note
role,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_float,unused_string,unused_string,unused_string,unused_string
0.0,1,1990,1,43,0,290,135,27,62,89,12,4,12,22,39,116,55,44,25,0,0,5,0,13,4,1,2,3,0,0,0,0,0,6,2,0,0,0,0,abdelal01,POR,NBA,
1.0,2,1991,1,71,0,934,432,81,179,260,30,25,17,66,132,361,178,101,76,0,0,8,0,25,12,0,4,4,2,0,0,2,4,10,5,4,2,0,0,abdelal01,POR,NBA,
2.0,3,1992,1,12,0,159,64,12,25,37,10,6,4,0,24,56,26,16,12,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,abdelal01,MIL,NBA,
3.0,4,1992,2,63,0,1152,514,114,186,300,17,19,22,97,165,417,219,100,76,0,0,4,0,68,22,2,11,13,1,0,1,9,7,24,11,0,0,0,0,abdelal01,BOS,NBA,
4.0,5,1993,1,13,0,159,64,12,34,46,3,2,3,17,20,55,24,25,16,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,abdelal01,BOS,NBA,
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23746.0,23747,2005,1,2,0,32,4,1,3,4,7,0,0,4,4,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,zimmede01,NJN,NBA,
23747.0,23748,1982,1,7,0,30,2,3,5,8,1,1,3,4,9,5,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,zoetji01,DET,NBA,
23748.0,23749,1970,1,53,0,398,118,0,0,46,73,0,0,0,34,135,49,36,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,zopfbi01,MIL,NBA,
23749.0,23750,1947,0,57,0,0,331,0,0,0,0,0,0,0,209,0,123,128,85,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,zunicma01,FNT,NBL,


In [11]:
# TODO: Annotate columns with roles
series_post

name,id,year,W,L,round,series,tmIDWinner,lgIDWinner,tmIDLoser,lgIDLoser
role,unused_float,unused_float,unused_float,unused_float,unused_string,unused_string,unused_string,unused_string,unused_string,unused_string
0.0,1,1946,4,1,F,O,PHW,NBA,CHS,NBA
1.0,2,1946,2,1,QF,M,NYK,NBA,CLR,NBA
2.0,3,1946,2,1,QF,M,PHW,NBA,STB,NBA
3.0,4,1946,2,0,SF,N,PHW,NBA,NYK,NBA
4.0,5,1946,4,2,SF,N,CHS,NBA,WSC,NBA
,...,...,...,...,...,...,...,...,...,...
770.0,771,1974,1,0,DT,A,KEN,ABA,NYA,ABA
771.0,772,1975,4,2,F,D,NYA,ABA,DNA,ABA
772.0,773,1975,4,3,SF,C,DNA,ABA,KEN,ABA
773.0,774,1975,4,3,SF,C,NYA,ABA,SAA,ABA


The next step is to define the data model. Refer to [https://relational.fel.cvut.cz/dataset/Basketball_men](https://relational.fel.cvut.cz/dataset/Basketball_men)
for a description of the dataset.

In [12]:
dm = getml.data.DataModel(population=teams.to_placeholder())
dm.add(getml.data.to_placeholder(**peripheral))

# TODO
# dm.population.join(...)

Now we can create the container and add the tables to it.

In [13]:
container = getml.data.Container(population=teams, split=teams.split)
container.add(**peripheral)

container

Unnamed: 0,subset,name,rows,type
0,train,teams,1076,View
1,val,teams,460,View

Unnamed: 0,name,rows,type
0,awards_coaches,61,DataFrame
1,awards_players,1719,DataFrame
2,coaches,1689,DataFrame
3,draft,8621,DataFrame
4,player_allstar,1608,DataFrame
5,players,5062,DataFrame
6,players_teams,23751,DataFrame
7,series_post,775,DataFrame
