# Data extraction from `nba_api`

In [1]:
from nba_api.stats.endpoints import leagueleaders, leaguegamefinder, boxscoretraditionalv2
from nba_api.stats.static import teams, players
import pandas as pd
import numpy as np
import time
import dataframe_image as dfi

In [2]:
# We wanna see all columns of a DataFrame
pd.set_option("display.max_columns", None)

## 1. Some stuff to get familiarized with the API

### 1.1. Top scorers in the last regular season (2022-23)

```python
# Pull data for the top 500 scorers by PTS column
top_500 = leagueleaders.LeagueLeaders(
    season = '2022-23',
    season_type_all_star = 'Regular Season',
    stat_category_abbreviation = 'PTS'
).get_data_frames()[0][:500]
top_500.head()
```

```python
# Compute points per game
top_500["PPG"] = top_500["PTS"]/top_500["GP"]

# View the top 10 players
top_500.sort_values(by = "PPG", ascending = False).head(10)
```

### 1.2. Find teams

In [3]:
# 'get_teams()' returns a list of 30 dictionaries, each an NBA team
nba_teams = teams.get_teams()
print('Number of teams fetched: {}'.format(len(nba_teams)))
nba_teams[:3]

Number of teams fetched: 30


[{'id': 1610612737,
  'full_name': 'Atlanta Hawks',
  'abbreviation': 'ATL',
  'nickname': 'Hawks',
  'city': 'Atlanta',
  'state': 'Georgia',
  'year_founded': 1949},
 {'id': 1610612738,
  'full_name': 'Boston Celtics',
  'abbreviation': 'BOS',
  'nickname': 'Celtics',
  'city': 'Boston',
  'state': 'Massachusetts',
  'year_founded': 1946},
 {'id': 1610612739,
  'full_name': 'Cleveland Cavaliers',
  'abbreviation': 'CLE',
  'nickname': 'Cavaliers',
  'city': 'Cleveland',
  'state': 'Ohio',
  'year_founded': 1970}]

Let's save the teams ids and names for future use.

In [4]:
extracted_data = [{k: v for k, v in d.items() if k in ['id', 'full_name']} for d in nba_teams]
nba_teams_df = pd.DataFrame(extracted_data)
nba_teams_df.to_csv('nba_teams.csv', index = False)

### 1.3. Find players

In [5]:
# 'get_players()' returns a list of dictionaries, each representing a player
nba_players = players.get_players()
print('Number of players fetched: {}'.format(len(nba_players)))
nba_players[:3]

Number of players fetched: 4815


[{'id': 76001,
  'full_name': 'Alaa Abdelnaby',
  'first_name': 'Alaa',
  'last_name': 'Abdelnaby',
  'is_active': False},
 {'id': 76002,
  'full_name': 'Zaid Abdul-Aziz',
  'first_name': 'Zaid',
  'last_name': 'Abdul-Aziz',
  'is_active': False},
 {'id': 76003,
  'full_name': 'Kareem Abdul-Jabbar',
  'first_name': 'Kareem',
  'last_name': 'Abdul-Jabbar',
  'is_active': False}]

```python
for player in nba_players:
    if player["full_name"] == "Jayson Tatum":
        print("Found Tatum!")
        print("His ID is: {}".format(player["id"]))
        break
```

```python
tatum = [player for player in nba_players if player["full_name"] == "Jayson Tatum"]
tatum
```

```python
tatum = [player for player in nba_players if player["full_name"] == "Jayson Tatum"][0]
tatum
```

In [6]:
extracted_data = [{k: v for k, v in d.items() if k in ['id', 'full_name', 'last_name']} for d in nba_players]
nba_players_df = pd.DataFrame(extracted_data)
nba_players_df.to_csv('nba_players.csv', index = False)

## 2. Denver Nuggets

### 2.1. Basic info

#### Jokic and Murray ids

In [7]:
# Find Nuggets ID
nuggets_info = [team for team in nba_teams if team["abbreviation"] == "DEN"][0]
team_id = nuggets_info["id"]

# Find Jokic and Murray IDs
jokic = [player for player in nba_players if player["first_name"] == "Nikola"][0]
murray = [player for player in nba_players if player["full_name"] == "Jamal Murray"][0]

# Create a list with Jokic and Murray IDs
jokic_murray = [jokic["id"], murray["id"]]

In [8]:
jokic_murray

[203999, 1627750]

In [9]:
team_id

1610612743

We save Jokic and Murray ids in a TXT file.

In [10]:
file_path = 'jokic_murray_ids.txt'
with open(file_path, 'w') as file:
    for id in jokic_murray:
        file.write(str(id) + '\n')

#### <span style="color:green">Nuggets' games</span>

In [3]:
# Find Nuggets games
team_id = 1610612743

In [None]:
nuggets_games = leaguegamefinder.LeagueGameFinder(team_id_nullable = team_id).get_data_frames()[0]
len(nuggets_games)

In [4]:
nuggets_games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22023,1610612743,DEN,Denver Nuggets,22300786,2024-02-14,DEN vs. SAC,L,239,98,35,89,0.393,9,26,0.346,19,22,0.864,12,33,45,27,6,5,12,18,-4.0
1,22023,1610612743,DEN,Denver Nuggets,22300767,2024-02-12,DEN @ MIL,L,240,95,36,94,0.383,11,40,0.275,12,17,0.706,12,36,48,24,7,2,13,14,-17.0
2,22023,1610612743,DEN,Denver Nuggets,22300746,2024-02-09,DEN @ SAC,L,240,106,38,82,0.463,10,29,0.345,20,28,0.714,9,26,35,26,9,5,20,16,-29.0
3,22023,1610612743,DEN,Denver Nuggets,22300740,2024-02-08,DEN @ LAL,W,239,114,47,97,0.485,14,32,0.438,6,7,0.857,14,36,50,33,4,6,12,17,8.0
4,22023,1610612743,DEN,Denver Nuggets,22300712,2024-02-04,DEN vs. POR,W,240,112,42,86,0.488,12,28,0.429,16,19,0.842,14,28,42,26,8,6,8,11,9.0


In [5]:
nuggets_games['SEASON_ID'].unique()

array(['22023', '12023', '42022', '22022', '12022', '42021', '22021',
       '12021', '42020', '22020', '12020', '42019', '22019', '12019',
       '42018', '22018', '12018', '22017', '12017', '22016', '12016',
       '22015', '12015', '22014', '12014', '22013', '12013', '42012',
       '22012', '12012', '42011', '22011', '12011', '42010', '22010',
       '12010', '42009', '22009', '12009', '42008', '22008', '12008',
       '42007', '22007', '12007', '42006', '22006', '12006', '42005',
       '22005', '12005', '42004', '22004', '12004', '42003', '22003',
       '22002', '22001', '22000', '21999', '21998', '21997', '21996',
       '21995', '41994', '21994', '41993', '21993', '21992', '21991',
       '21990', '41989', '21989', '41988', '21988', '41987', '21987',
       '41986', '21986', '41985', '21985', '41984', '21984', '41983',
       '21983'], dtype=object)

We're gonna filter the regular season and playoff games.

### 2.2. Nuggets' 2016-2023 games

#### 2016-17 regular season and playoffs

In [14]:
nuggets_2016 = nuggets_games[(nuggets_games['SEASON_ID'] == '22016') | \
                             (nuggets_games['SEASON_ID'] == '42016')].copy()
len(nuggets_2016)

87

In [15]:
nuggets_2016.sort_values(by = "GAME_DATE", ascending = True).head(10)

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
696,22016,1610612743,DEN,Denver Nuggets,1521600005,2016-07-08,DEN vs. MIN,W,199,88,30,69,0.435,5,23,0.217,23,35,0.657,13,24,37,17,5,2,11,26,6.0
695,22016,1610612743,DEN,Denver Nuggets,1521600009,2016-07-09,DEN @ MEM,W,201,106,38,79,0.481,8,25,0.32,22,27,0.815,15,40,55,17,10,4,11,20,44.6
694,22016,1610612743,DEN,Denver Nuggets,1521600025,2016-07-11,DEN vs. MIA,L,199,81,30,70,0.429,5,22,0.227,16,19,0.842,11,24,35,13,7,4,12,23,-11.0
693,22016,1610612743,DEN,Denver Nuggets,1521600049,2016-07-14,DEN vs. UTA,W,199,80,28,63,0.444,6,18,0.333,18,21,0.857,7,32,39,12,9,3,15,21,20.0
692,22016,1610612743,DEN,Denver Nuggets,1521600062,2016-07-16,DEN vs. PHX,L,210,81,33,80,0.413,4,16,0.25,11,18,0.611,20,28,48,10,8,4,16,19,-1.0
683,22016,1610612743,DEN,Denver Nuggets,21600010,2016-10-26,DEN @ NOP,W,240,107,37,83,0.446,8,24,0.333,25,33,0.758,11,47,58,18,7,4,24,22,5.0
682,22016,1610612743,DEN,Denver Nuggets,21600033,2016-10-29,DEN vs. POR,L,265,113,38,100,0.38,6,23,0.261,31,39,0.795,16,45,61,19,10,5,17,21,-2.0
681,22016,1610612743,DEN,Denver Nuggets,21600042,2016-10-31,DEN @ TOR,L,240,102,33,81,0.407,9,22,0.409,27,35,0.771,11,35,46,14,2,9,8,18,-3.0
680,22016,1610612743,DEN,Denver Nuggets,21600067,2016-11-03,DEN @ MIN,W,240,102,39,81,0.481,6,22,0.273,18,25,0.72,7,37,44,24,10,2,20,24,3.0
679,22016,1610612743,DEN,Denver Nuggets,21600082,2016-11-05,DEN @ DET,L,241,86,31,94,0.33,7,25,0.28,17,20,0.85,16,32,48,15,8,2,16,15,-17.0


In [16]:
# We drop summer league games
index_condition = nuggets_2016[nuggets_2016["GAME_DATE"].str.contains(\
    "2016-07")].index
nuggets_2016.drop(index_condition, inplace = True)

# We sort the games from first to last in the season
nuggets_2016.sort_values(by = "GAME_DATE", ascending = True, inplace = True)
nuggets_2016.reset_index(drop = True, inplace = True)

In [17]:
nuggets_2016.tail()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
77,22016,1610612743,DEN,Denver Nuggets,21601168,2017-04-05,DEN @ HOU,L,240,104,41,104,0.394,7,38,0.184,15,18,0.833,12,39,51,26,10,5,12,24,-6.0
78,22016,1610612743,DEN,Denver Nuggets,21601184,2017-04-07,DEN vs. NOP,W,241,122,43,78,0.551,13,32,0.406,23,32,0.719,11,39,50,31,9,4,21,15,16.0
79,22016,1610612743,DEN,Denver Nuggets,21601198,2017-04-09,DEN vs. OKC,L,239,105,40,84,0.476,11,34,0.324,14,15,0.933,4,38,42,22,6,4,12,16,-1.0
80,22016,1610612743,DEN,Denver Nuggets,21601214,2017-04-11,DEN @ DAL,W,242,109,41,84,0.488,18,42,0.429,9,14,0.643,10,43,53,31,7,1,16,14,18.0
81,22016,1610612743,DEN,Denver Nuggets,21601225,2017-04-12,DEN @ OKC,W,240,111,39,85,0.459,7,25,0.28,26,32,0.813,5,38,43,22,6,8,15,17,6.0


We represent regular season games with 0 and playoff games with 1. We leave this step for later when we have all seasons together

```python
nuggets_2016['PLAYOFFS'] = np.where(nuggets_2016['SEASON_ID'] == '22016', 0, 1)
nuggets_2016.tail()
```

#### 2017-18 regular season and playoffs

In [18]:
nuggets_2017 = nuggets_games[(nuggets_games['SEASON_ID'] == '22017') | \
                             (nuggets_games['SEASON_ID'] == '42017')].copy()
len(nuggets_2017)

88

In [19]:
nuggets_2017.sort_values(by = "GAME_DATE", ascending = True).head(10)

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
601,22017,1610612743,DEN,Denver Nuggets,1521700005,2017-07-07,DEN vs. HOU,L,200,99,35,81,0.432,12,35,0.343,17,23,0.739,10,26,36,14,9,2,21,25,-3.0
600,22017,1610612743,DEN,Denver Nuggets,1521700018,2017-07-09,DEN vs. MIN,L,200,71,27,65,0.415,8,24,0.333,9,14,0.643,3,24,27,14,8,7,23,13,-19.0
599,22017,1610612743,DEN,Denver Nuggets,1521700027,2017-07-10,DEN @ TOR,L,199,81,30,70,0.429,11,29,0.379,10,15,0.667,7,26,33,20,9,5,13,20,-1.0
598,22017,1610612743,DEN,Denver Nuggets,1521700038,2017-07-12,DEN @ HOU,W,200,87,32,73,0.438,3,20,0.15,20,26,0.769,16,31,47,13,8,6,19,26,6.0
597,22017,1610612743,DEN,Denver Nuggets,1521700046,2017-07-13,DEN @ BKN,L,201,74,26,81,0.321,8,25,0.32,14,19,0.737,8,30,38,11,12,1,10,21,-11.0
596,22017,1610612743,DEN,Denver Nuggets,1521700057,2017-07-14,DEN vs. NOP,W,200,96,39,77,0.506,7,21,0.333,11,15,0.733,10,27,37,16,7,5,10,14,5.0
590,22017,1610612743,DEN,Denver Nuggets,21700010,2017-10-18,DEN @ UTA,L,242,96,36,77,0.468,13,27,0.481,11,12,0.917,7,31,38,21,4,3,20,18,-10.0
589,22017,1610612743,DEN,Denver Nuggets,21700035,2017-10-21,DEN vs. SAC,W,240,96,37,85,0.435,10,30,0.333,12,20,0.6,18,40,58,25,7,2,16,19,17.0
588,22017,1610612743,DEN,Denver Nuggets,21700047,2017-10-23,DEN vs. WAS,L,241,104,39,80,0.488,8,23,0.348,18,23,0.783,9,35,44,27,5,2,23,22,-5.0
587,22017,1610612743,DEN,Denver Nuggets,21700055,2017-10-25,DEN @ CHA,L,240,93,38,100,0.38,8,31,0.258,9,15,0.6,18,33,51,18,6,1,13,23,-17.0


In [20]:
# We drop summer league games
index_condition = nuggets_2017[nuggets_2017["GAME_DATE"].str.contains(\
    "2017-07")].index
nuggets_2017.drop(index_condition, inplace = True)

# We sort the games from first to last in the season
nuggets_2017.sort_values(by = "GAME_DATE", ascending = True, inplace = True)
nuggets_2017.reset_index(drop = True, inplace = True)

In [21]:
nuggets_2017.tail()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
77,22017,1610612743,DEN,Denver Nuggets,21701165,2018-04-03,DEN vs. IND,W,240,107,41,89,0.461,10,32,0.313,15,20,0.75,13,36,49,31,10,8,16,19,3.0
78,22017,1610612743,DEN,Denver Nuggets,21701180,2018-04-05,DEN vs. MIN,W,240,100,35,89,0.393,11,36,0.306,19,22,0.864,13,34,47,21,3,7,10,17,4.0
79,22017,1610612743,DEN,Denver Nuggets,21701191,2018-04-07,DEN @ LAC,W,241,134,53,85,0.624,12,29,0.414,16,23,0.696,7,26,33,36,4,6,7,26,19.0
80,22017,1610612743,DEN,Denver Nuggets,21701211,2018-04-09,DEN vs. POR,W,239,88,31,83,0.373,6,26,0.231,20,26,0.769,10,42,52,22,7,5,12,18,6.0
81,22017,1610612743,DEN,Denver Nuggets,21701225,2018-04-11,DEN @ MIN,L,263,106,41,90,0.456,13,32,0.406,11,18,0.611,10,37,47,19,4,4,12,22,-6.0


#### 2018-19 regular season and playoffs

In [22]:
nuggets_2018 = nuggets_games[(nuggets_games['SEASON_ID'] == '22018') | \
                             (nuggets_games['SEASON_ID'] == '42018')].copy()
len(nuggets_2018)

101

In [23]:
nuggets_2018.sort_values(by = "GAME_DATE", ascending = True).head(10)

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
508,22018,1610612743,DEN,Denver Nuggets,1521800009,2018-07-06,DEN @ MIN,W,200,70,29,71,0.408,4,21,0.19,8,12,0.667,7,36,43,17,9,4,14,24,14.4
507,22018,1610612743,DEN,Denver Nuggets,1521800019,2018-07-07,DEN vs. BOS,W,202,82,33,75,0.44,5,16,0.313,11,12,0.917,8,31,39,17,9,3,14,18,9.0
506,22018,1610612743,DEN,Denver Nuggets,1521800037,2018-07-09,DEN vs. MIL,W,201,90,33,71,0.465,11,22,0.5,13,17,0.765,7,23,30,22,9,0,11,27,8.8
505,22018,1610612743,DEN,Denver Nuggets,1521800052,2018-07-11,DEN vs. TOR,L,200,77,31,79,0.392,8,28,0.286,7,13,0.538,14,26,40,16,8,2,10,18,-8.0
504,22018,1610612743,DEN,Denver Nuggets,1521800066,2018-07-13,DEN vs. MIN,L,198,71,30,75,0.4,4,21,0.19,7,12,0.583,14,30,44,20,9,3,20,18,-9.8
498,22018,1610612743,DEN,Denver Nuggets,21800012,2018-10-17,DEN @ LAC,W,240,107,33,87,0.379,8,24,0.333,33,42,0.786,14,42,56,20,6,9,10,22,9.0
497,22018,1610612743,DEN,Denver Nuggets,21800033,2018-10-20,DEN vs. PHX,W,241,119,43,85,0.506,9,28,0.321,24,29,0.828,9,45,54,28,12,6,18,31,28.0
496,22018,1610612743,DEN,Denver Nuggets,21800038,2018-10-21,DEN vs. GSW,W,240,100,35,86,0.407,6,32,0.188,24,42,0.571,13,34,47,21,11,4,8,22,2.0
495,22018,1610612743,DEN,Denver Nuggets,21800051,2018-10-23,DEN vs. SAC,W,239,126,51,99,0.515,11,30,0.367,13,15,0.867,15,34,49,31,11,3,12,22,14.0
494,22018,1610612743,DEN,Denver Nuggets,21800066,2018-10-25,DEN @ LAL,L,239,114,46,92,0.5,6,23,0.261,16,20,0.8,10,34,44,22,8,5,19,22,-7.0


In [24]:
# We drop summer league games
index_condition = nuggets_2018[nuggets_2018["GAME_DATE"].str.contains(\
    "2018-07")].index
nuggets_2018.drop(index_condition, inplace = True)

# We sort the games from first to last in the season
nuggets_2018.sort_values(by = "GAME_DATE", ascending = True, inplace = True)
nuggets_2018.reset_index(drop = True, inplace = True)

In [25]:
nuggets_2018.tail()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
91,42018,1610612743,DEN,Denver Nuggets,41800233,2019-05-03,DEN @ POR,L,341,137,54,119,0.454,15,42,0.357,14,20,0.7,24,42,66,28,11,8,20,27,-3.0
92,42018,1610612743,DEN,Denver Nuggets,41800234,2019-05-05,DEN @ POR,W,241,116,40,92,0.435,11,25,0.44,25,28,0.893,17,28,45,21,7,4,8,22,4.0
93,42018,1610612743,DEN,Denver Nuggets,41800235,2019-05-07,DEN vs. POR,W,239,124,44,89,0.494,8,24,0.333,28,31,0.903,10,52,62,26,4,4,8,22,26.0
94,42018,1610612743,DEN,Denver Nuggets,41800236,2019-05-09,DEN @ POR,L,240,108,33,86,0.384,12,32,0.375,30,32,0.938,16,28,44,22,7,3,6,23,-11.0
95,42018,1610612743,DEN,Denver Nuggets,41800237,2019-05-12,DEN vs. POR,L,240,96,33,89,0.371,2,19,0.105,28,39,0.718,13,38,51,15,1,9,5,23,-4.0


#### 2019-20 regular season and playoffs

In [26]:
nuggets_2019 = nuggets_games[(nuggets_games['SEASON_ID'] == '22019') | \
                             (nuggets_games['SEASON_ID'] == '42019')].copy()
len(nuggets_2019)

96

In [27]:
nuggets_2019.sort_values(by = "GAME_DATE", ascending = True).head(10)

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
402,22019,1610612743,DEN,Denver Nuggets,1521900024,2019-07-07,DEN @ ORL,W,201,84,32,64,0.5,11,25,0.44,9,17,0.529,7,24,31,19,6,4,20,23,1.6
401,22019,1610612743,DEN,Denver Nuggets,1521900045,2019-07-09,DEN @ BOS,L,200,82,31,70,0.443,12,27,0.444,8,14,0.571,8,28,36,16,3,2,18,29,-2.0
400,22019,1610612743,DEN,Denver Nuggets,1521900058,2019-07-10,DEN vs. GSW,L,200,69,27,73,0.37,3,25,0.12,12,14,0.857,15,24,39,17,15,2,20,20,4.2
399,22019,1610612743,DEN,Denver Nuggets,1521900079,2019-07-13,DEN @ HOU,L,202,96,38,75,0.507,9,25,0.36,11,13,0.846,12,22,34,22,6,3,13,18,-13.6
394,22019,1610612743,DEN,Denver Nuggets,21900013,2019-10-23,DEN @ POR,W,239,108,34,81,0.42,18,32,0.563,22,27,0.815,11,34,45,24,11,4,19,23,8.0
393,22019,1610612743,DEN,Denver Nuggets,21900023,2019-10-25,DEN vs. PHX,W,266,108,36,92,0.391,7,27,0.259,29,36,0.806,8,37,45,21,7,12,14,26,1.0
392,22019,1610612743,DEN,Denver Nuggets,21900050,2019-10-28,DEN @ SAC,W,240,101,36,98,0.367,10,36,0.278,19,21,0.905,19,35,54,19,8,5,10,18,7.0
391,22019,1610612743,DEN,Denver Nuggets,21900053,2019-10-29,DEN vs. DAL,L,240,106,43,91,0.473,11,32,0.344,9,13,0.692,14,36,50,24,6,1,10,24,-3.0
390,22019,1610612743,DEN,Denver Nuggets,21900067,2019-10-31,DEN @ NOP,L,239,107,43,89,0.483,7,26,0.269,14,16,0.875,5,30,35,24,5,3,14,24,-15.0
389,22019,1610612743,DEN,Denver Nuggets,21900079,2019-11-02,DEN @ ORL,W,238,91,27,73,0.37,9,28,0.321,28,39,0.718,7,34,41,15,8,3,13,14,4.0


In [28]:
# We drop summer league games
index_condition = nuggets_2019[nuggets_2019["GAME_DATE"].str.contains(\
    "2019-07")].index
nuggets_2019.drop(index_condition, inplace = True)

# We sort the games from first to last in the season
nuggets_2019.sort_values(by = "GAME_DATE", ascending = True, inplace = True)
nuggets_2019.reset_index(drop = True, inplace = True)

In [29]:
nuggets_2019.tail()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
87,42019,1610612743,DEN,Denver Nuggets,41900311,2020-09-18,DEN @ LAL,L,239,114,41,83,0.494,9,26,0.346,23,28,0.821,9,28,37,23,3,2,16,26,-12.0
88,42019,1610612743,DEN,Denver Nuggets,41900312,2020-09-20,DEN @ LAL,L,240,103,35,74,0.473,8,24,0.333,25,33,0.758,7,24,31,22,12,4,19,22,-2.0
89,42019,1610612743,DEN,Denver Nuggets,41900313,2020-09-22,DEN vs. LAL,W,240,114,40,73,0.548,11,29,0.379,23,29,0.793,9,35,44,26,8,2,18,21,8.0
90,42019,1610612743,DEN,Denver Nuggets,41900314,2020-09-24,DEN vs. LAL,L,241,108,39,77,0.506,10,28,0.357,20,23,0.87,6,27,33,22,4,3,11,28,-6.0
91,42019,1610612743,DEN,Denver Nuggets,41900315,2020-09-26,DEN @ LAL,L,240,107,38,90,0.422,8,30,0.267,23,26,0.885,9,27,36,23,5,2,11,25,-10.0


#### 2020-21 regular season and playoffs

Note there was no summer league this season due to the covid-19 pandemic.

In [30]:
nuggets_2020 = nuggets_games[(nuggets_games['SEASON_ID'] == '22020') | \
                             (nuggets_games['SEASON_ID'] == '42020')].copy()
len(nuggets_2020)

82

In [31]:
nuggets_2020.sort_values(by = "GAME_DATE", ascending = True).head(10)

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
296,22020,1610612743,DEN,Denver Nuggets,22000019,2020-12-23,DEN vs. SAC,L,265,122,47,98,0.48,8,29,0.276,20,24,0.833,10,36,46,28,6,11,15,25,-2.0
295,22020,1610612743,DEN,Denver Nuggets,22000009,2020-12-25,DEN vs. LAC,L,238,108,37,81,0.457,11,36,0.306,23,31,0.742,10,22,32,25,7,1,13,20,-13.0
294,22020,1610612743,DEN,Denver Nuggets,22000044,2020-12-28,DEN vs. HOU,W,242,124,46,87,0.529,14,31,0.452,18,32,0.563,11,41,52,31,8,7,14,18,13.0
293,22020,1610612743,DEN,Denver Nuggets,22000055,2020-12-29,DEN @ SAC,L,240,115,44,81,0.543,11,28,0.393,16,20,0.8,7,32,39,27,7,5,19,18,-10.0
292,22020,1610612743,DEN,Denver Nuggets,22000076,2021-01-01,DEN vs. PHX,L,242,103,39,86,0.453,12,29,0.414,13,16,0.813,8,30,38,28,7,1,11,25,-3.0
291,22020,1610612743,DEN,Denver Nuggets,22000088,2021-01-03,DEN @ MIN,W,241,124,45,92,0.489,19,45,0.422,15,21,0.714,13,30,43,34,6,4,15,11,15.0
290,22020,1610612743,DEN,Denver Nuggets,22000104,2021-01-05,DEN vs. MIN,W,241,123,41,79,0.519,8,25,0.32,33,45,0.733,10,32,42,22,9,2,14,27,7.0
289,22020,1610612743,DEN,Denver Nuggets,22000120,2021-01-07,DEN vs. DAL,L,265,117,43,97,0.443,15,44,0.341,16,21,0.762,9,38,47,25,8,1,13,27,-7.0
288,22020,1610612743,DEN,Denver Nuggets,22000133,2021-01-09,DEN @ PHI,W,239,115,45,90,0.5,12,33,0.364,13,15,0.867,15,30,45,28,11,7,17,12,12.0
287,22020,1610612743,DEN,Denver Nuggets,22000144,2021-01-10,DEN @ NYK,W,241,114,45,84,0.536,15,35,0.429,9,12,0.75,8,35,43,23,11,4,12,25,25.0


In [32]:
# We sort the games from first to last in the season
nuggets_2020.sort_values(by = "GAME_DATE", ascending = True, inplace = True)
nuggets_2020.reset_index(drop = True, inplace = True)

In [33]:
nuggets_2020.tail()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
77,42020,1610612743,DEN,Denver Nuggets,42000166,2021-06-03,DEN @ POR,W,239,126,44,86,0.512,15,34,0.441,23,27,0.852,10,29,39,28,8,3,12,18,11.0
78,42020,1610612743,DEN,Denver Nuggets,42000231,2021-06-07,DEN @ PHX,L,238,105,43,92,0.467,14,40,0.35,5,6,0.833,11,30,41,26,10,4,11,17,-17.0
79,42020,1610612743,DEN,Denver Nuggets,42000232,2021-06-09,DEN @ PHX,L,242,98,36,90,0.4,14,43,0.326,12,20,0.6,11,35,46,24,4,6,11,21,-25.0
80,42020,1610612743,DEN,Denver Nuggets,42000233,2021-06-11,DEN vs. PHX,L,240,102,39,95,0.411,14,41,0.341,10,16,0.625,18,28,46,21,4,3,14,21,-14.0
81,42020,1610612743,DEN,Denver Nuggets,42000234,2021-06-13,DEN vs. PHX,L,241,118,45,98,0.459,14,37,0.378,14,21,0.667,7,30,37,22,6,3,8,24,-7.0


#### 2021-22 regular season and playoffs

In [34]:
nuggets_2021 = nuggets_games[(nuggets_games['SEASON_ID'] == '22021') | \
                             (nuggets_games['SEASON_ID'] == '42021')].copy()
len(nuggets_2021)

92

In [35]:
nuggets_2021.sort_values(by = "GAME_DATE", ascending = True).head(10)

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
214,22021,1610612743,DEN,Denver Nuggets,1522100007,2021-08-08,DEN @ MIA,L,200,77,26,72,0.361,7,29,0.241,18,26,0.692,10,28,38,13,8,4,21,18,-19.4
213,22021,1610612743,DEN,Denver Nuggets,1522100018,2021-08-10,DEN vs. BOS,L,201,82,28,69,0.406,7,26,0.269,19,30,0.633,8,31,39,14,5,9,15,21,-21.6
212,22021,1610612743,DEN,Denver Nuggets,1522100037,2021-08-12,DEN vs. PHX,L,200,84,31,70,0.443,13,32,0.406,9,18,0.5,7,26,33,23,6,5,15,23,-6.0
211,22021,1610612743,DEN,Denver Nuggets,1522100045,2021-08-14,DEN @ DAL,W,210,89,32,71,0.451,13,30,0.433,12,18,0.667,15,31,46,15,7,4,24,24,0.0
210,22021,1610612743,DEN,Denver Nuggets,1522100062,2021-08-16,DEN vs. MIL,W,198,94,37,81,0.457,11,34,0.324,9,14,0.643,12,16,28,22,14,3,10,15,5.2
204,22021,1610612743,DEN,Denver Nuggets,22100012,2021-10-20,DEN @ PHX,W,241,110,44,83,0.53,17,39,0.436,5,9,0.556,6,40,46,25,9,1,17,20,12.0
203,22021,1610612743,DEN,Denver Nuggets,22100024,2021-10-22,DEN vs. SAS,W,240,102,44,86,0.512,9,30,0.3,5,8,0.625,8,33,41,26,10,2,21,21,6.0
202,22021,1610612743,DEN,Denver Nuggets,22100048,2021-10-25,DEN vs. CLE,L,240,87,35,86,0.407,9,38,0.237,8,12,0.667,11,36,47,21,10,3,21,22,-12.0
201,22021,1610612743,DEN,Denver Nuggets,22100054,2021-10-26,DEN @ UTA,L,242,110,42,83,0.506,10,28,0.357,16,24,0.667,5,30,35,19,9,3,13,23,-12.0
200,22021,1610612743,DEN,Denver Nuggets,22100075,2021-10-29,DEN vs. DAL,W,240,106,43,83,0.518,11,36,0.306,9,13,0.692,5,46,51,28,8,3,19,24,31.0


In [36]:
# We drop summer league games
index_condition = nuggets_2021[nuggets_2021["GAME_DATE"].str.contains(\
    "2021-08")].index
nuggets_2021.drop(index_condition, inplace = True)

# We sort the games from first to last in the season
nuggets_2021.sort_values(by = "GAME_DATE", ascending = True, inplace = True)
nuggets_2021.reset_index(drop = True, inplace = True)

In [37]:
nuggets_2021.tail()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
82,42021,1610612743,DEN,Denver Nuggets,42100161,2022-04-16,DEN @ GSW,L,241,107,43,93,0.462,11,35,0.314,10,13,0.769,9,26,35,26,8,3,10,22,-16.0
83,42021,1610612743,DEN,Denver Nuggets,42100162,2022-04-18,DEN @ GSW,L,240,106,37,87,0.425,13,36,0.361,19,21,0.905,17,30,47,20,4,4,16,25,-20.0
84,42021,1610612743,DEN,Denver Nuggets,42100163,2022-04-21,DEN vs. GSW,L,240,113,40,80,0.5,11,26,0.423,22,27,0.815,11,33,44,25,9,3,17,26,-5.0
85,42021,1610612743,DEN,Denver Nuggets,42100164,2022-04-24,DEN vs. GSW,W,241,126,41,73,0.562,15,31,0.484,29,36,0.806,5,28,33,28,9,2,20,31,5.0
86,42021,1610612743,DEN,Denver Nuggets,42100165,2022-04-27,DEN @ GSW,L,240,98,36,81,0.444,6,29,0.207,20,29,0.69,14,36,50,26,5,5,14,21,-4.0


#### 2022-23 regular season and playoffs

In [7]:
nuggets_2022 = nuggets_games[(nuggets_games['SEASON_ID'] == '22022') | \
                             (nuggets_games['SEASON_ID'] == '42022')].copy()
len(nuggets_2022)

107

In [8]:
nuggets_2022.sort_values(by = "GAME_DATE", ascending = True).head(10)

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
176,22022,1610612743,DEN,Denver Nuggets,1522200008,2022-07-08,DEN @ MIN,L,190,77,30,67,0.448,6,28,0.214,11,16,0.688,11,25,36,18,6,6,17,18,-9.6
175,22022,1610612743,DEN,Denver Nuggets,1522200022,2022-07-10,DEN @ CLE,W,199,84,35,78,0.449,9,32,0.281,5,12,0.417,17,31,48,20,8,7,15,22,6.8
174,22022,1610612743,DEN,Denver Nuggets,1522200045,2022-07-13,DEN @ LAC,W,198,80,28,73,0.384,11,34,0.324,13,23,0.565,11,39,50,22,11,10,15,15,7.0
173,22022,1610612743,DEN,Denver Nuggets,1522200055,2022-07-15,DEN vs. PHI,L,200,71,26,73,0.356,9,33,0.273,10,18,0.556,8,21,29,20,14,4,17,16,-26.0
172,22022,1610612743,DEN,Denver Nuggets,1522200075,2022-07-17,DEN @ UTA,L,202,72,26,69,0.377,6,19,0.316,14,20,0.7,14,30,44,11,8,5,18,18,-9.4
166,22022,1610612743,DEN,Denver Nuggets,22200012,2022-10-19,DEN @ UTA,L,238,102,40,83,0.482,5,22,0.227,17,18,0.944,10,25,35,21,10,3,21,23,-21.0
165,22022,1610612743,DEN,Denver Nuggets,22200026,2022-10-21,DEN @ GSW,W,241,128,46,86,0.535,15,34,0.441,21,25,0.84,12,29,41,30,10,2,20,22,5.0
164,22022,1610612743,DEN,Denver Nuggets,22200035,2022-10-22,DEN vs. OKC,W,239,122,42,85,0.494,20,38,0.526,18,31,0.581,9,37,46,33,5,6,18,21,5.0
163,22022,1610612743,DEN,Denver Nuggets,22200051,2022-10-24,DEN @ POR,L,242,110,45,91,0.495,12,31,0.387,8,14,0.571,10,28,38,28,4,1,8,28,-25.0
162,22022,1610612743,DEN,Denver Nuggets,22200064,2022-10-26,DEN vs. LAL,W,242,110,42,92,0.457,9,34,0.265,17,21,0.81,9,45,54,34,12,5,11,17,11.0


In [40]:
# We drop summer league games
index_condition = nuggets_2022[nuggets_2022["GAME_DATE"].str.contains(\
    "2022-07")].index
nuggets_2022.drop(index_condition, inplace = True)

# We sort the games from first to last in the season
nuggets_2022.sort_values(by = "GAME_DATE", ascending = True, inplace = True)
nuggets_2022.reset_index(drop = True, inplace = True)

In [41]:
nuggets_2022.tail()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
97,42022,1610612743,DEN,Denver Nuggets,42200401,2023-06-01,DEN vs. MIA,W,239,104,40,79,0.506,8,27,0.296,16,20,0.8,6,39,45,29,4,4,10,8,11.0
98,42022,1610612743,DEN,Denver Nuggets,42200402,2023-06-04,DEN vs. MIA,L,239,108,39,75,0.52,11,28,0.393,19,22,0.864,9,29,38,23,7,2,13,21,-3.0
99,42022,1610612743,DEN,Denver Nuggets,42200403,2023-06-07,DEN @ MIA,W,238,109,41,80,0.513,5,18,0.278,22,27,0.815,13,45,58,28,3,5,13,18,15.0
100,42022,1610612743,DEN,Denver Nuggets,42200404,2023-06-09,DEN @ MIA,W,242,108,39,79,0.494,14,28,0.5,16,21,0.762,5,29,34,26,11,7,6,18,13.0
101,42022,1610612743,DEN,Denver Nuggets,42200405,2023-06-12,DEN vs. MIA,W,240,94,38,84,0.452,5,28,0.179,13,23,0.565,11,46,57,21,6,7,14,13,5.0


When playing around, at some point we were working only with regular season games and saw 83 rows (regular season had 82 games), where the last row had a bunch of NaN. The following code got rid of that row

```python
nuggets_2022.drop(index = nuggets_2022.index[-1], axis = 0, inplace = True)
nuggets_2022.tail()
```

#### <span style="color:green">2023-24 regular season and playoffs</span>

In [20]:
nuggets_2022_23 = leaguegamefinder.LeagueGameFinder(
            team_id_nullable=team_id,
            season_nullable='2022-23',
            season_type_nullable="Regular Season",
        ).get_data_frames()[0]
nuggets_2022_23.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22022,1610612743,DEN,Denver Nuggets,22201227,2023-04-09,DEN vs. SAC,W,240,109,42,84,0.5,7,28,0.25,18,25,0.72,15,36,51,25,11,2,16,15,14.0
1,22022,1610612743,DEN,Denver Nuggets,22201213,2023-04-08,DEN @ UTA,L,240,114,41,90,0.456,15,46,0.326,17,24,0.708,5,36,41,32,7,9,14,20,-4.0
2,22022,1610612743,DEN,Denver Nuggets,22201201,2023-04-06,DEN @ PHX,L,239,115,44,83,0.53,8,23,0.348,19,23,0.826,9,29,38,24,7,2,12,19,-4.0
3,22022,1610612743,DEN,Denver Nuggets,22201183,2023-04-04,DEN @ HOU,L,240,103,41,87,0.471,11,36,0.306,10,18,0.556,13,34,47,26,9,13,20,18,-21.0
4,22022,1610612743,DEN,Denver Nuggets,22201175,2023-04-02,DEN vs. GSW,W,239,112,44,100,0.44,9,33,0.273,15,18,0.833,17,30,47,24,11,7,11,17,2.0


In [21]:
nuggets_2022_23.tail()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
77,22022,1610612743,DEN,Denver Nuggets,22200064,2022-10-26,DEN vs. LAL,W,242,110,42,92,0.457,9,34,0.265,17,21,0.81,9,45,54,34,12,5,11,17,11.0
78,22022,1610612743,DEN,Denver Nuggets,22200051,2022-10-24,DEN @ POR,L,242,110,45,91,0.495,12,31,0.387,8,14,0.571,10,28,38,28,4,1,8,28,-25.0
79,22022,1610612743,DEN,Denver Nuggets,22200035,2022-10-22,DEN vs. OKC,W,239,122,42,85,0.494,20,38,0.526,18,31,0.581,9,37,46,33,5,6,18,21,5.0
80,22022,1610612743,DEN,Denver Nuggets,22200026,2022-10-21,DEN @ GSW,W,241,128,46,86,0.535,15,34,0.441,21,25,0.84,12,29,41,30,10,2,20,22,5.0
81,22022,1610612743,DEN,Denver Nuggets,22200012,2022-10-19,DEN @ UTA,L,238,102,40,83,0.482,5,22,0.227,17,18,0.944,10,25,35,21,10,3,21,23,-21.0


In [22]:
nuggets_2023_24 = leaguegamefinder.LeagueGameFinder(
            team_id_nullable=team_id,
            season_nullable='2023-24',
            season_type_nullable="Regular Season",
        ).get_data_frames()[0]
nuggets_2023_24.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22023,1610612743,DEN,Denver Nuggets,22300786,2024-02-14,DEN vs. SAC,L,239,98,35,89,0.393,9,26,0.346,19,22,0.864,12,33,45,27,6,5,12,18,-4.0
1,22023,1610612743,DEN,Denver Nuggets,22300767,2024-02-12,DEN @ MIL,L,240,95,36,94,0.383,11,40,0.275,12,17,0.706,12,36,48,24,7,2,13,14,-17.0
2,22023,1610612743,DEN,Denver Nuggets,22300746,2024-02-09,DEN @ SAC,L,240,106,38,82,0.463,10,29,0.345,20,28,0.714,9,26,35,26,9,5,20,16,-29.0
3,22023,1610612743,DEN,Denver Nuggets,22300740,2024-02-08,DEN @ LAL,W,239,114,47,97,0.485,14,32,0.438,6,7,0.857,14,36,50,33,4,6,12,17,8.0
4,22023,1610612743,DEN,Denver Nuggets,22300712,2024-02-04,DEN vs. POR,W,240,112,42,86,0.488,12,28,0.429,16,19,0.842,14,28,42,26,8,6,8,11,9.0


In [24]:
nuggets_2023_24.tail()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
50,22023,1610612743,DEN,Denver Nuggets,22300123,2023-11-01,DEN @ MIN,L,240,89,38,96,0.396,6,33,0.182,7,10,0.7,12,31,43,23,6,10,15,22,-21.0
51,22023,1610612743,DEN,Denver Nuggets,22300110,2023-10-30,DEN vs. UTA,W,240,110,47,84,0.56,9,28,0.321,7,17,0.412,7,36,43,32,4,8,12,14,8.0
52,22023,1610612743,DEN,Denver Nuggets,22300095,2023-10-29,DEN @ OKC,W,241,128,53,88,0.602,11,27,0.407,11,13,0.846,10,38,48,34,8,5,13,21,33.0
53,22023,1610612743,DEN,Denver Nuggets,22300078,2023-10-27,DEN @ MEM,W,240,108,41,88,0.466,14,37,0.378,12,18,0.667,6,34,40,27,11,10,17,18,4.0
54,22023,1610612743,DEN,Denver Nuggets,22300061,2023-10-24,DEN vs. LAL,W,240,119,48,91,0.527,14,34,0.412,9,12,0.75,9,33,42,29,9,6,11,15,12.0


In [15]:
nuggets_2023_24[nuggets_2023_24['GAME_DATE'].str.contains('2023-11-2')]

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
36,22023,1610612743,DEN,Denver Nuggets,22300263,2023-11-29,DEN vs. HOU,W,240,134,48,95,0.505,19,43,0.442,19,26,0.731,13,28,41,37,4,2,3,18,10.0
37,22023,1610612743,DEN,Denver Nuggets,22300257,2023-11-27,DEN @ LAC,W,242,113,44,92,0.478,10,22,0.455,15,28,0.536,18,34,52,28,8,6,11,22,9.0
38,22023,1610612743,DEN,Denver Nuggets,22300252,2023-11-26,DEN vs. SAS,W,241,132,50,89,0.562,12,28,0.429,20,25,0.8,11,34,45,38,8,3,19,25,12.0
39,22023,1610612743,DEN,Denver Nuggets,22300048,2023-11-24,DEN @ HOU,L,240,86,32,94,0.34,7,30,0.233,15,21,0.714,16,30,46,17,8,4,15,14,-19.0
40,22023,1610612743,DEN,Denver Nuggets,22300226,2023-11-22,DEN @ ORL,L,240,119,44,93,0.473,15,35,0.429,16,21,0.762,17,25,42,29,6,2,10,25,-5.0
41,22023,1610612743,DEN,Denver Nuggets,22300218,2023-11-20,DEN @ DET,W,239,107,40,87,0.46,11,28,0.393,16,21,0.762,11,32,43,30,7,8,14,17,4.0


##### In-season tournament

According to [In-Season Tournament 101: Rules, format and how it works](https://www.nba.com/news/in-season-tournament-101), all in-season tournament games but the championship game count toward the regular-season standings.

Knowing the the Lakers won the 2023 in-season tournament by beating the Pacers on Dec 9, we'll check whether this game is part of the Lakers regular season games when calling the endpoint **LeagueGameFinder**.

The results below show the endpoint correctly pulls the regular season games since the stats from the in-season tournament championship game aren't pulled.

In [8]:
lakers_2023_24 = leaguegamefinder.LeagueGameFinder(
            team_id_nullable=1610612747,
            season_nullable='2023-24',
            season_type_nullable="Regular Season",
        ).get_data_frames()[0]
lakers_2023_24.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22023,1610612747,LAL,Los Angeles Lakers,22300788,2024-02-14,LAL @ UTA,W,240,138,52,91,0.571,14,31,0.452,20,24,0.833,5,32,37,34,8,4,9,13,16.0
1,22023,1610612747,LAL,Los Angeles Lakers,22300776,2024-02-13,LAL vs. DET,W,238,125,43,84,0.512,14,35,0.4,25,30,0.833,7,40,47,32,6,11,15,14,14.0
2,22023,1610612747,LAL,Los Angeles Lakers,22300747,2024-02-09,LAL vs. NOP,W,241,139,49,88,0.557,14,31,0.452,27,32,0.844,5,27,32,32,5,5,7,20,17.0
3,22023,1610612747,LAL,Los Angeles Lakers,22300740,2024-02-08,LAL vs. DEN,L,239,106,41,89,0.461,8,25,0.32,16,22,0.727,10,29,39,27,12,10,7,10,-8.0
4,22023,1610612747,LAL,Los Angeles Lakers,22300713,2024-02-05,LAL @ CHA,W,240,124,50,89,0.562,8,34,0.235,16,19,0.842,7,41,48,36,5,6,16,12,6.0


In [9]:
lakers_2023_24[lakers_2023_24['GAME_DATE'].str.contains('2023-12-0')]

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
33,22023,1610612747,LAL,Los Angeles Lakers,22301230,2023-12-07,LAL vs. NOP,W,238,133,47,86,0.547,17,35,0.486,22,29,0.759,11,48,59,31,6,4,14,16,44.0
34,22023,1610612747,LAL,Los Angeles Lakers,22301203,2023-12-05,LAL vs. PHX,W,240,106,38,102,0.373,9,30,0.3,21,25,0.84,21,24,45,24,11,2,9,19,3.0
35,22023,1610612747,LAL,Los Angeles Lakers,22300291,2023-12-02,LAL vs. HOU,W,239,107,40,89,0.449,8,27,0.296,19,27,0.704,9,39,48,24,11,7,9,17,10.0


In [10]:
lakers_2023_24[lakers_2023_24['GAME_DATE'].str.contains('2023-12-1')]

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
29,22023,1610612747,LAL,Los Angeles Lakers,22300359,2023-12-18,LAL vs. NYK,L,240,109,42,99,0.424,13,41,0.317,12,13,0.923,9,32,41,29,9,3,10,17,-5.0
30,22023,1610612747,LAL,Los Angeles Lakers,22300331,2023-12-15,LAL @ SAS,L,239,115,43,89,0.483,11,36,0.306,18,28,0.643,9,34,43,32,5,5,6,15,-14.0
31,22023,1610612747,LAL,Los Angeles Lakers,22300316,2023-12-13,LAL @ SAS,W,239,122,49,91,0.538,14,35,0.4,10,17,0.588,5,41,46,31,14,4,16,16,3.0
32,22023,1610612747,LAL,Los Angeles Lakers,22300306,2023-12-12,LAL @ DAL,L,241,125,49,91,0.538,15,29,0.517,12,18,0.667,10,30,40,34,4,4,15,20,-2.0


Let's run some tests with dates.

Note the **date format**. It's a string `m/d/yyyy`, where month and day allow single digits for Jan-Sep and 1st-9th, respectively.

In [59]:
lakers_2023_24 = leaguegamefinder.LeagueGameFinder(
            team_id_nullable=1610612747,
            date_from_nullable='02/01/2024',
            season_type_nullable='Regular Season'
        ).get_data_frames()[0]
len(lakers_2023_24)

7

In [60]:
lakers_2023_24

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22023,1610612747,LAL,Los Angeles Lakers,22300788,2024-02-14,LAL @ UTA,W,240,138,52,91,0.571,14,31,0.452,20,24,0.833,5,32,37,34,8,4,9,13,16.0
1,22023,1610612747,LAL,Los Angeles Lakers,22300776,2024-02-13,LAL vs. DET,W,238,125,43,84,0.512,14,35,0.4,25,30,0.833,7,40,47,32,6,11,15,14,14.0
2,22023,1610612747,LAL,Los Angeles Lakers,22300747,2024-02-09,LAL vs. NOP,W,241,139,49,88,0.557,14,31,0.452,27,32,0.844,5,27,32,32,5,5,7,20,17.0
3,22023,1610612747,LAL,Los Angeles Lakers,22300740,2024-02-08,LAL vs. DEN,L,239,106,41,89,0.461,8,25,0.32,16,22,0.727,10,29,39,27,12,10,7,10,-8.0
4,22023,1610612747,LAL,Los Angeles Lakers,22300713,2024-02-05,LAL @ CHA,W,240,124,50,89,0.562,8,34,0.235,16,19,0.842,7,41,48,36,5,6,16,12,6.0
5,22023,1610612747,LAL,Los Angeles Lakers,22300701,2024-02-03,LAL @ NYK,W,240,113,39,80,0.488,12,31,0.387,23,27,0.852,3,35,38,28,4,11,5,16,8.0
6,22023,1610612747,LAL,Los Angeles Lakers,22300684,2024-02-01,LAL @ BOS,W,241,114,38,94,0.404,19,36,0.528,19,26,0.731,15,37,52,30,11,2,7,8,9.0


In [13]:
lakers_2023_24 = leaguegamefinder.LeagueGameFinder(
            team_id_nullable=1610612747,
            date_from_nullable='01/31/2024',
            season_type_nullable='Playoffs'
        ).get_data_frames()[0]
len(lakers_2023_24)

0

### <span style="color:green">2.3. Testing fetching recent data</span>

We are gonna fetch regular season games from the 2023-24 season up to Jan 31st 2024.

<span style="color:red">**Note:** the methods `fetch_data` and `feature_group_connection_r2()` are no longer working here since they were updated so that they only work with GitHub actions.</span>

In [33]:
import os
import sys
directory_path = os.path.abspath(os.path.join('/mnt/c/Users/USER/DS_Projects/nba_analysis/src'))
sys.path.append(directory_path)
import fetch_data

In [66]:
from importlib import reload
fetch_data = reload(fetch_data)
testing = fetch_data.fetch_recent_games()

Connection closed.
Connected. Call `.close()` to terminate connection gracefully.



Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/97430
Connected. Call `.close()` to terminate connection gracefully.


In [67]:
testing

Unnamed: 0,jokic_pts,jokic_reb,jokic_ast,jokic_starter,murray_pts,murray_reb,murray_ast,murray_starter,rest_pts,rest_reb,rest_ast,game_id,game_date,season_id,playoffs,win
0,0,0,0,0,16,3,4,1,84,44,17,22300682,2024-01-31,22023,0,0
1,25,16,12,1,35,2,5,1,53,29,12,22300667,2024-01-29,22023,0,1
2,26,16,7,1,23,3,7,1,62,25,11,22300644,2024-01-27,22023,0,1
3,31,11,3,1,9,2,7,1,44,27,10,22300631,2024-01-25,22023,0,0
4,31,13,10,1,31,8,7,1,52,21,5,22300614,2024-01-23,22023,0,1
5,42,12,8,1,19,5,7,1,52,33,14,22300602,2024-01-21,22023,0,1
6,34,12,9,1,35,8,5,1,33,27,6,22300586,2024-01-19,22023,0,1
7,25,19,3,1,17,3,10,1,79,24,10,22300566,2024-01-16,22023,0,0
8,25,12,9,1,25,1,8,1,67,27,14,22300550,2024-01-14,22023,0,1
9,27,10,14,1,20,2,9,1,78,36,7,22300539,2024-01-12,22023,0,1


<span style="color:green">Push recent data to feature store (2023-24 regular season games up to Jan 31st 2024)</span>

In [68]:
import feature_store

In [69]:
feature_group = feature_store.feature_group_connection_r2()
feature_group.insert(
    testing,
    write_options = {'start_offline_backfill': False}
)

Connection closed.
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/97430
Connected. Call `.close()` to terminate connection gracefully.


Uploading Dataframe: 0.00% |          | Rows 0/49 | Elapsed Time: 00:00 | Remaining Time: ?

(<hsfs.core.job.Job at 0x7f97ac1d3ac0>, None)

### Putting all games together

We append all DataFrames together. The code below is run once. We save the data in a DataFrame we load when working again in the problem.

```python
games_list = [nuggets_2016, nuggets_2017, nuggets_2018, nuggets_2019, nuggets_2020, nuggets_2021, nuggets_2022]

# Concatenate the games DataFrames in a single DataFrame
nuggets_2016_17_2022_23 = pd.concat(games_list, axis = 0, ignore_index = True)
del games_list

# We represent regular season games with 1 and playoff games with 2
nuggets_2016_17_2022_23['PLAYOFFS'] = np.where(nuggets_2016_17_2022_23['SEASON_ID'].str[0] == '2', 0, 1)

# Save the DataFrame in a CSV file to avoid downloading the data again
nuggets_2016_17_2022_23.to_csv('nuggets_2016_17_2022_23.csv', index = False)
```

### <span style="color: red">Loading Nuggets' 2016-2023 games</span>

In [43]:
# We load the Nuggets' games from 2016_17 to 2022_23 seasons. We tell pandas the column 'GAME_ID' is of type
# string to avoid dropping the leading 0s
nuggets_2016_17_2022_23 = pd.read_csv('nuggets_2016_17_2022_23.csv', dtype = {'GAME_ID': 'string'})
len(nuggets_2016_17_2022_23)

623

In [44]:
nuggets_2016_17_2022_23.tail()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,PLAYOFFS
618,42022,1610612743,DEN,Denver Nuggets,42200401,2023-06-01,DEN vs. MIA,W,239,104,40,79,0.506,8,27,0.296,16,20,0.8,6,39,45,29,4,4,10,8,11.0,1
619,42022,1610612743,DEN,Denver Nuggets,42200402,2023-06-04,DEN vs. MIA,L,239,108,39,75,0.52,11,28,0.393,19,22,0.864,9,29,38,23,7,2,13,21,-3.0,1
620,42022,1610612743,DEN,Denver Nuggets,42200403,2023-06-07,DEN @ MIA,W,238,109,41,80,0.513,5,18,0.278,22,27,0.815,13,45,58,28,3,5,13,18,15.0,1
621,42022,1610612743,DEN,Denver Nuggets,42200404,2023-06-09,DEN @ MIA,W,242,108,39,79,0.494,14,28,0.5,16,21,0.762,5,29,34,26,11,7,6,18,13.0,1
622,42022,1610612743,DEN,Denver Nuggets,42200405,2023-06-12,DEN vs. MIA,W,240,94,38,84,0.452,5,28,0.179,13,23,0.565,11,46,57,21,6,7,14,13,5.0,1


### 2.3. Append Jokic and Murray stats to the 2016-2023 games

#### <span style="color: red">Loading Jokic and Murray ids</span>

In [45]:
# We load Jokic and Murray ids from the TXT file
file_path = 'jokic_murray_ids.txt'
jokic_murray = []
with open(file_path, 'r') as file:
    for line in file:
        id = int(line.strip())  # Convert the line to a number (use either int or float accordingly)
        jokic_murray.append(id)
jokic_murray

[203999, 1627750]

#### Pull players stats

In [46]:
nuggets_2016_17_2022_23['JOKIC_PTS'] = 0
nuggets_2016_17_2022_23['JOKIC_REB'] = 0
nuggets_2016_17_2022_23['JOKIC_AST'] = 0
nuggets_2016_17_2022_23['MURRAY_PTS'] = 0
nuggets_2016_17_2022_23['MURRAY_REB'] = 0
nuggets_2016_17_2022_23['MURRAY_AST'] = 0
nuggets_2016_17_2022_23['JOKIC_MURRAY_STARTERS'] = 0
nuggets_2016_17_2022_23.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,PLAYOFFS,JOKIC_PTS,JOKIC_REB,JOKIC_AST,MURRAY_PTS,MURRAY_REB,MURRAY_AST,JOKIC_MURRAY_STARTERS
0,22016,1610612743,DEN,Denver Nuggets,21600010,2016-10-26,DEN @ NOP,W,240,107,37,83,0.446,8,24,0.333,25,33,0.758,11,47,58,18,7,4,24,22,5.0,0,0,0,0,0,0,0,0
1,22016,1610612743,DEN,Denver Nuggets,21600033,2016-10-29,DEN vs. POR,L,265,113,38,100,0.38,6,23,0.261,31,39,0.795,16,45,61,19,10,5,17,21,-2.0,0,0,0,0,0,0,0,0
2,22016,1610612743,DEN,Denver Nuggets,21600042,2016-10-31,DEN @ TOR,L,240,102,33,81,0.407,9,22,0.409,27,35,0.771,11,35,46,14,2,9,8,18,-3.0,0,0,0,0,0,0,0,0
3,22016,1610612743,DEN,Denver Nuggets,21600067,2016-11-03,DEN @ MIN,W,240,102,39,81,0.481,6,22,0.273,18,25,0.72,7,37,44,24,10,2,20,24,3.0,0,0,0,0,0,0,0,0
4,22016,1610612743,DEN,Denver Nuggets,21600082,2016-11-05,DEN @ DET,L,241,86,31,94,0.33,7,25,0.28,17,20,0.85,16,32,48,15,8,2,16,15,-17.0,0,0,0,0,0,0,0,0


```python
# We explore how to pull the players stats
box_score = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id = "0021600010")
players_stats = box_score.player_stats.get_data_frame()
players_stats
```

We pull the players stats from all games. The code below is run once. We save the data in a DataFrame we load when working again in the problem.

```python
players_stats_game = []

for index, game in nuggets_2016_17_2022_23.iterrows():
    
    game_id = game['GAME_ID']
    
    # Get the box score for the game
    box_score = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id = game_id)
    players_stats = box_score.player_stats.get_data_frame()
    players_stats_game.append(players_stats)

# Concatenate the players stats from all games in a single DataFrame
nuggets_2016_17_2022_23_players_stats = pd.concat(players_stats_game, axis = 0, ignore_index = True)

# Save the DataFrame in a CSV file to avoid downloading the data again
nuggets_2016_17_2022_23_players_stats.to_csv("nuggets_2016_17_2022_23_players_stats.csv", index = False)
```

#### <span style="color: red">Loading Nuggets' players stats</span>

In [48]:
# We load the Nuggets' players stats from the 2022-23 regular season. We tell pandas the column 'GAME_ID' is of type
# string to avoid dropping the leading 0s
nuggets_2016_17_2022_23_players_stats = pd.read_csv('nuggets_2016_17_2022_23_players_stats.csv', \
    dtype = {'GAME_ID': 'string'})
nuggets_2016_17_2022_23_players_stats.head(10)

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,NICKNAME,START_POSITION,COMMENT,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS
0,21600010,1610612743,DEN,Denver,201568,Danilo Gallinari,Danilo,F,,37.000000:43,5.0,11.0,0.455,3.0,6.0,0.5,2.0,4.0,0.5,0.0,3.0,3.0,2.0,3.0,1.0,1.0,2.0,15.0,3.0
1,21600010,1610612743,DEN,Denver,203999,Nikola Jokic,Nikola,F,,21.000000:08,2.0,5.0,0.4,1.0,2.0,0.5,0.0,0.0,0.0,1.0,6.0,7.0,2.0,0.0,0.0,4.0,4.0,5.0,5.0
2,21600010,1610612743,DEN,Denver,203994,Jusuf Nurkic,Jusuf,C,,26.000000:01,9.0,13.0,0.692,0.0,0.0,0.0,5.0,6.0,0.833,2.0,7.0,9.0,3.0,0.0,1.0,1.0,3.0,23.0,2.0
3,21600010,1610612743,DEN,Denver,203115,Will Barton,Will,G,,34.000000:46,7.0,12.0,0.583,2.0,3.0,0.667,6.0,6.0,1.0,2.0,3.0,5.0,2.0,0.0,0.0,2.0,2.0,22.0,-3.0
4,21600010,1610612743,DEN,Denver,1626144,Emmanuel Mudiay,Emmanuel,G,,28.000000:06,2.0,8.0,0.25,0.0,1.0,0.0,7.0,8.0,0.875,0.0,7.0,7.0,3.0,1.0,0.0,6.0,5.0,11.0,-1.0
5,21600010,1610612743,DEN,Denver,201163,Wilson Chandler,Wilson,,,30.000000:13,5.0,15.0,0.333,1.0,6.0,0.167,1.0,1.0,1.0,0.0,7.0,7.0,1.0,0.0,0.0,2.0,3.0,12.0,5.0
6,21600010,1610612743,DEN,Denver,202702,Kenneth Faried,Kenneth,,,28.000000:16,3.0,9.0,0.333,0.0,1.0,0.0,3.0,6.0,0.5,6.0,8.0,14.0,1.0,2.0,2.0,3.0,1.0,9.0,2.0
7,21600010,1610612743,DEN,Denver,1627750,Jamal Murray,Jamal,,,13.000000:15,0.0,2.0,0.0,0.0,1.0,0.0,1.0,2.0,0.5,0.0,4.0,4.0,1.0,0.0,0.0,1.0,0.0,1.0,8.0
8,21600010,1610612743,DEN,Denver,2749,Jameer Nelson,Jameer,,,19.000000:54,4.0,8.0,0.5,1.0,4.0,0.25,0.0,0.0,0.0,0.0,2.0,2.0,3.0,1.0,0.0,4.0,2.0,9.0,6.0
9,21600010,1610612743,DEN,Denver,1627823,Juancho Hernangomez,Juancho,,,0.000000:39,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2.0


Note the following:

- That a player id is within the players ids for a given game doesn't mean the player step his feet on the court.
- To check whether a player actually played in a game, we need to check the column $MIN$. This stores how much time a player played a game. The format is a string of the form `##.######:##`, where the portion before the `:` captures the minutes and the portion after it capture the seconds (we can enginneer a feature to store this info in a simpler format).
- To check whether a player was a starter, we need to check the column $START\_POSITION$. A NaN value indicates the player wasn't a starter.

For the analysis, we'll consider only games where both Jokic and Murray were starters.

Let's first start by engineering the Revised $MIN$ column, which we call $MIN\_R$, so we have it available in case we decide to use it later.

In [49]:
nuggets_2016_17_2022_23_players_stats[['MINUTES', 'SECONDS']] = \
    nuggets_2016_17_2022_23_players_stats['MIN'].str.split(':', expand = True)
nuggets_2016_17_2022_23_players_stats['MINUTES'] = nuggets_2016_17_2022_23_players_stats['MINUTES'].astype(float)
nuggets_2016_17_2022_23_players_stats['SECONDS'] = nuggets_2016_17_2022_23_players_stats['SECONDS'].astype(float)
nuggets_2016_17_2022_23_players_stats['MIN_R'] = nuggets_2016_17_2022_23_players_stats['MINUTES'] + \
                                                 nuggets_2016_17_2022_23_players_stats['SECONDS']/60
nuggets_2016_17_2022_23_players_stats.drop(['MINUTES', 'SECONDS'], axis = 1, inplace = True)
nuggets_2016_17_2022_23_players_stats.head()

Unnamed: 0,GAME_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_CITY,PLAYER_ID,PLAYER_NAME,NICKNAME,START_POSITION,COMMENT,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TO,PF,PTS,PLUS_MINUS,MIN_R
0,21600010,1610612743,DEN,Denver,201568,Danilo Gallinari,Danilo,F,,37.000000:43,5.0,11.0,0.455,3.0,6.0,0.5,2.0,4.0,0.5,0.0,3.0,3.0,2.0,3.0,1.0,1.0,2.0,15.0,3.0,37.716667
1,21600010,1610612743,DEN,Denver,203999,Nikola Jokic,Nikola,F,,21.000000:08,2.0,5.0,0.4,1.0,2.0,0.5,0.0,0.0,0.0,1.0,6.0,7.0,2.0,0.0,0.0,4.0,4.0,5.0,5.0,21.133333
2,21600010,1610612743,DEN,Denver,203994,Jusuf Nurkic,Jusuf,C,,26.000000:01,9.0,13.0,0.692,0.0,0.0,0.0,5.0,6.0,0.833,2.0,7.0,9.0,3.0,0.0,1.0,1.0,3.0,23.0,2.0,26.016667
3,21600010,1610612743,DEN,Denver,203115,Will Barton,Will,G,,34.000000:46,7.0,12.0,0.583,2.0,3.0,0.667,6.0,6.0,1.0,2.0,3.0,5.0,2.0,0.0,0.0,2.0,2.0,22.0,-3.0,34.766667
4,21600010,1610612743,DEN,Denver,1626144,Emmanuel Mudiay,Emmanuel,G,,28.000000:06,2.0,8.0,0.25,0.0,1.0,0.0,7.0,8.0,0.875,0.0,7.0,7.0,3.0,1.0,0.0,6.0,5.0,11.0,-1.0,28.1


#### Pulling and appending the relevant stats

Now let's focus on appending the relevant stats from Jokic and Murray.

In [50]:
def jokic_murray_starters(game: pd.DataFrame, players: list):
    """
    This function checks whether Jokic and Murray were starters in a game.

    Args:
        game: DataFrame that contains the players stats from the game.
        players: list that contains Jokic and Murray player ids.
        
    Returns:
        starters: int that indicates whether both players were starters.
    """

    # List that contains the ids of the players that were active in the game.
    ids = game['PLAYER_ID'].to_list()

    starters = 0
    # We check whether both players were (active) in the game
    if players[0] in ids and players[1] in ids:
            
        # We capture the starting position of both players in a Series
        starter_bool = game.loc[game['PLAYER_ID'].isin(players), 'START_POSITION']
        # If they were starters, the Series won't have NaNs. Thus, dropping NaNs won't change the Series shape
        starters = 1 if starter_bool.dropna().shape[0] == 2 else 0
    
    return starters

In [51]:
# For each game, we pull Jokic and Murray's main stats (points, rebounds and assists) and whether they were starters
# in the game. We append this info to the games DataFrame

# We save the stats column names in a list
columns = ['JOKIC_PTS', 'JOKIC_REB', 'JOKIC_AST', 'MURRAY_PTS', 'MURRAY_REB', 'MURRAY_AST', 'JOKIC_MURRAY_STARTERS']

# We loop through each game
for index, game in nuggets_2016_17_2022_23.iterrows():

    # We create a list to collect the stats: Jokic's PTS, REB and AST, Murray's PTS, REB and AST, and whether
    # both were starters
    stats = [0, 0, 0, 0, 0, 0, 0]
    # We capture 'GAME_ID'
    game_id = game["GAME_ID"]
    # We pull the players stats from the game with id 'game_id'
    players_stats_game = nuggets_2016_17_2022_23_players_stats[nuggets_2016_17_2022_23_players_stats["GAME_ID"] == game_id].copy()
    
    if jokic_murray[0] in players_stats_game['PLAYER_ID'].to_list():
        # We put Jokic's stats in the first three positions of the list: 0-2
        jokic = players_stats_game['PLAYER_ID'] == jokic_murray[0] 
        stats[:3] = players_stats_game.loc[jokic, 'PTS'].values[0], \
                    players_stats_game.loc[jokic, 'REB'].values[0], \
                    players_stats_game.loc[jokic, 'AST'].values[0]
    
    if jokic_murray[1] in players_stats_game['PLAYER_ID'].to_list():
        # We put Murray's stats in the following three positions of the list: 3-5
        murray = players_stats_game['PLAYER_ID'] == jokic_murray[1] 
        stats[3:6] = players_stats_game.loc[murray, 'PTS'].values[0], \
                     players_stats_game.loc[murray, 'REB'].values[0], \
                     players_stats_game.loc[murray, 'AST'].values[0]
    
    # We capture whether both players were starters in the game
    stats[6] = jokic_murray_starters(players_stats_game, jokic_murray) 
    
    # We update the game's row with the collected stats
    nuggets_2016_17_2022_23.loc[index, columns] = stats[0], stats[1], stats[2], stats[3], stats[4], stats[5], stats[6]

nuggets_2016_17_2022_23.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,PLAYOFFS,JOKIC_PTS,JOKIC_REB,JOKIC_AST,MURRAY_PTS,MURRAY_REB,MURRAY_AST,JOKIC_MURRAY_STARTERS
0,22016,1610612743,DEN,Denver Nuggets,21600010,2016-10-26,DEN @ NOP,W,240,107,37,83,0.446,8,24,0.333,25,33,0.758,11,47,58,18,7,4,24,22,5.0,0,5.0,7.0,2.0,1.0,4.0,1.0,0
1,22016,1610612743,DEN,Denver Nuggets,21600033,2016-10-29,DEN vs. POR,L,265,113,38,100,0.38,6,23,0.261,31,39,0.795,16,45,61,19,10,5,17,21,-2.0,0,23.0,17.0,2.0,0.0,1.0,2.0,0
2,22016,1610612743,DEN,Denver Nuggets,21600042,2016-10-31,DEN @ TOR,L,240,102,33,81,0.407,9,22,0.409,27,35,0.771,11,35,46,14,2,9,8,18,-3.0,0,12.0,4.0,1.0,1.0,2.0,2.0,0
3,22016,1610612743,DEN,Denver Nuggets,21600067,2016-11-03,DEN @ MIN,W,240,102,39,81,0.481,6,22,0.273,18,25,0.72,7,37,44,24,10,2,20,24,3.0,0,8.0,3.0,5.0,0.0,3.0,4.0,1
4,22016,1610612743,DEN,Denver Nuggets,21600082,2016-11-05,DEN @ DET,L,241,86,31,94,0.33,7,25,0.28,17,20,0.85,16,32,48,15,8,2,16,15,-17.0,0,6.0,6.0,0.0,9.0,1.0,2.0,1


Let's compute the points, rebounds and assists from the remaining players.

In [52]:
nuggets_2016_17_2022_23['REST_PTS'] = nuggets_2016_17_2022_23['PTS'] - \
                                      (nuggets_2016_17_2022_23['JOKIC_PTS'] + nuggets_2016_17_2022_23['MURRAY_PTS'])
nuggets_2016_17_2022_23['REST_REB'] = nuggets_2016_17_2022_23['REB'] - \
                                      (nuggets_2016_17_2022_23['JOKIC_REB'] + nuggets_2016_17_2022_23['MURRAY_REB'])
nuggets_2016_17_2022_23['REST_AST'] = nuggets_2016_17_2022_23['AST'] - \
                                      (nuggets_2016_17_2022_23['JOKIC_AST'] + nuggets_2016_17_2022_23['MURRAY_AST'])
nuggets_2016_17_2022_23.tail()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,PLAYOFFS,JOKIC_PTS,JOKIC_REB,JOKIC_AST,MURRAY_PTS,MURRAY_REB,MURRAY_AST,JOKIC_MURRAY_STARTERS,REST_PTS,REST_REB,REST_AST
618,42022,1610612743,DEN,Denver Nuggets,42200401,2023-06-01,DEN vs. MIA,W,239,104,40,79,0.506,8,27,0.296,16,20,0.8,6,39,45,29,4,4,10,8,11.0,1,27.0,10.0,14.0,26.0,6.0,10.0,1,51.0,29.0,5.0
619,42022,1610612743,DEN,Denver Nuggets,42200402,2023-06-04,DEN vs. MIA,L,239,108,39,75,0.52,11,28,0.393,19,22,0.864,9,29,38,23,7,2,13,21,-3.0,1,41.0,11.0,4.0,18.0,4.0,10.0,1,49.0,23.0,9.0
620,42022,1610612743,DEN,Denver Nuggets,42200403,2023-06-07,DEN @ MIA,W,238,109,41,80,0.513,5,18,0.278,22,27,0.815,13,45,58,28,3,5,13,18,15.0,1,32.0,21.0,10.0,34.0,10.0,10.0,1,43.0,27.0,8.0
621,42022,1610612743,DEN,Denver Nuggets,42200404,2023-06-09,DEN @ MIA,W,242,108,39,79,0.494,14,28,0.5,16,21,0.762,5,29,34,26,11,7,6,18,13.0,1,23.0,12.0,4.0,15.0,3.0,12.0,1,70.0,19.0,10.0
622,42022,1610612743,DEN,Denver Nuggets,42200405,2023-06-12,DEN vs. MIA,W,240,94,38,84,0.452,5,28,0.179,13,23,0.565,11,46,57,21,6,7,14,13,5.0,1,28.0,16.0,4.0,14.0,8.0,8.0,1,52.0,33.0,9.0


Let's pull those games we're interested in (Jokic and Murray were starters).

In [53]:
nuggets_2016_17_2022_23_j_m = nuggets_2016_17_2022_23[nuggets_2016_17_2022_23['JOKIC_MURRAY_STARTERS'] == 1].copy()
columns = ['JOKIC_PTS', 'JOKIC_REB', 'JOKIC_AST', 'MURRAY_PTS', 'MURRAY_REB', 'MURRAY_AST', 'JOKIC_MURRAY_STARTERS', \
           'REST_PTS', 'REST_REB', 'REST_AST']
for col in columns:
    nuggets_2016_17_2022_23_j_m[col] = nuggets_2016_17_2022_23_j_m[col].astype(int)

nuggets_2016_17_2022_23_j_m.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,PLAYOFFS,JOKIC_PTS,JOKIC_REB,JOKIC_AST,MURRAY_PTS,MURRAY_REB,MURRAY_AST,JOKIC_MURRAY_STARTERS,REST_PTS,REST_REB,REST_AST
3,22016,1610612743,DEN,Denver Nuggets,21600067,2016-11-03,DEN @ MIN,W,240,102,39,81,0.481,6,22,0.273,18,25,0.72,7,37,44,24,10,2,20,24,3.0,0,8,3,5,0,3,4,1,94,38,15
4,22016,1610612743,DEN,Denver Nuggets,21600082,2016-11-05,DEN @ DET,L,241,86,31,94,0.33,7,25,0.28,17,20,0.85,16,32,48,15,8,2,16,15,-17.0,0,6,6,0,9,1,2,1,71,41,13
60,22016,1610612743,DEN,Denver Nuggets,21600899,2017-03-01,DEN @ MIL,W,240,110,41,84,0.488,10,33,0.303,18,20,0.9,14,31,45,21,8,2,14,15,12.0,0,13,14,10,10,1,5,1,87,30,6
75,22016,1610612743,DEN,Denver Nuggets,21601149,2017-04-02,DEN @ MIA,W,241,116,44,87,0.506,5,20,0.25,23,27,0.852,6,34,40,25,9,6,8,14,3.0,0,19,10,7,9,3,0,1,88,27,18
76,22016,1610612743,DEN,Denver Nuggets,21601158,2017-04-04,DEN @ NOP,W,241,134,53,99,0.535,13,30,0.433,15,21,0.714,15,35,50,31,3,2,10,20,3.0,0,21,12,4,16,5,5,1,97,33,22


#### We save the DataFrame

In [54]:
nuggets_2016_17_2022_23_j_m.to_csv('nuggets_2016_17_2022_23_j_m.csv', index = False)

## 3. Generalization

### 3.1. Pulling games

After some exploration of the method `LeagueGameFinder`, pulling the 2016_17-2022_23 games can be simplified since the parameters from this method allow to pull games for a particular season and specify whether we want to pull regular season or playoff games. Thus, the whole process reduces to the following:

1. We loop through the seasons.
2. For each season, we extract regular season games and playoff games separately and then put them together in a list.
3. We concatenate everything in a DataFrame.

The function below generalizes the process for any team and season(s). The team ids can be found in the CSV file `nba_teams.csv`.

In [3]:
def pull_team_games(team_id: int, season_init: int, season_end: int):
    """
    This function returns all regular season and playoff games info from a given team and seasons.

    Args:
        team_id: int that contains the team id.
        season_init: int that contains the starting season from which the games info will be pulled.
        season_end: int that contains the ending season from which the games info will be pulled.

    Returns:
        DataFrame that contains the games info from the given team and seasons. 
    """
    
    # We create a list to store DataFrames, each containing a team's either regular season or playoff games info
    # from individual seasons
    seasons_list = []
    
    # We loop through each season
    for i in range(season_init, season_end + 1):
        # We create the season id needed by the nba_api. A season id has the form 'yyyy-yy'. For example, the
        # 2022-2023 season id is '2022-23'
        season = str(i) + '-' + str(i + 1)[-2:]
        
        # We pull regular season games by calling the nba_api
        games = leaguegamefinder.LeagueGameFinder(team_id_nullable = team_id, season_nullable = season, \
                                          season_type_nullable = 'Regular Season').get_data_frames()[0]
        # We add a column to identify regular season games
        games['PLAYOFFS'] = 0
        seasons_list.append(games)
        
        # We pull playoff games by calling the nba_api
        games = leaguegamefinder.LeagueGameFinder(team_id_nullable = team_id, season_nullable = season, \
                                          season_type_nullable = 'Playoffs').get_data_frames()[0]
        # We add a column to identify playoff games
        games['PLAYOFFS'] = 1
        
        # We update the list
        seasons_list.append(games)
    
    # We prepare the DataFrames to concatenate them
    for games in seasons_list:
        games.reset_index(drop = True, inplace = True)
    
    # We concatenate the DataFrames vertically and then sort the resulting DataFrame by the games dates
    games = pd.concat(seasons_list, axis = 0, ignore_index = True)
    return games.sort_values(by = 'GAME_DATE', ascending = True).reset_index(drop = True)

In [56]:
games = pull_team_games(team_id = 1610612743, season_init = 2016, season_end = 2022)
games.tail()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,PLAYOFFS
618,42022,1610612743,DEN,Denver Nuggets,42200401,2023-06-01,DEN vs. MIA,W,239,104,40,79,0.506,8,27,0.296,16,20,0.8,6,39,45,29,4,4,10,8,11.0,1
619,42022,1610612743,DEN,Denver Nuggets,42200402,2023-06-04,DEN vs. MIA,L,239,108,39,75,0.52,11,28,0.393,19,22,0.864,9,29,38,23,7,2,13,21,-3.0,1
620,42022,1610612743,DEN,Denver Nuggets,42200403,2023-06-07,DEN @ MIA,W,238,109,41,80,0.513,5,18,0.278,22,27,0.815,13,45,58,28,3,5,13,18,15.0,1
621,42022,1610612743,DEN,Denver Nuggets,42200404,2023-06-09,DEN @ MIA,W,242,108,39,79,0.494,14,28,0.5,16,21,0.762,5,29,34,26,11,7,6,18,13.0,1
622,42022,1610612743,DEN,Denver Nuggets,42200405,2023-06-12,DEN vs. MIA,W,240,94,38,84,0.452,5,28,0.179,13,23,0.565,11,46,57,21,6,7,14,13,5.0,1


We export a view from the DataFrame for the repo README.

In [57]:
dfi.export(games.tail(), 'df1.png', table_conversion = 'matplotlib')

We save the work just in case.

In [24]:
games.to_csv('nuggets_2016_17_2022_23_r1.csv', index = False)
games = pd.read_csv('nuggets_2016_17_2022_23_r1.csv', dtype = {'SEASON_ID': 'string', 'GAME_ID': 'string'})
games.tail()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,PLAYOFFS
618,42022,1610612743,DEN,Denver Nuggets,42200401,2023-06-01,DEN vs. MIA,W,239,104,40,79,0.506,8,27,0.296,16,20,0.8,6,39,45,29,4,4,10,8,11.0,1
619,42022,1610612743,DEN,Denver Nuggets,42200402,2023-06-04,DEN vs. MIA,L,239,108,39,75,0.52,11,28,0.393,19,22,0.864,9,29,38,23,7,2,13,21,-3.0,1
620,42022,1610612743,DEN,Denver Nuggets,42200403,2023-06-07,DEN @ MIA,W,238,109,41,80,0.513,5,18,0.278,22,27,0.815,13,45,58,28,3,5,13,18,15.0,1
621,42022,1610612743,DEN,Denver Nuggets,42200404,2023-06-09,DEN @ MIA,W,242,108,39,79,0.494,14,28,0.5,16,21,0.762,5,29,34,26,11,7,6,18,13.0,1
622,42022,1610612743,DEN,Denver Nuggets,42200405,2023-06-12,DEN vs. MIA,W,240,94,38,84,0.452,5,28,0.179,13,23,0.565,11,46,57,21,6,7,14,13,5.0,1


### 3.2. Appending players stats

Once we have the games, we can append the main stats (points, rebounds and assists) from a pair of (star) players to check how these stats contribute to the team's victories. While the function below generalizes the process for more than two players, bear in mind that the modeling process works with the stats of two players. The player ids can be found in the CSV file `nba_players.csv`.

In [10]:
def append_players_stats_season(players_list: list, team_games: pd.DataFrame):
    """
    This function appends to a team's games info DataFrame from a single season the three main stats (points, rebounds
    and assists) from a given set of its players. The function also appends info on whether the players were starters in
    the games. This function is called within another function that loops through seasons. 

    Args:
        players_list: list that contains the players' ids.
        team_games: DataFrame that contains the team's games data from a single season.

    Returns:
        DataFrame that contains the team's games data from a single season, including the main stats from the given
        set of players. 
    """
    
    # We load the nba_players info
    nba_players = pd.read_csv('nba_players.csv')
    
    # We create a list to store DataFrames, each containing a player's main stats
    players_df_list = []
    
    # We loop through each player
    for id in players_list:
        
        # For each player, we create an empty DataFrame that will store his main stats. We use his last name (loaded 
        # from the nba_players DataFrame) to name the columns
        last_name = nba_players.loc[nba_players['id'] == id, 'last_name'].values[0].upper()
        players_df_list.append(pd.DataFrame(columns = [last_name + '_PTS', \
                                                       last_name + '_REB', \
                                                       last_name + '_AST', \
                                                       last_name + '_STARTER']))
    
    # We loop through each game in a season
    for game in team_games.itertuples():
        # We get the box score for the game by calling the nba_api
        game_id = getattr(game, 'GAME_ID')
        box_score = boxscoretraditionalv2.BoxScoreTraditionalV2(game_id = game_id)
        players_stats = box_score.player_stats.get_data_frame()
        
        # We loop through each player. Note we loop using slicing since we also work with each player's
        # stats DataFrame
        for i in range(len(players_list)):
            # If the player was part of the team roster for the game, we extract his stats from the box score. If he
            # wasn't, we fill the stats with 0s. We append the stats at the end of the player stats DataFrame. This
            # ensures that the games' info DataFrame and each player's stats DataFrame have the same length
            if players_list[i] in players_stats['PLAYER_ID'].to_list():
                # We create the condition to pull the player's info from the box score
                player = players_stats['PLAYER_ID'] == players_list[i] 
                
                # The 'START_POSITION' of non-starters is an empty string
                starter = 1 if players_stats.loc[player, 'START_POSITION'].values[0] != '' else 0
                # We pull the player's stats
                stats_list = [players_stats.loc[player, 'PTS'].values[0], \
                              players_stats.loc[player, 'REB'].values[0], \
                              players_stats.loc[player, 'AST'].values[0], \
                              starter]
                
                # We update his DataFrame
                players_df_list[i].loc[len(players_df_list[i])] = stats_list
            else:
                # We update his DataFrame
                players_df_list[i].loc[len(players_df_list[i])] = [0, 0, 0, 0]

    # We insert the games info DataFrame at the beginning of the list containing the players' stats DataFrames to
    # ease the concatenation
    players_df_list.insert(0, team_games)
    
    # We prepare the DataFrames to concatenate them
    for df in players_df_list:
        df.reset_index(drop = True, inplace = True)
    
    # We concatenate the DataFrames horizontally
    return pd.concat(players_df_list, axis = 1)

In [12]:
def append_players_stats(players_list: list, team_games: pd.DataFrame):
    """
    This function appends to a team's games info DataFrame the three main stats (points, rebounds and assists) from a
    given set of its players. The team's games info DataFrame may contain info from several seasons. The function
    takes this into consideration and appends the stats one season at a time. To avoid being blocked by the nba_api, the
    function adds a five-second sleep between seasons. The function also appends info on whether the players were
    starters in the games. This function's main purpose is controlling the append season by season since the actual
    append operation is performed by a function call. 

    Args:
        players_list: list that contains the players' ids.
        team_games: DataFrame that contains the team's games data.

    Returns:
        DataFrame that contains the team's games data, including the main stats from the given set of players. 
    """
    
    # We create a list to store DataFrames, each containing the team's games info from an individual season
    games_list = []
    
    # We extract the seasons and loop through them
    seasons = [i for i in team_games['SEASON_ID'].str[1:].unique().tolist()]
    for season in seasons:
        # We pull the games info from an individual season and append to it the players' stats
        df = team_games[team_games['SEASON_ID'].str[1:] == season].copy()
        df = append_players_stats_season(players_list, df)
        
        # We update the list
        games_list.append(df)
        
        # We add a five-second sleep to avoid being blocked by the nba_api
        time.sleep(5)
        
    # We prepare the DataFrames to concatenate them
    for games in games_list:
        games.reset_index(drop = True, inplace = True)
    
    # We concatenate the DataFrames vertically
    return pd.concat(games_list, axis = 0, ignore_index = True)

In [61]:
games = append_players_stats(players_list = [203999, 1627750], team_games = games)
games.tail()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,PLAYOFFS,JOKIC_PTS,JOKIC_REB,JOKIC_AST,JOKIC_STARTER,MURRAY_PTS,MURRAY_REB,MURRAY_AST,MURRAY_STARTER
618,42022,1610612743,DEN,Denver Nuggets,42200401,2023-06-01,DEN vs. MIA,W,239,104,40,79,0.506,8,27,0.296,16,20,0.8,6,39,45,29,4,4,10,8,11.0,1,27.0,10.0,14.0,1.0,26.0,6.0,10.0,1.0
619,42022,1610612743,DEN,Denver Nuggets,42200402,2023-06-04,DEN vs. MIA,L,239,108,39,75,0.52,11,28,0.393,19,22,0.864,9,29,38,23,7,2,13,21,-3.0,1,41.0,11.0,4.0,1.0,18.0,4.0,10.0,1.0
620,42022,1610612743,DEN,Denver Nuggets,42200403,2023-06-07,DEN @ MIA,W,238,109,41,80,0.513,5,18,0.278,22,27,0.815,13,45,58,28,3,5,13,18,15.0,1,32.0,21.0,10.0,1.0,34.0,10.0,10.0,1.0
621,42022,1610612743,DEN,Denver Nuggets,42200404,2023-06-09,DEN @ MIA,W,242,108,39,79,0.494,14,28,0.5,16,21,0.762,5,29,34,26,11,7,6,18,13.0,1,23.0,12.0,4.0,1.0,15.0,3.0,12.0,1.0
622,42022,1610612743,DEN,Denver Nuggets,42200405,2023-06-12,DEN vs. MIA,W,240,94,38,84,0.452,5,28,0.179,13,23,0.565,11,46,57,21,6,7,14,13,5.0,1,28.0,16.0,4.0,1.0,14.0,8.0,8.0,1.0


We save the work just in case.

In [45]:
games.to_csv('nuggets_2016_17_2022_23_r2.csv', index = False)
games = pd.read_csv('nuggets_2016_17_2022_23_r2.csv', dtype = {'SEASON_ID': 'string', 'GAME_ID': 'string'})
games.tail()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,PLAYOFFS,JOKIC_PTS,JOKIC_REB,JOKIC_AST,JOKIC_STARTER,MURRAY_PTS,MURRAY_REB,MURRAY_AST,MURRAY_STARTER
618,42022,1610612743,DEN,Denver Nuggets,42200401,2023-06-01,DEN vs. MIA,W,239,104,40,79,0.506,8,27,0.296,16,20,0.8,6,39,45,29,4,4,10,8,11.0,1,27.0,10.0,14.0,1.0,26.0,6.0,10.0,1.0
619,42022,1610612743,DEN,Denver Nuggets,42200402,2023-06-04,DEN vs. MIA,L,239,108,39,75,0.52,11,28,0.393,19,22,0.864,9,29,38,23,7,2,13,21,-3.0,1,41.0,11.0,4.0,1.0,18.0,4.0,10.0,1.0
620,42022,1610612743,DEN,Denver Nuggets,42200403,2023-06-07,DEN @ MIA,W,238,109,41,80,0.513,5,18,0.278,22,27,0.815,13,45,58,28,3,5,13,18,15.0,1,32.0,21.0,10.0,1.0,34.0,10.0,10.0,1.0
621,42022,1610612743,DEN,Denver Nuggets,42200404,2023-06-09,DEN @ MIA,W,242,108,39,79,0.494,14,28,0.5,16,21,0.762,5,29,34,26,11,7,6,18,13.0,1,23.0,12.0,4.0,1.0,15.0,3.0,12.0,1.0
622,42022,1610612743,DEN,Denver Nuggets,42200405,2023-06-12,DEN vs. MIA,W,240,94,38,84,0.452,5,28,0.179,13,23,0.565,11,46,57,21,6,7,14,13,5.0,1,28.0,16.0,4.0,1.0,14.0,8.0,8.0,1.0


### 3.3. Final preparation

The next step is to compute the stats from the rest of the teammates combined. We take advantage of the name codification of the players stats columns.

In [49]:
def teammates_stats(team_games: pd.DataFrame):
    """
    This function compute the main stats (PTS, REB and AST) from the rest of the teammates, i.e., from the
    teammates whose stats weren't appended to the games data. It takes advantage of the name codification
    of the players stats columns. For example, for each player whose stats were appended to the games data,
    the points column's name is 'NAME_PTS'. Thus, the function pulls all columns whose names contain '_PTS',
    adds these points up and substracts them from the whole team points.

    Args:
        team_games: DataFrame that contains the team's games data.

    Returns:
        DataFrame that contains the team's games data, including the main stats from the rest of the teammates.
    """

    # Some player stats are filled with NaN values. Fill them with 0
    cols = [col for col in team_games.columns if '_PTS' in col or '_REB' in col or '_AST' in col or '_STARTER' in col]
    col_dict = {col: 0 for col in cols}
    team_games.fillna(value = col_dict, inplace = True)

    # Pull points from the players, add them up and substract the sum from the whole team points
    cols = [col for col in team_games.columns if '_PTS' in col]
    team_games['REST_PTS'] = team_games['PTS'] - team_games[cols].sum(axis = 1)
    
    # Pull rebounds from the players, add them up and substract the sum from the whole team rebounds
    cols = [col for col in team_games.columns if '_REB' in col]
    team_games['REST_REB'] = team_games['REB'] - team_games[cols].sum(axis = 1)
    
    # Pull assists from the players, add them up and substract the sum from the whole team assists
    cols = [col for col in team_games.columns if '_AST' in col]
    team_games['REST_AST'] = team_games['AST'] - team_games[cols].sum(axis = 1)

    return team_games

In [47]:
games = teammates_stats(games)
games.tail()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,PLAYOFFS,JOKIC_PTS,JOKIC_REB,JOKIC_AST,JOKIC_STARTER,MURRAY_PTS,MURRAY_REB,MURRAY_AST,MURRAY_STARTER,REST_PTS,REST_REB,REST_AST
618,42022,1610612743,DEN,Denver Nuggets,42200401,2023-06-01,DEN vs. MIA,W,239,104,40,79,0.506,8,27,0.296,16,20,0.8,6,39,45,29,4,4,10,8,11.0,1,27.0,10.0,14.0,1.0,26.0,6.0,10.0,1.0,51.0,29.0,5.0
619,42022,1610612743,DEN,Denver Nuggets,42200402,2023-06-04,DEN vs. MIA,L,239,108,39,75,0.52,11,28,0.393,19,22,0.864,9,29,38,23,7,2,13,21,-3.0,1,41.0,11.0,4.0,1.0,18.0,4.0,10.0,1.0,49.0,23.0,9.0
620,42022,1610612743,DEN,Denver Nuggets,42200403,2023-06-07,DEN @ MIA,W,238,109,41,80,0.513,5,18,0.278,22,27,0.815,13,45,58,28,3,5,13,18,15.0,1,32.0,21.0,10.0,1.0,34.0,10.0,10.0,1.0,43.0,27.0,8.0
621,42022,1610612743,DEN,Denver Nuggets,42200404,2023-06-09,DEN @ MIA,W,242,108,39,79,0.494,14,28,0.5,16,21,0.762,5,29,34,26,11,7,6,18,13.0,1,23.0,12.0,4.0,1.0,15.0,3.0,12.0,1.0,70.0,19.0,10.0
622,42022,1610612743,DEN,Denver Nuggets,42200405,2023-06-12,DEN vs. MIA,W,240,94,38,84,0.452,5,28,0.179,13,23,0.565,11,46,57,21,6,7,14,13,5.0,1,28.0,16.0,4.0,1.0,14.0,8.0,8.0,1.0,52.0,33.0,9.0


Replace all NaN elements in column 'A', 'B', 'C', and 'D', with 0, 1, 2, and 3 respectively.

````python
values = {{"A": 0, "B": 1, "C": 2, "D": 3}}
df.fillna(value=values)
```

We export a view from the DataFrame for the repo README.

In [65]:
dfi.export(games.iloc[:, 29:].tail(), 'df2.png', table_conversion = 'matplotlib')

Next, we pull the games where both Jokic and Murray were starters. Again, we take advantage of the name codification of the starter columns.

<span style="color:red">We omit this step so that we can filter these games in the app.</span>

In [16]:
def pull_games_starters(team_games: pd.DataFrame):
    """
    This function filters the games data to extract only those games where the players whose stats were appended
    were starters. It takes advantage of the name codification of the players starter columns. For each player
    whose stats were appended to the games data, the starter column's name is 'NAME_STARTER'. Thus, the function
    pulls all columns whose names contain '_STARTER', adds them up and filters the data based on the result of the sum.
    Note that the column stores 1 if a player was a starter and stores 0 otherwise. Thus, when the sum is equal to the 
    number of players, it means all these players were starters.

    Args:
        team_games: DataFrame that contains the team's games data.

    Returns:
        DataFrame that contains the team's games data from games where the players whose stats were appended were
        starters.
    """

    # Pull starters info
    cols = [col for col in team_games.columns if '_STARTER' in col]
    
    # Add this info up
    team_games['starters'] = team_games[cols].sum(axis = 1)
    
    # Filter the games where the players of interest were starters
    filtered_team_games = team_games[team_games['starters'] == len(cols)].copy()
    
    # Cleaning and preparation
    team_games.drop('starters', axis = 1, inplace = True)
    filtered_team_games.drop('starters', axis = 1, inplace = True)
    filtered_team_games.drop(cols, axis = 1, inplace = True)
    filtered_team_games.reset_index(drop = True, inplace = True)
    
    return filtered_team_games

In [28]:
#games2 = pull_games_starters(games)
#games2.tail()

Finally, we convert the stats columns to 'int'.

In [29]:
def stats_to_int(team_games: pd.DataFrame):
    """
    This function converts the stats columns from (i) the players whose stats were appended to the games data
    and (ii) the rest of their teammates to 'int'.

    Args:
        team_games: DataFrame that contains the team's games data.

    Returns:
        DataFrame where all stats columns are of type 'int'.
    """

    cols = [col for col in team_games.columns if '_PTS' in col or '_REB' in col or '_AST' in col or '_STARTER' in col]
    for col in cols:
        team_games[col] = team_games[col].astype(int)

    return team_games

In [50]:
games = stats_to_int(games)
games.tail()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS,PLAYOFFS,JOKIC_PTS,JOKIC_REB,JOKIC_AST,JOKIC_STARTER,MURRAY_PTS,MURRAY_REB,MURRAY_AST,MURRAY_STARTER,REST_PTS,REST_REB,REST_AST
618,42022,1610612743,DEN,Denver Nuggets,42200401,2023-06-01,DEN vs. MIA,W,239,104,40,79,0.506,8,27,0.296,16,20,0.8,6,39,45,29,4,4,10,8,11.0,1,27,10,14,1,26,6,10,1,51,29,5
619,42022,1610612743,DEN,Denver Nuggets,42200402,2023-06-04,DEN vs. MIA,L,239,108,39,75,0.52,11,28,0.393,19,22,0.864,9,29,38,23,7,2,13,21,-3.0,1,41,11,4,1,18,4,10,1,49,23,9
620,42022,1610612743,DEN,Denver Nuggets,42200403,2023-06-07,DEN @ MIA,W,238,109,41,80,0.513,5,18,0.278,22,27,0.815,13,45,58,28,3,5,13,18,15.0,1,32,21,10,1,34,10,10,1,43,27,8
621,42022,1610612743,DEN,Denver Nuggets,42200404,2023-06-09,DEN @ MIA,W,242,108,39,79,0.494,14,28,0.5,16,21,0.762,5,29,34,26,11,7,6,18,13.0,1,23,12,4,1,15,3,12,1,70,19,10
622,42022,1610612743,DEN,Denver Nuggets,42200405,2023-06-12,DEN vs. MIA,W,240,94,38,84,0.452,5,28,0.179,13,23,0.565,11,46,57,21,6,7,14,13,5.0,1,28,16,4,1,14,8,8,1,52,33,9


We save the work.

In [51]:
games.to_csv('nuggets_2016_17_2022_23_r3.csv', index = False)