In [1]:
import pandas as pd
import numpy as np

from nba_api.stats.static import players
from nba_api.stats.static import teams
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.endpoints import playergamelogs
from nba_api.stats.endpoints import teamgamelogs

In [5]:
# https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/static/players.md
players.find_players_by_first_name('LeBron')

[{'id': 2544,
  'full_name': 'LeBron James',
  'first_name': 'LeBron',
  'last_name': 'James',
  'is_active': True}]

In [6]:
# Career stats
# https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/playercareerstats.md

lebron_stats = playercareerstats.PlayerCareerStats(player_id = '2544')
lebron_df = lebron_stats.get_data_frames()[0]
lebron_df

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,2544,2003-04,0,1610612739,CLE,19.0,79,79,3120.0,622,...,0.754,99,333,432,465,130,58,273,149,1654
1,2544,2004-05,0,1610612739,CLE,20.0,80,80,3388.0,795,...,0.75,111,477,588,577,177,52,262,146,2175
2,2544,2005-06,0,1610612739,CLE,21.0,79,79,3361.0,875,...,0.738,75,481,556,521,123,66,260,181,2478
3,2544,2006-07,0,1610612739,CLE,22.0,78,78,3190.0,772,...,0.698,83,443,526,470,125,55,250,171,2132
4,2544,2007-08,0,1610612739,CLE,23.0,75,74,3027.0,794,...,0.712,133,459,592,539,138,81,255,165,2250
5,2544,2008-09,0,1610612739,CLE,24.0,81,81,3054.0,789,...,0.78,106,507,613,587,137,93,241,139,2304
6,2544,2009-10,0,1610612739,CLE,25.0,76,76,2966.0,768,...,0.767,71,483,554,651,125,77,261,119,2258
7,2544,2010-11,0,1610612748,MIA,26.0,79,79,3063.0,758,...,0.759,80,510,590,554,124,50,284,163,2111
8,2544,2011-12,0,1610612748,MIA,27.0,62,62,2326.0,621,...,0.771,94,398,492,387,115,50,213,96,1683
9,2544,2012-13,0,1610612748,MIA,28.0,76,76,2877.0,765,...,0.753,97,513,610,551,129,67,226,110,2036


In [17]:
# Player Game Log
# https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/playergamelogs.md

lebron_games = playergamelogs.PlayerGameLogs(
    player_id_nullable = '2544',
    season_nullable = '2017-18'
)
lebron_games_df = lebron_games.get_data_frames()[0]
lebron_games_df

Unnamed: 0,SEASON_YEAR,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,...,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,NBA_FANTASY_PTS_RANK,DD2_RANK,TD3_RANK,WNBA_FANTASY_PTS_RANK,VIDEO_AVAILABLE_FLAG
0,2017-18,2544,LeBron James,LeBron,1610612739,CLE,Cleveland Cavaliers,0021701220,2018-04-11T00:00:00,CLE vs. NYK,...,33,1,69,81,31,82,53,19,82,1
1,2017-18,2544,LeBron James,LeBron,1610612739,CLE,Cleveland Cavaliers,0021701207,2018-04-09T00:00:00,CLE @ NYK,...,33,39,37,45,13,59,1,19,55,1
2,2017-18,2544,LeBron James,LeBron,1610612739,CLE,Cleveland Cavaliers,0021701183,2018-04-06T00:00:00,CLE @ PHI,...,79,1,11,2,26,3,1,1,2,1
3,2017-18,2544,LeBron James,LeBron,1610612739,CLE,Cleveland Cavaliers,0021701176,2018-04-05T00:00:00,CLE vs. WAS,...,33,39,25,16,16,32,1,19,22,1
4,2017-18,2544,LeBron James,LeBron,1610612739,CLE,Cleveland Cavaliers,0021701156,2018-04-03T00:00:00,CLE vs. TOR,...,1,39,4,40,18,64,1,19,61,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,2017-18,2544,LeBron James,LeBron,1610612739,CLE,Cleveland Cavaliers,0021700058,2017-10-25T00:00:00,CLE @ BKN,...,69,15,25,33,51,13,1,1,17,1
78,2017-18,2544,LeBron James,LeBron,1610612739,CLE,Cleveland Cavaliers,0021700049,2017-10-24T00:00:00,CLE vs. CHI,...,33,15,25,13,26,20,1,19,19,2
79,2017-18,2544,LeBron James,LeBron,1610612739,CLE,Cleveland Cavaliers,0021700028,2017-10-21T00:00:00,CLE vs. ORL,...,1,1,50,63,80,75,53,19,76,1
80,2017-18,2544,LeBron James,LeBron,1610612739,CLE,Cleveland Cavaliers,0021700021,2017-10-20T00:00:00,CLE @ MIL,...,1,39,78,55,16,64,53,19,64,1


In [18]:
# Team Game Log
# https://github.com/swar/nba_api/blob/master/docs/nba_api/stats/endpoints/teamgamelogs.md

cavs_games = teamgamelogs.TeamGameLogs(
    team_id_nullable = '1610612739',
    season_nullable = '2017-18'
)
cavs_games_df = cavs_games.get_data_frames()[0]
cavs_games_df

Unnamed: 0,SEASON_YEAR,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,REB_RANK,AST_RANK,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK
0,2017-18,1610612739,CLE,Cleveland Cavaliers,0021701220,2018-04-11T00:00:00,CLE vs. NYK,L,48.0,37,...,11,79,60,15,45,20,17,28,69,66
1,2017-18,1610612739,CLE,Cleveland Cavaliers,0021701207,2018-04-09T00:00:00,CLE @ NYK,W,48.0,40,...,40,8,16,15,60,71,17,38,12,13
2,2017-18,1610612739,CLE,Cleveland Cavaliers,0021701183,2018-04-06T00:00:00,CLE @ PHI,L,48.0,45,...,46,36,48,47,60,71,52,16,4,51
3,2017-18,1610612739,CLE,Cleveland Cavaliers,0021701176,2018-04-05T00:00:00,CLE vs. WAS,W,48.0,42,...,71,18,60,10,45,71,6,16,22,41
4,2017-18,1610612739,CLE,Cleveland Cavaliers,0021701156,2018-04-03T00:00:00,CLE vs. TOR,W,48.0,38,...,40,71,25,28,11,2,31,16,43,32
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,2017-18,1610612739,CLE,Cleveland Cavaliers,0021700058,2017-10-25T00:00:00,CLE @ BKN,L,48.0,39,...,14,56,81,57,3,20,31,28,50,58
78,2017-18,1610612739,CLE,Cleveland Cavaliers,0021700049,2017-10-24T00:00:00,CLE vs. CHI,W,48.0,43,...,59,10,4,57,45,20,6,38,22,24
79,2017-18,1610612739,CLE,Cleveland Cavaliers,0021700028,2017-10-21T00:00:00,CLE vs. ORL,L,48.0,33,...,31,79,60,67,3,63,6,3,77,77
80,2017-18,1610612739,CLE,Cleveland Cavaliers,0021700021,2017-10-20T00:00:00,CLE @ MIL,W,48.0,44,...,31,24,70,28,45,6,2,69,28,9


In [15]:
la_games_df.columns

Index(['SEASON_YEAR', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID',
       'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M',
       'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST',
       'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS',
       'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'FGM_RANK',
       'FGA_RANK', 'FG_PCT_RANK', 'FG3M_RANK', 'FG3A_RANK', 'FG3_PCT_RANK',
       'FTM_RANK', 'FTA_RANK', 'FT_PCT_RANK', 'OREB_RANK', 'DREB_RANK',
       'REB_RANK', 'AST_RANK', 'TOV_RANK', 'STL_RANK', 'BLK_RANK', 'BLKA_RANK',
       'PF_RANK', 'PFD_RANK', 'PTS_RANK', 'PLUS_MINUS_RANK'],
      dtype='object')

1. Get teams list and players list (active)
2. Get player career stats
3. Get all team games for specific season
4. Get player stats per game

In [2]:
players_list = players.get_active_players()
players_df = pd.DataFrame(players_list)
players_df.to_csv('players.csv')

In [6]:
players_df

Unnamed: 0,id,full_name,first_name,last_name,is_active
0,1630173,Precious Achiuwa,Precious,Achiuwa,True
1,203500,Steven Adams,Steven,Adams,True
2,1628389,Bam Adebayo,Bam,Adebayo,True
3,1630534,Ochai Agbaji,Ochai,Agbaji,True
4,1630583,Santi Aldama,Santi,Aldama,True
...,...,...,...,...,...
577,201152,Thaddeus Young,Thaddeus,Young,True
578,1629027,Trae Young,Trae,Young,True
579,1630209,Omer Yurtseven,Omer,Yurtseven,True
580,203469,Cody Zeller,Cody,Zeller,True


In [3]:
teams_list = teams.get_teams()
teams_df = pd.DataFrame(teams_list)
teams_df.to_csv('teams.csv')

In [4]:
#players_list[0]
player_dfs = []
for player in players_list[:10]:
    player_id = player['id']
    player_season_stats_df = playercareerstats.PlayerCareerStats(
        player_id = player_id,
        per_mode36 = 'PerGame'
    ).get_data_frames()[0]
    player_season_stats_df['PLAYER_NAME'] = player['full_name']
    player_dfs.append(player_season_stats_df)

all_player_dfs = pd.concat(player_dfs)
all_player_dfs
    

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,PLAYER_NAME
0,1630173,2020-21,0,1610612748,MIA,21.0,61,4,12.1,2.0,...,1.2,2.2,3.4,0.5,0.3,0.5,0.7,1.5,5.0,Precious Achiuwa
1,1630173,2021-22,0,1610612761,TOR,22.0,73,28,23.6,3.6,...,2.0,4.5,6.5,1.1,0.5,0.6,1.2,2.1,9.1,Precious Achiuwa
2,1630173,2022-23,0,1610612761,TOR,23.0,12,0,20.4,3.0,...,2.0,5.0,7.0,1.3,0.2,0.6,1.3,1.6,8.8,Precious Achiuwa
0,203500,2013-14,0,1610612760,OKC,20.0,81,20,14.8,1.1,...,1.8,2.3,4.1,0.5,0.5,0.7,0.9,2.5,3.3,Steven Adams
1,203500,2014-15,0,1610612760,OKC,21.0,70,67,25.3,3.1,...,2.8,4.6,7.5,0.9,0.5,1.2,1.4,3.2,7.7,Steven Adams
2,203500,2015-16,0,1610612760,OKC,22.0,80,80,25.2,3.3,...,2.7,3.9,6.7,0.8,0.5,1.1,1.1,2.8,8.0,Steven Adams
3,203500,2016-17,0,1610612760,OKC,23.0,80,80,29.9,4.7,...,3.5,4.2,7.7,1.1,1.1,1.0,1.8,2.4,11.3,Steven Adams
4,203500,2017-18,0,1610612760,OKC,24.0,76,76,32.7,5.9,...,5.1,4.0,9.0,1.2,1.2,1.0,1.7,2.8,13.9,Steven Adams
5,203500,2018-19,0,1610612760,OKC,25.0,80,80,33.4,6.0,...,4.9,4.6,9.5,1.6,1.5,1.0,1.7,2.6,13.9,Steven Adams
6,203500,2019-20,0,1610612760,OKC,26.0,63,63,26.7,4.5,...,3.3,6.0,9.3,2.3,0.8,1.1,1.5,1.9,10.9,Steven Adams


In [5]:
all_player_dfs.columns

Index(['PLAYER_ID', 'SEASON_ID', 'LEAGUE_ID', 'TEAM_ID', 'TEAM_ABBREVIATION',
       'PLAYER_AGE', 'GP', 'GS', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A',
       'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL',
       'BLK', 'TOV', 'PF', 'PTS', 'PLAYER_NAME'],
      dtype='object')

In [2]:
cavs_games = teamgamelogs.TeamGameLogs(
    team_id_nullable = '1610612739',
    season_nullable = '2022-23'
)
cavs_games_df = cavs_games.get_data_frames()[0]
cavs_games_df

Unnamed: 0,SEASON_YEAR,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,...,REB_RANK,AST_RANK,TOV_RANK,STL_RANK,BLK_RANK,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK
0,2022-23,1610612739,CLE,Cleveland Cavaliers,22200540,2022-12-31T00:00:00,CLE @ CHI,W,48.0,39,...,3,30,36,4,7,25,16,33,28,21
1,2022-23,1610612739,CLE,Cleveland Cavaliers,22200524,2022-12-29T00:00:00,CLE @ IND,L,48.0,50,...,15,5,9,11,14,35,10,28,4,31
2,2022-23,1610612739,CLE,Cleveland Cavaliers,22200498,2022-12-26T00:00:00,CLE vs. BKN,L,48.0,39,...,36,27,11,7,22,13,3,9,10,30
3,2022-23,1610612739,CLE,Cleveland Cavaliers,22200484,2022-12-23T00:00:00,CLE vs. TOR,L,48.0,39,...,33,8,21,4,14,2,10,22,24,32
4,2022-23,1610612739,CLE,Cleveland Cavaliers,22200466,2022-12-21T00:00:00,CLE vs. MIL,W,48.0,41,...,30,22,5,7,22,22,25,14,14,17
5,2022-23,1610612739,CLE,Cleveland Cavaliers,22200452,2022-12-19T00:00:00,CLE vs. UTA,W,48.0,43,...,14,16,37,28,14,2,19,16,6,5
6,2022-23,1610612739,CLE,Cleveland Cavaliers,22200440,2022-12-17T00:00:00,CLE vs. DAL,W,53.0,40,...,1,20,12,20,7,25,35,22,33,21
7,2022-23,1610612739,CLE,Cleveland Cavaliers,22200431,2022-12-16T00:00:00,CLE vs. IND,W,48.0,38,...,34,20,5,20,30,13,7,16,9,20
8,2022-23,1610612739,CLE,Cleveland Cavaliers,22200421,2022-12-14T00:00:00,CLE @ DAL,W,48.0,41,...,18,9,3,28,30,8,19,33,26,7
9,2022-23,1610612739,CLE,Cleveland Cavaliers,22200406,2022-12-12T00:00:00,CLE @ SAS,L,48.0,45,...,15,27,21,11,3,2,1,25,22,24


In [10]:
cavs_games_df.columns

Index(['SEASON_YEAR', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID',
       'GAME_DATE', 'MATCHUP', 'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M',
       'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST',
       'TOV', 'STL', 'BLK', 'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS',
       'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK', 'MIN_RANK', 'FGM_RANK',
       'FGA_RANK', 'FG_PCT_RANK', 'FG3M_RANK', 'FG3A_RANK', 'FG3_PCT_RANK',
       'FTM_RANK', 'FTA_RANK', 'FT_PCT_RANK', 'OREB_RANK', 'DREB_RANK',
       'REB_RANK', 'AST_RANK', 'TOV_RANK', 'STL_RANK', 'BLK_RANK', 'BLKA_RANK',
       'PF_RANK', 'PFD_RANK', 'PTS_RANK', 'PLUS_MINUS_RANK'],
      dtype='object')

In [37]:
cavs_games_filtered = cavs_games_df[[
    'SEASON_YEAR',
    'TEAM_ID',
    'TEAM_ABBREVIATION',
    'TEAM_NAME',
    'GAME_ID',
    'WL',
    'FGM',
    'FTM',
    'AST',
    'DREB',
    'STL',
    'BLK',
    'MIN'
]]
cavs_games_filtered['GAME_BUCKETS'] = cavs_games_filtered['FGM'] + cavs_games_filtered['FTM'] + cavs_games_filtered['AST']
cavs_games_filtered['GAME_STOPS'] = cavs_games_filtered['DREB'] + cavs_games_filtered['STL'] + cavs_games_filtered['BLK']
cavs_games_filtered.drop(['FGM', 'FTM', 'AST', 'DREB', 'STL', 'BLK'], axis = 1, inplace = True)
cavs_games_filtered.rename(columns = {'MIN': 'GAME_MIN'}, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cavs_games_filtered['GAME_BUCKETS'] = cavs_games_filtered['FGM'] + cavs_games_filtered['FTM'] + cavs_games_filtered['AST']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cavs_games_filtered['GAME_STOPS'] = cavs_games_filtered['DREB'] + cavs_games_filtered['STL'] + cavs_games_filtered['BLK']
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cavs_games_filter

In [38]:
cavs_games_filtered

Unnamed: 0,SEASON_YEAR,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,WL,GAME_MIN,GAME_BUCKETS,GAME_STOPS
0,2022-23,1610612739,CLE,Cleveland Cavaliers,22200540,W,48.0,71,49
1,2022-23,1610612739,CLE,Cleveland Cavaliers,22200524,L,48.0,93,38
2,2022-23,1610612739,CLE,Cleveland Cavaliers,22200498,L,48.0,86,34
3,2022-23,1610612739,CLE,Cleveland Cavaliers,22200484,L,48.0,84,42
4,2022-23,1610612739,CLE,Cleveland Cavaliers,22200466,W,48.0,85,39
5,2022-23,1610612739,CLE,Cleveland Cavaliers,22200452,W,48.0,88,50
6,2022-23,1610612739,CLE,Cleveland Cavaliers,22200440,W,53.0,76,53
7,2022-23,1610612739,CLE,Cleveland Cavaliers,22200431,W,48.0,89,36
8,2022-23,1610612739,CLE,Cleveland Cavaliers,22200421,W,48.0,82,45
9,2022-23,1610612739,CLE,Cleveland Cavaliers,22200406,L,48.0,82,43


In [5]:
cavs_player_games = playergamelogs.PlayerGameLogs(
    team_id_nullable = '1610612739',
    season_nullable = '2022-23'
)
cavs_player_games_df = cavs_player_games.get_data_frames()[0]
cavs_player_games_df

Unnamed: 0,SEASON_YEAR,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,...,BLKA_RANK,PF_RANK,PFD_RANK,PTS_RANK,PLUS_MINUS_RANK,NBA_FANTASY_PTS_RANK,DD2_RANK,TD3_RANK,WNBA_FANTASY_PTS_RANK,VIDEO_AVAILABLE_FLAG
0,2022-23,1627747,Caris LeVert,Caris,1610612739,CLE,Cleveland Cavaliers,0022200540,2022-12-31T00:00:00,CLE @ CHI,...,1,64,133,37,185,62,43,1,45,1
1,2022-23,1628378,Donovan Mitchell,Donovan,1610612739,CLE,Cleveland Cavaliers,0022200540,2022-12-31T00:00:00,CLE @ CHI,...,242,252,22,103,307,92,43,1,85,1
2,2022-23,1628386,Jarrett Allen,Jarrett,1610612739,CLE,Cleveland Cavaliers,0022200540,2022-12-31T00:00:00,CLE @ CHI,...,335,64,203,227,247,104,43,1,127,1
3,2022-23,1630171,Isaac Okoro,Isaac,1610612739,CLE,Cleveland Cavaliers,0022200540,2022-12-31T00:00:00,CLE @ CHI,...,242,252,82,152,170,144,43,1,161,1
4,2022-23,1626224,Cedi Osman,Cedi,1610612739,CLE,Cleveland Cavaliers,0022200540,2022-12-31T00:00:00,CLE @ CHI,...,242,310,203,137,89,198,43,1,171,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
362,2022-23,1629731,Dean Wade,Dean,1610612739,CLE,Cleveland Cavaliers,0022200008,2022-10-19T00:00:00,CLE @ TOR,...,1,64,203,190,327,201,43,1,204,1
363,2022-23,1629636,Darius Garland,Darius,1610612739,CLE,Cleveland Cavaliers,0022200008,2022-10-19T00:00:00,CLE @ TOR,...,242,64,133,258,319,234,43,1,225,1
364,2022-23,203526,Raul Neto,Raul,1610612739,CLE,Cleveland Cavaliers,0022200008,2022-10-19T00:00:00,CLE @ TOR,...,1,1,272,323,294,347,43,1,343,1
365,2022-23,1630171,Isaac Okoro,Isaac,1610612739,CLE,Cleveland Cavaliers,0022200008,2022-10-19T00:00:00,CLE @ TOR,...,1,310,272,323,200,365,43,1,350,1


In [13]:
cavs_player_games_df.columns

Index(['SEASON_YEAR', 'PLAYER_ID', 'PLAYER_NAME', 'NICKNAME', 'TEAM_ID',
       'TEAM_ABBREVIATION', 'TEAM_NAME', 'GAME_ID', 'GAME_DATE', 'MATCHUP',
       'WL', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM',
       'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'TOV', 'STL', 'BLK',
       'BLKA', 'PF', 'PFD', 'PTS', 'PLUS_MINUS', 'NBA_FANTASY_PTS', 'DD2',
       'TD3', 'WNBA_FANTASY_PTS', 'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK',
       'MIN_RANK', 'FGM_RANK', 'FGA_RANK', 'FG_PCT_RANK', 'FG3M_RANK',
       'FG3A_RANK', 'FG3_PCT_RANK', 'FTM_RANK', 'FTA_RANK', 'FT_PCT_RANK',
       'OREB_RANK', 'DREB_RANK', 'REB_RANK', 'AST_RANK', 'TOV_RANK',
       'STL_RANK', 'BLK_RANK', 'BLKA_RANK', 'PF_RANK', 'PFD_RANK', 'PTS_RANK',
       'PLUS_MINUS_RANK', 'NBA_FANTASY_PTS_RANK', 'DD2_RANK', 'TD3_RANK',
       'WNBA_FANTASY_PTS_RANK', 'VIDEO_AVAILABLE_FLAG'],
      dtype='object')

In [42]:
cavs_player_games_filtered = cavs_player_games_df[[
    'PLAYER_ID',
    'PLAYER_NAME',
    'TEAM_ID',
    'GAME_ID',
    'FGM',
    'FTM',
    'AST',
    'DREB',
    'STL',
    'BLK',
    'PLUS_MINUS',
    'MIN'
]]
cavs_player_games_filtered['PLAYER_BUCKETS'] = cavs_player_games_filtered['FGM'] + cavs_player_games_filtered['FTM'] + cavs_player_games_filtered['AST']
cavs_player_games_filtered['PLAYER_STOPS'] = cavs_player_games_filtered['DREB'] + cavs_player_games_filtered['STL'] + cavs_player_games_filtered['BLK']
cavs_player_games_filtered.rename(columns = {'MIN': 'PLAYER_MIN'}, inplace = True)
cavs_player_games_filtered.drop(['FGM', 'FTM', 'AST', 'DREB', 'STL', 'BLK'], axis = 1, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cavs_player_games_filtered['PLAYER_BUCKETS'] = cavs_player_games_filtered['FGM'] + cavs_player_games_filtered['FTM'] + cavs_player_games_filtered['AST']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cavs_player_games_filtered['PLAYER_STOPS'] = cavs_player_games_filtered['DREB'] + cavs_player_games_filtered['STL'] + cavs_player_games_filtered['BLK']
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/inde

In [43]:
cavs_player_games_filtered

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ID,GAME_ID,PLUS_MINUS,PLAYER_MIN,PLAYER_BUCKETS,PLAYER_STOPS
0,1627747,Caris LeVert,1610612739,0022200540,1,37.266667,14,6
1,1628378,Donovan Mitchell,1610612739,0022200540,-9,36.826667,16,6
2,1628386,Jarrett Allen,1610612739,0022200540,-3,34.870000,5,10
3,1630171,Isaac Okoro,1610612739,0022200540,2,22.940000,8,6
4,1626224,Cedi Osman,1610612739,0022200540,9,24.530000,7,3
...,...,...,...,...,...,...,...,...
362,1629731,Dean Wade,1610612739,0022200008,-11,21.693333,6,3
363,1629636,Darius Garland,1610612739,0022200008,-10,13.300000,5,4
364,203526,Raul Neto,1610612739,0022200008,-8,6.033333,0,1
365,1630171,Isaac Okoro,1610612739,0022200008,0,11.760000,0,0


In [44]:
cavs_merged_df = pd.merge(
    cavs_player_games_filtered,
    cavs_games_filtered,
    how = 'inner',
    on = ['TEAM_ID', 'GAME_ID']
)
cavs_merged_df

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ID,GAME_ID,PLUS_MINUS,PLAYER_MIN,PLAYER_BUCKETS,PLAYER_STOPS,SEASON_YEAR,TEAM_ABBREVIATION,TEAM_NAME,WL,GAME_MIN,GAME_BUCKETS,GAME_STOPS
0,1627747,Caris LeVert,1610612739,0022200540,1,37.266667,14,6,2022-23,CLE,Cleveland Cavaliers,W,48.0,71,49
1,1628378,Donovan Mitchell,1610612739,0022200540,-9,36.826667,16,6,2022-23,CLE,Cleveland Cavaliers,W,48.0,71,49
2,1628386,Jarrett Allen,1610612739,0022200540,-3,34.870000,5,10,2022-23,CLE,Cleveland Cavaliers,W,48.0,71,49
3,1630171,Isaac Okoro,1610612739,0022200540,2,22.940000,8,6,2022-23,CLE,Cleveland Cavaliers,W,48.0,71,49
4,1626224,Cedi Osman,1610612739,0022200540,9,24.530000,7,3,2022-23,CLE,Cleveland Cavaliers,W,48.0,71,49
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
362,1629731,Dean Wade,1610612739,0022200008,-11,21.693333,6,3,2022-23,CLE,Cleveland Cavaliers,L,48.0,82,42
363,1629636,Darius Garland,1610612739,0022200008,-10,13.300000,5,4,2022-23,CLE,Cleveland Cavaliers,L,48.0,82,42
364,203526,Raul Neto,1610612739,0022200008,-8,6.033333,0,1,2022-23,CLE,Cleveland Cavaliers,L,48.0,82,42
365,1630171,Isaac Okoro,1610612739,0022200008,0,11.760000,0,0,2022-23,CLE,Cleveland Cavaliers,L,48.0,82,42


In [45]:
cavs_merged_df['BUCKET_CONTRIBUTION'] = cavs_merged_df['PLAYER_BUCKETS'] / cavs_merged_df['GAME_BUCKETS']
cavs_merged_df['STOP_CONTRIBUTION'] = cavs_merged_df['PLAYER_STOPS'] / cavs_merged_df['GAME_STOPS']
cavs_merged_df['BUCKET_CONTRIBUTION_RATE'] = cavs_merged_df['BUCKET_CONTRIBUTION'] * 100
cavs_merged_df['STOP_CONTRIBUTION_RATE'] = cavs_merged_df['STOP_CONTRIBUTION'] * 100
cavs_merged_df['MIN_PERCENTAGE'] = cavs_merged_df['PLAYER_MIN'] / cavs_merged_df['GAME_MIN']

In [46]:
cavs_merged_df

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ID,GAME_ID,PLUS_MINUS,PLAYER_MIN,PLAYER_BUCKETS,PLAYER_STOPS,SEASON_YEAR,TEAM_ABBREVIATION,TEAM_NAME,WL,GAME_MIN,GAME_BUCKETS,GAME_STOPS,BUCKET_CONTRIBUTION,STOP_CONTRIBUTION,BUCKET_CONTRIBUTION_RATE,STOP_CONTRIBUTION_RATE,MIN_PERCENTAGE
0,1627747,Caris LeVert,1610612739,0022200540,1,37.266667,14,6,2022-23,CLE,Cleveland Cavaliers,W,48.0,71,49,0.197183,0.122449,19.718310,12.244898,0.776389
1,1628378,Donovan Mitchell,1610612739,0022200540,-9,36.826667,16,6,2022-23,CLE,Cleveland Cavaliers,W,48.0,71,49,0.225352,0.122449,22.535211,12.244898,0.767222
2,1628386,Jarrett Allen,1610612739,0022200540,-3,34.870000,5,10,2022-23,CLE,Cleveland Cavaliers,W,48.0,71,49,0.070423,0.204082,7.042254,20.408163,0.726458
3,1630171,Isaac Okoro,1610612739,0022200540,2,22.940000,8,6,2022-23,CLE,Cleveland Cavaliers,W,48.0,71,49,0.112676,0.122449,11.267606,12.244898,0.477917
4,1626224,Cedi Osman,1610612739,0022200540,9,24.530000,7,3,2022-23,CLE,Cleveland Cavaliers,W,48.0,71,49,0.098592,0.061224,9.859155,6.122449,0.511042
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
362,1629731,Dean Wade,1610612739,0022200008,-11,21.693333,6,3,2022-23,CLE,Cleveland Cavaliers,L,48.0,82,42,0.073171,0.071429,7.317073,7.142857,0.451944
363,1629636,Darius Garland,1610612739,0022200008,-10,13.300000,5,4,2022-23,CLE,Cleveland Cavaliers,L,48.0,82,42,0.060976,0.095238,6.097561,9.523810,0.277083
364,203526,Raul Neto,1610612739,0022200008,-8,6.033333,0,1,2022-23,CLE,Cleveland Cavaliers,L,48.0,82,42,0.000000,0.023810,0.000000,2.380952,0.125694
365,1630171,Isaac Okoro,1610612739,0022200008,0,11.760000,0,0,2022-23,CLE,Cleveland Cavaliers,L,48.0,82,42,0.000000,0.000000,0.000000,0.000000,0.245000


In [11]:
cavs_players_list = cavs_merged_df['PLAYER_ID'].unique().tolist()
cavs_stats_dfs = []
for player_id in cavs_players_list:
    player_season_stats_df = playercareerstats.PlayerCareerStats(
        player_id = player_id,
        per_mode36 = 'PerGame'
    ).get_data_frames()[0]
    cavs_stats_dfs.append(player_season_stats_df)

all_cavs_stats_dfs = pd.concat(cavs_stats_dfs)
all_cavs_stats_dfs

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,1627747,2016-17,00,1610612751,BKN,22.0,57,26,21.7,3.0,...,0.720,0.4,2.9,3.3,1.9,0.9,0.1,1.0,1.6,8.2
1,1627747,2017-18,00,1610612751,BKN,23.0,71,10,26.2,4.5,...,0.711,0.7,2.9,3.7,4.2,1.2,0.3,2.2,2.2,12.1
2,1627747,2018-19,00,1610612751,BKN,24.0,40,25,26.6,5.2,...,0.691,0.9,2.9,3.8,3.9,1.1,0.4,1.7,1.9,13.7
3,1627747,2019-20,00,1610612751,BKN,25.0,45,31,29.6,6.9,...,0.711,1.1,3.1,4.2,4.4,1.2,0.2,2.6,1.8,18.7
4,1627747,2020-21,00,1610612751,BKN,26.0,12,4,27.8,7.3,...,0.765,0.8,3.5,4.3,6.0,1.1,0.5,2.2,1.7,18.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,1630600,2022-23,00,1610612739,CLE,23.0,8,0,3.3,0.5,...,0.000,0.1,1.3,1.4,0.0,0.1,0.0,0.0,0.3,1.1
0,1629731,2019-20,00,1610612739,CLE,23.0,12,0,6.0,0.8,...,0.000,0.3,1.3,1.6,0.2,0.2,0.3,0.3,0.7,1.7
1,1629731,2020-21,00,1610612739,CLE,24.0,63,19,19.2,2.1,...,0.769,0.6,2.8,3.4,1.2,0.6,0.3,0.5,1.2,6.0
2,1629731,2021-22,00,1610612739,CLE,25.0,51,28,19.2,1.9,...,0.667,0.6,2.3,2.9,1.0,0.6,0.1,0.3,1.7,5.3


In [13]:
all_cavs_stats_dfs.to_csv('cavs_stats.csv', index = 0)

In [28]:
all_cavs_stats_dfs['AVERAGE_BUCKETS'] = all_cavs_stats_dfs['FGM'] + all_cavs_stats_dfs['FTM'] + all_cavs_stats_dfs['AST']
all_cavs_stats_dfs['AVERAGE_STOPS'] = all_cavs_stats_dfs['DREB'] + all_cavs_stats_dfs['STL'] + all_cavs_stats_dfs['BLK']

In [47]:
cavs_career_stats = all_cavs_stats_dfs[[
    'PLAYER_ID',
    'SEASON_ID',
    'TEAM_ID',
    'MIN',
    'AVERAGE_BUCKETS',
    'AVERAGE_STOPS'
]]
cavs_career_stats.rename(columns = {'SEASON_ID': 'SEASON_YEAR', 'MIN': 'AVERAGE_MIN'}, inplace = True)
cavs_career_stats

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cavs_career_stats.rename(columns = {'SEASON_ID': 'SEASON_YEAR', 'MIN': 'AVERAGE_MIN'}, inplace = True)


Unnamed: 0,PLAYER_ID,SEASON_YEAR,TEAM_ID,AVERAGE_MIN,AVERAGE_BUCKETS,AVERAGE_STOPS
0,1627747,2016-17,1610612751,21.7,6.1,3.9
1,1627747,2017-18,1610612751,26.2,10.6,4.4
2,1627747,2018-19,1610612751,26.6,11.2,4.4
3,1627747,2019-20,1610612751,29.6,14.4,4.5
4,1627747,2020-21,1610612751,27.8,15.5,5.1
...,...,...,...,...,...,...
0,1630600,2022-23,1610612739,3.3,0.5,1.4
0,1629731,2019-20,1610612739,6.0,1.0,1.8
1,1629731,2020-21,1610612739,19.2,3.8,3.7
2,1629731,2021-22,1610612739,19.2,3.3,3.0


In [48]:
cavs_merged_df_2 = pd.merge(
    cavs_merged_df,
    cavs_career_stats,
    how = 'inner',
    on = ['PLAYER_ID', 'TEAM_ID', 'SEASON_YEAR']
)
cavs_merged_df_2['BUCKET_UPLIFT'] = (cavs_merged_df_2['PLAYER_BUCKETS'] - cavs_merged_df_2['AVERAGE_BUCKETS']) / cavs_merged_df_2['AVERAGE_BUCKETS']
cavs_merged_df_2['STOP_UPLIFT'] = (cavs_merged_df_2['PLAYER_STOPS'] - cavs_merged_df_2['AVERAGE_STOPS']) / cavs_merged_df_2['AVERAGE_STOPS']
cavs_merged_df_2

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ID,GAME_ID,PLUS_MINUS,PLAYER_MIN,PLAYER_BUCKETS,PLAYER_STOPS,SEASON_YEAR,TEAM_ABBREVIATION,...,BUCKET_CONTRIBUTION,STOP_CONTRIBUTION,BUCKET_CONTRIBUTION_RATE,STOP_CONTRIBUTION_RATE,MIN_PERCENTAGE,AVERAGE_MIN,AVERAGE_BUCKETS,AVERAGE_STOPS,BUCKET_UPLIFT,STOP_UPLIFT
0,1627747,Caris LeVert,1610612739,0022200540,1,37.266667,14,6,2022-23,CLE,...,0.197183,0.122449,19.718310,12.244898,0.776389,30.7,10.0,4.4,0.400000,0.363636
1,1627747,Caris LeVert,1610612739,0022200524,-8,34.850000,12,5,2022-23,CLE,...,0.129032,0.131579,12.903226,13.157895,0.726042,30.7,10.0,4.4,0.200000,0.136364
2,1627747,Caris LeVert,1610612739,0022200498,-8,19.666667,5,3,2022-23,CLE,...,0.058140,0.088235,5.813953,8.823529,0.409722,30.7,10.0,4.4,-0.500000,-0.318182
3,1627747,Caris LeVert,1610612739,0022200484,0,18.033333,6,2,2022-23,CLE,...,0.071429,0.047619,7.142857,4.761905,0.375694,30.7,10.0,4.4,-0.400000,-0.545455
4,1627747,Caris LeVert,1610612739,0022200466,-6,32.283333,4,9,2022-23,CLE,...,0.047059,0.230769,4.705882,23.076923,0.672569,30.7,10.0,4.4,-0.600000,1.045455
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
362,1629731,Dean Wade,1610612739,0022200072,20,38.928333,3,6,2022-23,CLE,...,0.033708,0.150000,3.370787,15.000000,0.734497,24.1,3.7,4.6,-0.189189,0.304348
363,1629731,Dean Wade,1610612739,0022200056,9,29.725000,4,4,2022-23,CLE,...,0.048780,0.100000,4.878049,10.000000,0.619271,24.1,3.7,4.6,0.081081,-0.130435
364,1629731,Dean Wade,1610612739,0022200039,19,21.315000,9,3,2022-23,CLE,...,0.109756,0.063830,10.975610,6.382979,0.402170,24.1,3.7,4.6,1.432432,-0.347826
365,1629731,Dean Wade,1610612739,0022200032,6,4.116667,3,2,2022-23,CLE,...,0.031250,0.040816,3.125000,4.081633,0.085764,24.1,3.7,4.6,-0.189189,-0.565217


In [17]:
cavs_team_game_averages_2 = cavs_merged_df_2[['TEAM_ID', 'GAME_ID', 'PLAYER_ID', 'BUCKET_UPLIFT', 'STOP_UPLIFT']]
cavs_team_game_averages_2 = cavs_team_game_averages_2.groupby(['TEAM_ID', 'GAME_ID']).agg(
    {
        'BUCKET_UPLIFT': 'sum', 
        'STOP_UPLIFT': 'sum',
        'PLAYER_ID': 'count'
    }
).reset_index().rename(columns = {'BUCKET_UPLIFT': 'TOT_BUCKET_UPLIFT', 'STOP_UPLIFT': 'TOT_STOP_UPLIFT', 'PLAYER_ID': 'TOT_PLAYERS'})
cavs_team_game_averages_2

Unnamed: 0,TEAM_ID,GAME_ID,TOT_BUCKET_UPLIFT,TOT_STOP_UPLIFT,TOT_PLAYERS
0,1610612739,22200008,-1.502045,-1.556775,10
1,1610612739,22200032,4.049199,4.090153,12
2,1610612739,22200039,1.323572,0.970207,10
3,1610612739,22200056,1.530427,-1.667777,9
4,1610612739,22200072,-0.042161,-2.335365,9
5,1610612739,22200090,0.199947,-3.449789,12
6,1610612739,22200107,-2.362845,0.09809,10
7,1610612739,22200120,4.652612,6.329481,11
8,1610612739,22200140,-1.033769,-1.199104,10
9,1610612739,22200158,-0.154399,-1.471418,9


In [49]:
# cavs_team_game_averages = cavs_merged_df_2[['TEAM_ID', 'GAME_ID', 'BUCKET_UPLIFT', 'STOP_UPLIFT']]
# cavs_team_game_averages = cavs_team_game_averages.groupby(['TEAM_ID', 'GAME_ID']).mean().reset_index()
# cavs_team_game_averages.rename(columns = {'BUCKET_UPLIFT': 'AVG_BUCKET_UPLIFT', 'STOP_UPLIFT': 'AVG_STOP_UPLIFT'}, inplace = True)
# cavs_team_game_averages

In [50]:
cavs_merged_df_3 = pd.merge(
    cavs_merged_df_2,
    cavs_team_game_averages_2,
    how = 'inner',
    on = ['TEAM_ID', 'GAME_ID']
)
cavs_merged_df_3['AVG_TMT_BUCKET_UPLIFT'] = (cavs_merged_df_3['TOT_BUCKET_UPLIFT'] - cavs_merged_df_3['BUCKET_UPLIFT']) / (cavs_merged_df_3['TOT_PLAYERS'] - 1)
cavs_merged_df_3['TMT_BUCKET_UPLIFT_CONT_RATE'] = cavs_merged_df_3['MIN_PERCENTAGE'] * cavs_merged_df_3['AVG_TMT_BUCKET_UPLIFT'] * 100
cavs_merged_df_3['AVG_TMT_STOP_UPLIFT'] = (cavs_merged_df_3['TOT_STOP_UPLIFT'] - cavs_merged_df_3['STOP_UPLIFT']) / (cavs_merged_df_3['TOT_PLAYERS'] - 1)
cavs_merged_df_3['TMT_STOP_UPLIFT_CONT_RATE'] = cavs_merged_df_3['MIN_PERCENTAGE'] * cavs_merged_df_3['AVG_TMT_STOP_UPLIFT'] * 100
cavs_merged_df_3

Unnamed: 0,PLAYER_ID,PLAYER_NAME,TEAM_ID,GAME_ID,PLUS_MINUS,PLAYER_MIN,PLAYER_BUCKETS,PLAYER_STOPS,SEASON_YEAR,TEAM_ABBREVIATION,...,AVERAGE_STOPS,BUCKET_UPLIFT,STOP_UPLIFT,TOT_BUCKET_UPLIFT,TOT_STOP_UPLIFT,TOT_PLAYERS,AVG_TMT_BUCKET_UPLIFT,TMT_BUCKET_UPLIFT_CONT_RATE,AVG_TMT_STOP_UPLIFT,TMT_STOP_UPLIFT_CONT_RATE
0,1627747,Caris LeVert,1610612739,0022200540,1,37.266667,14,6,2022-23,CLE,...,4.4,0.400000,0.363636,2.073870,3.830445,10,0.185986,14.439713,0.385201,29.906578
1,1628378,Donovan Mitchell,1610612739,0022200540,-9,36.826667,16,6,2022-23,CLE,...,4.7,-0.166667,0.276596,2.073870,3.830445,10,0.248949,19.099885,0.394872,30.295472
2,1628386,Jarrett Allen,1610612739,0022200540,-3,34.870000,5,10,2022-23,CLE,...,9.2,-0.489796,0.086957,2.073870,3.830445,10,0.284852,20.693295,0.415943,30.216541
3,1630171,Isaac Okoro,1610612739,0022200540,2,22.940000,8,6,2022-23,CLE,...,2.7,1.222222,1.222222,2.073870,3.830445,10,0.094628,4.522408,0.289803,13.850148
4,1626224,Cedi Osman,1610612739,0022200540,9,24.530000,7,3,2022-23,CLE,...,2.9,0.111111,0.034483,2.073870,3.830445,10,0.218084,11.145018,0.421774,21.554390
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
362,201577,Robin Lopez,1610612739,0022200248,5,10.250000,4,2,2022-23,CLE,...,0.9,0.818182,1.222222,0.529468,0.684732,9,-0.036089,-0.770655,-0.067186,-1.434706
363,1630205,Lamar Stevens,1610612739,0022200248,1,28.528333,4,3,2022-23,CLE,...,4.1,-0.024390,-0.268293,0.529468,0.684732,9,0.069232,4.114754,0.119128,7.080264
364,201567,Kevin Love,1610612739,0022200248,-1,11.716667,1,4,2022-23,CLE,...,6.5,-0.846154,-0.384615,0.529468,0.684732,9,0.171953,4.197319,0.133668,3.262810
365,1629636,Darius Garland,1610612739,0022200248,8,38.100000,24,1,2022-23,CLE,...,3.7,0.237113,-0.729730,0.529468,0.684732,9,0.036544,2.900707,0.176808,14.034115


In [51]:
cavs_merged_df_3.columns

Index(['PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'GAME_ID', 'PLUS_MINUS',
       'PLAYER_MIN', 'PLAYER_BUCKETS', 'PLAYER_STOPS', 'SEASON_YEAR',
       'TEAM_ABBREVIATION', 'TEAM_NAME', 'WL', 'GAME_MIN', 'GAME_BUCKETS',
       'GAME_STOPS', 'BUCKET_CONTRIBUTION', 'STOP_CONTRIBUTION',
       'BUCKET_CONTRIBUTION_RATE', 'STOP_CONTRIBUTION_RATE', 'MIN_PERCENTAGE',
       'AVERAGE_MIN', 'AVERAGE_BUCKETS', 'AVERAGE_STOPS', 'BUCKET_UPLIFT',
       'STOP_UPLIFT', 'TOT_BUCKET_UPLIFT', 'TOT_STOP_UPLIFT', 'TOT_PLAYERS',
       'AVG_TMT_BUCKET_UPLIFT', 'TMT_BUCKET_UPLIFT_CONT_RATE',
       'AVG_TMT_STOP_UPLIFT', 'TMT_STOP_UPLIFT_CONT_RATE'],
      dtype='object')

In [53]:
cavs_agg_df = cavs_merged_df_3[[
    'SEASON_YEAR', 
    'PLAYER_ID', 
    'PLAYER_NAME', 
    'TEAM_ID', 
    'TEAM_ABBREVIATION', 
    'TEAM_NAME',
    'AVERAGE_MIN',
    'PLUS_MINUS',
    'BUCKET_CONTRIBUTION_RATE',
    'STOP_CONTRIBUTION_RATE',
    'TMT_BUCKET_UPLIFT_CONT_RATE',
    'TMT_STOP_UPLIFT_CONT_RATE'
]]
cavs_agg_df = cavs_agg_df\
    .groupby(['SEASON_YEAR', 'PLAYER_ID', 'PLAYER_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION', 'TEAM_NAME'])\
    .agg({
        'AVERAGE_MIN': 'mean',
        'PLUS_MINUS': 'sum',
        'BUCKET_CONTRIBUTION_RATE': 'mean', 
        'STOP_CONTRIBUTION_RATE': 'mean',
        'TMT_BUCKET_UPLIFT_CONT_RATE': 'mean',
        'TMT_STOP_UPLIFT_CONT_RATE': 'mean'
    }).reset_index()

In [55]:
cavs_agg_df['MIN_30_PLUS'] = cavs_agg_df['AVERAGE_MIN'].apply(lambda x: 1 if x > 30 else 0)

In [56]:
cavs_agg_df

Unnamed: 0,SEASON_YEAR,PLAYER_ID,PLAYER_NAME,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,AVERAGE_MIN,PLUS_MINUS,BUCKET_CONTRIBUTION_RATE,STOP_CONTRIBUTION_RATE,TMT_BUCKET_UPLIFT_CONT_RATE,TMT_STOP_UPLIFT_CONT_RATE,MIN_30_PLUS
0,2022-23,201567,Kevin Love,1610612739,CLE,Cleveland Cavaliers,20.3,114,7.800515,14.762016,-1.138555,-1.086489,0
1,2022-23,201577,Robin Lopez,1610612739,CLE,Cleveland Cavaliers,9.0,-7,2.802799,2.149564,1.715116,0.943715,0
2,2022-23,203526,Raul Neto,1610612739,CLE,Cleveland Cavaliers,9.2,21,3.254141,2.252939,2.016879,2.374224,0
3,2022-23,1626224,Cedi Osman,1610612739,CLE,Cleveland Cavaliers,22.9,182,7.593391,6.947413,0.654027,0.083446,0
4,2022-23,1627747,Caris LeVert,1610612739,CLE,Cleveland Cavaliers,30.7,109,12.06739,9.975993,-0.162176,1.158177,1
5,2022-23,1628378,Donovan Mitchell,1610612739,CLE,Cleveland Cavaliers,36.3,104,23.0597,10.776143,-3.807229,-3.615837,1
6,2022-23,1628386,Jarrett Allen,1610612739,CLE,Cleveland Cavaliers,33.5,139,11.454286,20.721244,-1.382691,-0.986432,1
7,2022-23,1629603,Mamadi Diakite,1610612739,CLE,Cleveland Cavaliers,7.1,4,1.301895,2.765173,-0.846867,0.790586,0
8,2022-23,1629636,Darius Garland,1610612739,CLE,Cleveland Cavaliers,35.9,107,23.597243,8.328161,-4.417274,-2.055365,1
9,2022-23,1629731,Dean Wade,1610612739,CLE,Cleveland Cavaliers,24.1,83,4.585264,10.393653,2.874682,-2.457558,0


In [57]:
from sklearn.cluster import KMeans



In [64]:
km = KMeans(
    n_clusters=3, init='random',
    n_init=10, max_iter=300, 
    tol=1e-04, random_state=0
)
fit_arr = cavs_agg_df[[
    'PLUS_MINUS',
    'BUCKET_CONTRIBUTION_RATE',
    'STOP_CONTRIBUTION_RATE',
    'TMT_BUCKET_UPLIFT_CONT_RATE',
    'TMT_STOP_UPLIFT_CONT_RATE',
    'MIN_30_PLUS'
]].to_numpy()
fit_arr
#y_km = km.fit_predict(fit_arr)

array([[ 1.14000000e+02,  7.80051544e+00,  1.47620160e+01,
        -1.13855456e+00, -1.08648915e+00,  0.00000000e+00],
       [-7.00000000e+00,  2.80279926e+00,  2.14956431e+00,
         1.71511616e+00,  9.43715075e-01,  0.00000000e+00],
       [ 2.10000000e+01,  3.25414104e+00,  2.25293915e+00,
         2.01687932e+00,  2.37422422e+00,  0.00000000e+00],
       [ 1.82000000e+02,  7.59339071e+00,  6.94741275e+00,
         6.54027150e-01,  8.34462547e-02,  0.00000000e+00],
       [ 1.09000000e+02,  1.20673898e+01,  9.97599323e+00,
        -1.62175967e-01,  1.15817699e+00,  1.00000000e+00],
       [ 1.04000000e+02,  2.30597000e+01,  1.07761434e+01,
        -3.80722862e+00, -3.61583679e+00,  1.00000000e+00],
       [ 1.39000000e+02,  1.14542863e+01,  2.07212437e+01,
        -1.38269108e+00, -9.86432058e-01,  1.00000000e+00],
       [ 4.00000000e+00,  1.30189526e+00,  2.76517301e+00,
        -8.46867317e-01,  7.90586007e-01,  0.00000000e+00],
       [ 1.07000000e+02,  2.35972426e+01,  8.328