In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import os
import json
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.static import teams
from nba_api.stats.static import players

In [48]:
def check_data_source(dir_name: str="data_source"):
    
    if os.path.exists(dir_name):
        print("Available data name under data_source:\n")
        print(os.listdir(dir_name))
        return os.listdir(dir_name)
    else:
        dir_name = "../" + dir_name
        print(f"Attempting to search dir: '{dir_name}'\n")
        assert os.path.exists(dir_name), f"Still can't find the dir from: {dir_name}"
        print("Available data name under data_source:")
        print(os.listdir(dir_name))
        return os.listdir(dir_name)
        
        

dir_list = check_data_source()

Attempting to search dir: '../data_source'

Available data name under data_source:
['lucas_dict.json', 'test.json', 'test222.py']


In [52]:
suffix_list = [".json", ".py"]

str_length = len(suffix_list[0])

[string for string in dir_list if string[-str_length:] == suffix_list[0]]

['lucas_dict.json', 'test.json']

In [3]:
from nba_api.stats.endpoints import playercareerstats
# Anthony Davis
career = playercareerstats.PlayerCareerStats(player_id='203076')
career.get_data_frames()[0]

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,203076,2012-13,0,1610612740,NOH,20.0,64,60,1846.0,349,...,0.751,165,357,522,63,75,112,89,158,867
1,203076,2013-14,0,1610612740,NOP,21.0,67,66,2358.0,522,...,0.791,207,466,673,105,89,189,109,200,1394
2,203076,2014-15,0,1610612740,NOP,22.0,68,68,2455.0,642,...,0.805,173,523,696,149,100,200,95,141,1656
3,203076,2015-16,0,1610612740,NOP,23.0,61,61,2164.0,560,...,0.758,130,497,627,116,78,125,121,148,1481
4,203076,2016-17,0,1610612740,NOP,24.0,75,75,2708.0,770,...,0.802,172,712,884,157,94,167,181,168,2099
5,203076,2017-18,0,1610612740,NOP,25.0,75,75,2727.0,780,...,0.828,187,644,831,174,115,193,162,159,2110
6,203076,2018-19,0,1610612740,NOP,26.0,56,56,1850.0,530,...,0.794,174,498,672,218,88,135,112,132,1452
7,203076,2019-20,0,1610612747,LAL,27.0,62,62,2131.0,551,...,0.846,142,435,577,200,91,143,154,156,1618
8,203076,2020-21,0,1610612747,LAL,28.0,36,36,1162.0,301,...,0.738,62,224,286,110,45,59,74,60,786
9,203076,2021-22,0,1610612747,LAL,29.0,40,40,1404.0,370,...,0.713,106,288,394,122,49,90,82,97,927


In [4]:
from nba_api.stats.static import teams
# get_teams returns a list of 30 dictionaries, each an NBA team.
nba_teams = teams.get_teams()
print('Number of teams fetched: {}'.format(len(nba_teams)))
nba_teams[:3]

Number of teams fetched: 30


[{'id': 1610612737,
  'full_name': 'Atlanta Hawks',
  'abbreviation': 'ATL',
  'nickname': 'Hawks',
  'city': 'Atlanta',
  'state': 'Atlanta',
  'year_founded': 1949},
 {'id': 1610612738,
  'full_name': 'Boston Celtics',
  'abbreviation': 'BOS',
  'nickname': 'Celtics',
  'city': 'Boston',
  'state': 'Massachusetts',
  'year_founded': 1946},
 {'id': 1610612739,
  'full_name': 'Cleveland Cavaliers',
  'abbreviation': 'CLE',
  'nickname': 'Cavaliers',
  'city': 'Cleveland',
  'state': 'Ohio',
  'year_founded': 1970}]

In [10]:
# Challenge: find gsw
result = [i for i in nba_teams if i.get("full_name") == "Golden State Warriors"][0]
result

{'id': 1610612744,
 'full_name': 'Golden State Warriors',
 'abbreviation': 'GSW',
 'nickname': 'Warriors',
 'city': 'Golden State',
 'state': 'California',
 'year_founded': 1946}

In [12]:
# Challenge: find Curry id
from nba_api.stats.static import players
# get_players returns a list of dictionaries, each representing a player.
nba_players = players.get_players()
print('Number of players fetched: {}'.format(len(nba_players)))
nba_players[:5]


Number of players fetched: 4831


[{'id': 76001,
  'full_name': 'Alaa Abdelnaby',
  'first_name': 'Alaa',
  'last_name': 'Abdelnaby',
  'is_active': False},
 {'id': 76002,
  'full_name': 'Zaid Abdul-Aziz',
  'first_name': 'Zaid',
  'last_name': 'Abdul-Aziz',
  'is_active': False},
 {'id': 76003,
  'full_name': 'Kareem Abdul-Jabbar',
  'first_name': 'Kareem',
  'last_name': 'Abdul-Jabbar',
  'is_active': False},
 {'id': 51,
  'full_name': 'Mahmoud Abdul-Rauf',
  'first_name': 'Mahmoud',
  'last_name': 'Abdul-Rauf',
  'is_active': False},
 {'id': 1505,
  'full_name': 'Tariq Abdul-Wahad',
  'first_name': 'Tariq',
  'last_name': 'Abdul-Wahad',
  'is_active': False}]

In [19]:
nba_players_active = [player for player in nba_players if player.get("is_active") == True]

In [28]:
curry_info = [i for i in nba_players_active if i.get("full_name") == "Stephen Curry"]
curry_id = curry_info[0].get("id")
print(curry_id)
print(type(curry_id))

201939
<class 'int'>


In [38]:
# Curry career data
career = playercareerstats.PlayerCareerStats(player_id=curry_id)
df_raw = career.get_data_frames()[0]
print(df_raw.columns)
df_raw.head()

Index(['PLAYER_ID', 'SEASON_ID', 'LEAGUE_ID', 'TEAM_ID', 'TEAM_ABBREVIATION',
       'PLAYER_AGE', 'GP', 'GS', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A',
       'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL',
       'BLK', 'TOV', 'PF', 'PTS'],
      dtype='object')


Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,201939,2009-10,0,1610612744,GSW,22.0,80,77,2896.0,528,...,0.885,48,308,356,472,152,19,243,252,1399
1,201939,2010-11,0,1610612744,GSW,23.0,74,74,2489.0,505,...,0.934,52,234,286,432,109,20,226,233,1373
2,201939,2011-12,0,1610612744,GSW,24.0,26,23,732.0,145,...,0.809,15,73,88,138,39,8,65,62,383
3,201939,2012-13,0,1610612744,GSW,25.0,78,78,2983.0,626,...,0.9,59,255,314,539,126,12,240,198,1786
4,201939,2013-14,0,1610612744,GSW,26.0,78,78,2846.0,652,...,0.885,46,288,334,666,128,14,294,194,1873


In [41]:
df_clean = df_raw[['SEASON_ID', 'PLAYER_AGE', 'GP', 'GS', 'MIN', 'FGM', 'FGA', 'FG3M', 'FG3A',
                   'FTM', 'FTA', 'OREB', 'DREB', 'AST', 'STL',
                   'BLK', 'TOV', 'PTS']]
display(df_clean)


Unnamed: 0,SEASON_ID,PLAYER_AGE,GP,GS,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,AST,STL,BLK,TOV,PTS
0,2009-10,22.0,80,77,2896.0,528,1143,166,380,177,200,48,308,472,152,19,243,1399
1,2010-11,23.0,74,74,2489.0,505,1053,151,342,212,227,52,234,432,109,20,226,1373
2,2011-12,24.0,26,23,732.0,145,296,55,121,38,47,15,73,138,39,8,65,383
3,2012-13,25.0,78,78,2983.0,626,1388,272,600,262,291,59,255,539,126,12,240,1786
4,2013-14,26.0,78,78,2846.0,652,1383,261,615,308,348,46,288,666,128,14,294,1873
5,2014-15,27.0,80,80,2613.0,653,1341,286,646,308,337,56,285,619,163,16,249,1900
6,2015-16,28.0,79,79,2700.0,805,1598,402,886,363,400,68,362,527,169,15,262,2375
7,2016-17,29.0,79,79,2639.0,675,1443,324,789,325,362,61,292,524,142,17,239,1999
8,2017-18,30.0,51,51,1631.0,428,864,212,501,278,302,36,225,310,80,8,153,1346
9,2018-19,31.0,69,69,2331.0,632,1340,354,810,263,287,45,324,361,92,25,192,1881


In [44]:
df_clean["fg3_pct"] = df_clean.FG3M / df_clean.FG3A

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [45]:
df_clean

Unnamed: 0,SEASON_ID,PLAYER_AGE,GP,GS,MIN,FGM,FGA,FG3M,FG3A,FTM,FTA,OREB,DREB,AST,STL,BLK,TOV,PTS,fg3_pct
0,2009-10,22.0,80,77,2896.0,528,1143,166,380,177,200,48,308,472,152,19,243,1399,0.436842
1,2010-11,23.0,74,74,2489.0,505,1053,151,342,212,227,52,234,432,109,20,226,1373,0.44152
2,2011-12,24.0,26,23,732.0,145,296,55,121,38,47,15,73,138,39,8,65,383,0.454545
3,2012-13,25.0,78,78,2983.0,626,1388,272,600,262,291,59,255,539,126,12,240,1786,0.453333
4,2013-14,26.0,78,78,2846.0,652,1383,261,615,308,348,46,288,666,128,14,294,1873,0.42439
5,2014-15,27.0,80,80,2613.0,653,1341,286,646,308,337,56,285,619,163,16,249,1900,0.442724
6,2015-16,28.0,79,79,2700.0,805,1598,402,886,363,400,68,362,527,169,15,262,2375,0.453725
7,2016-17,29.0,79,79,2639.0,675,1443,324,789,325,362,61,292,524,142,17,239,1999,0.410646
8,2017-18,30.0,51,51,1631.0,428,864,212,501,278,302,36,225,310,80,8,153,1346,0.423154
9,2018-19,31.0,69,69,2331.0,632,1340,354,810,263,287,45,324,361,92,25,192,1881,0.437037


In [47]:
avg_fg3m = df_clean.FG3M / df_clean.GP
avg_fg3m

0     2.075000
1     2.040541
2     2.115385
3     3.487179
4     3.346154
5     3.575000
6     5.088608
7     4.101266
8     4.156863
9     5.130435
10    2.400000
11    5.349206
12    4.453125
13    5.038462
dtype: float64

5.038461538461538