In [1]:
import pandas as pd
pd.set_option("display.max_columns",97)
pd.set_option("display.max_rows",74)
from time import sleep
from datetime import date

from nba_api.stats.static import players
from nba_api.stats.endpoints import playerawards, playercareerstats
from selenium import webdriver
from selenium.webdriver.common.by import By
from io import StringIO

In [3]:
#get list of all inactive players; to be used for training/testing
inactives=pd.DataFrame(players.get_inactive_players())
inactive_ineligibles=[]
inactives


Unnamed: 0,id,full_name,first_name,last_name,is_active
0,76001,Alaa Abdelnaby,Alaa,Abdelnaby,False
1,76002,Zaid Abdul-Aziz,Zaid,Abdul-Aziz,False
2,76003,Kareem Abdul-Jabbar,Kareem,Abdul-Jabbar,False
3,51,Mahmoud Abdul-Rauf,Mahmoud,Abdul-Rauf,False
4,1505,Tariq Abdul-Wahad,Tariq,Abdul-Wahad,False
...,...,...,...,...,...
4568,1627790,Ante Zizic,Ante,Zizic,False
4569,78647,Jim Zoet,Jim,Zoet,False
4570,78648,Bill Zopf,Bill,Zopf,False
4571,78650,Matt Zunic,Matt,Zunic,False


In [3]:
#determine current season for eligibility purposes
season=date.today().year
if date.today().month>6:
    season+=1
season

2026

In [13]:
test_df=inactives.iloc[:5].copy()
test_df

Unnamed: 0,id,full_name,first_name,last_name,is_active
0,76001,Alaa Abdelnaby,Alaa,Abdelnaby,False
1,76002,Zaid Abdul-Aziz,Zaid,Abdul-Aziz,False
2,76003,Kareem Abdul-Jabbar,Kareem,Abdul-Jabbar,False
3,51,Mahmoud Abdul-Rauf,Mahmoud,Abdul-Rauf,False
4,1505,Tariq Abdul-Wahad,Tariq,Abdul-Wahad,False


In [None]:
# This function creates the label variable - whether or not a player is in the HOF
# Moved into get_awards function
def get_hof(row):
    # sleeping to respect NBA's rate limiting
    sleep(0.5)
    awards = playerawards.PlayerAwards(row["id"]).get_data_frames()[0]
    return "Hall of Fame Inductee" in awards["DESCRIPTION"].values
test_df["HOF"] = test_df.apply(get_hof, axis=1)
test_df

Unnamed: 0,id,full_name,first_name,last_name,is_active,HOF
0,76001,Alaa Abdelnaby,Alaa,Abdelnaby,False,False
1,76002,Zaid Abdul-Aziz,Zaid,Abdul-Aziz,False,False
2,76003,Kareem Abdul-Jabbar,Kareem,Abdul-Jabbar,False,True
3,51,Mahmoud Abdul-Rauf,Mahmoud,Abdul-Rauf,False,False
4,1505,Tariq Abdul-Wahad,Tariq,Abdul-Wahad,False,False


In [3]:
#function that will be passed to pd rename function to rename average columns
def rename_avgs(col):
    if col=="PF":
        rename="F"
    elif col=="OREB" or col =="DREB":
        rename=col[:2]
    elif col[0]=="F":
        rename=col
    else:
        rename=col[0]
    return rename+"PG"
def clean_avgs(df):
    df.rename(columns=rename_avgs,inplace=True)
    return pd.concat([df.iloc[0,5:8],df.iloc[0,9:11],df.iloc[0,12:14],df.iloc[0,15:]])
def test_insert_missing(stats:pd.Series)->pd.Series:
    if "FG%" not in stats:
        stats=pd.concat([stats[:10],pd.Series({"FG%":0.0}),stats[10:12],pd.Series({"3P%":0.0}),stats[12:14],pd.Series({"2P%":0.0,"eFG%":0.0}),stats[14:]])
    elif "3P%" not in stats:
        stats=pd.concat([stats[:13],pd.Series({"3P%":0.0}),stats[13:]])
    elif "2P%" not in stats:
        stats=pd.concat([stats[:16],pd.Series({"2P%":0.0}),stats[16:]])
    if "FT%" not in stats:
        stats=pd.concat([stats[:20],pd.Series({"FT%":0.0}),stats[20:]])
    return stats

In [24]:
def get_career_stats(row):
    # sleep to respect rate limiting
    sleep(0.5)
    #holds the various dfs-season, career, reg season, playoffs
    try:
        totals=playercareerstats.PlayerCareerStats(row["id"]).get_data_frames()
    except KeyError:
        driver=webdriver.Firefox()
        driver.install_addon("ublock_origin-1.68.0.xpi")
        driver.get(f"https://www.basketball-reference.com/players/{row['last_name'].lower()[0]}/")
        player_link = driver.find_element(By.LINK_TEXT, row["full_name"]).get_attribute(
            "href"
        )
        driver.get(player_link)  # type: ignore
        avgs=driver.find_element(By.ID,"per_game_stats")
        totals=driver.find_element(By.ID,"totals_stats")
        totals_df=pd.read_html(StringIO(totals.get_attribute("outerHTML")))[0]
        totals_df=totals_df[totals_df.fillna("")["Season"].str.contains(r"^\d Yrs?$")].iloc[0]
        avgs_df=pd.read_html(StringIO(avgs.get_attribute("outerHTML")))[0]
        avgs_df=avgs_df[avgs_df.fillna("")["Season"].str.contains(r"^\d Yrs?$")].iloc[0]
        pf_totals_table=driver.find_elements(By.ID,"totals_stats_post")
        has_pf=False
        if len(pf_totals_table)==1:
            pf_totals=pd.read_html(StringIO(pf_totals_table[0].get_attribute("outerHTML")))[0]
            pf_totals=pf_totals[pf_totals.fillna("")["Season"].str.contains(r"^\d Yrs?$")].iloc[0]
            pf_avgs=pd.read_html(StringIO(driver.find_element(By.ID,"per_game_stats_post").get_attribute("outerHTML")))[0]
            pf_avgs=pf_avgs[pf_avgs.fillna("")["Season"].str.contains(r"^\d Yrs?$")].iloc[0]
            has_pf=True
        driver.quit()
        br_rename={"G":"GP","MP":"MIN","FG":"FGM","FG%":"FG_PCT","3P":"FG3M","3P%":"FG3_PCT","3PA":"FG3A","FT":"FTM","FT%":"FT_PCT","ORB":"OREB","DRB":"DREB","TRB":"REB"}
        totals_df=test_insert_missing(totals_df).rename(br_rename)
        avgs_df=test_insert_missing(avgs_df).rename(br_rename).rename(rename_avgs)
        if has_pf:
            pf_totals=test_insert_missing(pf_totals).rename(br_rename).add_prefix("PF_") #type: ignore
            pf_avgs=test_insert_missing(pf_avgs).rename(br_rename).rename(rename_avgs).add_prefix("PF_") #type: ignore
            return pd.concat([totals_df[5:14],totals_df[18:-2],avgs_df[7:10],avgs_df[11:13],avgs_df[18:20],avgs_df[21:-1],pf_totals[5:14],pf_totals[18:-2],pf_avgs[7:10],pf_avgs[11:13],pf_avgs[18:20],pf_avgs[21:-1]])
        return pd.concat([totals_df[5:14],totals_df[18:-2],avgs_df[7:10],avgs_df[11:13],avgs_df[18:20],avgs_df[21:-1]])
    if season-(int(totals[0].iloc[-1]["SEASON_ID"][:4])+1)<=4:
        inactive_ineligibles.append(row["full_name"])
    sleep(0.5)
    avgs=playercareerstats.PlayerCareerStats(row["id"],per_mode36="PerGame").get_data_frames()
    #check if player hasn't played in the playoffs; if so, only return regular season
    if len(totals[3])==0:
        return pd.concat([totals[1].iloc[0,3:],clean_avgs(avgs[1])])
    return pd.concat([totals[1].iloc[0,3:],clean_avgs(avgs[1]),totals[3].iloc[0,3:].add_prefix("PF_"),clean_avgs(avgs[3]).add_prefix("PF_")])
#playercareerstats.PlayerCareerStats(76003).get_data_frames()[0].columns

In [5]:
ai=inactives[inactives["id"]==947]
ai

Unnamed: 0,id,full_name,first_name,last_name,is_active
1928,947,Allen Iverson,Allen,Iverson,False


In [9]:
get_career_stats(ai)

GP          914
GS          901
MIN       37577
FGM        8467
FGA       19906
          ...  
PF_SPG      2.1
PF_BPG      0.2
PF_TPG      3.1
PF_FPG      2.2
PF_PPG     29.7
Name: 0, Length: 74, dtype: object

In [12]:
pd.concat([test_df,test_df.apply(get_career_stats,axis=1)],axis=1)

Unnamed: 0,id,full_name,first_name,last_name,is_active,HOF,GP,GS,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,MPG,FGMPG,FGAPG,FG3MPG,FG3APG,FTMPG,FTAPG,ORPG,DRPG,RPG,APG,SPG,BPG,TPG,FPG,PPG,PF_GP,PF_GS,PF_MIN,PF_FGM,PF_FGA,PF_FG_PCT,PF_FG3M,PF_FG3A,PF_FG3_PCT,PF_FTM,PF_FTA,PF_FT_PCT,PF_OREB,PF_DREB,PF_REB,PF_AST,PF_STL,PF_BLK,PF_TOV,PF_PF,PF_PTS,PF_MPG,PF_FGMPG,PF_FGAPG,PF_FG3MPG,PF_FG3APG,PF_FTMPG,PF_FTAPG,PF_ORPG,PF_DRPG,PF_RPG,PF_APG,PF_SPG,PF_BPG,PF_TPG,PF_FPG,PF_PPG
0,76001,Alaa Abdelnaby,Alaa,Abdelnaby,False,False,256,53.0,3200,620,1236,0.501618,0.0,3.0,0.0,225,321,0.700935,283,563,846,85,71,69,247,484,1465,12.5,2.4,4.8,0.0,0.0,0.9,1.3,1.1,2.2,3.3,0.3,0.3,0.3,1.0,1.9,5.7,17,4.0,106,18,40,0.45,0.0,0.0,0.0,2,4,0.5,3,17,20,3,0,1,11.0,11,38,6.2,1.1,2.4,0.0,0.0,0.1,0.2,0.2,1.0,1.2,0.2,0.0,0.1,0.6,0.6,2.2
1,76002,Zaid Abdul-Aziz,Zaid,Abdul-Aziz,False,False,505,,11023,1769,4138,0.427501,,,,1019,1400,0.727857,503,1124,4065,601,131,205,11,1120,4557,21.8,3.5,8.2,,,2.0,2.8,2.4,5.4,8.0,1.2,0.6,1.0,0.7,2.2,9.0,18,,210,37,70,0.528571,,,,18,26,0.692308,11,27,64,9,1,8,,12,92,11.7,2.1,3.9,,,1.0,1.4,1.0,2.5,3.6,0.5,0.1,0.7,,0.7,5.1
2,76003,Kareem Abdul-Jabbar,Kareem,Abdul-Jabbar,False,True,1560,1476.0,57446,15837,28307,0.559473,1.0,18.0,,6712,9304,0.72141,2975,9394,17440,5660,1160,3189,2527,4657,38387,36.8,10.2,18.1,0.0,,4.3,6.0,2.4,7.6,11.2,3.6,0.9,2.6,2.7,3.0,24.6,237,140.0,8612,2356,4422,0.532791,0.0,4.0,,1050,1419,0.739958,505,1273,2481,767,189,476,447.0,797,5762,36.3,9.9,18.7,0.0,,4.4,6.0,2.6,6.5,10.5,3.2,1.0,2.4,2.6,3.4,24.3
3,51,Mahmoud Abdul-Rauf,Mahmoud,Abdul-Rauf,False,False,586,336.0,15627,3514,7943,0.442402,474.0,1339.0,0.353996,1051,1161,0.905254,219,868,1087,2079,487,46,963,1106,8553,26.7,6.0,13.6,0.8,2.3,1.8,2.0,0.4,1.5,1.9,3.5,0.8,0.1,1.6,1.9,14.6,15,14.0,415,69,187,0.368984,14.0,49.0,0.285714,43,45,0.955556,5,18,23,35,7,1,22.0,37,195,27.7,4.6,12.5,0.9,3.3,2.9,3.0,0.3,1.2,1.5,2.3,0.5,0.1,1.5,2.5,13.0
4,1505,Tariq Abdul-Wahad,Tariq,Abdul-Wahad,False,False,236,145.0,4808,720,1726,0.417149,18.0,76.0,0.236842,372,529,0.703214,286,490,776,266,184,83,309,485,1830,20.4,3.1,7.3,0.1,0.3,1.6,2.2,1.2,2.1,3.3,1.1,0.8,0.4,1.3,2.1,7.8,13,5.0,178,24,63,0.380952,0.0,3.0,0.0,20,24,0.833333,17,24,41,11,4,4,4.0,14,68,13.7,1.8,4.8,0.0,0.2,1.5,1.8,1.3,1.8,3.2,0.8,0.3,0.3,0.3,1.1,5.2


In [5]:
#will eventually be combined with get_hof function as both make same api call
#for now, though, need to isolate the logic to get awards
def get_awards(row):
    #get list of all player's awards
    awards=playerawards.PlayerAwards(row["id"]).get_data_frames()[0]
    #used to distinguish 1st, 2nd, 3rd team All-NBA, etc
    team_nums={"1":"1st","2":"2nd","3":"3rd"}
    #get the subset of all the awards with a numbered team and prepend the description with the correct prefix from the dict
    awards.loc[awards["ALL_NBA_TEAM_NUMBER"].fillna("").str.isnumeric(),"DESCRIPTION"]=awards["ALL_NBA_TEAM_NUMBER"].map(team_nums)+" Team "+awards["DESCRIPTION"]
    #count number of times player has won each award
    award_counts=awards.groupby("DESCRIPTION").size()
    #future label column, HOF, determination is here now instead of its own function
    award_counts["HOF"]="Hall of Fame Inductee" in awards["DESCRIPTION"].values
    return award_counts

In [6]:
mj=inactives[inactives["full_name"]=="Michael Jordan"]
mj

Unnamed: 0,id,full_name,first_name,last_name,is_active
2139,893,Michael Jordan,Michael,Jordan,False


In [16]:
get_awards(mj)

DESCRIPTION
1st Team All-Defensive Team                              9
1st Team All-NBA                                        10
1st Team All-Rookie Team                                 1
2nd Team All-NBA                                         1
Hall of Fame Inductee                                    1
IBM Award                                                2
NBA All-Star                                            14
NBA All-Star Most Valuable Player                        3
NBA Champion                                             6
NBA Defensive Player of the Year                         1
NBA Finals Most Valuable Player                          6
NBA Most Valuable Player                                 5
NBA Player of the Month                                 16
NBA Player of the Week                                  25
NBA Rookie of the Month                                  3
NBA Rookie of the Year                                   1
NBA Sporting News Most Valuable Player of th

In [17]:
pd.concat([test_df,test_df.apply(get_awards,axis=1)],axis=1)

Unnamed: 0,id,full_name,first_name,last_name,is_active,1st Team All-Defensive Team,1st Team All-NBA,1st Team All-Rookie Team,2nd Team All-Defensive Team,2nd Team All-NBA,2nd Team All-Rookie Team,HOF,Hall of Fame Inductee,NBA All-Star,NBA Champion,NBA Finals Most Valuable Player,NBA Most Improved Player,NBA Most Valuable Player,NBA Player of the Month,NBA Player of the Week,NBA Rookie of the Year,NBA Sporting News Most Valuable Player of the Year,NBA Sporting News Rookie of the Year
0,76001,Alaa Abdelnaby,Alaa,Abdelnaby,False,,,,,,,False,,,,,,,,,,,
1,76002,Zaid Abdul-Aziz,Zaid,Abdul-Aziz,False,,,,,,,False,,,,,,,,,,,
2,76003,Kareem Abdul-Jabbar,Kareem,Abdul-Jabbar,False,5.0,10.0,1.0,6.0,5.0,,True,1.0,19.0,6.0,2.0,,6.0,2.0,7.0,1.0,6.0,1.0
3,51,Mahmoud Abdul-Rauf,Mahmoud,Abdul-Rauf,False,,,,,,1.0,False,,,,,1.0,,,,,,
4,1505,Tariq Abdul-Wahad,Tariq,Abdul-Wahad,False,,,,,,,False,,,,,,,,,,,


In [20]:
#final check I think for verifying the scraping for inactive players
test_df=pd.concat([test_df,test_df.apply(get_career_stats,axis=1),test_df.apply(get_awards,axis=1)],axis=1)

In [67]:
ii_tests=["Carmelo Anthony","Blake Griffin","Dwight Howard","John Wall","Andre Iguodala","Vince Carter","Chauncey Billups"]
ii_df=inactives[inactives["full_name"].isin(ii_tests)]
ii_df=pd.concat([ii_df,inactives[inactives["full_name"]=="Kostas Antetokounmpo"]])
ii_df

Unnamed: 0,id,full_name,first_name,last_name,is_active
103,2546,Carmelo Anthony,Carmelo,Anthony,False
315,1497,Chauncey Billups,Chauncey,Billups,False
661,1713,Vince Carter,Vince,Carter,False
1556,201933,Blake Griffin,Blake,Griffin,False
1863,2730,Dwight Howard,Dwight,Howard,False
1916,2738,Andre Iguodala,Andre,Iguodala,False
4210,202322,John Wall,John,Wall,False
102,1628961,Kostas Antetokounmpo,Kostas,Antetokounmpo,False


In [32]:
for row in ii_df.itertuples():
    print(f"{row[2]} is ii: {season-(int(playercareerstats.PlayerCareerStats(row[1]).get_data_frames()[0].iloc[-1]["SEASON_ID"][:4])+1)<=4}")


Kostas Antetokounmpo is ii: False
Carmelo Anthony is ii: True
Chauncey Billups is ii: False
Vince Carter is ii: False
Blake Griffin is ii: True
Dwight Howard is ii: True
Andre Iguodala is ii: True
John Wall is ii: True


In [None]:
pd.concat([ii_df,ii_df.apply(get_career_stats,axis=1),ii_df.apply(get_awards,axis=1)],axis=1)

Unnamed: 0,id,full_name,first_name,last_name,is_active,APG,AST,BLK,BPG,DREB,DRPG,FG3A,FG3APG,FG3M,FG3MPG,FG3_PCT,FGA,FGAPG,FGM,FGMPG,FG_PCT,FPG,FTA,FTAPG,FTM,FTMPG,FT_PCT,GP,GS,MIN,MPG,OREB,ORPG,PF,PF_APG,PF_AST,PF_BLK,PF_BPG,PF_DREB,PF_DRPG,PF_FG3A,PF_FG3APG,PF_FG3M,PF_FG3MPG,PF_FG3_PCT,PF_FGA,PF_FGAPG,PF_FGM,...,PF_FTA,PF_FTAPG,PF_FTM,PF_FTMPG,PF_FT_PCT,PF_GP,PF_GS,PF_MIN,PF_MPG,PF_OREB,PF_ORPG,PF_PF,PF_PPG,PF_PTS,PF_REB,PF_RPG,PF_SPG,PF_STL,PF_TOV,PF_TPG,PPG,PTS,REB,RPG,SPG,STL,TOV,TPG,1st Team All-Defensive Team,1st Team All-NBA,1st Team All-Rookie Team,2nd Team All-Defensive Team,2nd Team All-NBA,3rd Team All-NBA,HOF,J. Walter Kennedy Citizenship,NBA All-Star,NBA Champion,NBA Defensive Player of the Year,NBA Finals Most Valuable Player,NBA Player of the Month,NBA Player of the Week,NBA Rookie of the Month,NBA Rookie of the Year,NBA Sporting News Rookie of the Year,NBA Sportsmanship,Olympic Bronze Medal,Olympic Gold Medal
103,2546,Carmelo Anthony,Carmelo,Anthony,False,2.7,3422,644,0.5,5825,4.6,4873,3.9,1731,1.4,0.355223,22643,18.0,10119,8.0,0.446893,2.8,7764,6.2,6320,5.0,0.814013,1260,1120,43513,34.5,1983,1.6,3543,2.5,204.0,29.0,0.3,404.0,4.9,312.0,3.8,101.0,1.2,0.323718,1614.0,19.4,669.0,...,575.0,6.9,475.0,5.7,0.826087,83.0,77.0,3090.0,37.2,153.0,1.8,298.0,23.1,1914.0,557.0,6.7,1.2,100.0,218.0,2.6,22.5,28289,7808,6.2,1.0,1223,3052,2.4,,,1.0,,2.0,4.0,False,,10.0,,,,5.0,17.0,6.0,,,,1.0,3.0
315,1497,Chauncey Billups,Chauncey,Billups,False,5.4,5636,168,0.2,2512,2.4,4725,4.5,1830,1.8,0.387302,11413,10.9,4738,4.5,0.415141,2.1,5029,4.8,4496,4.3,0.894015,1043,937,33008,31.6,480,0.5,2169,5.7,831.0,23.0,0.2,421.0,2.9,729.0,5.0,267.0,1.8,0.366255,1793.0,12.3,737.0,...,892.0,6.1,785.0,5.4,0.880045,146.0,143.0,5323.0,36.5,71.0,0.5,373.0,17.3,2526.0,492.0,3.4,1.0,148.0,322.0,2.2,15.2,15802,2992,2.9,1.0,1051,2138,2.0,,,,2.0,1.0,2.0,False,1.0,5.0,1.0,,1.0,2.0,6.0,,,,1.0,,
661,1713,Vince Carter,Vince,Carter,False,3.1,4714,888,0.6,4948,3.2,6168,4.0,2290,1.5,0.371271,21339,13.8,9293,6.0,0.435494,2.6,6082,3.9,4852,3.1,0.797764,1541,982,46371,30.1,1658,1.1,3995,3.4,295.0,47.0,0.5,346.0,3.9,352.0,4.0,119.0,1.4,0.338068,1340.0,15.2,557.0,...,452.0,5.1,360.0,4.1,0.79646,88.0,66.0,3033.0,34.5,131.0,1.5,257.0,18.1,1593.0,477.0,5.4,1.1,94.0,168.0,1.9,16.7,25728,6606,4.3,1.0,1530,2590,1.7,,,1.0,,1.0,1.0,False,,8.0,,,,3.0,11.0,2.0,1.0,1.0,,,1.0
1556,201933,Blake Griffin,Blake,Griffin,False,4.0,3055,359,0.5,4622,6.0,1639,2.1,538,0.7,0.328249,10988,14.4,5419,7.1,0.493174,2.7,4508,5.9,3137,4.1,0.695874,765,692,24385,31.9,1487,1.9,2058,3.5,238.0,53.0,0.8,393.0,5.8,77.0,1.1,29.0,0.4,0.376623,970.0,14.3,477.0,...,349.0,5.1,255.0,3.8,0.730659,68.0,64.0,2219.0,32.6,132.0,1.9,227.0,18.2,1238.0,525.0,7.7,1.0,68.0,141.0,2.1,19.0,14513,6109,8.0,0.8,626,1732,2.3,,,1.0,,3.0,2.0,False,,6.0,,,,1.0,6.0,6.0,1.0,,,,
1863,2730,Dwight Howard,Dwight,Howard,False,1.3,1676,2228,1.8,10476,8.4,103,0.1,22,0.0,0.213592,12016,9.7,7051,5.7,0.586801,3.1,9455,7.6,5361,4.3,0.567002,1242,1078,39455,31.8,4151,3.3,3912,1.2,149.0,248.0,2.0,1032.0,8.3,7.0,0.1,1.0,0.0,0.142857,1156.0,9.2,681.0,...,997.0,8.0,546.0,4.4,0.547643,125.0,102.0,3945.0,31.6,441.0,3.5,467.0,15.3,1909.0,1473.0,11.8,0.8,94.0,323.0,2.6,15.7,19485,14627,11.8,0.9,1081,3302,2.7,4.0,5.0,1.0,1.0,1.0,2.0,False,,8.0,1.0,3.0,,6.0,15.0,,,,,,1.0
1916,2738,Andre Iguodala,Andre,Iguodala,False,4.2,5147,633,0.5,4929,4.0,3278,2.7,1081,0.9,0.329774,10870,8.8,5028,4.1,0.462557,1.8,3994,3.2,2831,2.3,0.708813,1231,784,39505,32.1,1118,0.9,2180,3.5,618.0,88.0,0.5,603.0,3.4,513.0,2.9,182.0,1.0,0.354776,1323.0,7.5,606.0,...,458.0,2.6,267.0,1.5,0.582969,177.0,81.0,5268.0,29.8,173.0,1.0,351.0,9.4,1661.0,776.0,4.4,1.2,217.0,237.0,1.3,11.3,13968,6047,4.9,1.4,1765,2240,1.8,1.0,,1.0,1.0,,,False,,1.0,4.0,,1.0,,,,,,,,1.0
4210,202322,John Wall,John,Wall,False,8.9,5735,439,0.7,2343,3.6,2021,3.1,651,1.0,0.322118,10171,15.7,4373,6.8,0.429948,2.2,3468,5.4,2691,4.2,0.775952,647,604,22586,34.9,361,0.6,1394,9.8,364.0,41.0,1.1,139.0,3.8,131.0,3.5,35.0,0.9,0.267176,682.0,18.4,286.0,...,247.0,6.7,203.0,5.5,0.821862,37.0,37.0,1433.0,38.7,20.0,0.5,84.0,21.9,810.0,159.0,4.3,1.7,64.0,145.0,3.9,18.7,12088,2704,4.2,1.6,1045,2412,3.7,,,1.0,1.0,,1.0,False,,5.0,,,,2.0,7.0,4.0,,,,,
102,1628961,Kostas Antetokounmpo,Kostas,Antetokounmpo,False,0.1,3,4,0.2,17,0.8,0,0.0,0,0.0,0.0,16,0.7,6,0.3,0.375,0.5,19,0.9,9,0.4,0.473684,22,0,86,3.9,6,0.3,10,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,1.0,21,23,1.0,0.2,4,13,0.6,,,,,,,False,,,,,,,,,,,,,


In [52]:
len(playercareerstats.PlayerCareerStats(1628961).get_data_frames()[3])

0

In [9]:
test_df

Unnamed: 0,id,full_name,first_name,last_name,is_active,GP,GS,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,MPG,FGMPG,FGAPG,FG3MPG,FG3APG,FTMPG,FTAPG,ORPG,DRPG,RPG,APG,SPG,BPG,TPG,FPG,PPG,PF_GP,PF_GS,PF_MIN,PF_FGM,PF_FGA,PF_FG_PCT,PF_FG3M,PF_FG3A,PF_FG3_PCT,PF_FTM,PF_FTA,PF_FT_PCT,PF_OREB,PF_DREB,PF_REB,PF_AST,PF_STL,PF_BLK,PF_TOV,PF_PF,PF_PTS,PF_MPG,PF_FGMPG,PF_FGAPG,PF_FG3MPG,PF_FG3APG,PF_FTMPG,PF_FTAPG,PF_ORPG,PF_DRPG,PF_RPG,PF_APG,PF_SPG,PF_BPG,PF_TPG,PF_FPG,PF_PPG,1st Team All-Defensive Team,1st Team All-NBA,1st Team All-Rookie Team,2nd Team All-Defensive Team,2nd Team All-NBA,2nd Team All-Rookie Team,HOF,Hall of Fame Inductee,NBA All-Star,NBA Champion,NBA Finals Most Valuable Player,NBA Most Improved Player,NBA Most Valuable Player,NBA Player of the Month,NBA Player of the Week,NBA Rookie of the Year,NBA Sporting News Most Valuable Player of the Year,NBA Sporting News Rookie of the Year
0,76001,Alaa Abdelnaby,Alaa,Abdelnaby,False,256,53.0,3200,620,1236,0.501618,0.0,3.0,0.0,225,321,0.700935,283,563,846,85,71,69,247,484,1465,12.5,2.4,4.8,0.0,0.0,0.9,1.3,1.1,2.2,3.3,0.3,0.3,0.3,1.0,1.9,5.7,17,4.0,106,18,40,0.45,0.0,0.0,0.0,2,4,0.5,3,17,20,3,0,1,11.0,11,38,6.2,1.1,2.4,0.0,0.0,0.1,0.2,0.2,1.0,1.2,0.2,0.0,0.1,0.6,0.6,2.2,0.0,0.0,0.0,0.0,0.0,0.0,False,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,76002,Zaid Abdul-Aziz,Zaid,Abdul-Aziz,False,505,0.0,11023,1769,4138,0.427501,0.0,0.0,0.0,1019,1400,0.727857,503,1124,4065,601,131,205,11,1120,4557,21.8,3.5,8.2,0.0,0.0,2.0,2.8,2.4,5.4,8.0,1.2,0.6,1.0,0.7,2.2,9.0,18,0.0,210,37,70,0.528571,0.0,0.0,0.0,18,26,0.692308,11,27,64,9,1,8,0.0,12,92,11.7,2.1,3.9,0.0,0.0,1.0,1.4,1.0,2.5,3.6,0.5,0.1,0.7,0.0,0.7,5.1,0.0,0.0,0.0,0.0,0.0,0.0,False,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,76003,Kareem Abdul-Jabbar,Kareem,Abdul-Jabbar,False,1560,1476.0,57446,15837,28307,0.559473,1.0,18.0,0.0,6712,9304,0.72141,2975,9394,17440,5660,1160,3189,2527,4657,38387,36.8,10.2,18.1,0.0,0.0,4.3,6.0,2.4,7.6,11.2,3.6,0.9,2.6,2.7,3.0,24.6,237,140.0,8612,2356,4422,0.532791,0.0,4.0,0.0,1050,1419,0.739958,505,1273,2481,767,189,476,447.0,797,5762,36.3,9.9,18.7,0.0,0.0,4.4,6.0,2.6,6.5,10.5,3.2,1.0,2.4,2.6,3.4,24.3,5.0,10.0,1.0,6.0,5.0,0.0,True,1.0,19.0,6.0,2.0,0.0,6.0,2.0,7.0,1.0,6.0,1.0
3,51,Mahmoud Abdul-Rauf,Mahmoud,Abdul-Rauf,False,586,336.0,15627,3514,7943,0.442402,474.0,1339.0,0.353996,1051,1161,0.905254,219,868,1087,2079,487,46,963,1106,8553,26.7,6.0,13.6,0.8,2.3,1.8,2.0,0.4,1.5,1.9,3.5,0.8,0.1,1.6,1.9,14.6,15,14.0,415,69,187,0.368984,14.0,49.0,0.285714,43,45,0.955556,5,18,23,35,7,1,22.0,37,195,27.7,4.6,12.5,0.9,3.3,2.9,3.0,0.3,1.2,1.5,2.3,0.5,0.1,1.5,2.5,13.0,0.0,0.0,0.0,0.0,0.0,1.0,False,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1505,Tariq Abdul-Wahad,Tariq,Abdul-Wahad,False,236,145.0,4808,720,1726,0.417149,18.0,76.0,0.236842,372,529,0.703214,286,490,776,266,184,83,309,485,1830,20.4,3.1,7.3,0.1,0.3,1.6,2.2,1.2,2.1,3.3,1.1,0.8,0.4,1.3,2.1,7.8,13,5.0,178,24,63,0.380952,0.0,3.0,0.0,20,24,0.833333,17,24,41,11,4,4,4.0,14,68,13.7,1.8,4.8,0.0,0.2,1.5,1.8,1.3,1.8,3.2,0.8,0.3,0.3,0.3,1.1,5.2,0.0,0.0,0.0,0.0,0.0,0.0,False,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
test_df2=inactives.iloc[6:11].copy()
test_df2

Unnamed: 0,id,full_name,first_name,last_name,is_active
6,76005,Tom Abernethy,Tom,Abernethy,False
7,76006,Forest Able,Forest,Able,False
8,76007,John Abramovic,John,Abramovic,False
9,203518,Alex Abrines,Alex,Abrines,False
10,101165,Alex Acker,Alex,Acker,False


In [16]:
test_df2=pd.concat([test_df2,test_df2.apply(get_career_stats,axis=1)],axis=1)
test_df2=pd.concat([test_df2,test_df2.apply(get_awards,axis=1)],axis=1).fillna(0)

In [15]:
test_df2.apply(get_awards,axis=1)

DESCRIPTION,HOF,Olympic Bronze Medal
6,False,
7,False,
8,False,
9,False,1.0
10,False,


In [21]:
test_df

Unnamed: 0,id,full_name,first_name,last_name,is_active,GP,GS,MIN,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,MPG,FGMPG,FGAPG,FG3MPG,FG3APG,FTMPG,FTAPG,ORPG,DRPG,RPG,APG,SPG,BPG,TPG,FPG,PPG,PF_GP,PF_GS,PF_MIN,PF_FGM,PF_FGA,PF_FG_PCT,PF_FG3M,PF_FG3A,PF_FG3_PCT,PF_FTM,PF_FTA,PF_FT_PCT,PF_OREB,PF_DREB,PF_REB,PF_AST,PF_STL,PF_BLK,PF_TOV,PF_PF,PF_PTS,PF_MPG,PF_FGMPG,PF_FGAPG,PF_FG3MPG,PF_FG3APG,PF_FTMPG,PF_FTAPG,PF_ORPG,PF_DRPG,PF_RPG,PF_APG,PF_SPG,PF_BPG,PF_TPG,PF_FPG,PF_PPG,1st Team All-Defensive Team,1st Team All-NBA,1st Team All-Rookie Team,2nd Team All-Defensive Team,2nd Team All-NBA,2nd Team All-Rookie Team,HOF,Hall of Fame Inductee,NBA All-Star,NBA Champion,NBA Finals Most Valuable Player,NBA Most Improved Player,NBA Most Valuable Player,NBA Player of the Month,NBA Player of the Week,NBA Rookie of the Year,NBA Sporting News Most Valuable Player of the Year,NBA Sporting News Rookie of the Year
0,76001,Alaa Abdelnaby,Alaa,Abdelnaby,False,256,53.0,3200,620,1236,0.501618,0.0,3.0,0.0,225,321,0.700935,283,563,846,85,71,69,247,484,1465,12.5,2.4,4.8,0.0,0.0,0.9,1.3,1.1,2.2,3.3,0.3,0.3,0.3,1.0,1.9,5.7,17,4.0,106,18,40,0.45,0.0,0.0,0.0,2,4,0.5,3,17,20,3,0,1,11.0,11,38,6.2,1.1,2.4,0.0,0.0,0.1,0.2,0.2,1.0,1.2,0.2,0.0,0.1,0.6,0.6,2.2,,,,,,,False,,,,,,,,,,,
1,76002,Zaid Abdul-Aziz,Zaid,Abdul-Aziz,False,505,,11023,1769,4138,0.427501,,,,1019,1400,0.727857,503,1124,4065,601,131,205,11,1120,4557,21.8,3.5,8.2,,,2.0,2.8,2.4,5.4,8.0,1.2,0.6,1.0,0.7,2.2,9.0,18,,210,37,70,0.528571,,,,18,26,0.692308,11,27,64,9,1,8,,12,92,11.7,2.1,3.9,,,1.0,1.4,1.0,2.5,3.6,0.5,0.1,0.7,,0.7,5.1,,,,,,,False,,,,,,,,,,,
2,76003,Kareem Abdul-Jabbar,Kareem,Abdul-Jabbar,False,1560,1476.0,57446,15837,28307,0.559473,1.0,18.0,,6712,9304,0.72141,2975,9394,17440,5660,1160,3189,2527,4657,38387,36.8,10.2,18.1,0.0,,4.3,6.0,2.4,7.6,11.2,3.6,0.9,2.6,2.7,3.0,24.6,237,140.0,8612,2356,4422,0.532791,0.0,4.0,,1050,1419,0.739958,505,1273,2481,767,189,476,447.0,797,5762,36.3,9.9,18.7,0.0,,4.4,6.0,2.6,6.5,10.5,3.2,1.0,2.4,2.6,3.4,24.3,5.0,10.0,1.0,6.0,5.0,,True,1.0,19.0,6.0,2.0,,6.0,2.0,7.0,1.0,6.0,1.0
3,51,Mahmoud Abdul-Rauf,Mahmoud,Abdul-Rauf,False,586,336.0,15627,3514,7943,0.442402,474.0,1339.0,0.353996,1051,1161,0.905254,219,868,1087,2079,487,46,963,1106,8553,26.7,6.0,13.6,0.8,2.3,1.8,2.0,0.4,1.5,1.9,3.5,0.8,0.1,1.6,1.9,14.6,15,14.0,415,69,187,0.368984,14.0,49.0,0.285714,43,45,0.955556,5,18,23,35,7,1,22.0,37,195,27.7,4.6,12.5,0.9,3.3,2.9,3.0,0.3,1.2,1.5,2.3,0.5,0.1,1.5,2.5,13.0,,,,,,1.0,False,,,,,1.0,,,,,,
4,1505,Tariq Abdul-Wahad,Tariq,Abdul-Wahad,False,236,145.0,4808,720,1726,0.417149,18.0,76.0,0.236842,372,529,0.703214,286,490,776,266,184,83,309,485,1830,20.4,3.1,7.3,0.1,0.3,1.6,2.2,1.2,2.1,3.3,1.1,0.8,0.4,1.3,2.1,7.8,13,5.0,178,24,63,0.380952,0.0,3.0,0.0,20,24,0.833333,17,24,41,11,4,4,4.0,14,68,13.7,1.8,4.8,0.0,0.2,1.5,1.8,1.3,1.8,3.2,0.8,0.3,0.3,0.3,1.1,5.2,,,,,,,False,,,,,,,,,,,


In [5]:
#need to check a type for the general file
print(type(playercareerstats.PlayerCareerStats(947).get_data_frames()[1]))
print(type(clean_avgs(playercareerstats.PlayerCareerStats(947).get_data_frames()[1])))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>


In [7]:
removals=["Alex Acker","Forest Able"]
removed=test_df2[test_df2["full_name"].isin(removals)]
removed

Unnamed: 0,id,full_name,first_name,last_name,is_active
7,76006,Forest Able,Forest,Able,False
10,101165,Alex Acker,Alex,Acker,False


In [9]:
test_df2.drop(removed.index)

Unnamed: 0,id,full_name,first_name,last_name,is_active
6,76005,Tom Abernethy,Tom,Abernethy,False
8,76007,John Abramovic,John,Abramovic,False
9,203518,Alex Abrines,Alex,Abrines,False


In [None]:
#pd.concat([test_df,removed])

Unnamed: 0,id,full_name,first_name,last_name,is_active
0,76001,Alaa Abdelnaby,Alaa,Abdelnaby,False
1,76002,Zaid Abdul-Aziz,Zaid,Abdul-Aziz,False
2,76003,Kareem Abdul-Jabbar,Kareem,Abdul-Jabbar,False
3,51,Mahmoud Abdul-Rauf,Mahmoud,Abdul-Rauf,False
4,1505,Tariq Abdul-Wahad,Tariq,Abdul-Wahad,False
7,76006,Forest Able,Forest,Able,False
10,101165,Alex Acker,Alex,Acker,False


In [14]:
test_df=pd.concat([test_df,inactives[inactives["full_name"].isin(["Peter Aluma","JamesOn Curry"])]])
test_df

Unnamed: 0,id,full_name,first_name,last_name,is_active
0,76001,Alaa Abdelnaby,Alaa,Abdelnaby,False
1,76002,Zaid Abdul-Aziz,Zaid,Abdul-Aziz,False
2,76003,Kareem Abdul-Jabbar,Kareem,Abdul-Jabbar,False
3,51,Mahmoud Abdul-Rauf,Mahmoud,Abdul-Rauf,False
4,1505,Tariq Abdul-Wahad,Tariq,Abdul-Wahad,False
67,1824,Peter Aluma,Peter,Aluma,False
881,201191,JamesOn Curry,JamesOn,Curry,False


In [20]:
test_df.apply(get_career_stats,axis=1)

Unnamed: 0,APG,AST,BLK,BPG,DREB,DRPG,FG3A,FG3APG,FG3M,FG3MPG,FG3_PCT,FGA,FGAPG,FGM,FGMPG,FG_PCT,FPG,FTA,FTAPG,FTM,FTMPG,FT_PCT,GP,GS,MIN,MPG,OREB,ORPG,PF,PF_APG,PF_AST,PF_BLK,PF_BPG,PF_DREB,PF_DRPG,PF_FG3A,PF_FG3APG,PF_FG3M,PF_FG3MPG,PF_FG3_PCT,PF_FGA,PF_FGAPG,PF_FGM,PF_FGMPG,PF_FG_PCT,PF_FPG,PF_FTA,PF_FTAPG,PF_FTM,PF_FTMPG,PF_FT_PCT,PF_GP,PF_GS,PF_MIN,PF_MPG,PF_OREB,PF_ORPG,PF_PF,PF_PPG,PF_PTS,PF_REB,PF_RPG,PF_SPG,PF_STL,PF_TOV,PF_TPG,PPG,PTS,REB,RPG,SPG,STL,TOV,TPG
0,0.3,85,69,0.3,563,2.2,3.0,0.0,0.0,0.0,0.0,1236,4.8,620,2.4,0.501618,1.9,321,1.3,225,0.9,0.700935,256,53.0,3200,12.5,283,1.1,484,0.2,3.0,1.0,0.1,17.0,1.0,0.0,0.0,0.0,0.0,0.0,40.0,2.4,18.0,1.1,0.45,0.6,4.0,0.2,2.0,0.1,0.5,17.0,4.0,106.0,6.2,3.0,0.2,11.0,2.2,38.0,20.0,1.2,0.0,0.0,11.0,0.6,5.7,1465,846,3.3,0.3,71,247,1.0
1,1.2,601,205,1.0,1124,5.4,,,,,,4138,8.2,1769,3.5,0.427501,2.2,1400,2.8,1019,2.0,0.727857,505,,11023,21.8,503,2.4,1120,0.5,9.0,8.0,0.7,27.0,2.5,,,,,,70.0,3.9,37.0,2.1,0.528571,0.7,26.0,1.4,18.0,1.0,0.692308,18.0,,210.0,11.7,11.0,1.0,12.0,5.1,92.0,64.0,3.6,0.1,1.0,,,9.0,4557,4065,8.0,0.6,131,11,0.7
2,3.6,5660,3189,2.6,9394,7.6,18.0,,1.0,0.0,,28307,18.1,15837,10.2,0.559473,3.0,9304,6.0,6712,4.3,0.72141,1560,1476.0,57446,36.8,2975,2.4,4657,3.2,767.0,476.0,2.4,1273.0,6.5,4.0,,0.0,0.0,,4422.0,18.7,2356.0,9.9,0.532791,3.4,1419.0,6.0,1050.0,4.4,0.739958,237.0,140.0,8612.0,36.3,505.0,2.6,797.0,24.3,5762.0,2481.0,10.5,1.0,189.0,447.0,2.6,24.6,38387,17440,11.2,0.9,1160,2527,2.7
3,3.5,2079,46,0.1,868,1.5,1339.0,2.3,474.0,0.8,0.353996,7943,13.6,3514,6.0,0.442402,1.9,1161,2.0,1051,1.8,0.905254,586,336.0,15627,26.7,219,0.4,1106,2.3,35.0,1.0,0.1,18.0,1.2,49.0,3.3,14.0,0.9,0.285714,187.0,12.5,69.0,4.6,0.368984,2.5,45.0,3.0,43.0,2.9,0.955556,15.0,14.0,415.0,27.7,5.0,0.3,37.0,13.0,195.0,23.0,1.5,0.5,7.0,22.0,1.5,14.6,8553,1087,1.9,0.8,487,963,1.6
4,1.1,266,83,0.4,490,2.1,76.0,0.3,18.0,0.1,0.236842,1726,7.3,720,3.1,0.417149,2.1,529,2.2,372,1.6,0.703214,236,145.0,4808,20.4,286,1.2,485,0.8,11.0,4.0,0.3,24.0,1.8,3.0,0.2,0.0,0.0,0.0,63.0,4.8,24.0,1.8,0.380952,1.1,24.0,1.8,20.0,1.5,0.833333,13.0,5.0,178.0,13.7,17.0,1.3,14.0,5.2,68.0,41.0,3.2,0.3,4.0,4.0,0.3,7.8,1830,776,3.3,0.8,184,309,1.3
67,0.0,0,1,0.5,1,0.5,0.0,0.0,0.0,0.0,0.0,2,1.0,1,0.5,0.5,2.0,0,0.0,0,0.0,0.0,2,0.0,5,2.5,1,0.5,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,2,2,1.0,0.5,1,2,1.0
881,0.0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,1,0.0,0,,0,0.0,0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0,0,0.0,0.0,0,0,0.0


In [30]:
no_fg=inactives[inactives["full_name"]=="Sim Bhullar"]
no_fg

Unnamed: 0,id,full_name,first_name,last_name,is_active
306,204021,Sim Bhullar,Sim,Bhullar,False


In [6]:
driver=webdriver.Firefox()
driver.get("https://www.basketball-reference.com/players/c/curryja01.html")
avgs=driver.find_element(By.ID,"per_game_stats")
totals=driver.find_element(By.ID,"totals_stats")
totals_df=pd.read_html(StringIO(totals.get_attribute("outerHTML")))[0].iloc[1]
avgs_df=pd.read_html(StringIO(avgs.get_attribute("outerHTML")))[0].iloc[1]
driver.quit()

In [19]:
avgs_df

Season    1 Yr
Age       1 Yr
Team      1 Yr
Lg        1 Yr
Pos        NaN
G            1
GS           0
MP         0.0
FG         0.0
FGA        0.0
3P         0.0
3PA        0.0
2P         0.0
2PA        0.0
FT         0.0
FTA        0.0
ORB        0.0
DRB        0.0
TRB        0.0
AST        0.0
STL        0.0
BLK        0.0
TOV        0.0
PF         0.0
PTS        0.0
Awards     NaN
Name: 1, dtype: object

In [20]:
totals_df

Season     1 Yr
Age        1 Yr
Team       1 Yr
Lg         1 Yr
Pos         NaN
G             1
GS            0
MP            0
FG            0
FGA           0
3P            0
3PA           0
2P            0
2PA           0
FT            0
FTA           0
ORB           0
DRB           0
TRB           0
AST           0
STL           0
BLK           0
TOV           0
PF            0
PTS           0
Trp-Dbl       0
Awards      NaN
Name: 1, dtype: object

In [6]:
def test_insert_missing(stats:pd.Series)->pd.Series:
    if "FG%" not in stats:
        stats=pd.concat([stats[:10],pd.Series({"FG%":0.0}),stats[10:12],pd.Series({"3P%":0.0}),stats[12:14],pd.Series({"2P%":0.0,"eFG%":0.0}),stats[14:]])
    elif "3P%" not in stats:
        stats=pd.concat([stats[:13],pd.Series({"3P%":0.0}),stats[13:]])
    elif "2P%" not in stats:
        stats=pd.concat([stats[:16],pd.Series({"2P%":0.0}),stats[16:]])
    if "FT%" not in stats:
        stats=pd.concat([stats[:20],pd.Series({"FT%":0.0}),stats[20:]])
    return stats

In [24]:
get_awards(no_fg)

DESCRIPTION
HOF    False
dtype: bool

In [22]:
hank=inactives[inactives["full_name"]=="Keith Benson"]
hank

Unnamed: 0,id,full_name,first_name,last_name,is_active
292,202728,Keith Benson,Keith,Benson,False


In [35]:
len(get_career_stats(ai.iloc[0]))

74

In [23]:
inactives.drop(2)

Unnamed: 0,id,full_name,first_name,last_name,is_active
0,76001,Alaa Abdelnaby,Alaa,Abdelnaby,False
1,76002,Zaid Abdul-Aziz,Zaid,Abdul-Aziz,False
3,51,Mahmoud Abdul-Rauf,Mahmoud,Abdul-Rauf,False
4,1505,Tariq Abdul-Wahad,Tariq,Abdul-Wahad,False
5,949,Shareef Abdur-Rahim,Shareef,Abdur-Rahim,False
...,...,...,...,...,...
4568,1627790,Ante Zizic,Ante,Zizic,False
4569,78647,Jim Zoet,Jim,Zoet,False
4570,78648,Bill Zopf,Bill,Zopf,False
4571,78650,Matt Zunic,Matt,Zunic,False


In [8]:
driver=webdriver.Firefox()
driver.install_addon("ublock_origin-1.68.0.xpi")
driver.get("https://www.basketball-reference.com/players/b/brantja01.html")
avg_row=driver.find_elements(By.ID,"per_game_stats_post")
avgs=pd.read_html(StringIO(avg_row[0].get_attribute("outerHTML")))[0].iloc[-1]
driver.quit()
avgs


Season    2 Yrs
Age       2 Yrs
Team      2 Yrs
Lg        2 Yrs
Pos         NaN
G             4
GS            0
MP          4.0
FG          0.0
FGA         1.5
FG%         0.0
3P          0.0
3PA         0.8
3P%         0.0
2P          0.0
2PA         0.8
2P%         0.0
eFG%        0.0
FT          0.5
FTA         1.0
FT%         0.5
ORB         0.8
DRB         0.5
TRB         1.3
AST         0.5
STL         0.0
BLK         0.3
TOV         0.5
PF          1.0
PTS         0.5
Awards      NaN
Name: 2, dtype: object

In [15]:
get_awards(hank)

DESCRIPTION
HOF    False
dtype: bool

In [24]:
get_career_stats(hank.iloc[0]).to_dict()

{'GP': 3,
 'GS': 0,
 'MIN': 9,
 'FGM': 0,
 'FGA': 1,
 'FG_PCT': 0.0,
 'FG3M': 0,
 'FG3A': 0,
 'FG3_PCT': 0.0,
 'FTM': 0,
 'FTA': 0,
 'FT_PCT': 0.0,
 'OREB': 2,
 'DREB': 1,
 'REB': 3,
 'AST': 0,
 'STL': 0,
 'BLK': 0,
 'TOV': 0,
 'PF': 0,
 'PTS': 0,
 'MPG': 3.0,
 'FGMPG': 0.0,
 'FGAPG': 0.3,
 'FG3MPG': 0.0,
 'FG3APG': 0.0,
 'FTMPG': 0.0,
 'FTAPG': 0.0,
 'ORPG': 0.7,
 'DRPG': 0.3,
 'RPG': 1.0,
 'APG': 0.0,
 'SPG': 0.0,
 'BPG': 0.0,
 'TPG': 0.0,
 'FPG': 0.0,
 'PPG': 0.0}

In [8]:
inactives.loc[inactives["full_name"]=="Cui Cui","full_name"]="Yongxi Cui"
inactives[inactives["last_name"]=="Cui"]

Unnamed: 0,id,full_name,first_name,last_name,is_active
863,1642385,Yongxi Cui,Cui,Cui,False


In [3]:
ike=inactives[inactives["full_name"]=="Ike Fontaine"]
ike

Unnamed: 0,id,full_name,first_name,last_name,is_active
1279,1829,Ike Fontaine,Ike,Fontaine,False


In [6]:
get_awards(ike)

DESCRIPTION
HOF    False
dtype: bool

In [47]:
br_case=inactives[inactives["full_name"]=="Ruben Garces"].iloc[0]
br_case["full_name"]="Rubén Garcés"
br_case

id                    2092
full_name     Rubén Garcés
first_name           Ruben
last_name           Garces
is_active            False
Name: 1373, dtype: object

In [48]:
get_career_stats(br_case)

GP          13.0
GS           0.0
MIN         73.0
FGM          7.0
FGA         22.0
FG_PCT     0.318
FG3M         0.0
FG3A         0.0
FG3_PCT      0.0
FTM          2.0
FTA          8.0
FT_PCT      0.25
OREB        16.0
DREB        13.0
REB         29.0
AST          5.0
STL          3.0
BLK          2.0
TOV          5.0
PF          14.0
PTS         16.0
MPG          5.6
FGMPG        0.5
FGAPG        1.7
FG3MPG       0.0
FG3APG       0.0
FTMPG        0.2
FTAPG        0.6
ORPG         1.2
DRPG         1.0
RPG          2.2
APG          0.4
SPG          0.2
BPG          0.2
TPG          0.4
FPG          1.1
PPG          1.2
dtype: object

In [None]:
driver=webdriver.Firefox()
driver.install_addon("ublock_origin-1.68.0.xpi")
driver.get(f"https://www.basketball-reference.com/players/g/garceru01.html")
br_table=driver.find_element(By.ID,"totals_stats")
br_df=pd.read_html(StringIO(br_table.get_attribute("outerHTML")))[0]
driver.quit()
br_df

Unnamed: 0,Season,Age,Team,Lg,Pos,G,GS,MP,FG,FGA,FG%,3P,3PA,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Trp-Dbl,Awards
0,2000-01,27,2TM,NBA,PF,13.0,0.0,73.0,7.0,22.0,0.318,0.0,0.0,7.0,22.0,0.318,0.318,2.0,8.0,0.25,16.0,13.0,29.0,5.0,3.0,2.0,5.0,14.0,16.0,0.0,
1,2000-01,27,PHO,NBA,PF,10.0,0.0,62.0,7.0,15.0,0.467,0.0,0.0,7.0,15.0,0.467,0.467,2.0,8.0,0.25,12.0,10.0,22.0,4.0,2.0,1.0,5.0,14.0,16.0,0.0,
2,2000-01,27,GSW,NBA,PF,3.0,0.0,11.0,0.0,7.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,,4.0,3.0,7.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,
3,1 Yr,1 Yr,1 Yr,1 Yr,,13.0,0.0,73.0,7.0,22.0,0.318,0.0,0.0,7.0,22.0,0.318,0.318,2.0,8.0,0.25,16.0,13.0,29.0,5.0,3.0,2.0,5.0,14.0,16.0,0.0,
4,82 Game Avg,82 Game Avg,82 Game Avg,82 Game Avg,,82.0,0.0,460.0,44.0,139.0,0.318,0.0,0.0,44.0,139.0,0.318,0.318,13.0,50.0,0.25,101.0,82.0,183.0,32.0,19.0,13.0,32.0,88.0,101.0,0.0,
5,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,PHO (1 Yr),PHO (1 Yr),PHO (1 Yr),PHO (1 Yr),,10.0,0.0,62.0,7.0,15.0,0.467,0.0,0.0,7.0,15.0,0.467,0.467,2.0,8.0,0.25,12.0,10.0,22.0,4.0,2.0,1.0,5.0,14.0,16.0,0.0,
7,GSW (1 Yr),GSW (1 Yr),GSW (1 Yr),GSW (1 Yr),,3.0,0.0,11.0,0.0,7.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,,4.0,3.0,7.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,


In [23]:
br_df[br_df.fillna("")["Season"].str.contains(r"^\d Yrs?$")].iloc[0]

Season      1 Yr
Age         1 Yr
Team        1 Yr
Lg          1 Yr
Pos          NaN
G           13.0
GS           0.0
MP          73.0
FG           7.0
FGA         22.0
FG%        0.318
3P           0.0
3PA          0.0
2P           7.0
2PA         22.0
2P%        0.318
eFG%       0.318
FT           2.0
FTA          8.0
FT%         0.25
ORB         16.0
DRB         13.0
TRB         29.0
AST          5.0
STL          3.0
BLK          2.0
TOV          5.0
PF          14.0
PTS         16.0
Trp-Dbl      0.0
Awards       NaN
Name: 3, dtype: object

In [5]:
players_df=pd.DataFrame(players.get_players())
ln_groups=players_df.groupby(players_df["last_name"].str[0])
group=ln_groups.get_group("B")
br=[]
for row in group.itertuples(index=False):
    print(row[1])
    sleep(.5)
    try:
        playercareerstats.PlayerCareerStats(row[0])
    except KeyError:
        br.append(row[1])
driver=webdriver.Firefox()
driver.install_addon("ublock_origin-1.68.0.xpi")
driver.get(f"https://www.basketball-reference.com/players/{group.lower()}/")
for p_name in br:
    print(p_name)
    if driver.find_elements(By.LINK_TEXT,p_name)==0:
        print(f"MISMATCH: {p_name}")
driver.quit()

Chris Babb


KeyboardInterrupt: 