In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import make_column_selector, make_column_transformer

In [2]:
players = pd.read_csv('data/Players.csv')
players = players.drop(columns= ['Unnamed: 0'], axis=1)
players

Unnamed: 0,Player,height,weight,collage,born,birth_city,birth_state
0,Curly Armstrong,180.0,77.0,Indiana University,1918.0,,
1,Cliff Barker,188.0,83.0,University of Kentucky,1921.0,Yorktown,Indiana
2,Leo Barnhorst,193.0,86.0,University of Notre Dame,1924.0,,
3,Ed Bartels,196.0,88.0,North Carolina State University,1925.0,,
4,Ralph Beard,178.0,79.0,University of Kentucky,1927.0,Hardinsburg,Kentucky
...,...,...,...,...,...,...,...
3917,Troy Williams,198.0,97.0,South Carolina State University,1969.0,Columbia,South Carolina
3918,Kyle Wiltjer,208.0,108.0,Gonzaga University,1992.0,Portland,Oregon
3919,Stephen Zimmerman,213.0,108.0,"University of Nevada, Las Vegas",1996.0,Hendersonville,Tennessee
3920,Paul Zipser,203.0,97.0,,1994.0,Heidelberg,Germany


In [3]:
stats = pd.read_csv('data/Seasons_Stats.csv')
stats = stats.drop(columns= ['Unnamed: 0'], axis=1)
stats

Unnamed: 0,Year,Player,Pos,Age,Tm,G,GS,MP,PER,TS%,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1950.0,Curly Armstrong,G-F,31.0,FTW,63.0,,,,0.368,...,0.705,,,,176.0,,,,217.0,458.0
1,1950.0,Cliff Barker,SG,29.0,INO,49.0,,,,0.435,...,0.708,,,,109.0,,,,99.0,279.0
2,1950.0,Leo Barnhorst,SF,25.0,CHS,67.0,,,,0.394,...,0.698,,,,140.0,,,,192.0,438.0
3,1950.0,Ed Bartels,F,24.0,TOT,15.0,,,,0.312,...,0.559,,,,20.0,,,,29.0,63.0
4,1950.0,Ed Bartels,F,24.0,DNN,13.0,,,,0.308,...,0.548,,,,20.0,,,,27.0,59.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24686,2017.0,Cody Zeller,PF,24.0,CHO,62.0,58.0,1725.0,16.7,0.604,...,0.679,135.0,270.0,405.0,99.0,62.0,58.0,65.0,189.0,639.0
24687,2017.0,Tyler Zeller,C,27.0,BOS,51.0,5.0,525.0,13.0,0.508,...,0.564,43.0,81.0,124.0,42.0,7.0,21.0,20.0,61.0,178.0
24688,2017.0,Stephen Zimmerman,C,20.0,ORL,19.0,0.0,108.0,7.3,0.346,...,0.600,11.0,24.0,35.0,4.0,2.0,5.0,3.0,17.0,23.0
24689,2017.0,Paul Zipser,SF,22.0,CHI,44.0,18.0,843.0,6.9,0.503,...,0.775,15.0,110.0,125.0,36.0,15.0,16.0,40.0,78.0,240.0


In [4]:
mvp_players = {'Bob Pettit*': [1956, 1959],
                  'Bob Cousy*': [1957],
                  'Bill Russell*': [1958, 1961, 1962, 1963, 1965],
                  'Wilt Chamberlain*': [1960, 1966, 1967, 1968],
                  'Oscar Robertson*': [1964],
                  'Wes Unseld*': [1969],
                  'Willis Reed*': [1970],
                  'Kareem Abdul-Jabbar*': [1971, 1972, 1974, 1976, 1977, 1980],
                  'Dave Cowens*': [1973],
                  'Bob McAdoo*': [1975],
                  'Bill Walton*': [1978],
                  'Moses Malone*': [1979, 1982, 1983],
                  'Julius Erving*': [1981],
                  'Larry Bird*': [1984, 1985, 1986],
                  'Magic Johnson*': [1987, 1989, 1990],
                  'Michael Jordan*': [1988, 1991, 1992, 1996, 1998],
                  'Charles Barkley*': [1993],
                  'Hakeem Olajuwon*': [1994],
                  'David Robinson*': [1995],
                  'Karl Malone*': [1997, 1999],
                  'Shaquille O\'Neal*': [2000],
                  'Allen Iverson*': [2001],
                  'Tim Duncan': [2002, 2003],
                  'Kevin Garnett': [2004],
                  'Steve Nash': [2005, 2006],
                  'Dirk Nowitzki': [2007],
                  'Kobe Bryant': [2008],
                  'LeBron James': [2009, 2010, 2012, 2013],
                  'Derrick Rose': [2011],
                  'Kevin Durant': [2014],
                  'Stephen Curry': [2015, 2016],
                  'Russell Westbrook': [2017],
                  'James Harden': [2018]}

In [5]:
stats['MVP'] = 0
for i, row in stats.iterrows():
    for k, v in mvp_players.items():
        for year in v:
            if row['Player'] != k:
                break
            elif(row['Year'] == year) & (row['Player'] == k):
                stats.loc[i, 'MVP'] = 1
                break

In [6]:
teams_champions = {'BOS': [1957, 1959, 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1968, 1969, 1974, 1976, 1981, 1984, 1986, 2008],
                  'LAL': [1972, 1980, 1982, 1985, 1987, 1988, 2000, 2001, 2002, 2009, 2010],
                  'MNL': [1949, 1950, 1952, 1953, 1954],
                  'CHI': [1991, 1992, 1993, 1996, 1997, 1998],
                  'GSW': [1975, 2015, 2017, 2018],
                  'PHW': [1947, 1956],
                  'SAS': [1999, 2003, 2005, 2007, 2014],
                  'DET': [1989, 1990, 2004],
                  'MIA': [2006, 2012, 2013],
                  'PHI': [1967, 1983],
                  'SYR': [1955],
                  'HOU': [1994, 1995],
                  'NYK': [1970, 1973],
                  'STL': [1958],
                  'BLB': [1948],
                  'CLE': [2016],
                  'DAL': [2011],
                  'MIL': [1971],
                  'SEA': [1979],
                  'POR': [1977],
                  'ROC': [1951],
                  'WSB': [1978]}

In [7]:
stats['rings'] = 0
for i, row in stats.iterrows():
    for k, v in teams_champions.items():
        for year in v:
            if row['Tm'] != k:
                break
            elif(row['Year'] == year) & (row['Tm'] == k):
                stats.loc[i, 'rings'] = 1
                break

In [49]:
finals_mvp = {'Jerry West*': [1969],
               'Willis Reed*': [1970, 1973],
               'Micheal Jordan*': [1991, 1992, 1993, 1996, 1997, 1998],
               'Lebron James*': [2012, 2013, 2016],
                 'Magic Johnson*': [1980, 1982, 1987],
                 'Shaquille O\'Neal*': [2000, 2001, 2002],
                 'Tim Duncan': [1999, 2003, 2005],
                 'Kareem Abdul-Jabbar*': [1971, 1985],
                 'Larry Bird*': [1984, 1986],
                 'Hakeem Olajuwon*': [1994, 1995],
                 'Kobe Bryant': [2009, 2010],
                 'Kevin Durant': [2017, 2018],
                 'Kawhi Leonard': [2014], 
                 'Wilt Chamberlain*': [1972],
                 'John Havlicek*': [1974],
                 'Rick Barry': [1975], 
                 'Jo Jo White*': [1976], 
                 'Bill Walton*': [1977],
                 'Wes Unseld*': [1978],
                 'Dennis Johnson*': [1979],
                 'Cedric Maxwell': [1981], 
                 'Moses Malone*': [1983],
                 'James Worthy*': [1988],
                 'Joe Dumars*': [1989],
                 'Isiah Thomas*': [1990],
                 'Chauncey Billups': [2004],
                 'Dwayne Wade': [2006],
                 'Tony Parker': [2007],
                 'Paul Pierce': [2008],
                 'Dirk Nowitzki': [2011],
                 'Andre Iguodala': [2015]}

In [None]:
stats['Player']

In [8]:
stats.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24691 entries, 0 to 24690
Data columns (total 54 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Year    24624 non-null  float64
 1   Player  24624 non-null  object 
 2   Pos     24624 non-null  object 
 3   Age     24616 non-null  float64
 4   Tm      24624 non-null  object 
 5   G       24624 non-null  float64
 6   GS      18233 non-null  float64
 7   MP      24138 non-null  float64
 8   PER     24101 non-null  float64
 9   TS%     24538 non-null  float64
 10  3PAr    18839 non-null  float64
 11  FTr     24525 non-null  float64
 12  ORB%    20792 non-null  float64
 13  DRB%    20792 non-null  float64
 14  TRB%    21571 non-null  float64
 15  AST%    22555 non-null  float64
 16  STL%    20792 non-null  float64
 17  BLK%    20792 non-null  float64
 18  TOV%    19582 non-null  float64
 19  USG%    19640 non-null  float64
 20  blanl   0 non-null      float64
 21  OWS     24585 non-null  float64
 22

In [9]:
len(stats.Player.unique())

3922

In [10]:
stats.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24691 entries, 0 to 24690
Data columns (total 54 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Year    24624 non-null  float64
 1   Player  24624 non-null  object 
 2   Pos     24624 non-null  object 
 3   Age     24616 non-null  float64
 4   Tm      24624 non-null  object 
 5   G       24624 non-null  float64
 6   GS      18233 non-null  float64
 7   MP      24138 non-null  float64
 8   PER     24101 non-null  float64
 9   TS%     24538 non-null  float64
 10  3PAr    18839 non-null  float64
 11  FTr     24525 non-null  float64
 12  ORB%    20792 non-null  float64
 13  DRB%    20792 non-null  float64
 14  TRB%    21571 non-null  float64
 15  AST%    22555 non-null  float64
 16  STL%    20792 non-null  float64
 17  BLK%    20792 non-null  float64
 18  TOV%    19582 non-null  float64
 19  USG%    19640 non-null  float64
 20  blanl   0 non-null      float64
 21  OWS     24585 non-null  float64
 22

In [11]:
stats['MVP'].value_counts()

0    24629
1       62
Name: MVP, dtype: int64

In [12]:
totals = stats.groupby('Player').count()['Year']
totals = pd.DataFrame(totals)
totals.reset_index(inplace=True)
totals.head()

Unnamed: 0,Player,Year
0,A.C. Green,18
1,A.J. Bramlett,1
2,A.J. English,2
3,A.J. Guyton,3
4,A.J. Hammons,1


In [13]:
totals['Seasons'] = totals['Year']
totals = totals.drop(columns=['Year'], axis=1)

In [14]:
totals['position'] = stats.Pos

In [15]:
totals.columns = totals.columns.str.lower()

In [16]:
totals['team'] = stats.Tm

In [17]:
games = stats.groupby('Player').sum()['G']
games = pd.DataFrame(games)
games.reset_index(inplace=True)
games.columns = games.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(games, totals, how='left', on='player')
totals.rename(columns= {'g': 'games_played'}, inplace=True)
totals.head()

Unnamed: 0,player,games_played,seasons,position,team
0,A.C. Green,1361.0,18,G-F,FTW
1,A.J. Bramlett,8.0,1,SG,INO
2,A.J. English,151.0,2,SF,CHS
3,A.J. Guyton,80.0,3,F,TOT
4,A.J. Hammons,22.0,1,F,DNN


In [18]:
player_efficiency = stats.groupby('Player').mean()['PER']
player_efficiency = pd.DataFrame(player_efficiency)
player_efficiency.reset_index(inplace=True)
player_efficiency.columns = player_efficiency.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(player_efficiency, totals, how='left', on='player')
totals.rename(columns = {'per': 'player_efficiency_rating'}, inplace=True)
totals.head()

Unnamed: 0,player,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,-0.4,8.0,1,SG,INO
2,A.J. English,11.55,151.0,2,SF,CHS
3,A.J. Guyton,4.366667,80.0,3,F,TOT
4,A.J. Hammons,8.4,22.0,1,F,DNN


In [19]:
shooting_percent = stats.groupby('Player').mean()['TS%']
shooting_percent = pd.DataFrame(shooting_percent)
shooting_percent.reset_index(inplace=True)
shooting_percent.columns = shooting_percent.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(shooting_percent, totals, how='left', on='player')
totals.rename(columns = {'ts%': 'shooting_percent'}, inplace=True)
totals.head()

Unnamed: 0,player,shooting_percent,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,0.543556,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,0.19,-0.4,8.0,1,SG,INO
2,A.J. English,0.48,11.55,151.0,2,SF,CHS
3,A.J. Guyton,0.324,4.366667,80.0,3,F,TOT
4,A.J. Hammons,0.472,8.4,22.0,1,F,DNN


In [20]:
field_goals = stats.groupby('Player').sum()['FG']
field_goals = pd.DataFrame(field_goals)
field_goals.reset_index(inplace=True)
field_goals.columns = field_goals.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(field_goals, totals, how='left', on='player')
totals.rename(columns = {'fg': 'field_goals'}, inplace=True)
totals.head()

Unnamed: 0,player,field_goals,shooting_percent,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,4778.0,0.543556,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,4.0,0.19,-0.4,8.0,1,SG,INO
2,A.J. English,617.0,0.48,11.55,151.0,2,SF,CHS
3,A.J. Guyton,166.0,0.324,4.366667,80.0,3,F,TOT
4,A.J. Hammons,17.0,0.472,8.4,22.0,1,F,DNN


In [21]:
fg_percent = stats.groupby('Player').mean()['FG%']
fg_percent = pd.DataFrame(fg_percent)
fg_percent.reset_index(inplace=True)
fg_percent.columns = fg_percent.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(fg_percent, totals, how='left', on='player')
totals.rename(columns = {'fg%': 'fg_percent'}, inplace=True)
totals.head()

Unnamed: 0,player,fg_percent,field_goals,shooting_percent,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,0.487667,4778.0,0.543556,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,0.19,4.0,0.19,-0.4,8.0,1,SG,INO
2,A.J. English,0.436,617.0,0.48,11.55,151.0,2,SF,CHS
3,A.J. Guyton,0.255667,166.0,0.324,4.366667,80.0,3,F,TOT
4,A.J. Hammons,0.405,17.0,0.472,8.4,22.0,1,F,DNN


In [22]:
three_p_made = stats.groupby('Player').sum()['3P']
three_p_made = pd.DataFrame(three_p_made)
three_p_made.reset_index(inplace=True)
three_p_made.columns = three_p_made.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(three_p_made, totals, how='left', on='player')
totals.rename(columns = {'3p': '3p_made'}, inplace=True)
totals.head()

Unnamed: 0,player,3p_made,fg_percent,field_goals,shooting_percent,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,125.0,0.487667,4778.0,0.543556,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,0.0,0.19,4.0,0.19,-0.4,8.0,1,SG,INO
2,A.J. English,9.0,0.436,617.0,0.48,11.55,151.0,2,SF,CHS
3,A.J. Guyton,73.0,0.255667,166.0,0.324,4.366667,80.0,3,F,TOT
4,A.J. Hammons,5.0,0.405,17.0,0.472,8.4,22.0,1,F,DNN


In [23]:
three_p_per = stats.groupby('Player').mean()['3P%']
three_p_per = pd.DataFrame(three_p_per)
three_p_per.reset_index(inplace=True)
three_p_per.columns = three_p_per.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(three_p_per, totals, how='left', on='player')
totals.rename(columns = {'3p%': '3p_percentage'}, inplace=True)
totals.head()

Unnamed: 0,player,3p_percentage,3p_made,fg_percent,field_goals,shooting_percent,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,0.146833,125.0,0.487667,4778.0,0.543556,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,,0.0,0.19,4.0,0.19,-0.4,8.0,1,SG,INO
2,A.J. English,0.1365,9.0,0.436,617.0,0.48,11.55,151.0,2,SF,CHS
3,A.J. Guyton,0.255,73.0,0.255667,166.0,0.324,4.366667,80.0,3,F,TOT
4,A.J. Hammons,0.5,5.0,0.405,17.0,0.472,8.4,22.0,1,F,DNN


In [24]:
two_p_made = stats.groupby('Player').sum()['2P']
two_p_made = pd.DataFrame(two_p_made)
two_p_made.reset_index(inplace=True)
two_p_made.columns = two_p_made.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(two_p_made, totals, how='left', on='player')
totals.rename(columns = {'2p': '2p_made'}, inplace=True)
totals.head()

Unnamed: 0,player,2p_made,3p_percentage,3p_made,fg_percent,field_goals,shooting_percent,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,4653.0,0.146833,125.0,0.487667,4778.0,0.543556,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,4.0,,0.0,0.19,4.0,0.19,-0.4,8.0,1,SG,INO
2,A.J. English,608.0,0.1365,9.0,0.436,617.0,0.48,11.55,151.0,2,SF,CHS
3,A.J. Guyton,93.0,0.255,73.0,0.255667,166.0,0.324,4.366667,80.0,3,F,TOT
4,A.J. Hammons,12.0,0.5,5.0,0.405,17.0,0.472,8.4,22.0,1,F,DNN


In [25]:
two_p_per = stats.groupby('Player').mean()['2P%']
two_p_per = pd.DataFrame(two_p_per)
two_p_per.reset_index(inplace=True)
two_p_per.columns = two_p_per.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(two_p_per, totals, how='left', on='player')
totals.rename(columns = {'2p%': '2p_percentage'}, inplace=True)
totals.head()

Unnamed: 0,player,2p_percentage,2p_made,3p_percentage,3p_made,fg_percent,field_goals,shooting_percent,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,0.501889,4653.0,0.146833,125.0,0.487667,4778.0,0.543556,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,0.19,4.0,,0.0,0.19,4.0,0.19,-0.4,8.0,1,SG,INO
2,A.J. English,0.4505,608.0,0.1365,9.0,0.436,617.0,0.48,11.55,151.0,2,SF,CHS
3,A.J. Guyton,0.254,93.0,0.255,73.0,0.255667,166.0,0.324,4.366667,80.0,3,F,TOT
4,A.J. Hammons,0.375,12.0,0.5,5.0,0.405,17.0,0.472,8.4,22.0,1,F,DNN


In [26]:
free_throws_made = stats.groupby('Player').sum()['FT']
free_throws_made = pd.DataFrame(free_throws_made)
free_throws_made.reset_index(inplace=True)
free_throws_made.columns = free_throws_made.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(free_throws_made, totals, how='left', on='player')
totals.rename(columns = {'ft': 'free_throws_made'}, inplace=True)
totals.head()

Unnamed: 0,player,free_throws_made,2p_percentage,2p_made,3p_percentage,3p_made,fg_percent,field_goals,shooting_percent,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,3247.0,0.501889,4653.0,0.146833,125.0,0.487667,4778.0,0.543556,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,0.0,0.19,4.0,,0.0,0.19,4.0,0.19,-0.4,8.0,1,SG,INO
2,A.J. English,259.0,0.4505,608.0,0.1365,9.0,0.436,617.0,0.48,11.55,151.0,2,SF,CHS
3,A.J. Guyton,37.0,0.254,93.0,0.255,73.0,0.255667,166.0,0.324,4.366667,80.0,3,F,TOT
4,A.J. Hammons,9.0,0.375,12.0,0.5,5.0,0.405,17.0,0.472,8.4,22.0,1,F,DNN


In [27]:
free_throw_per = stats.groupby('Player').mean()['FT%']
free_throw_per = pd.DataFrame(free_throw_per)
free_throw_per.reset_index(inplace=True)
free_throw_per.columns = free_throw_per.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(free_throw_per, totals, how='left', on='player')
totals.rename(columns = {'ft%': 'ft_percentage'}, inplace=True)
totals.head()

Unnamed: 0,player,ft_percentage,free_throws_made,2p_percentage,2p_made,3p_percentage,3p_made,fg_percent,field_goals,shooting_percent,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,0.708056,3247.0,0.501889,4653.0,0.146833,125.0,0.487667,4778.0,0.543556,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,,0.0,0.19,4.0,,0.0,0.19,4.0,0.19,-0.4,8.0,1,SG,INO
2,A.J. English,0.774,259.0,0.4505,608.0,0.1365,9.0,0.436,617.0,0.48,11.55,151.0,2,SF,CHS
3,A.J. Guyton,0.824,37.0,0.254,93.0,0.255,73.0,0.255667,166.0,0.324,4.366667,80.0,3,F,TOT
4,A.J. Hammons,0.45,9.0,0.375,12.0,0.5,5.0,0.405,17.0,0.472,8.4,22.0,1,F,DNN


In [28]:
offensive_rebounds = stats.groupby('Player').sum()['ORB']
offensive_rebounds = pd.DataFrame(offensive_rebounds)
offensive_rebounds.reset_index(inplace=True)
offensive_rebounds.columns = offensive_rebounds.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(offensive_rebounds, totals, how='left', on='player')
totals.rename(columns = {'orb': 'offensive_rebounds'}, inplace=True)
totals.head()

Unnamed: 0,player,offensive_rebounds,ft_percentage,free_throws_made,2p_percentage,2p_made,3p_percentage,3p_made,fg_percent,field_goals,shooting_percent,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,3576.0,0.708056,3247.0,0.501889,4653.0,0.146833,125.0,0.487667,4778.0,0.543556,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,12.0,,0.0,0.19,4.0,,0.0,0.19,4.0,0.19,-0.4,8.0,1,SG,INO
2,A.J. English,140.0,0.774,259.0,0.4505,608.0,0.1365,9.0,0.436,617.0,0.48,11.55,151.0,2,SF,CHS
3,A.J. Guyton,22.0,0.824,37.0,0.254,93.0,0.255,73.0,0.255667,166.0,0.324,4.366667,80.0,3,F,TOT
4,A.J. Hammons,8.0,0.45,9.0,0.375,12.0,0.5,5.0,0.405,17.0,0.472,8.4,22.0,1,F,DNN


In [29]:
defensive_rebounds = stats.groupby('Player').sum()['DRB']
defensive_rebounds = pd.DataFrame(defensive_rebounds)
defensive_rebounds.reset_index(inplace=True)
defensive_rebounds.columns = defensive_rebounds.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(defensive_rebounds, totals, how='left', on='player')
totals.rename(columns = {'drb': 'defensive_rebounds'}, inplace=True)
totals.head()

Unnamed: 0,player,defensive_rebounds,offensive_rebounds,ft_percentage,free_throws_made,2p_percentage,2p_made,3p_percentage,3p_made,fg_percent,field_goals,shooting_percent,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,6553.0,3576.0,0.708056,3247.0,0.501889,4653.0,0.146833,125.0,0.487667,4778.0,0.543556,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,10.0,12.0,,0.0,0.19,4.0,,0.0,0.19,4.0,0.19,-0.4,8.0,1,SG,INO
2,A.J. English,175.0,140.0,0.774,259.0,0.4505,608.0,0.1365,9.0,0.436,617.0,0.48,11.55,151.0,2,SF,CHS
3,A.J. Guyton,58.0,22.0,0.824,37.0,0.254,93.0,0.255,73.0,0.255667,166.0,0.324,4.366667,80.0,3,F,TOT
4,A.J. Hammons,28.0,8.0,0.45,9.0,0.375,12.0,0.5,5.0,0.405,17.0,0.472,8.4,22.0,1,F,DNN


In [30]:
total_rebounds = stats.groupby('Player').sum()['TRB']
total_rebounds = pd.DataFrame(total_rebounds)
total_rebounds.reset_index(inplace=True)
total_rebounds.columns = total_rebounds.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(total_rebounds, totals, how='left', on='player')
totals.rename(columns = {'trb': 'total_rebounds'}, inplace=True)
totals.head()

Unnamed: 0,player,total_rebounds,defensive_rebounds,offensive_rebounds,ft_percentage,free_throws_made,2p_percentage,2p_made,3p_percentage,3p_made,fg_percent,field_goals,shooting_percent,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,10129.0,6553.0,3576.0,0.708056,3247.0,0.501889,4653.0,0.146833,125.0,0.487667,4778.0,0.543556,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,22.0,10.0,12.0,,0.0,0.19,4.0,,0.0,0.19,4.0,0.19,-0.4,8.0,1,SG,INO
2,A.J. English,315.0,175.0,140.0,0.774,259.0,0.4505,608.0,0.1365,9.0,0.436,617.0,0.48,11.55,151.0,2,SF,CHS
3,A.J. Guyton,80.0,58.0,22.0,0.824,37.0,0.254,93.0,0.255,73.0,0.255667,166.0,0.324,4.366667,80.0,3,F,TOT
4,A.J. Hammons,36.0,28.0,8.0,0.45,9.0,0.375,12.0,0.5,5.0,0.405,17.0,0.472,8.4,22.0,1,F,DNN


In [31]:
total_assists = stats.groupby('Player').sum()['AST']
total_assists = pd.DataFrame(total_assists)
total_assists.reset_index(inplace=True)
total_assists.columns = total_assists.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(total_assists, totals, how='left', on='player')
totals.rename(columns = {'ast': 'total_assists'}, inplace=True)
totals.head()

Unnamed: 0,player,total_assists,total_rebounds,defensive_rebounds,offensive_rebounds,ft_percentage,free_throws_made,2p_percentage,2p_made,3p_percentage,3p_made,fg_percent,field_goals,shooting_percent,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,1469.0,10129.0,6553.0,3576.0,0.708056,3247.0,0.501889,4653.0,0.146833,125.0,0.487667,4778.0,0.543556,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,0.0,22.0,10.0,12.0,,0.0,0.19,4.0,,0.0,0.19,4.0,0.19,-0.4,8.0,1,SG,INO
2,A.J. English,320.0,315.0,175.0,140.0,0.774,259.0,0.4505,608.0,0.1365,9.0,0.436,617.0,0.48,11.55,151.0,2,SF,CHS
3,A.J. Guyton,147.0,80.0,58.0,22.0,0.824,37.0,0.254,93.0,0.255,73.0,0.255667,166.0,0.324,4.366667,80.0,3,F,TOT
4,A.J. Hammons,4.0,36.0,28.0,8.0,0.45,9.0,0.375,12.0,0.5,5.0,0.405,17.0,0.472,8.4,22.0,1,F,DNN


In [32]:
total_steals = stats.groupby('Player').sum()['STL']
total_steals = pd.DataFrame(total_steals)
total_steals.reset_index(inplace=True)
total_steals.columns = total_steals.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(total_steals, totals, how='left', on='player')
totals.rename(columns = {'stl': 'total_steals'}, inplace=True)
totals.head()

Unnamed: 0,player,total_steals,total_assists,total_rebounds,defensive_rebounds,offensive_rebounds,ft_percentage,free_throws_made,2p_percentage,2p_made,3p_percentage,3p_made,fg_percent,field_goals,shooting_percent,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,1103.0,1469.0,10129.0,6553.0,3576.0,0.708056,3247.0,0.501889,4653.0,0.146833,125.0,0.487667,4778.0,0.543556,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,1.0,0.0,22.0,10.0,12.0,,0.0,0.19,4.0,,0.0,0.19,4.0,0.19,-0.4,8.0,1,SG,INO
2,A.J. English,57.0,320.0,315.0,175.0,140.0,0.774,259.0,0.4505,608.0,0.1365,9.0,0.436,617.0,0.48,11.55,151.0,2,SF,CHS
3,A.J. Guyton,20.0,147.0,80.0,58.0,22.0,0.824,37.0,0.254,93.0,0.255,73.0,0.255667,166.0,0.324,4.366667,80.0,3,F,TOT
4,A.J. Hammons,1.0,4.0,36.0,28.0,8.0,0.45,9.0,0.375,12.0,0.5,5.0,0.405,17.0,0.472,8.4,22.0,1,F,DNN


In [33]:
total_blocks = stats.groupby('Player').sum()['BLK']
total_blocks = pd.DataFrame(total_blocks)
total_blocks.reset_index(inplace=True)
total_blocks.columns = total_blocks.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(total_blocks, totals, how='left', on='player')
totals.rename(columns = {'blk': 'total_blocks'}, inplace=True)
totals.head()

Unnamed: 0,player,total_blocks,total_steals,total_assists,total_rebounds,defensive_rebounds,offensive_rebounds,ft_percentage,free_throws_made,2p_percentage,...,3p_percentage,3p_made,fg_percent,field_goals,shooting_percent,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,562.0,1103.0,1469.0,10129.0,6553.0,3576.0,0.708056,3247.0,0.501889,...,0.146833,125.0,0.487667,4778.0,0.543556,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,0.0,1.0,0.0,22.0,10.0,12.0,,0.0,0.19,...,,0.0,0.19,4.0,0.19,-0.4,8.0,1,SG,INO
2,A.J. English,24.0,57.0,320.0,315.0,175.0,140.0,0.774,259.0,0.4505,...,0.1365,9.0,0.436,617.0,0.48,11.55,151.0,2,SF,CHS
3,A.J. Guyton,12.0,20.0,147.0,80.0,58.0,22.0,0.824,37.0,0.254,...,0.255,73.0,0.255667,166.0,0.324,4.366667,80.0,3,F,TOT
4,A.J. Hammons,13.0,1.0,4.0,36.0,28.0,8.0,0.45,9.0,0.375,...,0.5,5.0,0.405,17.0,0.472,8.4,22.0,1,F,DNN


In [34]:
total_turnovers = stats.groupby('Player').sum()['TOV']
total_turnovers = pd.DataFrame(total_turnovers)
total_turnovers.reset_index(inplace=True)
total_turnovers.columns = total_turnovers.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(total_turnovers, totals, how='left', on='player')
totals.rename(columns = {'tov': 'total_turnovers'}, inplace=True)
totals.head()

Unnamed: 0,player,total_turnovers,total_blocks,total_steals,total_assists,total_rebounds,defensive_rebounds,offensive_rebounds,ft_percentage,free_throws_made,...,3p_percentage,3p_made,fg_percent,field_goals,shooting_percent,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,1508.0,562.0,1103.0,1469.0,10129.0,6553.0,3576.0,0.708056,3247.0,...,0.146833,125.0,0.487667,4778.0,0.543556,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,3.0,0.0,1.0,0.0,22.0,10.0,12.0,,0.0,...,,0.0,0.19,4.0,0.19,-0.4,8.0,1,SG,INO
2,A.J. English,203.0,24.0,57.0,320.0,315.0,175.0,140.0,0.774,259.0,...,0.1365,9.0,0.436,617.0,0.48,11.55,151.0,2,SF,CHS
3,A.J. Guyton,62.0,12.0,20.0,147.0,80.0,58.0,22.0,0.824,37.0,...,0.255,73.0,0.255667,166.0,0.324,4.366667,80.0,3,F,TOT
4,A.J. Hammons,10.0,13.0,1.0,4.0,36.0,28.0,8.0,0.45,9.0,...,0.5,5.0,0.405,17.0,0.472,8.4,22.0,1,F,DNN


In [35]:
total_personal_fouls = stats.groupby('Player').sum()['PF']
total_personal_fouls = pd.DataFrame(total_personal_fouls)
total_personal_fouls.reset_index(inplace=True)
total_personal_fouls.columns = total_personal_fouls.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(total_personal_fouls, totals, how='left', on='player')
totals.rename(columns = {'pf': 'total_personal_fouls'}, inplace=True)
totals.head()

Unnamed: 0,player,total_personal_fouls,total_turnovers,total_blocks,total_steals,total_assists,total_rebounds,defensive_rebounds,offensive_rebounds,ft_percentage,...,3p_percentage,3p_made,fg_percent,field_goals,shooting_percent,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,2581.0,1508.0,562.0,1103.0,1469.0,10129.0,6553.0,3576.0,0.708056,...,0.146833,125.0,0.487667,4778.0,0.543556,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,13.0,3.0,0.0,1.0,0.0,22.0,10.0,12.0,,...,,0.0,0.19,4.0,0.19,-0.4,8.0,1,SG,INO
2,A.J. English,287.0,203.0,24.0,57.0,320.0,315.0,175.0,140.0,0.774,...,0.1365,9.0,0.436,617.0,0.48,11.55,151.0,2,SF,CHS
3,A.J. Guyton,58.0,62.0,12.0,20.0,147.0,80.0,58.0,22.0,0.824,...,0.255,73.0,0.255667,166.0,0.324,4.366667,80.0,3,F,TOT
4,A.J. Hammons,21.0,10.0,13.0,1.0,4.0,36.0,28.0,8.0,0.45,...,0.5,5.0,0.405,17.0,0.472,8.4,22.0,1,F,DNN


In [36]:
total_points = stats.groupby('Player').sum()['PTS']
total_points = pd.DataFrame(total_points)
total_points.reset_index(inplace=True)
total_points.columns = total_points.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(total_points, totals, how='left', on='player')
totals.rename(columns = {'pts': 'total_points'}, inplace=True)
totals.head()

Unnamed: 0,player,total_points,total_personal_fouls,total_turnovers,total_blocks,total_steals,total_assists,total_rebounds,defensive_rebounds,offensive_rebounds,...,3p_percentage,3p_made,fg_percent,field_goals,shooting_percent,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,12928.0,2581.0,1508.0,562.0,1103.0,1469.0,10129.0,6553.0,3576.0,...,0.146833,125.0,0.487667,4778.0,0.543556,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,8.0,13.0,3.0,0.0,1.0,0.0,22.0,10.0,12.0,...,,0.0,0.19,4.0,0.19,-0.4,8.0,1,SG,INO
2,A.J. English,1502.0,287.0,203.0,24.0,57.0,320.0,315.0,175.0,140.0,...,0.1365,9.0,0.436,617.0,0.48,11.55,151.0,2,SF,CHS
3,A.J. Guyton,442.0,58.0,62.0,12.0,20.0,147.0,80.0,58.0,22.0,...,0.255,73.0,0.255667,166.0,0.324,4.366667,80.0,3,F,TOT
4,A.J. Hammons,48.0,21.0,10.0,13.0,1.0,4.0,36.0,28.0,8.0,...,0.5,5.0,0.405,17.0,0.472,8.4,22.0,1,F,DNN


In [37]:
hall_of_fame = totals[totals.games_played >= 164]

In [38]:
list_of_hall_of_famers = []
for i, row in hall_of_fame.iterrows():
    if '*' in row['player']:
        list_of_hall_of_famers.append(row['player'])
list_of_hall_of_famers = list(set(list_of_hall_of_famers))

In [39]:
def is_hall_of_famer(x):
    if '*' in x:
        return 1
    else:
        return 0

In [40]:
hall_of_fame['HOF'] = hall_of_fame['player'].apply(is_hall_of_famer)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hall_of_fame['HOF'] = hall_of_fame['player'].apply(is_hall_of_famer)


In [41]:
hall_of_fame = hall_of_fame.set_index('player')

In [42]:
hall_of_fame.corr()['HOF'].map(abs).sort_values(ascending=False)

HOF                         1.000000
free_throws_made            0.469877
2p_made                     0.443314
total_points                0.413747
field_goals                 0.409497
total_rebounds              0.379603
player_efficiency_rating    0.376964
total_personal_fouls        0.307770
total_assists               0.280950
games_played                0.215158
total_blocks                0.131520
defensive_rebounds          0.111382
offensive_rebounds          0.108104
seasons                     0.102158
total_turnovers             0.092802
total_steals                0.085595
ft_percentage               0.078167
fg_percent                  0.076635
shooting_percent            0.072490
3p_made                     0.055085
2p_percentage               0.025016
3p_percentage               0.019597
Name: HOF, dtype: float64

In [43]:
total_mvp = stats.groupby('Player').sum()['MVP']
total_mvp = pd.DataFrame(total_mvp)
total_mvp.reset_index(inplace=True)
total_mvp.columns = total_mvp.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(total_mvp, totals, how='left', on='player')
totals.rename(columns = {'MVP': 'total_MVP'}, inplace=True)
totals.head()

Unnamed: 0,player,mvp,total_points,total_personal_fouls,total_turnovers,total_blocks,total_steals,total_assists,total_rebounds,defensive_rebounds,...,3p_percentage,3p_made,fg_percent,field_goals,shooting_percent,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,0,12928.0,2581.0,1508.0,562.0,1103.0,1469.0,10129.0,6553.0,...,0.146833,125.0,0.487667,4778.0,0.543556,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,0,8.0,13.0,3.0,0.0,1.0,0.0,22.0,10.0,...,,0.0,0.19,4.0,0.19,-0.4,8.0,1,SG,INO
2,A.J. English,0,1502.0,287.0,203.0,24.0,57.0,320.0,315.0,175.0,...,0.1365,9.0,0.436,617.0,0.48,11.55,151.0,2,SF,CHS
3,A.J. Guyton,0,442.0,58.0,62.0,12.0,20.0,147.0,80.0,58.0,...,0.255,73.0,0.255667,166.0,0.324,4.366667,80.0,3,F,TOT
4,A.J. Hammons,0,48.0,21.0,10.0,13.0,1.0,4.0,36.0,28.0,...,0.5,5.0,0.405,17.0,0.472,8.4,22.0,1,F,DNN


In [44]:
total_rings = stats.groupby('Player').sum()['rings']
total_rings = pd.DataFrame(total_rings)
total_rings.reset_index(inplace=True)
total_rings.columns = total_rings.columns.str.replace(" ", "_").str.lower()
totals = pd.merge(total_rings, totals, how='left', on='player')
totals.rename(columns = {'rings': 'total_rings'}, inplace=True)
totals.head()

Unnamed: 0,player,total_rings,mvp,total_points,total_personal_fouls,total_turnovers,total_blocks,total_steals,total_assists,total_rebounds,...,3p_percentage,3p_made,fg_percent,field_goals,shooting_percent,player_efficiency_rating,games_played,seasons,position,team
0,A.C. Green,3,0,12928.0,2581.0,1508.0,562.0,1103.0,1469.0,10129.0,...,0.146833,125.0,0.487667,4778.0,0.543556,13.872222,1361.0,18,G-F,FTW
1,A.J. Bramlett,0,0,8.0,13.0,3.0,0.0,1.0,0.0,22.0,...,,0.0,0.19,4.0,0.19,-0.4,8.0,1,SG,INO
2,A.J. English,0,0,1502.0,287.0,203.0,24.0,57.0,320.0,315.0,...,0.1365,9.0,0.436,617.0,0.48,11.55,151.0,2,SF,CHS
3,A.J. Guyton,0,0,442.0,58.0,62.0,12.0,20.0,147.0,80.0,...,0.255,73.0,0.255667,166.0,0.324,4.366667,80.0,3,F,TOT
4,A.J. Hammons,0,0,48.0,21.0,10.0,13.0,1.0,4.0,36.0,...,0.5,5.0,0.405,17.0,0.472,8.4,22.0,1,F,DNN


In [45]:
totals['mvp'].value_counts()

0    3889
1      19
2       5
3       3
4       2
5       2
6       1
Name: mvp, dtype: int64

In [46]:
totals['total_rings'].value_counts()

0     3318
1      433
2       91
3       40
4       15
5       11
6        4
8        4
7        3
10       1
11       1
Name: total_rings, dtype: int64

In [50]:
wil = 'cool'