# Load Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math
from urllib.request import urlopen
from bs4 import BeautifulSoup
import time

from sklearn.preprocessing import LabelEncoder
from scipy.stats import skew
from scipy.special import boxcox1p
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from mlxtend.plotting import plot_sequential_feature_selection as plot_sfs

# Load Data

### Scraping Data from Basketball Reference

In [9]:
def single_player(season):
    url = f'https://www.basketball-reference.com/leagues/NBA_{season}_totals.html'
    table_html = BeautifulSoup(urlopen(url), 'html.parser').findAll('table')
    df = pd.read_html(str(table_html))[0]
    df = df.drop('Rk', 1) # drop Rk columns
    df.Player = df.Player.str.replace('*','') # remove asterisk on player's name
    df.insert(0,'Season',season) # insert season column
    df = df.apply(pd.to_numeric, errors='coerce').fillna(df) 
    return df

def multiple_player(start_year,end_year):
    df = single_player(start_year)
    while start_year < end_year:
        time.sleep(4)                
        start_year = start_year + 1   
        df = df.append(single_player(start_year))
    return df

def scrape_NBA_team_data(years = [2017, 2018]):
    
    final_df = pd.DataFrame(columns = ["Year", "Team", "W", "L",
                                       "W/L%", "GB", "PS/G", "PA/G",
                                       "SRS", "Playoffs",
                                       "Losing_season"])
    
    # loop through each year
    for y in years:
        # NBA season to scrape
        year = y
        
        # URL to scrape, notice f string:
        url = f"https://www.basketball-reference.com/leagues/NBA_{year}_standings.html"
        
        # collect HTML data
        html = urlopen(url)
        
        # create beautiful soup object from HTML
        soup = BeautifulSoup(html, features="lxml")
        
        # use getText()to extract the headers into a list
        titles = [th.getText() for th in soup.findAll('tr', limit=2)[0].findAll('th')]
        
        # first, find only column headers
        headers = titles[1:titles.index("SRS")+1]
        
        # then, exclude first set of column headers (duplicated)
        titles = titles[titles.index("SRS")+1:]
        
        # next, row titles (ex: Boston Celtics, Toronto Raptors)
        try:
            row_titles = titles[0:titles.index("Eastern Conference")]
        except: row_titles = titles
        # remove the non-teams from this list
        for i in headers:
            row_titles.remove(i)
        row_titles.remove("Western Conference")
        divisions = ["Atlantic Division", "Central Division",
                     "Southeast Division", "Northwest Division",
                     "Pacific Division", "Southwest Division",
                     "Midwest Division"]
        for d in divisions:
            try:
                row_titles.remove(d)
            except:
                print("no division:", d)
        
        # next, grab all data from rows (avoid first row)
        rows = soup.findAll('tr')[1:]
        team_stats = [[td.getText() for td in rows[i].findAll('td')]
                    for i in range(len(rows))]
        # remove empty elements
        team_stats = [e for e in team_stats if e != []]
        # only keep needed rows
        team_stats = team_stats[0:len(row_titles)]
        
        # add team name to each row in team_stats
        for i in range(0, len(team_stats)):
            team_stats[i].insert(0, row_titles[i])
            team_stats[i].insert(0, year)
            
        # add team, year columns to headers
        headers.insert(0, "Team")
        headers.insert(0, "Year")
        
        # create a dataframe with all aquired info
        year_standings = pd.DataFrame(team_stats, columns = headers)
        
        # add a column to dataframe to indicate playoff appearance
        year_standings["Playoffs"] = ["Y" if "*" in ele else "N" for ele in year_standings["Team"]]
        # remove * from team names
        year_standings["Team"] = [ele.replace('*', '') for ele in year_standings["Team"]]
        # add losing season indicator (win % < .5)
        year_standings["Losing_season"] = ["Y" if float(ele) < .5 else "N" for ele in year_standings["W/L%"]]
        
        # append new dataframe to final_df
        final_df = pd.concat([final_df,year_standings],axis=0)
    return final_df

def scrape_award_winners(start_year,end_year,award):
    url = f'https://www.basketball-reference.com/awards/{award}.html'
    table_html = str(BeautifulSoup(urlopen(url), 'html.parser').findAll('table')[0])
    df = pd.read_html(str(table_html))[0]
    df.columns = df.columns.droplevel(0)
    df = df[['Season','Player']]
    return df.loc[(df['Season'] >= str(start_year)) & (df['Season'] <= str(end_year))]

In [3]:
player_df = multiple_player(1998,2022)
player_df.head()

  df = df.drop('Rk', 1) # drop Rk columns
  df.Player = df.Player.str.replace('*','') # remove asterisk on player's name
  df = df.drop('Rk', 1) # drop Rk columns
  df.Player = df.Player.str.replace('*','') # remove asterisk on player's name
  df = df.append(single_player(start_year))
  df = df.drop('Rk', 1) # drop Rk columns
  df.Player = df.Player.str.replace('*','') # remove asterisk on player's name
  df = df.append(single_player(start_year))
  df = df.drop('Rk', 1) # drop Rk columns
  df.Player = df.Player.str.replace('*','') # remove asterisk on player's name
  df = df.append(single_player(start_year))
  df = df.drop('Rk', 1) # drop Rk columns
  df.Player = df.Player.str.replace('*','') # remove asterisk on player's name
  df = df.append(single_player(start_year))
  df = df.drop('Rk', 1) # drop Rk columns
  df.Player = df.Player.str.replace('*','') # remove asterisk on player's name
  df = df.append(single_player(start_year))
  df = df.drop('Rk', 1) # drop Rk columns
  df.Player 

Unnamed: 0,Season,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1998,Mahmoud Abdul-Rauf,PG,28.0,SAC,31.0,0.0,530.0,103.0,273.0,...,1.0,6.0,31.0,37.0,58.0,16.0,1.0,19.0,31.0,227.0
1,1998,Tariq Abdul-Wahad,SG,23.0,SAC,59.0,16.0,959.0,144.0,357.0,...,0.672,44.0,72.0,116.0,51.0,35.0,13.0,65.0,81.0,376.0
2,1998,Shareef Abdur-Rahim,SF,21.0,VAN,82.0,82.0,2950.0,653.0,1347.0,...,0.784,227.0,354.0,581.0,213.0,89.0,76.0,257.0,201.0,1829.0
3,1998,Cory Alexander,PG,24.0,TOT,60.0,22.0,1298.0,171.0,400.0,...,0.784,17.0,129.0,146.0,209.0,70.0,11.0,112.0,98.0,488.0
4,1998,Cory Alexander,PG,24.0,SAS,37.0,3.0,501.0,60.0,145.0,...,0.676,7.0,40.0,47.0,71.0,25.0,5.0,47.0,53.0,165.0


In [6]:
!pwd

/Users/claykindiger/Documents/Rice/2023 Fall/DSCI 303/DSCI303FinalProject


In [7]:
player_df.to_csv('/Users/claykindiger/Documents/Rice/2023 Fall/DSCI 303/DSCI303FinalProject/data/players.csv',index = False)

In [10]:
team_df = scrape_NBA_team_data(np.arange(1998,2023))
team_df.head()

no division: Southeast Division
no division: Northwest Division
no division: Southwest Division
no division: Southeast Division
no division: Northwest Division
no division: Southwest Division
no division: Southeast Division
no division: Northwest Division
no division: Southwest Division
no division: Southeast Division
no division: Northwest Division
no division: Southwest Division
no division: Southeast Division
no division: Northwest Division
no division: Southwest Division
no division: Southeast Division
no division: Northwest Division
no division: Southwest Division
no division: Southeast Division
no division: Northwest Division
no division: Southwest Division
no division: Midwest Division
no division: Midwest Division
no division: Midwest Division
no division: Midwest Division
no division: Midwest Division
no division: Midwest Division
no division: Midwest Division
no division: Midwest Division
no division: Midwest Division
no division: Midwest Division
no division: Midwest Divisio

Unnamed: 0,Year,Team,W,L,W/L%,GB,PS/G,PA/G,SRS,Playoffs,Losing_season
0,1998,Miami Heat,55,27,0.671,—,95.0,90.0,5.09,Y,N
1,1998,New York Knicks,43,39,0.524,12.0,91.6,89.1,2.74,Y,N
2,1998,New Jersey Nets,43,39,0.524,12.0,99.6,98.1,1.88,Y,N
3,1998,Washington Wizards,42,40,0.512,13.0,97.2,96.6,1.11,N,N
4,1998,Orlando Magic,41,41,0.5,14.0,90.1,91.2,-0.53,N,N


In [12]:
team_df['Playoffs'] = [1 if row['Playoffs'] == 'Y' else 0 for idx,row in team_df.iterrows()]
team_df['Losing_season'] = [1 if row['Losing_season'] == 'Y' else 0 for idx,row in team_df.iterrows()]
team_df.head()

Unnamed: 0,Year,Team,W,L,W/L%,GB,PS/G,PA/G,SRS,Playoffs,Losing_season
0,1998,Miami Heat,55,27,0.671,—,95.0,90.0,5.09,1,0
1,1998,New York Knicks,43,39,0.524,12.0,91.6,89.1,2.74,1,0
2,1998,New Jersey Nets,43,39,0.524,12.0,99.6,98.1,1.88,1,0
3,1998,Washington Wizards,42,40,0.512,13.0,97.2,96.6,1.11,0,0
4,1998,Orlando Magic,41,41,0.5,14.0,90.1,91.2,-0.53,0,0


In [13]:
team_df.to_csv('/Users/claykindiger/Documents/Rice/2023 Fall/DSCI 303/DSCI303FinalProject/data/teams.csv',index = False)

In [211]:
mvp_df = scrape_award_winners(1997,2022,'mvp')
mvp_df

Unnamed: 0,Season,Player
1,2021-22,Nikola Jokić
2,2020-21,Nikola Jokić
3,2019-20,Giannis Antetokounmpo
4,2018-19,Giannis Antetokounmpo
5,2017-18,James Harden
6,2016-17,Russell Westbrook
7,2015-16,Stephen Curry
8,2014-15,Stephen Curry
9,2013-14,Kevin Durant
10,2012-13,LeBron James


In [212]:
mvp_df.to_csv('/Users/claykindiger/Documents/Rice/2023 Fall/DSCI 303/DSCI303FinalProject/data/mvp.csv',index = False)

In [None]:
players = pd.read_csv('nba_player_stats_2.csv')
team = pd.read_csv('NBA_Team_Stats.csv')

In [None]:
players.tail(5)

In [None]:
team.head()

## Data Merge and Clean

In [213]:
players = pd.read_csv('/Users/claykindiger/Documents/Rice/2023 Fall/DSCI 303/DSCI303FinalProject/data/players.csv')
teams = pd.read_csv('/Users/claykindiger/Documents/Rice/2023 Fall/DSCI 303/DSCI303FinalProject/data/teams.csv')
mvp = pd.read_csv('/Users/claykindiger/Documents/Rice/2023 Fall/DSCI 303/DSCI303FinalProject/data/mvp.csv')
dpoy = pd.read_csv('/Users/claykindiger/Documents/Rice/2023 Fall/DSCI 303/DSCI303FinalProject/data/dpoy.csv')

In [185]:
players.head()

Unnamed: 0,Season,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1998,Mahmoud Abdul-Rauf,PG,28.0,SAC,31.0,0.0,530.0,103.0,273.0,...,1.0,6.0,31.0,37.0,58.0,16.0,1.0,19.0,31.0,227.0
1,1998,Tariq Abdul-Wahad,SG,23.0,SAC,59.0,16.0,959.0,144.0,357.0,...,0.672,44.0,72.0,116.0,51.0,35.0,13.0,65.0,81.0,376.0
2,1998,Shareef Abdur-Rahim,SF,21.0,VAN,82.0,82.0,2950.0,653.0,1347.0,...,0.784,227.0,354.0,581.0,213.0,89.0,76.0,257.0,201.0,1829.0
3,1998,Cory Alexander,PG,24.0,TOT,60.0,22.0,1298.0,171.0,400.0,...,0.784,17.0,129.0,146.0,209.0,70.0,11.0,112.0,98.0,488.0
4,1998,Cory Alexander,PG,24.0,SAS,37.0,3.0,501.0,60.0,145.0,...,0.676,7.0,40.0,47.0,71.0,25.0,5.0,47.0,53.0,165.0


In [186]:
teams.head()

Unnamed: 0,Year,Team,W,L,W/L%,GB,PS/G,PA/G,SRS,Playoffs,Losing_season
0,1998,Miami Heat,55,27,0.671,—,95.0,90.0,5.09,1,0
1,1998,New York Knicks,43,39,0.524,12.0,91.6,89.1,2.74,1,0
2,1998,New Jersey Nets,43,39,0.524,12.0,99.6,98.1,1.88,1,0
3,1998,Washington Wizards,42,40,0.512,13.0,97.2,96.6,1.11,0,0
4,1998,Orlando Magic,41,41,0.5,14.0,90.1,91.2,-0.53,0,0


In [187]:
mvp.head()

Unnamed: 0,Season,Player
0,2021-22,Nikola Jokić
1,2020-21,Nikola Jokić
2,2019-20,Giannis Antetokounmpo
3,2018-19,Giannis Antetokounmpo
4,2017-18,James Harden


In [188]:
dpoy.head()

Unnamed: 0,Season,Lg,Player,Voting,Age,Tm,G,MP,PTS,TRB,AST,STL,BLK,FG%,3P%,FT%,WS,WS/48,-9999
0,2022-23,NBA,Jaren Jackson Jr.,(V),23,MEM,63,28.4,18.6,6.8,1.0,1.0,3.0,0.506,0.355,0.788,6.6,0.177,jacksja02
1,2021-22,NBA,Marcus Smart,(V),27,BOS,71,32.3,12.1,3.8,5.9,1.7,0.3,0.418,0.331,0.793,5.6,0.116,smartma01
2,2020-21,NBA,Rudy Gobert,(V),28,UTA,71,30.8,14.3,13.5,1.3,0.6,2.7,0.675,0.0,0.623,11.3,0.248,goberru01
3,2019-20,NBA,Giannis Antetokounmpo,(V),25,MIL,63,30.4,29.5,13.6,5.6,1.0,1.0,0.553,0.304,0.633,11.1,0.279,antetgi01
4,2018-19,NBA,Rudy Gobert,(V),26,UTA,81,31.8,15.9,12.9,2.0,0.8,2.3,0.669,,0.636,14.4,0.268,goberru01


In [189]:
players = players[players['Player'] != 'Player']

In [190]:
players.Tm.unique()

array(['SAC', 'VAN', 'TOT', 'SAS', 'DEN', 'MIL', 'CLE', 'ATL', 'POR',
       'BOS', 'ORL', 'UTA', 'DAL', 'SEA', 'GSW', 'CHH', 'MIA', 'LAC',
       'HOU', 'LAL', 'PHI', 'NJN', 'IND', 'TOR', 'CHI', 'NYK', 'PHO',
       'MIN', 'WAS', 'DET', 'MEM', 'NOH', 'CHA', 'NOK', 'OKC', 'BRK',
       'NOP', 'CHO'], dtype=object)

In [191]:
players[players['Tm'] == 'CHH']

Unnamed: 0,Season,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
18,1998,B.J. Armstrong,PG,30.0,CHH,62.0,0.0,772.0,99.0,194.0,...,0.86,12.0,57.0,69.0,144.0,25.0,0.0,35.0,68.0,244.0
36,1998,Corey Beck,PG,26.0,CHH,59.0,14.0,738.0,73.0,159.0,...,0.729,27.0,63.0,90.0,98.0,33.0,7.0,70.0,100.0,191.0
49,1998,Muggsy Bogues,PG,33.0,CHH,2.0,0.0,16.0,2.0,5.0,...,1.0,0.0,1.0,1.0,4.0,2.0,0.0,1.0,2.0,6.0
114,1998,Dell Curry,SG,33.0,CHH,52.0,1.0,971.0,194.0,434.0,...,0.788,26.0,75.0,101.0,69.0,31.0,4.0,54.0,85.0,490.0
134,1998,Tony Delk,PG,24.0,CHH,3.0,0.0,34.0,3.0,4.0,...,0.5,0.0,2.0,2.0,3.0,0.0,0.0,4.0,8.0,8.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2500,2002,Jérôme Moïso,C,23.0,CHH,15.0,0.0,76.0,8.0,20.0,...,,4.0,21.0,25.0,4.0,3.0,2.0,8.0,12.0,16.0
2512,2002,Lee Nailon,SF,26.0,CHH,79.0,41.0,1912.0,369.0,764.0,...,0.747,103.0,188.0,291.0,94.0,59.0,17.0,96.0,175.0,851.0
2569,2002,Eldridge Recasner,SG,34.0,CHH,1.0,0.0,2.0,0.0,0.0,...,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2648,2002,Robert Traylor,PF,24.0,CHH,61.0,1.0,678.0,87.0,204.0,...,0.631,67.0,120.0,187.0,37.0,24.0,37.0,45.0,127.0,228.0


In [192]:
teams.Team.unique()

array(['Miami Heat', 'New York Knicks', 'New Jersey Nets',
       'Washington Wizards', 'Orlando Magic', 'Boston Celtics',
       'Philadelphia 76ers', 'Chicago Bulls', 'Indiana Pacers',
       'Charlotte Hornets', 'Atlanta Hawks', 'Cleveland Cavaliers',
       'Detroit Pistons', 'Milwaukee Bucks', 'Toronto Raptors',
       'Utah Jazz', 'San Antonio Spurs', 'Minnesota Timberwolves',
       'Houston Rockets', 'Dallas Mavericks', 'Vancouver Grizzlies',
       'Denver Nuggets', 'Seattle SuperSonics', 'Los Angeles Lakers',
       'Phoenix Suns', 'Portland Trail Blazers', 'Sacramento Kings',
       'Golden State Warriors', 'Los Angeles Clippers',
       'Memphis Grizzlies', 'New Orleans Hornets', 'Charlotte Bobcats',
       'New Orleans/Oklahoma City Hornets', 'Oklahoma City Thunder',
       'Brooklyn Nets', 'New Orleans Pelicans'], dtype=object)

In [193]:
team_mapping = {'Miami Heat':'MIA',
                'New York Knicks':'NYK', 
                'New Jersey Nets':'NJN',
                'Washington Wizards':'WAS', 
                'Orlando Magic':'ORL',
                'Boston Celtics':'BOS',
                'Philadelphia 76ers':'PHI', 
                'Chicago Bulls':'CHI', 
                'Indiana Pacers':'IND',
                'Charlotte Hornets':'CHO', 
                'Atlanta Hawks':'ATL', 
                'Cleveland Cavaliers':'CLE',
                'Detroit Pistons':'DET', 
                'Milwaukee Bucks':'MIL', 
                'Toronto Raptors':'TOR',
                'Utah Jazz':'UTA',
                'San Antonio Spurs':'SAS',
                'Minnesota Timberwolves':'MIN',
                'Houston Rockets':'HOU', 
                'Dallas Mavericks':'DAL',
                'Vancouver Grizzlies':'VAN',
                'Denver Nuggets':'DEN',
                'Seattle SuperSonics':'SEA',
                'Los Angeles Lakers':'LAL',
                'Phoenix Suns':'PHO',
                'Portland Trail Blazers':'POR',
                'Sacramento Kings':'SAC',
                'Golden State Warriors':'GSW',
                'Los Angeles Clippers':'LAC',
                'Memphis Grizzlies':'MEM',
                'New Orleans Hornets':'NOH',
                'Charlotte Bobcats':'CHA',
                'New Orleans/Oklahoma City Hornets':'NOK',
                'Oklahoma City Thunder':'OKC',
                'Brooklyn Nets':'BRK',
                'New Orleans Pelicans':'NOP'}

In [194]:
teams_list = []
for idx,row in teams.iterrows():
    if row['Team'] == 'Charlotte Hornets':
        if row['Year'] <= 2006:
            teams_list.append('CHH')
        else:
            teams_list.append(team_mapping[row['Team']])
    else:
        teams_list.append(team_mapping[row['Team']])
teams['Team'] = teams_list

In [195]:
teams[teams['Team'] == 'NOK']

Unnamed: 0,Year,Team,W,L,W/L%,GB,PS/G,PA/G,SRS,Playoffs,Losing_season
261,2006,NOK,38,44,0.463,25.0,92.8,95.6,-2.51,0,1
291,2007,NOK,39,43,0.476,28.0,95.5,97.1,-1.19,0,1


In [196]:
players_team = {}
for ply,group in players.groupby(['Player','Season']):
    if group.shape[0] > 1:
        tm = list(group.Tm)[-1]
        if 28 in group.index:
            print(group.Tm)
        players_team[list(group.index)[0]] = tm
        if tm == 'TOT':
            print(ply)

In [197]:
teams_list = []
for idx,row in players.iterrows():
    if idx in players_team.keys():
        tm = players_team[idx]
        teams_list.append(tm)
    else:
        tm = row['Tm']
        if idx == 3613:
            tm = 'UTA'
        elif idx == 21:
            tm = 'MIA'
        elif idx == 4189:
            tm = 'WAS'
        elif idx == 3037:
            tm = 'BOS'
        elif idx == 11099:
            tm = 'CHI'
        elif idx == 6128:
            tm = 'NJN'
        teams_list.append(tm)

players['Tm'] = teams_list

In [198]:
players.drop_duplicates(['Player','Season'],keep = 'first',inplace = True)

In [199]:
players_team = players.merge(teams, left_on = ['Tm','Season'],right_on = ['Team','Year'],how = 'inner')
players_team.shape

(11862, 41)

In [202]:
players_team.to_csv('/Users/claykindiger/Documents/Rice/2023 Fall/DSCI 303/DSCI303FinalProject/players_team.csv')

In [203]:
mvp.head()

Unnamed: 0,Season,Player
0,2021-22,Nikola Jokić
1,2020-21,Nikola Jokić
2,2019-20,Giannis Antetokounmpo
3,2018-19,Giannis Antetokounmpo
4,2017-18,James Harden


In [204]:
dpoy.head()

Unnamed: 0,Season,Lg,Player,Voting,Age,Tm,G,MP,PTS,TRB,AST,STL,BLK,FG%,3P%,FT%,WS,WS/48,-9999
0,2022-23,NBA,Jaren Jackson Jr.,(V),23,MEM,63,28.4,18.6,6.8,1.0,1.0,3.0,0.506,0.355,0.788,6.6,0.177,jacksja02
1,2021-22,NBA,Marcus Smart,(V),27,BOS,71,32.3,12.1,3.8,5.9,1.7,0.3,0.418,0.331,0.793,5.6,0.116,smartma01
2,2020-21,NBA,Rudy Gobert,(V),28,UTA,71,30.8,14.3,13.5,1.3,0.6,2.7,0.675,0.0,0.623,11.3,0.248,goberru01
3,2019-20,NBA,Giannis Antetokounmpo,(V),25,MIL,63,30.4,29.5,13.6,5.6,1.0,1.0,0.553,0.304,0.633,11.1,0.279,antetgi01
4,2018-19,NBA,Rudy Gobert,(V),26,UTA,81,31.8,15.9,12.9,2.0,0.8,2.3,0.669,,0.636,14.4,0.268,goberru01


In [215]:
year_vec = []
for idx,row in mvp.iterrows():
    if row['Season'][-2] == '9':
        year_vec.append('19' + row['Season'].split('-')[1])
    else:
        year_vec.append('20' + row['Season'].split('-')[1])
mvp['Season'] = year_vec

2
2
2
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
9
9


In [216]:
year_vec = []
for idx,row in dpoy.iterrows():
    if row['Season'][-2] == '9':
        year_vec.append('19' + row['Season'].split('-')[1])
    else:
        year_vec.append('20' + row['Season'].split('-')[1])
mvp['Season'] = year_vec

ValueError: Length of values (26) does not match length of index (25)

In [None]:
players_team['mvp'] = [1 if row['mvp'] else 0 for idx,row in players_team.iterrows()]

In [None]:
team.columns

In [None]:
team.

In [None]:
players_team.mvp.value_counts()

## EDA

In [None]:
df_num = players_team.select_dtypes(include = 'number')
df_num.head()

In [None]:
hist = df_num.hist(layout=(15,3),figsize=(15,40))