# Load Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math

from sklearn.preprocessing import LabelEncoder
from scipy.stats import skew
from scipy.special import boxcox1p
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from mlxtend.plotting import plot_sequential_feature_selection as plot_sfs

# Load Data

In [33]:
players = pd.read_csv('nba_player_stats_2.csv')
team = pd.read_csv('NBA_Team_Stats.csv')

In [34]:
players.head(5)

Unnamed: 0,rk,player,pos,age,tm,g,gs,mp,fg,fga,...,drb,trb,ast,stl,blk,tov,pf,pts,season,mvp
0,1,Mahmoud Abdul-Rauf,PG,28,SAC,31,0,17.1,3.3,8.8,...,1.0,1.2,1.9,0.5,0.0,0.6,1.0,7.3,1997-98,False
1,2,Tariq Abdul-Wahad,SG,23,SAC,59,16,16.3,2.4,6.1,...,1.2,2.0,0.9,0.6,0.2,1.1,1.4,6.4,1997-98,False
2,3,Shareef Abdur-Rahim,SF,21,VAN,82,82,36.0,8.0,16.4,...,4.3,7.1,2.6,1.1,0.9,3.1,2.5,22.3,1997-98,False
3,4,Cory Alexander,PG,24,TOT,60,22,21.6,2.9,6.7,...,2.2,2.4,3.5,1.2,0.2,1.9,1.6,8.1,1997-98,False
4,4,Cory Alexander,PG,24,SAS,37,3,13.5,1.6,3.9,...,1.1,1.3,1.9,0.7,0.1,1.3,1.4,4.5,1997-98,False


In [35]:
team.head()

Unnamed: 0,No,Team,G,Min,Pts,Reb,Ast,Stl,Blk,To,...,Oreb,Fgm-a,Pct,3gm-a,Pct.1,Ftm-a,Pct.2,Eff,Deff,Year
0,1,Chicago,103,48.4,96.0,44.1,23.1,8.6,4.3,13.0,...,14.9,36.7-81.7,0.449,3.9-12.0,0.323,18.7-25.2,0.741,111.6,17.5,1997-1998
1,2,Utah,102,48.3,98.6,40.8,24.7,7.6,4.8,14.7,...,11.3,35.9-74.3,0.483,3.1-8.4,0.368,23.8-30.9,0.768,116.3,17.5,1997-1998
2,3,Phoenix,86,48.6,99.3,41.9,25.6,9.2,5.3,14.4,...,12.1,38.2-82.0,0.466,5.2-14.7,0.355,17.7-23.6,0.747,117.1,13.6,1997-1998
3,4,L.A.Lakers,95,48.3,104.8,42.9,24.3,8.7,6.8,14.7,...,13.2,38.0-79.1,0.48,6.1-17.3,0.35,22.8-33.7,0.675,120.8,13.2,1997-1998
4,5,San Antonio,91,48.4,92.5,44.1,21.9,6.2,6.9,15.3,...,11.9,35.1-75.1,0.468,3.7-10.8,0.344,18.5-26.8,0.688,108.0,13.1,1997-1998


## Merge Data

In [36]:
players[players.index.isin([9741,9742,9743,9744])]

Unnamed: 0,rk,player,pos,age,tm,g,gs,mp,fg,fga,...,drb,trb,ast,stl,blk,tov,pf,pts,season,mvp
9741,373,A.J. Price,PG,28,TOT,26,0,12.5,2.0,5.3,...,1.0,1.2,1.8,0.3,0.0,0.5,0.6,5.1,2014-15,False
9742,373,A.J. Price,PG,28,IND,10,0,19.3,3.9,8.9,...,1.2,1.4,2.7,0.4,0.0,1.0,0.9,10.5,2014-15,False
9743,373,A.J. Price,PG,28,CLE,11,0,7.9,0.8,3.1,...,1.0,1.4,1.2,0.3,0.0,0.3,0.1,2.0,2014-15,False
9744,373,A.J. Price,PG,28,PHO,5,0,8.8,0.6,2.8,...,0.6,0.6,1.2,0.0,0.0,0.2,1.0,1.2,2014-15,False


In [37]:
team_mapping = {'SAC':'Sacramento', 'VAN':'Vancouver', 'SAS':'San Antonio', 'DEN':'Denver', 'MIL':'Milwaukee', 'CLE':'Cleveland', 'ATL':'Atlanta', 'POR':'Portland',
       'BOS':'Boston', 'ORL':'Orlando', 'UTA':'Utah', 'DAL':'Dallas', 'SEA':'Seattle', 'GSW':'Golden State', 'CHH':'Charlotte', 'MIA':'Miami', 'LAC':'L.A.Clippers',
       'PHI':'Philadelphia', 'LAL':'L.A.Lakers', 'NJN':'New Jersey', 'IND':'Indiana', 'TOR':'Toronto', 'CHI':'Chicago', 'NYK':'New York', 'PHO':'Phoenix', 'HOU':'Houston',
       'MIN':'Minnesota', 'WAS':'Washington', 'DET':'Detroit', 'MEM':'Memphis', 'NOH':'New Orleans', 'CHA':'Charlotte', 'NOK':'New Orleans', 'OKC':'Oklahoma City', 'BRK':'Brooklyn',
       'NOP':'New Orleans', 'CHO':'Charlotte'}

multi_team_player = {}
for player,group in players.groupby(['player','season']):
    if group.tm.nunique() > 1:
        multi_team_player[group.index[0]] = list(group.tm)[-1]
    
multi_team_player

{9741: 'PHO',
 7039: 'PHO',
 8178: 'HOU',
 8754: 'DEN',
 13296: 'DEN',
 6577: 'NOH',
 8904: 'SAC',
 14082: 'PHO',
 288: 'PHI',
 4147: 'TOR',
 4701: 'NOK',
 6705: 'CHI',
 7305: 'GSW',
 2707: 'DEN',
 6181: 'LAL',
 5989: 'MEM',
 9204: 'PHI',
 9720: 'MIN',
 5436: 'SEA',
 4892: 'GSW',
 6026: 'NYK',
 6896: 'WAS',
 7495: 'GSW',
 13087: 'CHI',
 6904: 'MIN',
 11821: 'SAC',
 12518: 'PHI',
 5815: 'LAC',
 12793: 'SAC',
 13452: 'WAS',
 10410: 'MEM',
 9800: 'NYK',
 6960: 'TOR',
 14134: 'NOP',
 12571: 'MIN',
 4933: 'DEN',
 6069: 'DET',
 6647: 'PHI',
 7180: 'CLE',
 9486: 'POR',
 3944: 'MIA',
 510: 'TOR',
 9832: 'DAL',
 10445: 'GSW',
 3620: 'ORL',
 4194: 'TOR',
 12606: 'CLE',
 13243: 'LAL',
 13953: 'BRK',
 5029: 'PHI',
 9054: 'WAS',
 9661: 'SAC',
 10283: 'SAS',
 10541: 'CLE',
 8776: 'IND',
 647: 'CLE',
 12003: 'NOP',
 10877: 'BRK',
 13050: 'GSW',
 6197: 'SAC',
 3222: 'NYK',
 6666: 'CLE',
 10553: 'ORL',
 7066: 'NYK',
 2688: 'WAS',
 3204: 'NJN',
 3782: 'MIL',
 1255: 'ORL',
 1747: 'CLE',
 4959: 'ATL',
 55

In [38]:
teams = []
for idx,row in players.iterrows():
    if idx in multi_team_player.keys():
        tm = multi_team_player[idx]
        teams.append(team_mapping[tm])
    else:
        tm = row['tm']
        if idx == 3613:
            tm = 'UTA'
        elif idx == 21:
            tm = 'MIA'
        elif idx == 4189:
            tm = 'WAS'
        elif idx == 3037:
            tm = 'BOS'
        elif idx == 11099:
            tm = 'CHI'
        elif idx == 5792:
            tm = 'NJN'
        teams.append(team_mapping[tm])

players['tm'] = teams

14573

In [40]:
year_vec = []
for idx,row in players.iterrows():
    if row['season'][-2] == '9':
        year = row['season'].split('-')[0] + '-19' + row['season'].split('-')[1]
    else:
        year = row['season'].split('-')[0] + '-20' + row['season'].split('-')[1]
    year_vec.append(year)
players['season'] = year_vec

['1997-1998',
 '1997-1998',
 '1997-1998',
 '1997-1998',
 '1997-1998',
 '1997-1998',
 '1997-1998',
 '1997-1998',
 '1997-1998',
 '1997-1998']

In [42]:
players.drop_duplicates(['player','season'],keep = 'first',inplace = True)

In [44]:
players_team = players.merge(team, left_on = ['tm','season'],right_on = ['Team','Year'],how = 'inner')
players_team.shape

(11408, 54)