In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
from pandas.tseries.offsets import DateOffset
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as datetime
import time
import shap
from sklearn.model_selection import GridSearchCV, ShuffleSplit, cross_validate, train_test_split, RandomizedSearchCV
from sklearn.metrics import roc_auc_score, roc_curve, accuracy_score, confusion_matrix, classification_report
import xgboost as xgb
from pandas_profiling import ProfileReport

pd.options.display.max_rows = 200
pd.options.display.max_columns = 100

In [4]:
df = pd.read_csv("data/nba.csv")

In [28]:
def feature_engineering(df):
    df["WOR"] = df.VORP * 2.7
    df["WOR_Year"] = df.groupby(["name", "Year"]).WOR.transform("sum")
    df["WOR_Career_avg"] = df.groupby("name").WOR_Year.transform("mean")
    df["WOR_Career_tot"] = df.groupby("name").WOR_Year.transform("sum")
    return df

In [29]:
df.describe()

Unnamed: 0,Year,Age,G,GS,MP,PER,TS%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,year_start,year_end,weight,WOR,WOR_Year,WOR_Career_avg,WOR_Career_tot
count,25276.0,25268.0,25276.0,18749.0,24787.0,24750.0,25188.0,19374.0,25174.0,21385.0,21385.0,22191.0,23195.0,21385.0,21385.0,20134.0,20193.0,25237.0,25237.0,25237.0,24750.0,21390.0,21390.0,21390.0,21390.0,25276.0,25276.0,25174.0,19463.0,19463.0,15874.0,25276.0,25276.0,25141.0,25174.0,25276.0,25276.0,24392.0,21390.0,21390.0,24961.0,25276.0,21390.0,21390.0,20198.0,25276.0,25276.0,25276.0,25276.0,25271.0,21390.0,25276.0,25276.0,25276.0
mean,1992.588424,26.684304,50.826317,23.689743,1211.144632,12.481665,0.493251,0.158975,0.325273,6.165022,13.685434,9.928899,13.036504,1.648819,1.420215,15.081102,18.920423,1.260031,1.229972,2.491073,0.065042,-1.770645,-0.543609,-2.31403,0.565783,195.744382,431.222029,0.431071,22.244104,63.719622,0.248663,178.615999,382.15671,0.44571,0.450969,102.31971,136.615089,0.719503,62.221692,147.363534,224.479989,115.188044,40.010472,24.770126,74.207595,116.460753,510.936857,1988.432663,1996.697618,212.075739,1.527614,1.519257,1.519257,32.106766
std,17.340278,3.854205,26.527986,28.698872,942.488797,6.038687,0.09442,0.187875,0.221787,4.853608,6.643834,5.041705,9.219115,1.013455,1.797625,6.911416,5.471004,2.136701,1.272058,3.05999,0.102432,3.792329,2.271906,4.697556,1.339261,188.70206,398.506021,0.095937,38.558287,102.510986,0.176751,179.954455,371.760447,0.099736,0.099133,113.162145,145.764933,0.141772,67.289971,146.08689,227.865287,136.141665,38.794079,37.678685,67.923189,84.940412,494.048109,17.133375,17.774624,26.434199,3.616004,4.319995,3.277624,90.646498
min,1950.0,18.0,1.0,0.0,0.0,-90.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-5.1,-1.0,-2.8,-2.519,-73.8,-30.4,-86.7,-2.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1947.0,1947.0,133.0,-7.02,-10.8,-5.535,-141.48
25%,1981.0,24.0,27.0,0.0,339.0,9.8,0.458,0.005,0.208,2.6,8.7,5.9,6.5,1.1,0.3,11.3,15.4,-0.1,0.2,0.2,0.031,-3.4,-1.7,-4.2,-0.2,41.0,98.0,0.393,0.0,1.0,0.1,35.0,82.0,0.407,0.414,18.0,27.0,0.658,12.0,33.0,51.0,19.0,9.0,3.0,18.0,39.0,106.0,1977.0,1985.0,190.0,-0.54,-0.54,-0.27,-1.62
50%,1996.0,26.0,58.0,8.0,1055.0,12.7,0.506,0.064,0.295,5.4,12.7,9.2,10.5,1.5,0.9,14.2,18.7,0.4,0.8,1.4,0.075,-1.5,-0.5,-1.8,0.0,141.0,321.0,0.439,3.0,12.0,0.292,122.0,270.0,0.456,0.463,63.0,88.0,0.743,38.0,106.0,159.0,68.0,29.0,11.0,56.0,109.0,364.0,1991.0,2000.0,210.0,0.0,0.0,0.2025,1.89
75%,2007.0,29.0,76.0,46.0,1973.0,15.6,0.544,0.289,0.4,9.0,18.1,13.5,17.6,2.1,1.9,17.7,22.2,2.0,1.8,3.8,0.115,0.3,0.7,0.3,0.9,299.25,663.0,0.48,27.0,84.0,0.363,269.0,581.0,0.497,0.501,149.0,201.0,0.808,91.0,213.0,322.0,161.0,60.0,30.0,113.0,183.0,780.0,2002.0,2012.0,230.0,2.43,2.16,2.646,34.56
max,2017.0,44.0,88.0,83.0,3882.0,129.1,1.136,1.0,6.0,100.0,100.0,100.0,100.0,24.2,77.8,100.0,100.0,18.3,16.0,25.4,2.123,47.8,46.8,36.2,12.4,1597.0,3159.0,1.0,402.0,886.0,1.0,1597.0,3159.0,1.0,1.5,840.0,1363.0,1.0,587.0,1111.0,2149.0,1164.0,301.0,456.0,464.0,386.0,4029.0,2017.0,2018.0,360.0,33.48,45.36,22.653,906.12


In [30]:
hof = list(set(df.loc[df.Player.str.contains("\*"),"Player"].str.replace("*","").values))
hof

['David Robinson',
 'Oscar Robertson',
 'Alfred McGuire',
 'Elvin Hayes',
 'Michael Jordan',
 'Gary Payton',
 'Earl Monroe',
 'Mitch Richmond',
 'Tom Heinsohn',
 'Walt Bellamy',
 'Bob Lanier',
 'Ed Macauley',
 'Charles Barkley',
 'Clyde Lovellette',
 'Andy Phillip',
 'Jack Twyman',
 'Alex English',
 'Frank Ramsey',
 'Hal Greer',
 'Alex Hannum',
 'Nat Clifton',
 'Sarunas Marciulionis',
 'Mel Daniels',
 'Dennis Rodman',
 'Bob Pettit',
 'Arnie Risen',
 'Bailey Howell',
 'Yao Ming',
 'Moses Malone',
 "Shaquille O'Neal",
 'Richie Guerin',
 'Pat Riley',
 'Alonzo Mourning',
 'Bob Davies',
 'Chet Walker',
 'John Thompson',
 'Don Barksdale',
 'Bernard King',
 'K.C. Jones',
 'Neil Johnston',
 'Hakeem Olajuwon',
 'Harry Gallatin',
 'Willis Reed',
 'Pete Maravich',
 'Artis Gilmore',
 'Wes Unseld',
 'Ralph Sampson',
 'Kevin McHale',
 'Dick McGuire',
 'Sam Jones',
 'Drazen Petrovic',
 'Scottie Pippen',
 'Nate Thurmond',
 'Isiah Thomas',
 'Dominique Wilkins',
 'George Yardley',
 'Jerry Sloan',
 'Slic

In [33]:
df.loc[df.WOR_Career_avg > 18, "name"].unique()

array(['Larry Nance', 'Michael Jordan', 'Gary Payton', 'LeBron James'],
      dtype=object)

In [31]:
df.loc[df.name == "Michael Jordan"]

Unnamed: 0,Year,Player,Pos,Age,Tm,G,GS,MP,PER,TS%,3PAr,FTr,ORB%,DRB%,TRB%,AST%,STL%,BLK%,TOV%,USG%,OWS,DWS,WS,WS/48,OBPM,DBPM,BPM,VORP,FG,FGA,FG%,3P,3PA,3P%,2P,2PA,2P%,eFG%,FT,FTA,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,name,year_start,year_end,position,height,weight,birth_date,college,WOR,WOR_Year,WOR_Career_avg,WOR_Career_tot
7882,1985.0,Michael Jordan*,SG,21.0,CHI,82.0,82.0,3144.0,25.8,0.592,0.032,0.459,6.3,13.2,9.8,25.3,3.0,1.3,13.0,29.8,10.3,3.7,14.0,0.213,6.8,1.4,8.2,8.1,837.0,1625.0,0.515,9.0,52.0,0.173,828.0,1573.0,0.526,0.518,630.0,746.0,0.845,167.0,367.0,534.0,481.0,196.0,69.0,291.0,285.0,2313.0,Michael Jordan,1985,2003,G-F,6-6,195.0,"February 17, 1963",University of North Carolina,21.87,21.87,18.81,282.15
8280,1986.0,Michael Jordan*,SG,22.0,CHI,18.0,7.0,451.0,27.5,0.533,0.055,0.381,5.6,10.7,8.0,21.7,3.9,2.7,10.5,38.6,1.0,0.5,1.5,0.16,5.1,-0.5,4.7,0.8,150.0,328.0,0.457,3.0,18.0,0.167,147.0,310.0,0.474,0.462,105.0,125.0,0.84,23.0,41.0,64.0,53.0,37.0,21.0,45.0,46.0,408.0,Michael Jordan,1985,2003,G-F,6-6,195.0,"February 17, 1963",University of North Carolina,2.16,2.16,18.81,282.15
8669,1987.0,Michael Jordan*,SG,23.0,CHI,82.0,82.0,3281.0,29.8,0.562,0.029,0.427,5.6,9.3,7.4,22.2,3.6,2.3,9.1,38.3,11.9,5.0,16.9,0.247,8.0,0.6,8.6,8.8,1098.0,2279.0,0.482,12.0,66.0,0.182,1086.0,2213.0,0.491,0.484,833.0,972.0,0.857,166.0,264.0,430.0,377.0,236.0,125.0,272.0,237.0,3041.0,Michael Jordan,1985,2003,G-F,6-6,195.0,"February 17, 1963",University of North Carolina,23.76,23.76,18.81,282.15
9081,1988.0,Michael Jordan*,SG,24.0,CHI,82.0,82.0,3311.0,31.7,0.603,0.027,0.43,4.8,10.7,7.8,27.0,3.9,2.4,9.6,34.1,15.2,6.1,21.2,0.308,9.8,2.3,12.2,11.8,1069.0,1998.0,0.535,7.0,53.0,0.132,1062.0,1945.0,0.546,0.537,723.0,860.0,0.841,139.0,310.0,449.0,485.0,259.0,131.0,252.0,270.0,2868.0,Michael Jordan,1985,2003,G-F,6-6,195.0,"February 17, 1963",University of North Carolina,31.86,31.86,18.81,282.15
9527,1989.0,Michael Jordan*,SG,25.0,CHI,81.0,81.0,3255.0,31.1,0.614,0.055,0.442,5.5,17.3,11.6,34.7,3.6,1.2,11.9,32.1,14.6,5.2,19.8,0.292,9.8,2.7,12.6,12.0,966.0,1795.0,0.538,27.0,98.0,0.276,939.0,1697.0,0.553,0.546,674.0,793.0,0.85,149.0,503.0,652.0,650.0,234.0,65.0,290.0,247.0,2633.0,Michael Jordan,1985,2003,G-F,6-6,195.0,"February 17, 1963",University of North Carolina,32.4,32.4,18.81,282.15
9991,1990.0,Michael Jordan*,SG,26.0,CHI,82.0,82.0,3197.0,31.2,0.606,0.125,0.356,5.3,15.6,10.4,28.6,3.5,1.1,9.8,33.7,14.7,4.3,19.0,0.285,9.7,0.8,10.6,10.1,1034.0,1964.0,0.526,92.0,245.0,0.376,942.0,1719.0,0.548,0.55,593.0,699.0,0.848,143.0,422.0,565.0,519.0,227.0,54.0,247.0,241.0,2753.0,Michael Jordan,1985,2003,G-F,6-6,195.0,"February 17, 1963",University of North Carolina,27.27,27.27,18.81,282.15
10477,1991.0,Michael Jordan*,SG,27.0,CHI,82.0,82.0,3034.0,31.6,0.605,0.051,0.365,4.6,14.3,9.5,25.2,3.7,1.7,8.7,32.9,14.9,5.4,20.3,0.321,8.9,1.8,10.8,9.8,990.0,1837.0,0.539,29.0,93.0,0.312,961.0,1744.0,0.551,0.547,571.0,671.0,0.851,118.0,374.0,492.0,453.0,223.0,83.0,202.0,229.0,2580.0,Michael Jordan,1985,2003,G-F,6-6,195.0,"February 17, 1963",University of North Carolina,26.46,26.46,18.81,282.15
10951,1992.0,Michael Jordan*,SG,28.0,CHI,80.0,80.0,3102.0,27.7,0.579,0.055,0.325,3.5,15.3,9.5,25.7,3.0,1.5,8.8,31.7,12.1,5.6,17.7,0.274,6.9,1.7,8.6,8.3,943.0,1818.0,0.519,27.0,100.0,0.27,916.0,1718.0,0.533,0.526,491.0,590.0,0.832,91.0,420.0,511.0,489.0,182.0,75.0,200.0,201.0,2404.0,Michael Jordan,1985,2003,G-F,6-6,195.0,"February 17, 1963",University of North Carolina,22.41,22.41,18.81,282.15
11403,1993.0,Michael Jordan*,SG,29.0,CHI,78.0,78.0,3067.0,29.7,0.564,0.115,0.284,4.9,15.1,9.8,25.2,3.7,1.3,8.4,34.7,12.0,5.2,17.2,0.27,8.3,1.2,9.5,8.9,992.0,2003.0,0.495,81.0,230.0,0.352,911.0,1773.0,0.514,0.515,476.0,569.0,0.837,135.0,387.0,522.0,428.0,221.0,61.0,207.0,188.0,2541.0,Michael Jordan,1985,2003,G-F,6-6,195.0,"February 17, 1963",University of North Carolina,24.03,24.03,18.81,282.15
12369,1995.0,Michael Jordan*,SG,31.0,CHI,17.0,17.0,668.0,22.1,0.493,0.079,0.337,4.4,16.2,10.3,24.2,2.3,1.5,7.0,33.2,1.2,1.1,2.3,0.167,2.0,0.7,2.7,0.8,166.0,404.0,0.411,16.0,32.0,0.5,150.0,372.0,0.403,0.431,109.0,136.0,0.801,25.0,92.0,117.0,90.0,30.0,13.0,35.0,47.0,457.0,Michael Jordan,1985,2003,G-F,6-6,195.0,"February 17, 1963",University of North Carolina,2.16,2.16,18.81,282.15
