# NBA Predictive Machine Learning Model
## Jordan Stapinski (jstapins), Calvin Lui (clui)
### Project II - Practical Data Science 67-364 Spring 2018

In [5]:
# Data Importing
import matplotlib.pyplot as plt
import numpy as np
import operator
import pandas as pd

%matplotlib inline

# Defining Constants
PLAYER_CSV_FILE = './nba-enhanced-stats/2017-18_playerBoxScore.csv'
TEAM_CSV_FILE = './nba-enhanced-stats/2017-18_teamBoxScore.csv'
PREVIEW_LEN = 10

player_cols = ['gmDate', 'gmTime', 'seasTyp', 'playLNm', 'playFNm', 'teamAbbr', 'teamConf', 'teamDiv', 'teamLoc', 'teamRslt', 'teamDayOff', 'offLNm1', 'offFNm1', 'offLNm2', 'offFNm2', 'offLNm3', 'offFNm3', 'playDispNm', 'playStat', 'playMin', 'playPos', 'playHeight', 'playWeight', 'playBDate', 'playPTS', 'playAST', 'playTO', 'playSTL', 'playBLK', 'playPF', 'playFGA', 'playFGM', 'playFG%', 'play2PA', 'play2PM', 'play2P%', 'play3PA', 'play3PM', 'play3P%', 'playFTA', 'playFTM', 'playFT%', 'playORB', 'playDRB', 'playTRB', 'opptAbbr', 'opptConf', 'opptDiv', 'opptLoc', 'opptRslt', 'opptDayOff']
team_cols = ['gmDate', 'gmTime', 'seasTyp', 'offLNm1', 'offFNm1', 'offLNm2', 'offFNm2', 'offLNm3', 'offFNm3', 'teamAbbr', 'teamConf', 'teamDiv', 'teamLoc', 'teamRslt', 'teamMin', 'teamDayOff', 'teamPTS', 'teamAST', 'teamTO', 'teamSTL', 'teamBLK', 'teamPF', 'teamFGA', 'teamFGM', 'teamFG%', 'team2PA', 'team2PM', 'team2P%', 'team3PA', 'team3PM', 'team3P%', 'teamFTA', 'teamFTM', 'teamFT%', 'teamORB', 'teamDRB', 'teamTRB', 'teamPTS1', 'teamPTS2', 'teamPTS3', 'teamPTS4', 'teamPTS5', 'teamPTS6', 'teamPTS7', 'teamPTS8', 'teamTREB%', 'teamASST%', 'teamTS%', 'teamEFG%', 'teamOREB%', 'teamDREB%', 'teamTO%', 'teamSTL%', 'teamBLK%', 'teamBLKR', 'teamPPS', 'teamFIC', 'teamFIC40', 'teamOrtg', 'teamDrtg', 'teamEDiff', 'teamPlay%', 'teamAR', 'teamAST/TO', 'teamSTL/TO', 'opptAbbr', 'opptConf', 'opptDiv', 'opptLoc', 'opptRslt', 'opptMin', 'opptDayOff', 'opptPTS', 'opptAST', 'opptTO', 'opptSTL', 'opptBLK', 'opptPF', 'opptFGA', 'opptFGM', 'opptFG%', 'oppt2PA', 'oppt2PM', 'oppt2P%', 'oppt3PA', 'oppt3PM', 'oppt3P%', 'opptFTA', 'opptFTM', 'opptFT%', 'opptORB', 'opptDRB', 'opptTRB', 'opptPTS1', 'opptPTS2', 'opptPTS3', 'opptPTS4', 'opptPTS5', 'opptPTS6', 'opptPTS7', 'opptPTS8', 'opptTREB%', 'opptASST%', 'opptTS%', 'opptEFG%', 'opptOREB%', 'opptDREB%', 'opptTO%', 'opptSTL%', 'opptBLK%', 'opptBLKR', 'opptPPS', 'opptFIC', 'opptFIC40', 'opptOrtg', 'opptDrtg', 'opptEDiff', 'opptPlay%', 'opptAR', 'opptAST/TO', 'opptSTL/TO', 'poss', 'pace']

player_stats = pd.read_csv(PLAYER_CSV_FILE, sep=',', names=player_cols, encoding='latin-1', skiprows=[0])
team_stats = pd.read_csv(TEAM_CSV_FILE, sep=',', names=player_cols, encoding='latin-1', skiprows=[0])
team_stats.head(PREVIEW_LEN)

# Note team_stats have two rows for same ref

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29,Unnamed: 30,Unnamed: 31,Unnamed: 32,Unnamed: 33,Unnamed: 34,Unnamed: 35,Unnamed: 36,Unnamed: 37,Unnamed: 38,Unnamed: 39,Unnamed: 40,Unnamed: 41,Unnamed: 42,Unnamed: 43,Unnamed: 44,Unnamed: 45,Unnamed: 46,Unnamed: 47,Unnamed: 48,Unnamed: 49,Unnamed: 50,Unnamed: 51,Unnamed: 52,Unnamed: 53,Unnamed: 54,Unnamed: 55,Unnamed: 56,Unnamed: 57,Unnamed: 58,Unnamed: 59,Unnamed: 60,Unnamed: 61,Unnamed: 62,Unnamed: 63,Unnamed: 64,Unnamed: 65,Unnamed: 66,Unnamed: 67,Unnamed: 68,Unnamed: 69,Unnamed: 70,Unnamed: 71,gmDate,gmTime,seasTyp,playLNm,playFNm,teamAbbr,teamConf,teamDiv,teamLoc,teamRslt,...,playFT%,playORB,playDRB,playTRB,opptAbbr,opptConf,opptDiv,opptLoc,opptRslt,opptDayOff
2017-10-17,08:00,Regular,Forte,Brian,Smith,Michael,McCutchen,Monty,BOS,East,Atlantic,Away,Loss,241,0,99,24,12,11,4,24,88,36,0.4091,56,28,0.5,32,8,0.25,25,19,0.76,9,37,46,19,19,33,28,0,0,0,0,47.9167,66.6667,0.5,0.4545,18.0,80.4348,10.8108,10.9692,3.9888,7.1429,1.125,75.375,62.5519,98.7227,101.7143,-2.9916,0.3956,17.7778,2.0,91.6667,CLE,East,Central,Home,Win,240,0,102,19,17,3,4,25,83,38,0.4578,61,...,55.5208,101.7143,98.7227,2.9916,0.4176,14.6154,1.1176,17.6471,100.2809,99.8648
2017-10-17,08:00,Regular,Forte,Brian,Smith,Michael,McCutchen,Monty,CLE,East,Central,Home,Win,240,0,102,19,17,3,4,25,83,38,0.4578,61,33,0.541,22,5,0.2273,25,21,0.84,9,41,50,29,25,18,30,0,0,0,0,52.0833,50.0,0.5426,0.488,19.5652,82.0,15.3153,2.9916,3.9888,6.5574,1.2289,66.625,55.5208,101.7143,98.7227,2.9916,0.4176,14.6154,1.1176,17.6471,BOS,East,Atlantic,Away,Loss,241,0,99,24,12,11,4,24,88,36,0.4091,56,...,62.5519,98.7227,101.7143,-2.9916,0.3956,17.7778,2.0,91.6667,100.2809,100.2809
2017-10-17,10:30,Regular,Maddox,Tre,Garretson,Ron,Foster,Scott,HOU,West,Southwest,Away,Win,239,0,122,28,13,9,5,16,97,47,0.4845,56,32,0.5714,41,15,0.3659,19,13,0.6842,10,33,43,34,28,26,34,0,0,0,0,51.1905,59.5745,0.579,0.5619,22.2222,84.6154,10.9834,8.7795,4.8775,8.9286,1.2577,97.875,81.9038,119.0108,118.0353,0.9755,0.47,19.1309,2.1538,69.2308,GS,West,Pacific,Home,Loss,241,0,121,34,17,5,9,25,80,43,0.5375,50,...,86.2033,118.0353,119.0108,-0.9755,0.4725,24.2442,2.0,29.4118,102.5117,102.9406
2017-10-17,10:30,Regular,Maddox,Tre,Garretson,Ron,Foster,Scott,GS,West,Pacific,Home,Loss,241,0,121,34,17,5,9,25,80,43,0.5375,50,27,0.54,30,16,0.5333,21,19,0.9048,6,35,41,35,36,30,20,0,0,0,0,48.8095,79.0698,0.6779,0.6375,15.3846,77.7778,16.0015,4.8775,8.7795,18.0,1.5125,103.875,86.2033,118.0353,119.0108,-0.9755,0.4725,24.2442,2.0,29.4118,HOU,West,Southwest,Away,Win,239,0,122,28,13,9,5,16,97,47,0.4845,56,...,81.9038,119.0108,118.0353,0.9755,0.47,19.1309,2.1538,69.2308,102.5117,102.0863
2017-10-18,07:00,Regular,Davis,Marc,Boland,Matt,DeRosa,Joe,CHA,East,Southeast,Away,Loss,238,0,90,16,17,4,3,15,73,29,0.3973,43,20,0.4651,30,9,0.3,29,23,0.7931,3,44,47,27,18,25,20,0,0,0,0,50.0,55.1724,0.5247,0.4589,7.3171,83.0189,16.5434,4.0412,3.0309,6.9767,1.2329,58.875,49.4748,90.927,103.0506,-12.1236,0.3333,13.4725,0.9412,23.5294,DET,East,Central,Home,Win,239,0,102,24,9,14,3,21,96,41,0.4271,70,...,70.7113,103.0506,90.927,12.1236,0.4271,17.8731,2.6667,155.5556,98.9805,99.8123
2017-10-18,07:00,Regular,Davis,Marc,Boland,Matt,DeRosa,Joe,DET,East,Central,Home,Win,239,0,102,24,9,14,3,21,96,41,0.4271,70,32,0.4571,26,9,0.3462,12,11,0.9167,9,38,47,29,27,26,20,0,0,0,0,50.0,58.5366,0.5036,0.474,16.9811,92.6829,8.161,14.1442,3.0309,4.2857,1.0625,84.5,70.7113,103.0506,90.927,12.1236,0.4271,17.8731,2.6667,155.5556,CHA,East,Southeast,Away,Loss,238,0,90,16,17,4,3,15,73,29,0.3973,43,...,49.4748,90.927,103.0506,-12.1236,0.3333,13.4725,0.9412,23.5294,98.9805,99.3946
2017-10-18,07:00,Regular,Cutler,Kevin,Brothers,Tony,Collins,Derrick,BKN,East,Atlantic,Away,Loss,241,0,131,22,20,7,2,25,94,45,0.4787,64,33,0.5156,30,12,0.4,32,29,0.9063,11,32,43,30,33,35,33,0,0,0,0,47.7778,48.8889,0.606,0.5426,25.0,69.5652,15.6152,6.18,1.7657,3.125,1.3936,82.0,68.0498,115.6539,123.5996,-7.9457,0.4369,14.6588,1.1,35.0,IND,East,Central,Home,Win,240,0,140,29,14,12,9,25,102,53,0.5196,68,...,94.7917,123.5996,115.6539,7.9457,0.5196,18.2298,2.0714,85.7143,113.269,112.799
2017-10-18,07:00,Regular,Cutler,Kevin,Brothers,Tony,Collins,Derrick,IND,East,Central,Home,Win,240,0,140,29,14,12,9,25,102,53,0.5196,68,44,0.6471,34,9,0.2647,32,25,0.7813,14,33,47,29,36,41,34,0,0,0,0,52.2222,54.717,0.603,0.5637,30.4348,75.0,10.7626,10.5942,7.9457,13.2353,1.3725,113.75,94.7917,123.5996,115.6539,7.9457,0.5196,18.2298,2.0714,85.7143,BKN,East,Atlantic,Away,Loss,241,0,131,22,20,7,2,25,94,45,0.4787,64,...,68.0498,115.6539,123.5996,-7.9457,0.4369,14.6588,1.1,35.0,113.269,113.269
2017-10-18,07:00,Regular,Lane,Karl,Holtkamp,Lauren,Malloy,Ed,MIA,East,Southeast,Away,Loss,240,0,109,27,13,6,4,20,102,44,0.4314,72,36,0.5,30,8,0.2667,17,13,0.7647,11,33,44,37,18,23,31,0,0,0,0,46.3158,61.3636,0.4978,0.4706,21.5686,75.0,10.614,5.6486,3.7657,5.5556,1.0686,75.875,63.2292,102.6166,109.2067,-6.5901,0.4231,18.0626,2.0769,46.1538,ORL,East,Southeast,Home,Win,240,0,116,22,15,8,10,18,90,43,0.4778,69,...,78.8542,109.2067,102.6166,6.5901,0.4574,15.7413,1.4667,53.3333,106.2206,106.2206
2017-10-18,07:00,Regular,Lane,Karl,Holtkamp,Lauren,Malloy,Ed,ORL,East,Southeast,Home,Win,240,0,116,22,15,8,10,18,90,43,0.4778,69,35,0.5072,21,8,0.381,29,22,0.7586,11,40,51,32,26,30,28,0,0,0,0,53.6842,51.1628,0.5644,0.5222,25.0,78.4314,12.7378,7.5315,9.4144,14.4928,1.2889,94.625,78.8542,109.2067,102.6166,6.5901,0.4574,15.7413,1.4667,53.3333,MIA,East,Southeast,Away,Loss,240,0,109,27,13,6,4,20,102,44,0.4314,72,...,63.2292,102.6166,109.2067,-6.5901,0.4231,18.0626,2.0769,46.1538,106.2206,106.2206


In [2]:
# Data Cleaning

In [6]:
# Player stats representation?

"""
CONVENTION: A player is represented as an object with the following attributes:
- PPG (points per game)
- APG (assists per game)
- SPG (steals)
- RPG (rebounds)
- GP (games played)
- raw_player_df (dataframe for just this player)
and methods:
- stats_vs_specific_team(team_abbrev):
    returns a dictionary mapping above attributes to just against a specific team
... more to be enumerated later
"""

class Player(object):
    def __init__(self, ppg, apg, spg, rpg, gp, raw_player_df):
        self.ppg = ppg
        self.apg = apg
        self.spg = spg
        self.rpg = rpg
        self.gp = gp
        self.raw_player_df = raw_player_df
        
    def stats_vs_specific_team(team_abbrev):
        pass
        # returns dictionary of funneled down values

'\nCONVENTION: A player is represented as an object with the following attributes:\n- PPG\n- APG\n- SPG (steals)\n'

In [3]:
# Team Generating (object-oriented programming?)

"""
CONVENTION: A team is represented as a list of player objects
"""

In [None]:
# Model Params / Features Definition

In [None]:
# Model Training

In [None]:
# Scenarios