# LS88 Project

###   Imports

In [1]:
%matplotlib inline
import numpy as np
import datascience as ds
from datascience import Table, make_array, predicates
import nba_py as py
from nba_py import player
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

### Load the Main Table

In [2]:
main = Table().read_table('PbP_15_16_reduced.csv', sep=',')
main.show(5)

GAME_ID,EVENTNUM,PCTIMESTRING,TIME,PERIOD,HOME_TEAM,AWAY_TEAM,HOME_SCORE,AWAY_SCORE,POINTS_SCORED,SHOT_MADE,SHOT_TYPE,SHOT_DIST,PLAYER1_ID,PLAYER1_NAME,PLAYER1_TEAM_NICKNAME,ASSIST_PLAYER_ID,EVENTMSGTYPE,EVENTMSGACTIONTYPE
21500001,0,12:00,0,1,Hawks,Pistons,0,0,0,,,,0,,,,Start Period,
21500001,1,12:00,0,1,Hawks,Pistons,0,0,0,,,,201143,Al Horford,Hawks,,Jumpball,
21500001,2,11:41,19,1,Hawks,Pistons,0,0,0,0.0,Layup,2.0,203083,Andre Drummond,Pistons,,Miss,Driving Layup
21500001,3,11:39,21,1,Hawks,Pistons,0,0,0,,,,203145,Kent Bazemore,Hawks,,Rebound,Player
21500001,4,11:37,23,1,Hawks,Pistons,0,0,0,,,,203145,Kent Bazemore,Hawks,,Turnover,Out of Bounds - Bad Pass Turnover


### Helper Functions

In [60]:
def calc_efg(FGM, THREEPTM, FGA):
    try:
        (FGM + (0.5 * THREEPTM)) / FGA
    except:
        return 0
    
    return (FGM + (0.5 * THREEPTM)) / FGA

def calc_ts(FTA, PTS, FGA):
    try:
        PTS / ((2 * FGA) + (0.88 * FTA))
    except:
        return 0
    
    return PTS / ((2 * FGA) + (0.88 * FTA))

def convert_season_to_index(season):
    end = season[5:]
    int_year = int(end)
    return 18 - int_year

def time_string_to_number(time_string):
    index = len(time_string)-3
    minute = int(time_string[:index:])
    seconds = int(time_string[index::].replace(':', '')) / 60
    return minute + seconds

### Get Any Player's TS and ES

In [83]:
def get_season_stats(first, last):
    first = first.replace('.', '')    
    
    print(first)
    print(last)
    
    try:
        player_id = py.player.get_player(first, last_name=last)
    except:
        return []
          
    player_general_splits = py.player.PlayerGeneralSplits(player_id)

    player_yearoveryear_splits = py.player.PlayerYearOverYearSplits(player_id, '2015-16')
    


    q = ds.Table.from_df(player_yearoveryear_splits.by_year())
    

    q = q.where('GROUP_VALUE', '2015-16')
    
    if q.num_rows < 1:
        return []

    FGA = q.column('FGA').item(0)
    FG3M = q.column('FG3M').item(0)
    FGM = q.column('FGM').item(0)

    EFG = calc_efg(FGM, FG3M, FGA)
    
    FTA = q.column('FTA').item(0)
    PTS = q.column('PTS').item(0)
    
    TS = calc_ts(FTA, PTS, FGA)
    
    print([TS, EFG])

    return [TS, EFG]


In [79]:
get_season_stats('Craig', 'Ehlo')

Craig
Ehlo
GROUP_SET | GROUP_VALUE | TEAM_ID | TEAM_ABBREVIATION | MAX_GAME_DATE | GP   | W    | L    | W_PCT | MIN  | FGM  | FGA  | FG_PCT | FG3M | FG3A | FG3_PCT | FTM  | FTA  | FT_PCT | OREB | DREB | REB  | AST  | TOV  | STL  | BLK  | BLKA | PF   | PFD  | PTS  | PLUS_MINUS | NBA_FANTASY_PTS | DD2  | TD3  | GP_RANK | W_RANK | L_RANK | W_PCT_RANK | MIN_RANK | FGM_RANK | FGA_RANK | FG_PCT_RANK | FG3M_RANK | FG3A_RANK | FG3_PCT_RANK | FTM_RANK | FTA_RANK | FT_PCT_RANK | OREB_RANK | DREB_RANK | REB_RANK | AST_RANK | TOV_RANK | STL_RANK | BLK_RANK | BLKA_RANK | PF_RANK | PFD_RANK | PTS_RANK | PLUS_MINUS_RANK | NBA_FANTASY_PTS_RANK | DD2_RANK | TD3_RANK | CFID | CFPARAMS


[]

### Creates a Table of Close Games Only

In [6]:
# Takes in a csv filepath of one of the EightThirtyFour data sets
# and filters the data to games with a 10 or less point lead in 
# the last 6 minutes of the game.
def game_filter(csv_file): 
    
    
    pbp = Table().read_table(csv_file)
        
    last_quarter = pbp.where('PERIOD', predicates.are.equal_to(4))
    transformed_minutes = last_quarter.apply(time_string_to_number, 'PCTIMESTRING')
    last_quarter_and_minutes = last_quarter.with_column('TIME', transformed_minutes)
    
    find_close_scores = last_quarter_and_minutes.where('TIME', predicates.are.between(6, 6.5))  

    close_games_ids = make_array()   
    last_id = 0
    
    for i in np.arange(find_close_scores.num_rows):      
        row = find_close_scores.row(i)    
        current_id = int(row[0])
        if current_id != last_id:
            diff = abs(int(row[7]) - int(row[8]))
            if diff <= 10:
                close_games_ids = np.append(current_id, close_games_ids)
        last_id = current_id
        
        
    close_games_table = Table().with_column('Close Games', close_games_ids)    
    time_less_than_six = last_quarter_and_minutes.where('TIME', predicates.are.below(6.6))
    time_less_than_six = time_less_than_six.join('GAME_ID', close_games_table, 'Close Games')
    
    return time_less_than_six


### Writes the New Table to File

In [7]:
#Write to File
game_filter('PbP_15_16_reduced.csv').to_csv('PbP_15_16_trimmed.csv')

In [8]:
games = Table().read_table('PbP_15_16_trimmed.csv', sep=',')
games

GAME_ID,EVENTNUM,PCTIMESTRING,TIME,PERIOD,HOME_TEAM,AWAY_TEAM,HOME_SCORE,AWAY_SCORE,POINTS_SCORED,SHOT_MADE,SHOT_TYPE,SHOT_DIST,PLAYER1_ID,PLAYER1_NAME,PLAYER1_TEAM_NICKNAME,ASSIST_PLAYER_ID,EVENTMSGTYPE,EVENTMSGACTIONTYPE
21500002,474,6:32,6.53333,4,Bulls,Cavaliers,80,79,0,,,,2747,J.R. Smith,Cavaliers,,Personal Foul,P.FOUL
21500002,476,6:20,6.33333,4,Bulls,Cavaliers,82,79,2,1.0,Shot,15.0,201565,Derrick Rose,Bulls,,Make,Pullup Jump Shot
21500002,479,6:01,6.01667,4,Bulls,Cavaliers,82,81,2,1.0,Dunk,,202684,Tristan Thompson,Cavaliers,2590.0,Make,Dunk
21500002,480,5:50,5.83333,4,Bulls,Cavaliers,82,81,0,0.0,Layup,2.0,201565,Derrick Rose,Bulls,,Miss,Driving Layup
21500002,481,5:48,5.8,4,Bulls,Cavaliers,82,81,0,,,,201567,Kevin Love,Cavaliers,,Rebound,Player
21500002,482,5:32,5.53333,4,Bulls,Cavaliers,82,83,2,1.0,Shot,7.0,2544,LeBron James,Cavaliers,,Make,Driving Floating Jump Shot
21500002,483,5:18,5.3,4,Bulls,Cavaliers,84,83,2,1.0,Shot,13.0,202703,Nikola Mirotic,Bulls,,Make,Driving Floating Jump Shot
21500002,484,5:18,5.3,4,Bulls,Cavaliers,84,83,0,,,,202684,Tristan Thompson,Cavaliers,,Personal Foul,S.FOUL
21500002,485,5:18,5.3,4,Bulls,Cavaliers,85,83,1,,,,202703,Nikola Mirotic,Bulls,,Free Throw,Free Throw 1 of 1
21500002,488,5:07,5.11667,4,Bulls,Cavaliers,85,83,0,0.0,Layup,3.0,201567,Kevin Love,Cavaliers,,Miss,Reverse Layup


### Helper Funciton to Create Game Reference Table

In [9]:
# Sort the trimmed table to analyze on a game by game basis
# builds a list of tuples of indecies, each tubple represents a unquue game in the table

def isolate_game(table):
    games = []
    row_start = table.row(0)  
    last_id1 = int(row_start[0])
    current_id1 = 0
    last_index = 0
    count = 0
    
    for i in np.arange(table.num_rows):   
        
        row = table.row(i)           
        current_id1 = int(row[0])
        
        if current_id1 != last_id1:
            x = [last_index, i-1]

            games.append(x)
            count += 1
            
            # reset indicies
            last_index = i
            last_id1 = current_id1
        
    return games

games_list = isolate_game(games)    

    

### Creates Game Reference Table

In [10]:
def construct_game_reference_table():
    count = 1
    season = Table(make_array('HOME_TEAM', 'AWAY_TEAM'))
    array = make_array()
    for t in games_list:
        index = t[0]
        row = games.take(index).select('HOME_TEAM', 'AWAY_TEAM')
        array = np.append(array, count)
        count += 1
        season = season.with_row(row)
    season = season.with_column('GAME NUMBER', array).with_column('INDICIES', games_list)
    return season 
      
construct_game_reference_table()       

HOME_TEAM,AWAY_TEAM,GAME NUMBER,INDICIES
Bulls,Cavaliers,1,[ 0 69]
Magic,Wizards,2,[ 70 149]
Celtics,76ers,3,[150 224]
Pistons,Jazz,4,[225 301]
Pacers,Raptors,5,[302 396]
Thunder,Spurs,6,[397 475]
Clippers,Kings,7,[476 545]
Lakers,Timberwolves,8,[546 624]
Pacers,Grizzlies,9,[625 690]
Thunder,Magic,10,[691 773]


### Creates Player Reference Table

In [85]:
players = games.select('PLAYER1_ID', 'PLAYER1_NAME')
players = players.group('PLAYER1_ID', max).where('PLAYER1_NAME max', predicates.are.not_equal_to('nan')).relabel(1, 'PLAYER_NAME')

def first(string):
    name = string.split(' ')
    first = name[0]
    return first
    
def last(string):
    name = string.split(' ')
    if len(name) == 1:
        return 'nan'
    first = name[1]
    return first

players = players.with_column('FIRST_NAME', players.apply(first, 'PLAYER_NAME'))
players = players.with_column('LAST_NAME', players.apply(last, 'PLAYER_NAME'))


game1 = games.take(np.arange(0, 69))
names = game1.select('PLAYER1_NAME')

print(players.num_rows)

# players = players.join('PLAYER_NAME', names, 'PLAYER1_NAME').group('PLAYER1_ID', max)
statzz = Table(make_array('Player', 'TS', 'EFG'))

for i in np.arange(0, players.num_rows):
    print(i)
    row = players.row(i)
    playerrr = row[2] + ' ' + row[3]
    roww = [playerrr] + get_season_stats(row[2], row[3])
    statzz = statzz.with_row(roww)
    
statzz  



481
0
Craig
Ehlo
1
Tony
Smith
2
Shaquille
O'Neal
3
Willie
Burton
4
Sam
Mitchell
5
Terry
Davis
6
Sherman
Douglas
7
Shawn
Kemp
8
Buck
Williams
9
Tony
Dumas
10
Juwan
Howard
11
Robert
Werdann
12
Zan
Tabak
13
Lamond
Murray
14
Wesley
Person
15
Lucious
Harris
16
Bo
Outlaw
17
Vin
Baker
18
Chuck
Person
19
Andrew
Lang
20
Howard
Eisley
21
Trevor
Ruffin
22
Popeye
Jones
23
JR
Reid
24
Jason
Kidd
25
Jon
Barry
26
Jamal
Mashburn
27
Stefano
Rusconi
28
Doug
Smith
29
Melvin
Booker
30
Kevin
Garnett
[0.49019607843137253, 0.4666666666666666]
31
Kobe
Bryant
[0.4682843763303534, 0.4142011834319527]
32
Tim
Duncan
[0.5292959133431806, 0.4861111111111111]
33
Vince
Carter
[0.5224825839138695, 0.4736842105263158]
34
Dirk
Nowitzki
[0.5569758948137327, 0.5033783783783783]
35
Paul
Pierce
[0.4887820512820512, 0.456896551724138]
36
Elton
Brand
[0.4638009049773755, 0.42857142857142855]
37
Andre
Miller
[0.6061598951507209, 0.5740740740740741]
38
Jason
Terry
[0.5442804428044281, 0.5288461538461539]
39
Metta
World
40
Manu
G

[0.5434782608695652, 0.49645390070921985]
174
Courtney
Lee
[0.547945205479452, 0.5185185185185186]
175
Kosta
Koufos
[0.5420918367346939, 0.5263157894736842]
176
Serge
Ibaka
[0.5337173839376482, 0.5135135135135136]
177
Nicolas
Batum
[0.5442723553477499, 0.504]
178
George
Hill
[0.5560661764705882, 0.525]
179
Darrell
Arthur
[0.5100652883569097, 0.5]
180
Mario
Chalmers
[0.5602697998259356, 0.4657534246575342]
181
DeAndre
Jordan
[0.6274703557312253, 0.6969696969696969]
182
Omer
Asik
[0.5549389567147613, 0.5172413793103449]
183
Luc
Mbah
184
Sonny
Weems
[0.45754026354319177, 0.44230769230769224]
185
Goran
Dragic
[0.5336058128973661, 0.5122950819672132]
186
Sasha
Kaun
[0.5136986301369864, 0.5714285714285715]
187
Anthony
Morrow
[0.5422153369481022, 0.5306122448979592]
188
Blake
Griffin
[0.545361875637105, 0.5058139534883721]
189
James
Harden
[0.5994708119728791, 0.515228426395939]
190
Tyreke
Evans
[0.5341579983131852, 0.48015873015873023]
191
Ricky
Rubio
[0.5313552188552189, 0.42857142857142855

[0.4781420765027322, 0.39999999999999997]
319
Hollis
Thompson
[0.5253001715265866, 0.5056818181818181]
320
Mirza
Teletovic
[0.5711610486891384, 0.5459183673469387]
321
Chris
Copeland
[0.4704301075268817, 0.4285714285714285]
322
Pablo
Prigioni
[0.4913522012578616, 0.45999999999999996]
323
Kent
Bazemore
[0.5482041587901701, 0.520618556701031]
324
Brian
Roberts
[0.5561735261401557, 0.5156249999999999]
325
Henry
Sims
[0.5059153175591532, 0.43636363636363634]
326
Willie
Reed
[0.5851593625498008, 0.5588235294117647]
327
Chris
Johnson
328
Justin
Holiday
[0.48828125000000006, 0.4767441860465116]
329
JaMychal
Green
[0.5217146080090242, 0.4761904761904762]
330
Jorge
Gutierrez
[0.6944444444444444, 0.5555555555555556]
331
Aron
Baynes
[0.564516129032258, 0.5106382978723404]
332
Nerlens
Noel
[0.5390442890442889, 0.5227272727272726]
333
Alex
Len
[0.48056386159760783, 0.425]
334
Allen
Crabbe
[0.5712067435669921, 0.5357142857142857]
335
Andre
Roberson
[0.5644402634054563, 0.5512820512820513]
336
Anthon

[0.5761173184357542, 0.5576923076923077]
469
Darrun
Hilliard
[0.5149330587023687, 0.4852941176470588]
470
Joe
Young
[0.4353803849679193, 0.39024390243902446]
471
Larry
Nance
472
Mario
Hezonja
[0.5438659058487875, 0.5094339622641509]
473
Alan
Williams
[0.4807692307692307, 0.4166666666666667]
474
Bryce
Dejean-Jones
[0.5035971223021583, 0.5]
475
Cristiano
Felicio
[0.610632183908046, 0.5652173913043479]
476
Boban
Marjanovic
[0.6668283220174587, 0.5937499999999999]
477
Axel
Toupane
[0.49288061336254113, 0.4545454545454546]
478
Salah
Mejri
[0.6344307270233197, 0.6086956521739131]
479
Marcelo
Huertas
[0.4977876106194691, 0.4534883720930233]
480
Briante
Weber
[0.3751705320600273, 0.35714285714285715]


TS,EFG
0.490196,0.466667
0.468284,0.414201
0.529296,0.486111
0.522483,0.473684
0.556976,0.503378
0.488782,0.456897
0.463801,0.428571
0.60616,0.574074
0.54428,0.528846
0.575816,0.533333


# Analysis Begins Here

In [None]:
def shots_all_players(table):
    #This function finds the number of shots that each player takes in time period
    #(it doesn't distinguish the type of shot)
    #it can be used for an entire game or for the last 6 minutes
    shots = table.where('EVENTMSGTYPE', predicates.are.containing('M'))
    players = shots.select('EVENTMSGTYPE', 'EVENTMSGACTIONTYPE', 'PLAYER1_NAME', 'PLAYER1_TEAM_NICKNAME')
    count = players.group(('PLAYER1_NAME', 'PLAYER1_TEAM_NICKNAME'))
    return count

def max_shot_player_each_team(table, team1, team2):
    #This takes in a table like player_count above
    #It returns the player who shoots the most for each team
    player_team_one = table.where('PLAYER1_TEAM_NICKNAME', team1).sort('count', descending=True).column('PLAYER1_NAME').item(0)
    player_team_two = table.where('PLAYER1_TEAM_NICKNAME', team2).sort('count', descending=True).column('PLAYER1_NAME').item(0)
    return (player_team_one, player_team_two)