In [1]:
%matplotlib inline
import os.path
import numpy as np
import pandas as pd

## Load data

In [2]:
def load_data(dirname, filename):
    currdir = os.getcwd()
    datadir = os.path.abspath(os.path.join(currdir, dirname))
    return os.path.abspath(os.path.join(datadir, filename))

In [3]:
dirname = '../../../data/'
filename = 'uaap-season81-fibalivestats-shotchart-data-CLEANED.csv'

fg_df = pd.read_csv(load_data(dirname, filename))

In [4]:
fg_df

Unnamed: 0,team,opponent,made,x,y,num,player,points,shot_type,date,venue
0,NU,UST,0,8,3,9,J. Clemente,2,jump shot,2018.09.08,MOA
1,NU,UST,1,0,2,10,I. Gaye,2,dunk,2018.09.08,MOA
2,NU,UST,1,-1,16,8,A. Joson,2,jump shot,2018.09.08,MOA
3,NU,UST,0,4,8,1,D. Ildefonso,2,jump shot,2018.09.08,MOA
4,NU,UST,0,62,43,1,D. Ildefonso,3,jump shot,2018.09.08,MOA
...,...,...,...,...,...,...,...,...,...,...,...
7614,ADU,FEU,1,-1,6,20,K. Bernardo,2,lay up,2018.11.18,MOA
7615,ADU,FEU,1,-7,-1,28,M. Macion,2,jump shot,2018.11.18,MOA
7616,ADU,FEU,0,8,6,9,W. Magbuhos,2,jump shot,2018.11.18,MOA
7617,ADU,FEU,0,4,0,17,J. Colonia,2,jump shot,2018.11.18,MOA


## Add column for points scored (made_points)

In [5]:
# add column for points scored (made_points)

fg_df['made_points'] = fg_df.apply(lambda row: row.made * row.points, axis=1)
fg_df

Unnamed: 0,team,opponent,made,x,y,num,player,points,shot_type,date,venue,made_points
0,NU,UST,0,8,3,9,J. Clemente,2,jump shot,2018.09.08,MOA,0
1,NU,UST,1,0,2,10,I. Gaye,2,dunk,2018.09.08,MOA,2
2,NU,UST,1,-1,16,8,A. Joson,2,jump shot,2018.09.08,MOA,2
3,NU,UST,0,4,8,1,D. Ildefonso,2,jump shot,2018.09.08,MOA,0
4,NU,UST,0,62,43,1,D. Ildefonso,3,jump shot,2018.09.08,MOA,0
...,...,...,...,...,...,...,...,...,...,...,...,...
7614,ADU,FEU,1,-1,6,20,K. Bernardo,2,lay up,2018.11.18,MOA,2
7615,ADU,FEU,1,-7,-1,28,M. Macion,2,jump shot,2018.11.18,MOA,2
7616,ADU,FEU,0,8,6,9,W. Magbuhos,2,jump shot,2018.11.18,MOA,0
7617,ADU,FEU,0,4,0,17,J. Colonia,2,jump shot,2018.11.18,MOA,0


## Make summary table of players
- Group by players
- Add team, fga, fg, ppa columns

In [13]:
fg_summ_df = fg_df[['player', 'team', 'made', 'points', 'made_points']].groupby('player').agg(
    team=pd.NamedAgg(column='team', aggfunc='first'),
    fg=pd.NamedAgg(column='made', aggfunc='sum'),
    fga=pd.NamedAgg(column='points', aggfunc='count'),
    points=pd.NamedAgg(column='made_points', aggfunc='sum'),
)
fg_summ_df = fg_summ_df.reset_index()
fg_summ_df.to_csv('outputs/data/players-all.csv', index_label='p_id_all')
fg_summ_df.sort_values(by='fga', ascending=False).to_csv('outputs/data/players-all-sorted.csv', index_label='p_id_all')

## Get list of FGA of all retained players (FGA >= 28) and dropped players (FGA < 28)

In [7]:
retain_players = fg_summ_df[fg_summ_df.fga>=28].player.values
drop_players = fg_summ_df[fg_summ_df.fga<28].player.values

## Make DataFrame of retained players FG
- save the FG of retained and dropped players

In [8]:
fg_df[fg_df.player.isin(retain_players)].to_csv('outputs/data/players-retained-fg.csv')
fg_df[~fg_df.player.isin(retain_players)].to_csv('outputs/data/players-dropped-fg.csv')

fg_retain_df = fg_df[fg_df.player.isin(retain_players)]
fg_retain_df
# any(['a' in ['a', 'b', 'c']])

Unnamed: 0,team,opponent,made,x,y,num,player,points,shot_type,date,venue,made_points
0,NU,UST,0,8,3,9,J. Clemente,2,jump shot,2018.09.08,MOA,0
1,NU,UST,1,0,2,10,I. Gaye,2,dunk,2018.09.08,MOA,2
2,NU,UST,1,-1,16,8,A. Joson,2,jump shot,2018.09.08,MOA,2
3,NU,UST,0,4,8,1,D. Ildefonso,2,jump shot,2018.09.08,MOA,0
4,NU,UST,0,62,43,1,D. Ildefonso,3,jump shot,2018.09.08,MOA,0
...,...,...,...,...,...,...,...,...,...,...,...,...
7605,ADU,FEU,1,13,5,4,C. Catapusan,2,jump shot,2018.11.18,MOA,2
7606,ADU,FEU,1,-34,11,7,J. Lastimosa,2,jump shot,2018.11.18,MOA,2
7607,ADU,FEU,0,4,3,11,J. Espeleta,2,jump shot,2018.11.18,MOA,0
7608,ADU,FEU,0,-34,-12,7,J. Lastimosa,3,jump shot,2018.11.18,MOA,0


## Get summary of players with FGA >= 28

In [9]:
fg_summ_retained_df = fg_summ_df[fg_summ_df.player.isin(retain_players)]
fg_summ_retained_df.to_csv('outputs/data/players-retained.csv', index_label='p_id_all')
fg_summ_retained_df.sort_values(by='fga', ascending=False).reset_index().drop(columns=['index']).to_csv('outputs/data/players-retained-sorted.csv', index_label='p_id_retained')

In [10]:
fg_pts_df = fg_retain_df[['player', 'team', 'made', 'points', 'made_points']].groupby(['player', 'points']).agg(
    team=pd.NamedAgg(column='team', aggfunc='first'),
    fg=pd.NamedAgg(column='made', aggfunc='sum'),
    fga=pd.NamedAgg(column='points', aggfunc='count'),
    points=pd.NamedAgg(column='made_points', aggfunc='sum'),
)
# fg_pts_df
fg_pts_df

Unnamed: 0_level_0,Unnamed: 1_level_0,team,fg,fga,points
player,points,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A. Asistio,2,ADMU,7,27,14
A. Asistio,3,ADMU,28,62,84
A. Caracut,2,DLSU,34,84,68
A. Caracut,3,DLSU,12,43,36
A. Diputado,2,NU,16,33,32
...,...,...,...,...,...
W. Comboy,3,FEU,18,58,54
W. Navarro,2,ADMU,20,42,40
W. Navarro,3,ADMU,1,24,3
Z. Huang,2,UST,39,90,78
