In [1]:
import pandas as pd

from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database

from secrets import *

In [2]:
dbname = 'cluj'
engine = create_engine('postgres://%s:%s@localhost/%s'%(AUTH['db_user'],AUTH['db_pass'],dbname))

if not database_exists(engine.url):
    create_database(engine.url)
print(database_exists(engine.url))

True


In [4]:
boxscores = pd.read_sql("SELECT * FROM boxscores WHERE date >= '2019-10-22'", engine)
boxscores.shape

(15118, 15)

In [5]:
boxscores.columns

Index(['assists', 'fga', 'fta', 'blocks', 'date', 'fgm', 'ftm', 'threes',
       'name', 'opponent', 'seconds_played', 'steals', 'team', 'rebounds',
       'points'],
      dtype='object')

In [9]:
boxscores.dtypes

assists             int64
fga                 int64
fta                 int64
blocks              int64
date               object
fgm                 int64
ftm                 int64
threes              int64
name               object
opponent           object
seconds_played      int64
steals              int64
team               object
rebounds            int64
points              int64
ft_pct            float64
fg_pct            float64
dtype: object

In [6]:
boxscores['ft_pct'] = boxscores.ftm/boxscores.fta
boxscores['fg_pct'] = boxscores.fgm/boxscores.fga

In [19]:
ft_pct_threshold = 0.75
fg_pct_threshold = 0.4
octys = boxscores.loc[(boxscores.assists > 0) & (boxscores.threes > 0) & (boxscores.steals > 0) & (boxscores.rebounds > 0) & (boxscores['blocks'] > 0) & (boxscores.ft_pct > ft_pct_threshold) & (boxscores.fg_pct > fg_pct_threshold)]
octys.shape

(372, 17)

In [36]:
octy_counts = pd.DataFrame(octys.name.value_counts())
octy_counts.reset_index(inplace=True)
octy_counts.rename(columns={'name':'octy_count', 'index':'name'}, inplace=True)
octy_counts.head()

Unnamed: 0,name,octy_count
0,Anthony Davis,13
1,James Harden,9
2,Kawhi Leonard,8
3,Joel Embiid,8
4,Brandon Ingram,7


In [40]:
octys_games = pd.DataFrame(boxscores.groupby('name')['assists'].count())
octys_games.reset_index(inplace=True)
octys_games.rename(columns={'assists':'game_count'}, inplace=True)

In [41]:
octys_games.head()

Unnamed: 0,name,game_count
0,Aaron Gordon,42
1,Aaron Holiday,45
2,Abdel Nader,33
3,Adam Mokoka,3
4,Admiral Schofield,25


In [44]:
merged = octy_counts.merge(octys_games, on='name')
merged['octy_pct'] = merged.octy_count / merged.game_count
merged.sort_values('octy_pct', ascending=False, inplace=True)

In [46]:
merged.head(40)

Unnamed: 0,name,octy_count,game_count,octy_pct
0,Anthony Davis,13,39,0.333333
3,Joel Embiid,8,32,0.25
22,Kyrie Irving,4,18,0.222222
2,Kawhi Leonard,8,36,0.222222
1,James Harden,9,44,0.204545
23,John Collins,4,22,0.181818
7,Pascal Siakam,6,36,0.166667
4,Brandon Ingram,7,43,0.162791
8,Josh Richardson,6,38,0.157895
5,Kemba Walker,6,41,0.146341
