# Goal:
Rack and stack all of the 2020 NBA draft class prospects for our shiny app.

Import libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import getpass
import psycopg2
from sklearn.preprocessing import scale, LabelEncoder, LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score
from sklearn.model_selection import train_test_split
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
sns.set_style('whitegrid')
sns.set(rc = {'figure.figsize':(12, 8)})

Connect to our database

In [2]:
mypasswd = getpass.getpass()
conn = psycopg2.connect(database = 'cs20_group4',
                              user = 'jpcp73', #replace with pawprint
                              host = 'pgsql.dsa.lan',
                              password = mypasswd)
cursor = conn.cursor()

········


Query our 2020 NBA Draft class predictions from our database

In [3]:
data = pd.read_sql_query("""
SELECT
    *
FROM active_player_predictions
""", con = conn)
print(data.shape)
data.head()

(5111, 11)


Unnamed: 0,ncaa_id,player_name,nba_pred,predicted_career_role,benchwarmer_prob,non_nba_prob,replacement_player_prob,reserve_player_prob,role_player_prob,star_player_prob,starter_player_prob
0,iran-bennett-1,Iran Bennett,1.0,Non-NBA,8.125922e-08,0.5543595,0.3918272,0.0002564247,0.008213609,8.71502e-07,0.04534241
1,jakub-dombek-1,Jakub Dombek,0.998781,Replacement Player,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,shakur-juiston-1,Shakur Juiston,1.0,Reserve,2.000015e-11,0.3197494,3.438615e-22,0.6801035,7.102528e-18,2.260053e-12,0.0001470853
3,maxwell-starwood-1,Maxwell Starwood,1.0,Non-NBA,1.346084e-13,0.9999917,5.807572e-14,8.159463e-06,2.181801e-07,1.783722e-11,5.009856e-08
4,james-wiseman-1,James Wiseman,1.0,Star,1.126775e-07,1.482664e-15,1.548839e-10,1.739439e-09,2.910505e-07,0.9999994,1.863512e-07


Create a composite score that sums up our model's probabilities for each player so we can compare all of the 2020 draft prospects.

In [4]:
data['player_score'] = (-1 * data.non_nba_prob) + (1 * data.replacement_player_prob) + (2 * data.benchwarmer_prob) + \
                       (3 * data.reserve_player_prob) + (4 * data.role_player_prob) + (5 * data.starter_player_prob) + \
                       (6 * data.star_player_prob)

View our results

In [5]:
data.sort_values(by = 'player_score', ascending = False).head(30)[['ncaa_id', 'player_name', 'predicted_career_role', 'player_score']]

Unnamed: 0,ncaa_id,player_name,predicted_career_role,player_score
6,anthony-edwards-2,Anthony Edwards,Star,6.0
354,jordan-nwora-1,Jordan Nwora,Star,6.0
4,james-wiseman-1,James Wiseman,Star,5.999999
55,nfaly-dante-1,N'Faly Dante,Star,5.999024
58,tre-jones-1,Tre Jones,Star,5.977293
60,precious-achiuwa-1,Precious Achiuwa,Star,5.953998
51,josh-green-2,Josh Green,Star,5.912573
1040,marcus-evans-2,Marcus Evans,Star,5.895392
10,isaiah-stewart-2,Isaiah Stewart,Star,5.287022
93,onyeka-okongwu-1,Onyeka Okongwu,Star,5.214092


Define our `draft_rankings` data table schema 

In [6]:
cursor = conn.cursor()
c_table = []
for c in data.columns.tolist():
    if data[c].dtype in [float, int, 'int64']:
        c_table.append('{} numeric'.format(c))
    elif data[c].dtype == 'datetime64[ns]':
        c_table.append('{} TIMESTAMP'.format(c))
    else:
        c_table.append('{} varchar({})'.format(c, max([len(str(x)) for x in data[c] if x is not None])))
create_table = 'DROP TABLE IF EXISTS draft_rankings; '
create_table += 'CREATE TABLE draft_rankings ({})'.format(', '.join(c for c in c_table))
cursor.execute(create_table)
conn.commit()

Insert our rankings data into the `draft_rankings` data table

In [7]:
df = data.where(pd.notnull(data), None)
cursor = conn.cursor()
INSERT_SQL = 'INSERT INTO draft_rankings'
INSERT_SQL += '({}) VALUES'.format(', '.join([x for x in df.columns]))
INSERT_SQL += '({})'.format(''.join(['%s,' * len(df.columns)])[:-1])
with conn, conn.cursor() as cursor:
    for row in df.itertuples(index=False, name=None):
        cursor.execute(INSERT_SQL, row)