In [125]:
import sys
import psycopg2

import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database


In [126]:
def do_feature_eng(in_dataframe):
    print '  Now engineering features!'

    #now lets engineer some features
    out_dataframe = in_dataframe
    try:
        out_dataframe = out_dataframe.drop(['player','player_url','team_name', 
                                      'index', 'game_id'], 1)
    except:
        out_dataframe = out_dataframe
        
    try:
        out_dataframe = out_dataframe.drop(['player_pos', 'ha', 'wl'], 1)
    except:
        out_dataframe = out_dataframe
    
    try:
        pts = out_dataframe['pts'].astype('float')
        mins = out_dataframe['min'].astype('float')
        ppm = [pts[ii]/mins[ii] for ii in np.arange(len(pts))] 
        out_dataframe['ppm'] = ppm
    except:
        out_dataframe = out_dataframe
    
    try:
        tpm = out_dataframe['tpm'].astype('float')
        tpa = out_dataframe['tpa'].astype('float')
        tper = [tpm[ii]/tpa[ii] for ii in np.arange(len(tpm))] 
        out_dataframe['tper'] = tper
    except:
        out_dataframe = out_dataframe
    
    try:
        ftm = out_dataframe['ftm'].astype('float')
        fta = out_dataframe['fta'].astype('float')
        ftper = [ftm[ii]/fta[ii] for ii in np.arange(len(ftm))] 
        out_dataframe['ftper'] = ftper
    except:
        out_dataframe = out_dataframe

    try:
        fgm = out_dataframe['fgm'].astype('float')
        fga = out_dataframe['fga'].astype('float')
        fgper = [fgm[ii]/fga[ii] for ii in np.arange(len(fgm))] 
        out_dataframe['fgper'] = fgper
    except:
        out_dataframe = out_dataframe

    try:
        out_dataframe = out_dataframe.astype(float)
    except:
        print 'WARNING!! Could not generate a numeric data frame!!'
        sys.exit(0)
    
    
    return out_dataframe

In [127]:
def make_heatmap(dataframe):
    print '  Now making correlation heatmap'
    sns.set(context="paper", font="monospace")
    
    
    print dataframe
    #print ''
    corr_matrix = dataframe.corr()
    print '  Correlation matrix'
    print '  ', corr_matrix

    f, ax = plt.subplots(figsize=(12, 9))
    sns.heatmap(corr_matrix, vmax=.8, square=True)

    plt.show()

In [128]:
def load_data_db(sql_query):
    
    dbname = 'ncaa_mbb_db'
    username = 'smaug'
    #print '    database we are connecting to:', dbname
    #engine = create_engine('postgres://%s@localhost/%s'%(username,dbname))
    #print '    DB server:', engine.url

    con = None
    con = psycopg2.connect(database=dbname, user=username)
    
    #sql_query = """
    #SELECT COUNT(*) FROM games_table;
    #"""
    print sql_query
    mbb_from_sql = pd.read_sql_query(sql_query, con)
    print mbb_from_sql



In [129]:
def scatter_pts_vs_min(pts, min):
    print 'now here'
    
    xs1 = mydata['MIN']
    ys1 = mydata['PTS']
    xs2 = mydata2['MIN']
    ys2 = mydata2['PTS']
    
    #print mydata.keys()
    #mydata.plot(kind='scatter', x=xs, y='PTS')
    #mydata2.plot(kind='scatter', x=ys, y='PTS')

    
    plt.plot(xs1, ys1, 'ro', label='team1')
    plt.plot(xs2, ys2, 'bo', label='team2')
    plt.xlabel('Minutes Played')
    plt.ylabel('Points Scored')
    plt.title('Player Efficiency')
    plt.legend(loc=2)


    plt.show()

In [130]:
def main(feature_eng=True, heatmap=False):
    print 'Now running: ', sys.argv[0]
    
    args = sys.argv[1:]
    
    #connect to Postgres
    username = 'smaug'
    dbname = 'ncaa_mbb_db'
    con = None
    con = psycopg2.connect(database=dbname, user=username)
    sql_query = 'SELECT * FROM stats1415'
    my_df = pd.read_sql(sql_query, con)

    
    if feature_eng:
        numeric_df = do_feature_eng(my_df)
        print numeric_df.head(10)
    


    
    if heatmap is not False:
        worked = make_heatmap(numeric_df)


In [131]:
# boilerplate to execute call to main() function
if __name__ == '__main__':
    main(feature_eng=True, heatmap=True)

Now running:  /anaconda/lib/python2.7/site-packages/ipykernel/__main__.py
  Now engineering features!
   ast  blk  dreb  fga  fgm  fta  ftm  min  oreb  pf  pts  reb  stl  to  tpa  \
0    0    2     3    5    1    0    0   17     1   1    2    4    0   1    0   
1    0    0     3    3    2    0    0   28     0   0    5    3    0   2    1   
2    2    0     1    7    2    5    5   19     1   3   10    2    0   1    5   
3    2    0     2    5    0    4    2   23     1   2    2    3    1   4    2   
4    1    0     0   11    5    4    1   28     0   2   15    0    1   3    4   
5    0    0     1    4    0    1    0   18     1   5    0    2    0   0    0   
6    0    1     0    0    0    2    0    6     0   2    0    0    0   1    0   
7    0    0     0    0    0    0    0    7     0   0    0    0    0   1    0   
8    2    0     1    3    1    0    0   11     1   0    2    2    1   4    0   
9    0    0     0    7    2    0    0   23     2   0    5    2    0   0    3   

   tpm       ppm 