In [335]:
import sys
import psycopg2

import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab


from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database


In [336]:
def do_feature_eng(in_dataframe):
    print '  Now engineering features!'

    #now lets engineer some features
    out_dataframe = in_dataframe
    try:
        out_dataframe = out_dataframe.drop(['player','player_url','team_name', 
                                      'index', 'game_id'], 1)
    except:
        out_dataframe = out_dataframe
        
    try:
        out_dataframe = out_dataframe.drop(['player_pos', 'ha', 'wl'], 1)
    except:
        out_dataframe = out_dataframe
    
    try:
        pts = out_dataframe['pts'].astype('float')
        mins = out_dataframe['min'].astype('float')
        ppm = [pts[ii]/mins[ii] for ii in np.arange(len(pts))] 
        out_dataframe['ppm'] = ppm
        out_dataframe.fillna(0)
    except:
        out_dataframe = out_dataframe
    
    try:
        tpm = out_dataframe['tpm'].astype('float')
        tpa = out_dataframe['tpa'].astype('float')
        tper = [tpm[ii]/tpa[ii] for ii in np.arange(len(tpm))] 
        out_dataframe['tper'] = tper
        out_dataframe.fillna(0)
    except:
        out_dataframe = out_dataframe
    
    try:
        ftm = out_dataframe['ftm'].astype('float')
        fta = out_dataframe['fta'].astype('float')
        ftper = [ftm[ii]/fta[ii] for ii in np.arange(len(ftm))] 
        out_dataframe['ftper'] = ftper
        out_dataframe.fillna(0)
    except:
        out_dataframe = out_dataframe

    try:
        fgm = out_dataframe['fgm'].astype('float')
        fga = out_dataframe['fga'].astype('float')
        fgper = [fgm[ii]/fga[ii] for ii in np.arange(len(fgm))] 
        out_dataframe['fgper'] = fgper
        out_dataframe.fillna(0)
    except:
        out_dataframe = out_dataframe

    try:
        out_dataframe = out_dataframe.astype(float)
    except:
        print 'WARNING!! Could not generate a numeric data frame!!'
        sys.exit(0)
    
    
    return out_dataframe

In [337]:
def make_heatmap(dataframe):
    print '  Now making correlation heatmap'
    sns.set(context="paper", font="monospace")
    
    
    #print dataframe
    #print ''
    corr_matrix = dataframe.corr()
    #print '  Correlation matrix'
    #print '  ', corr_matrix

    f, ax = plt.subplots(figsize=(12, 9))
    sns.heatmap(corr_matrix, vmax=.8, square=True)
    plt.title('Covariance Heatmap', fontsize=24)
    plt.xlabel('Variable 1', fontsize=18)
    plt.ylabel('Variable 2', fontsize=18)


    plt.show()

In [338]:
def load_data_db(sql_query):
    
    dbname = 'ncaa_mbb_db'
    username = 'smaug'
    #print '    database we are connecting to:', dbname
    #engine = create_engine('postgres://%s@localhost/%s'%(username,dbname))
    #print '    DB server:', engine.url

    con = None
    con = psycopg2.connect(database=dbname, user=username)
    
    #sql_query = """
    #SELECT COUNT(*) FROM games_table;
    #"""
    print sql_query
    mbb_from_sql = pd.read_sql_query(sql_query, con)
    print mbb_from_sql



In [339]:
def scatter_pts_vs_min(pts, min):
    print 'now here'
    
    xs1 = mydata['MIN']
    ys1 = mydata['PTS']
    xs2 = mydata2['MIN']
    ys2 = mydata2['PTS']
    
    #print mydata.keys()
    #mydata.plot(kind='scatter', x=xs, y='PTS')
    #mydata2.plot(kind='scatter', x=ys, y='PTS')

    
    plt.plot(xs1, ys1, 'ro', label='team1')
    plt.plot(xs2, ys2, 'bo', label='team2')
    plt.xlabel('Minutes Played')
    plt.ylabel('Points Scored')
    plt.title('Player Efficiency')
    plt.legend(loc=2)


    plt.show()

In [340]:
def main(feature_eng=True, heatmap=False, histograms=False):
    print 'Now running: ', sys.argv[0]
    
    args = sys.argv[1:]
    
    #connect to Postgres
    username = 'smaug'
    dbname = 'ncaa_mbb_db'
    con = None
    con = psycopg2.connect(database=dbname, user=username)
    
    sql_query = 'SELECT * FROM stats1415'
    #sql_query = 'SELECT * FROM stats1314'
    
    my_df = pd.read_sql(sql_query, con)
    #print my_df.head(25)
    
    if feature_eng is not False:
        numeric_df = do_feature_eng(my_df)
        numeric_df.fillna(0)
    else:
        numeric_df = my_df

    


    
    if heatmap is not False:
        worked = make_heatmap(numeric_df)

        
    if histograms is not False:
        mins = numeric_df['pts'].astype('float')

        mins_nonull = mins.notnull()
        #help(plt.hist)
        #sys.exit(0)
        num_bins = np.arange(55)+1
        # the histogram of the data
        #n, bins, patches = plt.hist(mins, num_bins, normed=1, facecolor='green', alpha=0.5)
        n, bins, patches = plt.hist(mins[mins_nonull], num_bins, 
                                    normed=1, facecolor='green', alpha=0.5)

        # add a 'best fit' line
        #y = mlab.normpdf(bins, mu, sigma)
        #plt.plot(bins, y, 'r--')
        plt.xlabel('Smarts')
        plt.ylabel('Probability')
        plt.title(r'Histogram of IQ: $\mu=100$, $\sigma=15$')

        # Tweak spacing to prevent clipping of ylabel
        #plt.subplots_adjust(left=0.15)
        plt.show(block=False)


In [341]:
# boilerplate to execute call to main() function
if __name__ == '__main__':
    main(feature_eng=True, heatmap=False, histograms=True)

Now running:  /anaconda/lib/python2.7/site-packages/ipykernel/__main__.py
  Now engineering features!
