In [468]:
import sys
import pandas as pd
import numpy as np
import psycopg2
import re
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database



In [469]:
def do_sanity_checks(my_con, my_sanity_checks):
    my_table = 'stats' + my_sanity_checks
    print '  Some basic sanity checks for the %s season table.' % my_table
    
    try:
        sql_query = '''
                    SELECT COUNT(*) from %s
                    ''' % (my_table)
        from_sql_df = pd.read_sql(sql_query, my_con)
        print '      Total number of entries in table:'
        print from_sql_df
    except:
        print '    Unable to determine total number of entries. '
        
    try:
        sql_query = '''
                    SELECT COUNT(DISTINCT(game_id)) from %s
                    ''' % (my_table)
        from_sql_df = pd.read_sql(sql_query, my_con)
        print '      Total number of games in table:'
        print from_sql_df.head(5)
    except:
        print '    Unable to determine total number of games. '

    try:
        sql_query = '''
                    SELECT COUNT(DISTINCT(team_name)) from %s
                    ''' % (my_table)
        from_sql_df = pd.read_sql(sql_query, my_con)
        print '      Total number of teams in table:'
        print from_sql_df.head(100)
    except:
        print '    Unable to determine total number of teams. '

    try:
        sql_query = '''
                    SELECT DISTINCT(team_name) from %s
                     ORDER BY team_name ASC
                    ''' % (my_table)
        from_sql_df = pd.read_sql(sql_query, my_con)
        print '      Teams in table:'
        print from_sql_df
    except:
        print '    Unable to determine teams. '


        
        

In [470]:
def build_team_db(my_con, my_engine, season):
    print '    Rebuilding teams database for %s season' % (season)
    my_table = 'stats' + season
    team_table = 'teams' + season
    my_if_exists = 'replace'
    
    
    cnt = 1
    try:
        sql_query = '''
                    SELECT DISTINCT(team_name) from %s
                     ORDER BY team_name ASC
                    ''' % (my_table)
        from_sql = pd.read_sql(sql_query, my_con)
        #print '      Teams in table:'
        #print from_sql['team_name']
    except:
        print '    Unable to determine teams. '
        
    
    for team_name in from_sql['team_name']:
        #SQL queries dont like single quotes in strings  
        match = re.search('\'',team_name)
        if match is not None:
            team_name = team_name.replace("'", "''")
            
        #find games involving this team
        sql_query_2 = '''
                    SELECT DISTINCT(game_id) from %s
                     WHERE team_name IN ('%s')
                    ''' % (my_table, team_name)
        from_sql_2 = pd.read_sql(sql_query_2, my_con)

      
        
        for game_id in from_sql_2['game_id']:
            print team_name, game_id
            sql_query_3 = '''
                    SELECT * from %s
                     WHERE team_name IN ('%s')
                      AND game_id IN ('%s')
                    ''' % (my_table, team_name, game_id)
            from_sql_3 = pd.read_sql(sql_query_3, my_con)
            
            this_wl = from_sql_3['wl'][0]
            this_ha = from_sql_3['ha'][0]
            this_game_id = from_sql_3['game_id'][0]
            this_team_name = from_sql_3['team_name'][0]
            from_sql_3 = from_sql_3.drop(['game_id', 'player_url', 
                                         'team_name', 'ha', 'wl',
                                         'player_pos', 'player', 'index'], 1)
            from_sql_3 = from_sql_3.astype('float')
            
            for_sql = from_sql_3.sum(axis=0)
            for_sql['ftper'] = for_sql['ftm'] / for_sql['fta']
            for_sql['fgper'] = for_sql['fgm'] / for_sql['fga']
            for_sql['tpper'] = for_sql['tpm'] / for_sql['tpa']
            for_sql['ha'] = this_ha
            for_sql['wl'] = this_wl
            for_sql['game_id'] = this_game_id
            for_sql['team_name'] = this_team_name

            #summing returns a series not a data frame. we put it back 
            df = pd.DataFrame(for_sql, index=for_sql.index)
            df = df.transpose()            
            df.to_sql(team_table, my_engine, if_exists=my_if_exists)
            my_if_exists = 'append'



            
            #if cnt >= 3:
            #    sys.exit(0)
            #cnt = cnt + 1
                

    
        

In [471]:
def main(sanity_checks=False, team_db=False):
    print 'Now running: ', sys.argv[0]
    
    
    #connect to the database
    dbname = 'ncaa_mbb_db'
    username = 'smaug'
    con = None
    con = psycopg2.connect(database=dbname, user=username)    
    engine = create_engine('postgres://%s@localhost/%s' % (username, dbname))


    #do sanity checks
    if sanity_checks is not False:
        chk = do_sanity_checks(con, sanity_checks)
        
    #remake teams database
    if team_db is not False:
        chk = build_team_db(con, engine, team_db)

In [472]:
# boilerplate to execute call to main() function
if __name__ == '__main__':
    main(sanity_checks=False, team_db='1415')

Now running:  /anaconda/lib/python2.7/site-packages/ipykernel/__main__.py
    Rebuilding teams database for 1415 season
AUB Montgomery Senators 400598788
AUB Montgomery Senators 400600688
Abilene Christian  Wildcats 400595589


SystemExit: 0

To exit: use 'exit', 'quit', or Ctrl-D.
