In [69]:
import pandas as pd
import numpy as np
import sqlite3
from datetime import datetime as dt, timedelta
from dateutil.relativedelta import *
import re

Give the team, the year and the game #, get the team stats including:
<ul>
    <li>Record to date for the team and their opponent</li>
    <li>Outcomes of last 5 contests - inlcuding previous year</li>
    <li>Outcomes of last 5 contests for each team</li>
</ul>

<h3>SQLite Handling functions</h3>
<p>Use Pandas to import dataframe into SQLlite</p>

In [70]:
DB = "..\\Data\\hockey_data_goalies.db"

#SQL Handle functions
def run_query(q):
    with sqlite3.connect(DB) as conn:
        x = pd.read_sql(q,conn)
    return x
   
#send command
def run_command(c):
    with sqlite3.connect(DB) as conn:
        conn.isolation_level = None
        return conn.execute(c)
    
#show tables
def show_tables():
    r = """
    Select name,type
    FROM sqlite_master
    WHERE type in ("table","view");
        """
    return run_query(r)

Statistic functions

In [71]:
def sos_calc(year,game_no,nme,wind=[0]):
    """
        CALCULATES THE SOS FOR THE TEAM entered faced before a given date
    INPUT:
        year: the current season (YYYY-YYYY+1)
        game_no: the game_no for the current game previous games are [0-game_no-1]
        nme: the team abbr to calculate the SOS for
        wind: optional window to calc for
        RETURN:
        sos: the computed sos statistics sum(no_times_faced*(goalsfor-goalsagainst)/no_games)/no_games
    """
    
    sos = 0
    
    #get the details on the game
    q = ("""SELECT * 
            FROM team_log 
            WHERE team_id=\"{0}\"
            AND (CAST(SUBSTR(date_game,1,4) AS FLOAT)+CAST(SUBSTR(date_game,6,7) AS FLOAT)/12) > {1}
            AND (CAST(SUBSTR(date_game,1,4) AS FLOAT)+CAST(SUBSTR(date_game,6,7) AS FLOAT)/12) < {2}
            """.format(nme,int(year) + .66,int(year)+1.66))
    games = run_query(q)
    games['date_game'] = games['date_game'].astype('datetime64')
    
    gamedate = games.at[game_no,'date_game']

    if game_no>0:
        #list of teams faced to date (and number of times)
        if len(wind) ==1:
            teams_faced = games.loc[games['date_game']<gamedate,'opp_name'].value_counts()
        else:
            teams_faced = games.loc[wind,'opp_name'].value_counts()
       
        #get list of games played in the league that season up until the gameday
        q = ("""SELECT * 
                FROM team_log 
                WHERE (CAST(SUBSTR(date_game,1,4) AS FLOAT)+CAST(SUBSTR(date_game,6,7) AS FLOAT)/12) > {1}
                AND (CAST(SUBSTR(date_game,1,4) AS FLOAT)+CAST(SUBSTR(date_game,6,7) AS FLOAT)/12) < {2}
                """.format(nme,int(year) + .66,int(year)+1.66))
        temp_games = run_query(q)
        temp_games['date_game'] = temp_games['date_game'].astype('datetime64')

        #calculate statistics for each team
        goals = temp_games[temp_games['date_game']<gamedate].groupby('team_id').agg([sum,'count'])

        diff = (goals['goals']['sum']-goals['opp_goals']['sum'])/goals['goals']['count'] #calculates metric for all teams to date

        for eachteam in teams_faced.index:
            #get team_abbr

            q = '''SELECT team_abbr FROM team_list WHERE team_name=\"{0}\" AND CAST(SUBSTR(years_active,6,9) AS INT)>{1}'''.format(eachteam,int(year))
            nme = (run_query(q)['team_abbr'].values)[0]
            sos = sos + teams_faced[eachteam]*diff[nme]
        if len(wind) == 1:
            return sos/game_no
        else:
            return sos/wind.sum()
    else:
        return 0
    
def team_stat_gen(team_value,year_value,game_value):
    """
        Generates team based statistics for the season (and the last 3 weeks) upto the selected game
        INPUT:
            team_value: The team abbr
            year_value: The season in question where year is YYYY:YYYY+1
            game_value: The game_no for the current game previous games are [0-game_no-1] # must be more than 10
        OUTPUT:
            returns a data series of team stats
    """
    #get the details on the game
    q = ("""SELECT * 
            FROM team_log 
            WHERE team_id=\"{0}\"
            AND (CAST(SUBSTR(date_game,1,4) AS FLOAT)+CAST(SUBSTR(date_game,6,7) AS FLOAT)/12) > {1}
            AND (CAST(SUBSTR(date_game,1,4) AS FLOAT)+CAST(SUBSTR(date_game,6,7) AS FLOAT)/12) < {2}
            """.format(team_value,int(year_value) + .66,int(year_value)+1.66))
    game_details = run_query(q)
    game_details['date_game'] = game_details['date_game'].astype('datetime64')
    try:
        game_date = game_details.at[game_value,'date_game']
    except:
        print(game_details,team_value,year_value,game_value)
        
    rest_days = game_date-game_details.at[game_value-1,'date_game']

    #team stats up to this point of the season (basic stats)
    cum_total = game_details.loc[game_details.index<game_value,['goals','opp_goals','shots','shots_against']].sum()
    games = (game_details.loc[game_details.index<game_value,'game_outcome']=='W').count() 
    wins = (game_details.loc[game_details.index<game_value,'game_outcome']=='W').sum()
    losses = (game_details.loc[game_details.index<game_value,'game_outcome']=='L').sum()
    ties = (game_details.loc[game_details.index<game_value,'game_outcome']=='T').sum()
    otl = ((game_details.loc[game_details.index<game_value,'game_outcome']=='L')&(game_details['overtimes'].notnull())).sum()
    points = wins*2+ties+otl
    points_pct = np.round(points/(2*games),3)
    sos = sos_calc(year_value,game_value,team_value) #strength of schedule higher is tougher
    srs = (cum_total['goals']-cum_total['opp_goals'])/games+sos #simple rating system
    cum_total = game_details.loc[game_details.index<game_value,['goals','opp_goals','shots','shots_against']].sum()/game_value
    basic_stats = [games,wins,losses,ties,otl,points,points_pct,srs,sos]
    basic_stats.extend(cum_total)

    #team stats last 3 weeks (basic stats)
    window = (game_details['date_game']<game_date)&(game_details['date_game']>(game_date-timedelta(21)))
    rec_game_details = game_details.loc[window]
    rgames = (rec_game_details['game_outcome']=='W').count() 
    rwins = (rec_game_details['game_outcome']=='W').sum()
    rlosses = (rec_game_details['game_outcome']=='L').sum()
    rties = (rec_game_details['game_outcome']=='T').sum()
    rotl = ((rec_game_details['game_outcome']=='L')&(rec_game_details['overtimes'].notnull())).sum()
    rpoints = rwins*2+rties+rotl
    rpoints_pct = np.round(rpoints/(2*rgames),3)
    rcumum = rec_game_details[['goals','opp_goals','shots','shots_against']].sum()
    sos_rec = sos_calc(year_value,game_value,team_value,window) #strength of schedule higher is tougher
    srs_rec = (rcumum['goals']-rcumum['opp_goals'])/rgames+sos_rec #simple rating system
    rcumum_total = rcumum/rgames
    recent_stats = [rest_days.days,rgames,rwins,rlosses,rties,rotl,rpoints,rpoints_pct,srs_rec,sos_rec]
    recent_stats.extend(list(rcumum_total))

    labels = ['total_games','wins','loses','ties','otl','points','point_pct','srs','sos','ave_goals','ave_oppgoals','ave_shots','ave_oppshots','rest_days','recent_games','recent_wins','recent_loses','recent_ties','recent_otl','recent_points','recent_point_pct','recent_srs','recent_sos','recent_ave_goals','recent_ave_oppgoals','recent_ave_shots','recent_ave_oppshots']
    all_stats = basic_stats+recent_stats
    return pd.Series(all_stats,labels)

In [72]:
#inputs
team_value = 'ANA'
year_value = 2016
game_num = 56 # for the team

<h2>Get record to date</h2>

In [73]:
#find the game date
q = ("""SELECT *
        FROM team_log 
        WHERE team_id=\"{0}\" 
        AND (CAST(SUBSTR(date_game,1,4) AS FLOAT)+CAST(SUBSTR(date_game,6,7) AS FLOAT)/12) > {1}
        AND (CAST(SUBSTR(date_game,1,4) AS FLOAT)+CAST(SUBSTR(date_game,6,7) AS FLOAT)/12) < {2}
        AND game_number={3}""".format(team_value,int(year_value) + .66,int(year_value)+1.66,game_num))
game_selected = run_query(q)
game_date = game_selected['date_game'].values[0]
opp_name =  game_selected['opp_name'].values[0]

#get home team name
q = """SELECT team_name
       FROM team_list 
       WHERE team_abbr=\"{0}\"""".format(team_value)
team_name = run_query(q).at[0,'team_name']

#get opponent abbr
q = """SELECT * 
       FROM team_list 
       WHERE team_name=\"{0}\" 
       AND CAST(SUBSTR(years_active,6,9) as INT)>{1}""".format(opp_name,year_value)
opp_abbr = run_query(q).at[0,'team_abbr']

#get game # for opponent
q = """SELECT * 
       FROM team_log 
       WHERE team_id=\"{0}\" 
       AND date_game=\"{1}\"""".format(opp_abbr,game_date)
opp_gn = run_query(q).at[0,'game_number']

print(game_date,team_name,opp_name,opp_abbr,opp_gn)

2017-02-11 Anaheim Ducks Washington Capitals WSH 55


In [75]:
hometeamstats = team_stat_gen(team_value,year_value,game_num)
oppteamstats = team_stat_gen(opp_abbr,year_value,opp_gn)
stats = pd.concat((hometeamstats,oppteamstats),axis=1)
stats.columns = [team_name,opp_name]
statst = stats.transpose().iloc[:,:9]
statst.columns = ['Total Games', 'Wins','Loses','Ties','OTL','Points','Point_Pct','SRS','SOS']

In [None]:
stats