In [1]:
import pandas as pd
import numpy as np
import os
from datetime import datetime as dt, timedelta
from dateutil.relativedelta import *
import re
from matplotlib import pyplot as plt
import seaborn as sns
import sqlite3

<h3>SQLite Handling functions</h3>
<p>Use Pandas to import dataframe into SQLlite</p>

In [2]:
DB = "..\\Data\\hockey_data_goalies.db"

#SQL Handle functions
def run_query(q):
    with sqlite3.connect(DB) as conn:
        x = pd.read_sql(q,conn)
    return x
   
#send command
def run_command(c):
    with sqlite3.connect(DB) as conn:
        conn.isolation_level = None
        return conn.execute(c)
    
#show tables
def show_tables():
    r = """
    Select name,type
    FROM sqlite_master
    WHERE type in ("table","view");
        """
    return run_query(r)

<h2>Stat Functions</h2>
<p>Given the player statistics calculate:
    <ul>
        <li>Age</li>
        <li>Total minutes this season</li>
        <li>Total minutes last 3 weeks</li>
        <li>shots_against this season</li>
        <li>shots_against 3 weeks</li>
        <li>save_pct this season</li>
        <li>save_pct 3 weeks</li>
        <li>rest days</li>
       
</ul></p>

In [122]:
def season_finder(date):
    if date.month > 8:
        return date.year
    else:
        return date.year-1
def season_cumul(player):
    """ 
    Given the player, generate the career_log_data
    """
     #get the gamelogs for that player
    q = """SELECT * FROM player_log WHERE player_id=\"{0}\" """.format(player)
    player_logs = run_query(q)
    #unit conversions
    player_logs['date_game'] = player_logs['date_game'].astype('datetime64') #convert to datetime
    player_logs['time_on_ice'] = player_logs['time_on_ice'].str.extract(r'(\d*)\:\d*')[0].astype(int)+player_logs['time_on_ice'].str.extract(r'\d*\:(\d*)')[0].astype(int)/60
    player_logs['season'] = player_logs['date_game'].map(lambda x: season_finder(x))
    
    prodf = pd.DataFrame(columns=['player_id','team_id','opp_id','date_game','age','rest_days','min_season',
                                    'shots_against','save_pct','min3W','sa3W','svepct3W','future_save_pct','injured'])
    row = 0
    #for each season
    for season in player_logs['season'].unique():
        season_logs = player_logs[player_logs['season']==season].copy().reset_index(drop=True)
        for r,game in season_logs[1:].iterrows():
            prodf.loc[row,'player_id'] = season_logs.loc[r,'player_id']
            prodf.loc[row,'team_id'] = season_logs.loc[r,'team_id']
            prodf.loc[row,'opp_id'] = season_logs.loc[r,'opp_id']
            prodf.loc[row,'date_game'] = season_logs.loc[r,'date_game']
            prodf.loc[row,'age'] = season_logs.loc[r,'age']
            prodf.loc[row,'rest_days'] = (season_logs.loc[r,'date_game']-season_logs.loc[r-1,'date_game']).days
            prodf.loc[row,'min_season'] = season_logs.loc[:(r-1),'time_on_ice'].sum()
            prodf.loc[row,'shots_against'] = season_logs.loc[:(r-1),'shots_against'].sum()
            prodf.loc[row,'save_pct'] = np.round(season_logs.loc[:(r-1),'saves'].sum()/season_logs.loc[:(r-1),'shots_against'].sum(),3)
            
            window = (season_logs.loc[r,'date_game']>season_logs['date_game'])&(season_logs['date_game']>(season_logs.loc[r,'date_game']-timedelta(21)))
                        
            prodf.loc[row,'min3W'] = season_logs.loc[window,'time_on_ice'].sum()
            prodf.loc[row,'sa3W'] = season_logs.loc[window ,'shots_against'].sum()
            prodf.loc[row,'svepct3W'] = season_logs.loc[window,'saves'].sum()/season_logs.loc[window,'shots_against'].sum()
            prodf.loc[row,'future_save_pct'] = np.round(season_logs.loc[r,'saves'].sum()/season_logs.loc[r,'shots_against'].sum(),3)
            prodf.loc[row,'injured'] = season_logs.loc[r,'injured']
            row+=1
     
    return prodf

In [123]:
#for each player in the database
players = run_query('Select unique_id FROM player_list')
pdf = pd.DataFrame(columns=['player_id','team_id','opp_id','date_game','age','rest_days','min_season',
                                    'shots_against','save_pct','min3W','sa3W','svepct3W','future_save_pct','injured'])
for player in players['unique_id']:

    pdf = pdf.append(season_cumul(player))

pdf = pdf.fillna(pdf['save_pct'].mean())











































In [125]:
pdf.to_csv('goalietrainingdata.txt')