In [6]:
import pandas as pd
from SQLCode import DatabaseConnection
from SQLCode import DatabaseCredentials as DBC
import numpy as np

In [7]:
# Opening connection
creds = DBC.DataBaseCredentials()
conn = DatabaseConnection.sql_connection(creds.server, creds.database, creds.user, creds.password)
connection = conn.open()
cursor = connection.cursor()

In [8]:
# Getting the season mappings
SEASON_MAPPING = pd.read_sql_query("""
select * from season_to_next_season_mapping
""", connection)

In [9]:
# Getting the current season
cursor.execute("select max(seasonID) from seasons")
CURRENT_SEASON = cursor.fetchall()[0][0]

In [11]:
CURRENT_SEASON

20212022

In [10]:
# Getting the previous_Season
PREVIOUS_SEASON = SEASON_MAPPING[SEASON_MAPPING['seasonID'] == CURRENT_SEASON]['previousSeasonID'].values[0]

IndexError: index 0 is out of bounds for axis 0 with size 0

In [None]:
# Selecting GIM for this and last season
GIM_VALUES = pd.read_sql_query(f"""
select GAMES.seasonID,
       GAMES.gameID,
       GAMES.gameType,
       GAMES.teamID,
       GAMES.playerID,
       IF(GIM.value is null, 0, GIM.value) as 'gimForIndvGame',
       ROW_NUMBER() over (partition by GAMES.seasonID, GAMES.playerID order by GAMES.gameID) as 'gameNumber',
       SUM(IF(GIM.value is null, 0, GIM.value)) over (partition by GAMES.seasonID, GAMES.playerID order by GAMES.gameID) as 'gimCumTotal',
       AVG(IF(GIM.value is null, 0, GIM.value)) over (partition by GAMES.seasonID, GAMES.playerID order by GAMES.gameID) as 'gimMean'
from (
         select s.seasonID,
                bs.gameID,
                bs.playerID,
                bs.teamID,
                s.gameType
         from box_scores bs
                  inner join schedules s on bs.gameID = s.gameID
         where scratched = 0 and
               seasonID >= 20102011 and
               timeOnIce is not null and
               s.gameType in ('R')
     ) GAMES
left join
    (
        select gim.gameID,
               playerID,
               sum(if(awayTeam = 1, awayProbability, homeProbability)) as 'value'
        from stage_hockey.gim_values gim
        group by gim.gameID, playerID
    ) GIM ON GAMES.gameID =  GIM.gameID and GAMES.playerID = GIM.playerID
where GAMES.seasonID >= {PREVIOUS_SEASON}
order by gameID,playerID;
""", connection)

# Updating GIM Values By Position
Adding in last seasons GIM values for each Position

In [None]:
# getting each players final gim for the regular season
GIM_BY_PLAYER_BY_SEASON = pd.merge(GIM_VALUES[['seasonID','playerID','gameNumber','gimMean']],
                                   GIM_VALUES[GIM_VALUES['gameType'] == 'R'][['seasonID','playerID','gameNumber']].groupby(['seasonID','playerID']).max('gameNumber').reset_index(),
                                   how='inner',
                                   on=['seasonID','playerID','gameNumber'])

In [None]:
# Getting each players position
POSITIONS = pd.read_sql_query("""
select playerID,
             primaryPositionCode,
             row_number() over (partition by playerID order by date desc ) as 'ROW_NUM'
      from plays_position
      where primaryPositionCode is not null
""", connection)

In [None]:
# Making sure we are using the most recent position info
POSITIONS = POSITIONS[POSITIONS['ROW_NUM'] == 1].drop(['ROW_NUM'],axis=1)

In [None]:
# Getting merging GIM to positions
POSITIONS_AVERAGES = pd.merge(GIM_VALUES[['seasonID','playerID','gimMean']],POSITIONS,how='inner')

In [None]:
# Getting the averages for eac position/season
POSITIONS_AVERAGES = POSITIONS_AVERAGES.drop('playerID',axis=1).groupby(['seasonID','primaryPositionCode']).mean('gimMean').reset_index()

In [None]:
# Inserting this seasons position averages into the table
for index, row in POSITIONS_AVERAGES[POSITIONS_AVERAGES['seasonID'] == CURRENT_SEASON].iterrows():
    if index % 1000 == 0:
        print((index/len(POSITIONS_AVERAGES))*100)
    query = f"insert into stage_hockey.gim_position_averages_per_season values({row['seasonID']},\'{row['primaryPositionCode']}\',{row['gimMean']})"
    cursor.execute(query)
    connection.commit()

# Updating GIM Values By Player for Past Season
Adding in last seasons Adjusted GIM values for Player

In [None]:
# Taking this seasons current season and determining the past season
pastGIMValues = pd.merge(GIM_BY_PLAYER_BY_SEASON,
                             SEASON_MAPPING,
                             how = 'inner',
                             left_on=['seasonID'],
                             right_on = ['seasonID'])

In [None]:
pastGIMValues = pd.merge(pastGIMValues,
                             GIM_BY_PLAYER_BY_SEASON,
                             how = 'left',
                             left_on=['previousSeasonID','playerID'],
                             right_on = ['seasonID','playerID'],
                             suffixes=('', '_previous'))

In [None]:
pastGIMValues = pd.merge(pastGIMValues,POSITIONS,how='inner')

In [None]:
pastGIMValues = pastGIMValues.drop(['seasonID_previous'],axis=1)

In [None]:
pastGIMValues = pd.merge(pastGIMValues, 
                         POSITIONS_AVERAGES,
                         how='inner',
                         left_on=['previousSeasonID','primaryPositionCode'],
                         right_on = ['seasonID','primaryPositionCode'],
                         suffixes=('', '_positionalMean'))

pastGIMValues = pastGIMValues.drop('seasonID_positionalMean',axis=1)

In [None]:
gimValuesMinus2Years = np.where(pastGIMValues['gameNumber_previous'].isna(), 
                                pastGIMValues['gimMean_positionalMean'],
                                np.where(pastGIMValues['gameNumber_previous'] >= 20,
                                         pastGIMValues['gimMean_previous'],
                                         (pastGIMValues['gameNumber_previous']/20) * pastGIMValues['gimMean_previous'] + (1-pastGIMValues['gameNumber_previous']/20)*pastGIMValues['gimMean_positionalMean']))

In [None]:
gimValues = np.where(pastGIMValues['gameNumber'] >= 20, 
         pastGIMValues['gimMean'],
         (pastGIMValues['gameNumber']/20) * pastGIMValues['gimMean'] + (1-pastGIMValues['gameNumber']/20)*gimValuesMinus2Years)

In [None]:
pastGIMValues = pastGIMValues[['seasonID','playerID']]

In [None]:
pastGIMValues['gimValueAdjusted'] = gimValues

In [None]:
for index, row in pastGIMValues.iterrows():
    if index % 100 == 0:
        print((index/len(POSITIONS_AVERAGES))*100)
    query = f"insert into stage_hockey.gim_by_player_by_season values({row['seasonID']},{row['playerID']},{row['gimValueAdjusted']})"
    cursor.execute(query)
    connection.commit()

In [None]:
conn.close()