In [1]:
import pandas as pd
from SQLCode import DatabaseConnection
from SQLCode import DatabaseCredentials as DBC
import numpy as np

In [7]:
# Opening connection
creds = DBC.DataBaseCredentials()
conn = DatabaseConnection.sql_connection(creds.server, creds.database, creds.user, creds.password)
connection = conn.open()
cursor = connection.cursor()

In [3]:
cursor.execute("select max(seasonID) from seasons")
CURRENT_SEASON = cursor.fetchall()[0][0]

In [4]:
GIM_VALUES = pd.read_sql_query("""
select GAMES.seasonID,
       GAMES.gameID,
       GAMES.gameType,
       GAMES.teamID,
       GAMES.playerID,
       IF(GIM.value is null, 0, GIM.value) as 'gimForIndvGame',
       ROW_NUMBER() over (partition by GAMES.seasonID, GAMES.playerID order by GAMES.gameID) as 'gameNumber',
       SUM(IF(GIM.value is null, 0, GIM.value)) over (partition by GAMES.seasonID, GAMES.playerID order by GAMES.gameID) as 'gimCumTotal',
       AVG(IF(GIM.value is null, 0, GIM.value)) over (partition by GAMES.seasonID, GAMES.playerID order by GAMES.gameID) as 'gimMean'
from (
         select s.seasonID,
                bs.gameID,
                bs.playerID,
                bs.teamID,
                s.gameType
         from box_scores bs
                  inner join schedules s on bs.gameID = s.gameID
         where scratched = 0 and
               seasonID >= 20102011 and
               timeOnIce is not null and
               s.gameType in ('R', 'P')
     ) GAMES
left join
    (
        select gim.gameID,
               playerID,
               sum(if(awayTeam = 1, awayProbability, homeProbability)) as 'value'
        from stage_hockey.gim_values gim
        group by gim.gameID, playerID
    ) GIM ON GAMES.gameID =  GIM.gameID and GAMES.playerID = GIM.playerID
order by gameID,playerID;
""", connection)

In [8]:
lastRegularSeasonGameNumbers = GIM_VALUES[GIM_VALUES['gameType'] == 'R'][['seasonID','playerID','gameNumber']].groupby(['seasonID','playerID']).max('gameNumber').reset_index()

In [9]:
GIM_BY_PLAYER_BY_SEASON = pd.merge(GIM_VALUES[['seasonID','playerID','gameNumber','gimMean']],
                                   lastRegularSeasonGameNumbers,
                                   how='inner',
                                   on=['seasonID','playerID','gameNumber'])

In [10]:
POSITIONS = pd.read_sql_query("""
select playerID,
             primaryPositionCode,
             row_number() over (partition by playerID order by date desc ) as 'ROW_NUM'
      from plays_position
      where primaryPositionCode is not null
""", connection)

In [11]:
POSITIONS = POSITIONS[POSITIONS['ROW_NUM'] == 1].drop(['ROW_NUM'],axis=1)

In [12]:
POSITIONS_AVERAGES = pd.merge(GIM_VALUES[['seasonID','playerID','gimMean']],POSITIONS,how='inner')

In [13]:
POSITIONS_AVERAGES = POSITIONS_AVERAGES.drop('playerID',axis=1).groupby(['seasonID','primaryPositionCode']).mean('gimMean').reset_index()

In [14]:
SEASON_MAPPING = pd.read_sql_query("""
select * from season_to_next_season_mapping
""", connection)

In [15]:
pastGIMValues = pd.merge(GIM_BY_PLAYER_BY_SEASON,
                             SEASON_MAPPING,
                             how = 'inner',
                             left_on=['seasonID'],
                             right_on = ['seasonID'])

In [16]:
pastGIMValues = pd.merge(pastGIMValues,
                             GIM_BY_PLAYER_BY_SEASON,
                             how = 'left',
                             left_on=['previousSeasonID','playerID'],
                             right_on = ['seasonID','playerID'],
                             suffixes=('', '_previous'))

In [17]:
pastGIMValues = pd.merge(pastGIMValues,POSITIONS,how='inner')

In [18]:
pastGIMValues = pastGIMValues.drop(['seasonID_previous'],axis=1)

In [19]:
pastGIMValues = pd.merge(pastGIMValues, 
                         POSITIONS_AVERAGES,
                         how='inner',
                         left_on=['previousSeasonID','primaryPositionCode'],
                         right_on = ['seasonID','primaryPositionCode'],
                         suffixes=('', '_positionalMean'))

pastGIMValues = pastGIMValues.drop('seasonID_positionalMean',axis=1)

In [20]:
gimValuesMinus2Years = np.where(pastGIMValues['gameNumber_previous'].isna(), 
                                pastGIMValues['gimMean_positionalMean'],
                                np.where(pastGIMValues['gameNumber_previous'] >= 20,
                                         pastGIMValues['gimMean_previous'],
                                         (pastGIMValues['gameNumber_previous']/20) * pastGIMValues['gimMean_previous'] + (1-pastGIMValues['gameNumber_previous']/20)*pastGIMValues['gimMean_positionalMean']))

In [21]:
gimValues = np.where(pastGIMValues['gameNumber'] >= 20, 
         pastGIMValues['gimMean'],
         (pastGIMValues['gameNumber']/20) * pastGIMValues['gimMean'] + (1-pastGIMValues['gameNumber']/20)*gimValuesMinus2Years)

In [22]:
pastGIMValues = pastGIMValues[['seasonID','playerID']]

In [23]:
pastGIMValues['gimValue'] = gimValues

In [24]:
gimValuesAdjusted = pd.merge(GIM_VALUES,
         SEASON_MAPPING,
         how='inner',on=['seasonID'])

In [25]:
gimValuesAdjusted = pd.merge(gimValuesAdjusted,
         pastGIMValues,
         how='left', 
         left_on=['previousSeasonID','playerID'],
         right_on=['seasonID','playerID'],
         suffixes=('', '_previous')).drop(['seasonID_previous'],axis=1)

In [26]:
gimValuesAdjusted = pd.merge(gimValuesAdjusted,POSITIONS,how='inner')

In [27]:
gimValuesAdjusted = pd.merge(gimValuesAdjusted,POSITIONS_AVERAGES,how='inner',  
         left_on=['previousSeasonID','primaryPositionCode'],
                         right_on = ['seasonID','primaryPositionCode'],
                         suffixes=('', '_positionalMean')).drop(['primaryPositionCode'],axis=1)

In [28]:
gimValuesAdjusted['gimMeanAdjusted'] = np.where(gimValuesAdjusted['gameNumber'] >= 20, 
         gimValuesAdjusted['gimMean'],
         np.where(gimValuesAdjusted['gimValue'].isna(),
                 (gimValuesAdjusted['gameNumber']/20)*gimValuesAdjusted['gimMean'] + (1-gimValuesAdjusted['gameNumber']/20)*gimValuesAdjusted['gimMean_positionalMean'],                 
                  (gimValuesAdjusted['gameNumber']/20)*gimValuesAdjusted['gimMean'] + (1-gimValuesAdjusted['gameNumber']/20)*gimValuesAdjusted['gimValue']))

In [29]:
gimValuesAdjusted = gimValuesAdjusted.drop(['previousSeasonID', 
                        'gimValue', 
                        'seasonID_positionalMean',
                        'gimMean_positionalMean'],axis=1)

In [30]:
gimValuesAdjusted = gimValuesAdjusted[gimValuesAdjusted['seasonID'] > 20112012]

In [31]:
gimValuesAdjusted = gimValuesAdjusted.reset_index(drop=True)

In [32]:
gimValuesAdjusted.to_csv('gimValuesAdjusted.csv')

In [34]:
cursor.execute("truncate table stage_hockey.gim_values_consolidated;")
connection.commit()

In [35]:
for index, row in gimValuesAdjusted.iterrows():
    if index % 1000 == 0:
        print((index/len(gimValuesAdjusted))*100)
    query = f"insert into stage_hockey.gim_values_consolidated values({row['seasonID']},{row['gameID']},\'{row['gameType']}\',{row['teamID']},{row['playerID']},{row['gimForIndvGame']},{row['gameNumber']},{row['gimCumTotal']},{row['gimMean']},{row['gimMeanAdjusted']})"
    cursor.execute(query)
    connection.commit()
#     break

0.0


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\Aidan\anaconda3\envs\tf\lib\site-packages\IPython\core\interactiveshell.py", line 3418, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-35-ccc1617a7a21>", line 6, in <module>
    connection.commit()
  File "C:\Users\Aidan\anaconda3\envs\tf\lib\site-packages\mysql\connector\connection.py", line 1229, in commit
    self._execute_query("COMMIT")
  File "C:\Users\Aidan\anaconda3\envs\tf\lib\site-packages\mysql\connector\connection.py", line 1248, in _execute_query
    self.cmd_query(query)
  File "C:\Users\Aidan\anaconda3\envs\tf\lib\site-packages\mysql\connector\connection.py", line 846, in cmd_query
    result = self._handle_result(self._send_cmd(ServerCmd.QUERY, query))
  File "C:\Users\Aidan\anaconda3\envs\tf\lib\site-packages\mysql\connector\connection.py", line 501, in _send_cmd
    return self._socket.recv()
  File "C:\Users\Aidan\anaconda3\envs\tf\lib\site-packages\mysql\connector\network.p

TypeError: object of type 'NoneType' has no len()

In [36]:
conn.close()