In [72]:
import numpy as np
import pandas as pd
import math
from datetime import datetime

In [73]:
def getAbsolutePeriodTime(period, time):
    if period == 1:
        return time
    else:
        periodInt = int(period)
        first_digit = (periodInt - 1) * 2 + int(time[0])
        return str(first_digit) + time[1:]

In [74]:
'''
gets total elapsed time for events in a dataframe
'''
def getSplitTimeLength(df):
    min_period = df['period'].min()
    max_period = df['period'].max()
    min_period_df = df[df['period']==min_period]
    max_period_df = df[df['period']==max_period]
    min_period_time = min_period_df['periodTime'].min()
    max_period_time = max_period_df['periodTime'].max()
    start = getAbsolutePeriodTime(min_period, min_period_time)
    end = getAbsolutePeriodTime(max_period, max_period_time)
    return getElapsedTime(start, end)

In [75]:
def getDateTimeStamp(time):
    split = time.split(":")
    minutes = int(split[0])
    seconds = split[1]
    remain = minutes % 60
    hours = math.floor(minutes / 60)
    new_time = f"0{hours}:{remain}:{seconds}"
    timeStamp = datetime.strptime(new_time, '%H:%M:%S')
    return timeStamp

In [76]:
'''
convert the absolute minute values to hour:minut:second
use python functions to computer time difference
'''

def getElapsedTime(start, end):
    start_timeStamp = getDateTimeStamp(start)
    end_timeStamp = getDateTimeStamp(end)
    time_delta = (end_timeStamp - start_timeStamp)
    total_seconds = time_delta.total_seconds()
    minutes = total_seconds/60
    seconds_remain = total_seconds % 60
    output = f'{str(math.floor(minutes))}:{str(math.floor(seconds_remain))}'
    return output

In [77]:
def getPerSixtyValue(events, elapsedTime):
    count = len(events)
    split = elapsedTime.split(":")
    minutes = int(split[0])
    seconds = int(split[1])
    timeFraction = minutes + seconds/60
    valuePerMinute = 0
    if (timeFraction > 0):
        valuePerMinute = count / timeFraction
    perSixty = valuePerMinute * 60
    return float("{:.2f}".format(perSixty))

In [78]:
def getFightInfo(fight_row, df):
    pre_fight_df = df.iloc[:fight_row,:]
    post_fight_df = df.iloc[fight_row+1:,:]
    pre_fight_df = pre_fight_df.reset_index()
    post_fight_df = post_fight_df.reset_index()
    pre_home_shots = pre_fight_df.index[ (pre_fight_df['eventTypeId'] == "SHOT") & (pre_fight_df['teamName'] == pre_fight_df['homeTeam'])].tolist()
    pre_away_shots = pre_fight_df.index[ (pre_fight_df['eventTypeId'] == "SHOT") & (pre_fight_df['teamName'] == pre_fight_df['awayTeam'])].tolist()
    pre_home_hits = pre_fight_df.index[ (pre_fight_df['eventTypeId'] == "HIT") & (pre_fight_df['teamName'] == pre_fight_df['homeTeam'])].tolist()
    pre_away_hits = pre_fight_df.index[ (pre_fight_df['eventTypeId'] == "HIT") & (pre_fight_df['teamName'] == pre_fight_df['awayTeam'])].tolist()
    pre_home_penalties = pre_fight_df.index[ (pre_fight_df['eventTypeId'] == "PENALTY") & (pre_fight_df['teamName'] == pre_fight_df['homeTeam'])].tolist()
    pre_away_penalties = pre_fight_df.index[ (pre_fight_df['eventTypeId'] == "PENALTY") & (pre_fight_df['teamName'] == pre_fight_df['awayTeam'])].tolist()
    post_home_shots = post_fight_df.index[ (post_fight_df['eventTypeId'] == "SHOT") & (post_fight_df['teamName'] == post_fight_df['homeTeam'])].tolist()
    post_away_shots = post_fight_df.index[ (post_fight_df['eventTypeId'] == "SHOT") & (post_fight_df['teamName'] == post_fight_df['awayTeam'])].tolist()
    post_home_hits = post_fight_df.index[ (post_fight_df['eventTypeId'] == "HIT") & (post_fight_df['teamName'] == post_fight_df['homeTeam'])].tolist()
    post_away_hits = post_fight_df.index[ (post_fight_df['eventTypeId'] == "HIT") & (post_fight_df['teamName'] == post_fight_df['awayTeam'])].tolist()
    post_home_penalties = post_fight_df.index[ (post_fight_df['eventTypeId'] == "PENALTY") & (post_fight_df['teamName'] == post_fight_df['homeTeam'])].tolist()
    post_away_penalties = post_fight_df.index[ (post_fight_df['eventTypeId'] == "PENALTY") & (post_fight_df['teamName'] == post_fight_df['awayTeam'])].tolist()
    pre_total_shots = pre_fight_df.index[ (pre_fight_df['eventTypeId'] == "SHOT") ].tolist()
    pre_total_hits = pre_fight_df.index[ (pre_fight_df['eventTypeId'] == "HIT") ].tolist()
    pre_total_penalties = pre_fight_df.index[ (pre_fight_df['eventTypeId'] == "PENALTY") ].tolist()
    post_total_shots = post_fight_df.index[ (post_fight_df['eventTypeId'] == "SHOT") ].tolist()
    post_total_hits = post_fight_df.index[ (post_fight_df['eventTypeId'] == "HIT") ].tolist()
    post_total_penalties = post_fight_df.index[ (post_fight_df['eventTypeId'] == "PENALTY") ].tolist()
    fightData = df.iloc[fight_row]
    pre_fight_duration = getSplitTimeLength(pre_fight_df)
    post_fight_duration = getSplitTimeLength(post_fight_df)
    fightRowDict = {
        'gameid' : df['gameId'].min(),
        'homeTeam' : df['homeTeam'].min(),
        'homeId' : df['homeTeamId'].min(),
        'awayTeam': df['awayTeam'].min(),
        'awayId' : df['awayTeamId'].min(),
        'preShotsHome' : getPerSixtyValue(pre_home_shots,pre_fight_duration),
        'preHitsHome' : getPerSixtyValue(pre_home_hits,pre_fight_duration),
        'prePenaltiesHome' : getPerSixtyValue(pre_home_penalties,pre_fight_duration),
        'preShotsAway' : getPerSixtyValue(pre_away_shots,pre_fight_duration),
        'preHitsAway' : getPerSixtyValue(pre_away_hits,pre_fight_duration),
        'prePenaltiesAway' : getPerSixtyValue(pre_away_penalties,pre_fight_duration),
        'preShotsTotal' : getPerSixtyValue(pre_total_shots,pre_fight_duration),
        'preHitsTotal' : getPerSixtyValue(pre_total_hits,pre_fight_duration),
        'prePenaltiesTotal' : getPerSixtyValue(pre_total_penalties,pre_fight_duration),
        'fightTime' : getAbsolutePeriodTime(fightData['period'],fightData['periodTime']),
        'player1Id' : fightData['player1ID'],
        'player1Name' : fightData['player1FullName'],
        'player2Id' : fightData['player2ID'],
        'player2Name' : fightData['player2FullName'],
        'postShotsHome' : getPerSixtyValue(post_home_shots,post_fight_duration),
        'postHitsHome' : getPerSixtyValue(post_home_hits,post_fight_duration),
        'postPenaltiesHome' : getPerSixtyValue(post_home_penalties,post_fight_duration),
        'postShotsAway' : getPerSixtyValue(post_away_shots,post_fight_duration),
        'postHitsAway' : getPerSixtyValue(post_away_hits,post_fight_duration),
        'postPenaltiesAway': getPerSixtyValue(post_away_penalties,post_fight_duration),
        'postShotsTotal' : getPerSixtyValue(post_total_shots,post_fight_duration),
        'postHitsTotal' : getPerSixtyValue(post_total_hits,post_fight_duration),
        'postPenaltiesTotal' : getPerSixtyValue(post_total_penalties,post_fight_duration)
    }
    return fightRowDict

In [79]:
def getFightDataForGame(gameId, df):
    game_of_interest = df['gameId'] == gameId
    game_df = df[game_of_interest]
    game_df = game_df.reset_index()
    fight_rows = game_df.index[game_df['secondaryType'] == "Fighting"].tolist()[::2]
    output = []
    for fight in fight_rows:
        output.append(getFightInfo(fight, game_df))
    return output

In [82]:
df = pd.read_csv('AllFightGameDataCombined.csv')
gameIds = df['gameId'].unique()
outputData = []
for gameId in gameIds:
    outputData = outputData +  getFightDataForGame(gameId,df)
output_df = pd.DataFrame.from_dict(outputData)
output_df.to_csv(f'FightAnalytics.csv', index=False)

In [None]:
'''
Save the fight time as an 'absolute' time, i.e. total time passed in the game
Period values: 1,2,3,4,5
5 = Shootout = Ignore
1,2,3 = 20:00 each
'''

'''
Get the row where the fight happens (2 rows)
Seperate into pre- and post data frames
for each dataframe:
get total shots and per team
get total hits and per team
get total penalties and per team
'''

'''
If a game has multiple fights
Repeat the same process for each 'fight split'
So you can have multiple rows in the final dataframe for the same game
'''

'''
Fight in OT?
Treat as normal
'''

'''
Eventually can filter out fights
i.e. ignore fights that happen after 60:00
ignore rows where pre / post metrics are 0 or close to 0
ignore fights that happen within the first minute of a period
'''

In [None]:
# Final dataframe shape
'''
GAMEID
HOMETEAM
HOMEID
AWAYTEAM
AWAYID
PRE HOME SH/60
PRE HOME HIT/60
PRE HOME PENALTY/60
PRE AWAY SH/60
PRE AWAY HIT/60
PRE AWAY PENALTY/60
PRE TOTAL SH/60
PRE TOTAL HIT/60
PRE TOTAL PENALTY/60
FIGHT TIME
FIGHT ID1
FIGHT NAME1
FIGHT ID2
FIGHT NAME2
POST HOME SH/60
POST HOME HIT/60
POST HOME PENALTY/60
POST AWAY SH/60
POST AWAY HIT/60
POST AWAY PENALTY/60
POST TOTAL SH/60
POST TOTAL HIT/60
POST TOTAL PENALTY/60
'''