In [1]:
from ift6758.data import load_train_test_dataframes

import pandas as pd

In [2]:
train_data, test_data = load_train_test_dataframes(all_types=True)

Found 413339 events
Found 434744 events
Found 433203 events
Found 367886 events
Found 287926 events


In [7]:
# Trier les données par match, période, et temps
train_data = train_data.sort_values(by=["game_id", "period_number", "time_in_period"])
test_data = test_data.sort_values(by=["game_id", "period_number", "time_in_period"])


### Utils

In [8]:
# Convert time_in_period from MM:SS to seconds
def convert_to_seconds(time_str):
    minutes, seconds = map(int, time_str.split(':'))
    return minutes * 60 + seconds

In [12]:
import numpy as np

def add_previous_event_features(data):
    # Ajouter le type d'événement précédent
    data["last_event_type"] = data["type_desc_key"].shift(1)
    
    # Ajouter les coordonnées de l'événement précédent
    data["last_x"] = data["x_coord"].shift(1)
    data["last_y"] = data["y_coord"].shift(1)
    
    # Ajouter le temps écoulé depuis l'événement précédent
    data["time_since_last_event"] = data["game_seconds"] - data["game_seconds"].shift(1)
    
    # Calculer la distance depuis l'événement précédent
    def calculate_distance(row):
        if pd.isna(row["last_x"]) or pd.isna(row["last_y"]):
            return np.nan
        return np.sqrt((row["x_coord"] - row["last_x"])**2 + (row["y_coord"] - row["last_y"])**2)
    
    data["distance_from_last_event"] = data.apply(calculate_distance, axis=1)
    
    return data

In [36]:
def add_last_angle(data):
    """
    Ajoute la colonne `last_angle` au DataFrame.
    La valeur de `last_angle` correspond à `goal_angle` de l'événement précédent,
    et est mise à NaN si l'événement précédent appartient à un autre match.
    """
    # Décaler la colonne `goal_angle` pour obtenir l'angle précédent
    data["last_angle"] = data["goal_angle"].shift(1)
    
    # Mettre à NaN si le match précédent est différent
    data.loc[data["game_id"] != data["game_id"].shift(1), "last_angle"] = np.nan
    
    return data


In [32]:
def calculate_absolute_angle_change(row):
    """
    Calcule le changement d'angle en additionnant les valeurs absolues de l'angle précédent et de l'actuel.
    Retourne 0 si ce n'est pas un rebond.
    """
    if not row["is_rebound"]:  # Si ce n'est pas un rebond, le changement d'angle est 0
        return 0
    # Appliquer la valeur absolue directement lors du calcul
    return abs(row["goal_angle"]) + abs(row["last_angle"])


In [33]:
def calculate_speed(row):
    """
    Calcule la vitesse (distance / temps écoulé).
    Retourne 0 si le temps écoulé est nul ou NaN.
    """
    if pd.isna(row["time_since_last_event"]) or row["time_since_last_event"] <= 0:
        return 0
    return row["distance_from_last_event"] / row["time_since_last_event"]



### Preprocessing function (Advanced)

In [49]:
def preprocess_advanced(data):
    """
    Prétraitement avancé des données :
    - Conversion de `time_in_period` en secondes.
    - Calcul de `game_seconds`.
    - Ajout des informations sur l'événement précédent (type, distance, temps écoulé).
    - Détection des rebonds.
    - Calcul de la vitesse et du changement d'angle.
    - Filtrage pour conserver uniquement les goals et shots-on-goal.
    """
    # Convertir `time_in_period` en secondes
    def convert_to_seconds(time_str):
        minutes, seconds = map(int, time_str.split(':'))
        return minutes * 60 + seconds

    data['time_in_period_seconds'] = data['time_in_period'].apply(convert_to_seconds)
    
    # Calculer game_seconds
    data['game_seconds'] = (data['period_number'] - 1) * 1200 + data['time_in_period_seconds']
    
    # Ajouter les informations sur l'événement précédent
    data = add_previous_event_features(data)

    # Ajouter la colonne `is_rebound` pour détecter les rebonds
    data["is_rebound"] = data["last_event_type"].isin(["shot-on-goal", "missed-shot", "blocked-shot"])

    # Calculer la vitesse depuis le dernier événement
    data["speed"] = data.apply(calculate_speed, axis=1)
    
    # Ajouter la colonne last_angle (angle précédent)
    data = add_last_angle(data)

    # Ajouter la colonne absolute_angle_change
    data["absolute_angle_change"] = data.apply(calculate_absolute_angle_change, axis=1)

    data = data[data["type_desc_key"].isin(["goal", "shot-on-goal"])]

    return data


In [50]:
preprocess_advanced(train_data)

train_data.head()

Unnamed: 0,game_id,season,game_type,game_date,venue,venue_location,away_team_id,away_team_abbrev,away_team_name,home_team_id,home_team_abbrev,home_team_name,event_id,event_idx,sort_order,period_number,period_type,max_regulation_periods,time_in_period,time_remaining,situation_code,is_empty_net,is_goal,type_code,type_desc_key,away_score,home_score,away_sog,home_sog,x_coord,y_coord,zone_code,shot_type,description,event_owner_team_id,scoring_player_total,assist1_player_total,assist2_player_total,goal_distance,goal_angle,goal_side,goal_x_coord,shooting_player_id,shooting_player_name,shooting_player_team_id,shooting_player_position_code,goalie_in_net_id,goalie_in_net_name,goalie_in_net_team_id,goalie_in_net_position_code,scoring_player_id,scoring_player_name,scoring_player_team_id,scoring_player_position_code,assist1_player_id,assist1_player_name,assist1_player_team_id,assist1_player_position_code,assist2_player_id,assist2_player_name,assist2_player_team_id,assist2_player_position_code,time_in_period_seconds,game_seconds,last_event_type,last_x,last_y,time_since_last_event,distance_from_last_event,is_rebound,speed,last_angle,absolute_angle_change
7,2016020001,20162017,2,2016-10-12,Canadian Tire Centre,Ottawa,10,TOR,Maple Leafs,9,OTT,Senators,8,7,16,1,REG,3,01:11,18:49,1551,False,False,506,shot-on-goal,,,1.0,0.0,-77.0,5.0,O,wrist,Craig Anderson stops a shot from Mitch Marner,10.0,,,,13.0,-22.619865,left,-89,8478483.0,Mitch Marner,10.0,C,8467950.0,Craig Anderson,9.0,G,,,,,,,,,,,,,71,71,,,,,,False,0.0,,0.0
14,2016020001,20162017,2,2016-10-12,Canadian Tire Centre,Ottawa,10,TOR,Maple Leafs,9,OTT,Senators,11,14,38,1,REG,3,02:53,17:07,1551,False,False,506,shot-on-goal,,,1.0,1.0,86.0,13.0,O,wrist,Frederik Andersen stops a shot from Chris Kelly,9.0,,,,13.341664,77.005383,right,89,8467967.0,Chris Kelly,9.0,C,8475883.0,Frederik Andersen,10.0,G,,,,,,,,,,,,,173,173,shot-on-goal,-77.0,5.0,102.0,163.196201,True,1.599963,-22.619865,99.625248
21,2016020001,20162017,2,2016-10-12,Canadian Tire Centre,Ottawa,10,TOR,Maple Leafs,9,OTT,Senators,15,21,53,1,REG,3,04:01,15:59,1551,False,False,506,shot-on-goal,,,1.0,2.0,23.0,-38.0,N,wrist,Frederik Andersen stops a shot from Cody Ceci,9.0,,,,76.157731,-29.931512,left,89,8476879.0,Cody Ceci,9.0,D,8475883.0,Frederik Andersen,10.0,G,,,,,,,,,,,,,241,241,shot-on-goal,86.0,13.0,68.0,81.055537,True,1.191993,77.005383,106.936895
23,2016020001,20162017,2,2016-10-12,Canadian Tire Centre,Ottawa,10,TOR,Maple Leafs,9,OTT,Senators,16,23,60,1,REG,3,04:46,15:14,1551,False,False,506,shot-on-goal,,,1.0,3.0,33.0,-15.0,O,slap,Frederik Andersen stops a shot from Erik Karlsson,9.0,,,,57.974132,-14.995079,left,89,8474578.0,Erik Karlsson,9.0,D,8475883.0,Frederik Andersen,10.0,G,,,,,,,,,,,,,286,286,shot-on-goal,23.0,-38.0,45.0,25.079872,True,0.55733,-29.931512,44.926591
36,2016020001,20162017,2,2016-10-12,Canadian Tire Centre,Ottawa,10,TOR,Maple Leafs,9,OTT,Senators,24,36,86,1,REG,3,06:46,13:14,1551,False,False,506,shot-on-goal,,,2.0,3.0,-34.0,28.0,O,wrist,Craig Anderson stops a shot from Martin Marincin,10.0,,,,61.717096,-26.980231,left,-89,8475716.0,Martin Marincin,10.0,D,8467950.0,Craig Anderson,9.0,G,,,,,,,,,,,,,406,406,shot-on-goal,33.0,-15.0,120.0,79.611557,True,0.66343,-14.995079,41.97531
