# IE423 Term Project â€“ Event Detection with CUSUM Control Charts

This notebook implements a CUSUM-based system to detect goals and red cards in football matches using match statistics.


In [None]:
import pandas as pd
import numpy as np

# Configuration parameters
BASELINE_WINDOW = 15
K_FACTOR = 0.5
H_FACTOR = 4.0

pd.options.mode.chained_assignment = None


In [None]:
# Load dataset
df = pd.read_csv('fall25_ie423_project_data_sample.txt')
print("Data shape:", df.shape)
df.head()

In [None]:
# Align minutes between halves
df['aligned_minute'] = 0
for mid, group in df.groupby('match_id'):
    first_half_end = group[group['halftime']=='1st-half']['minute'].max()
    offset = 0 if pd.isna(first_half_end) else max(int(first_half_end) - 45, 0)
    mask_first = (df['match_id']==mid) & (df['halftime']=='1st-half')
    mask_second = (df['match_id']==mid) & (df['halftime']=='2nd-half')
    df.loc[mask_first, 'aligned_minute'] = df.loc[mask_first, 'minute'].astype(int)
    df.loc[mask_second, 'aligned_minute'] = df.loc[mask_second, 'minute'].astype(int) + offset

df = df.sort_values(['match_id', 'aligned_minute']).reset_index(drop=True)


In [None]:
# Compute first differences for key features
df['attack_home_diff'] = df.groupby('match_id')['ATTACKS - home'].diff().fillna(0)
df['attack_away_diff'] = df.groupby('match_id')['ATTACKS - away'].diff().fillna(0)
df['shot_home_diff']   = df.groupby('match_id')['SHOTS_ON_TARGET - home'].diff().fillna(0)
df['shot_away_diff']   = df.groupby('match_id')['SHOTS_ON_TARGET - away'].diff().fillna(0)
df['poss_home_diff']   = df.groupby('match_id')['BALL_POSSESSION - home'].diff().fillna(0)
df['poss_away_diff']   = df.groupby('match_id')['BALL_POSSESSION - away'].diff().fillna(0)


In [None]:
def detect_cusum_alarms(series, baseline_window=BASELINE_WINDOW, 
                        k_factor=K_FACTOR, h_factor=H_FACTOR):
    alarms = []
    if len(series) < baseline_window:
        return alarms
    baseline_data = series.iloc[:baseline_window]
    mu = baseline_data.mean()
    sigma = baseline_data.std(ddof=0)
    if sigma == 0:
        sigma = 1e-6
    K = k_factor * sigma
    H = h_factor * sigma
    S = 0.0
    for t in range(baseline_window, len(series)):
        x = series.iloc[t]
        S = max(0, S + (x - mu - K))
        if S > H:
            alarm_minute = series.index[t]
            alarms.append(alarm_minute)
            S = 0.0
    return alarms


In [None]:
alarms_by_match = {}
for mid, group in df.groupby('match_id'):
    home_series = group.set_index('aligned_minute')['attack_home_diff']
    away_series = group.set_index('aligned_minute')['attack_away_diff']
    home_alarms = detect_cusum_alarms(home_series)
    away_alarms = detect_cusum_alarms(away_series)
    alarms_by_match[mid] = {'home': home_alarms, 'away': away_alarms}


In [None]:
event_detections = []
false_alarms = []

for mid, group in df.groupby('match_id'):
    home_alarms = alarms_by_match[mid]['home'].copy()
    away_alarms = alarms_by_match[mid]['away'].copy()
    events = group[(group['goal_home_event']==1) | (group['goal_away_event']==1) |
                   (group['red_home_event']==1) | (group['red_away_event']==1)]
    for _, row in events.iterrows():
        minute = int(row['aligned_minute'])
        event_type = None
        alarm_minute = None
        if row['goal_home_event'] == 1:
            event_type = 'Goal (Home)'
            alarms_in_window = [a for a in home_alarms if minute-5 <= a <= minute]
            if alarms_in_window:
                alarm_minute = max(alarms_in_window)
                home_alarms.remove(alarm_minute)
        elif row['goal_away_event'] == 1:
            event_type = 'Goal (Away)'
            alarms_in_window = [a for a in away_alarms if minute-5 <= a <= minute]
            if alarms_in_window:
                alarm_minute = max(alarms_in_window)
                away_alarms.remove(alarm_minute)
        elif row['red_home_event'] == 1:
            event_type = 'Red Card (Home)'
            alarms_in_window = [a for a in away_alarms if minute <= a <= minute+5]
            if alarms_in_window:
                alarm_minute = min(alarms_in_window)
                away_alarms.remove(alarm_minute)
        elif row['red_away_event'] == 1:
            event_type = 'Red Card (Away)'
            alarms_in_window = [a for a in home_alarms if minute <= a <= minute+5]
            if alarms_in_window:
                alarm_minute = min(alarms_in_window)
                home_alarms.remove(alarm_minute)
        event_detections.append({
            'match_id': mid,
            'event_type': event_type,
            'event_minute': minute,
            'alarm_minute': alarm_minute if alarm_minute is not None else np.nan,
            'time_difference': (minute - alarm_minute) if alarm_minute is not None else np.nan
        })
    for a in home_alarms:
        false_alarms.append((mid, 'home', a))
    for a in away_alarms:
        false_alarms.append((mid, 'away', a))

results_df = pd.DataFrame(event_detections)
results_df.head()
