In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re

In [5]:
# Load the dataset
df = pd.read_excel('../Week2/SnD/snd.xlsx')

# Change 'Xrock' to 'XROCK' in the 'Offense','Defense', 'FBTeam', 'Winner' columns
df['Offense'] = df['Offense'].replace('Xrock', 'XROCK')
df['Defense'] = df['Defense'].replace('Xrock', 'XROCK')
df['FBTeam'] = df['FBTeam'].replace('Xrock', 'XROCK')
df['Winner'] = df['Winner'].replace('Xrock', 'XROCK')

# Convert blank strings or whitespace-only to NaN
df['PlantSite'] = df['PlantSite'].replace(r'^\s*$', np.nan, regex=True)

# week3_mask = df['Date'] >= '2025-08-14'
# df = df[week3_mask]

master_teams = ['SPG', 'Q9', 'OUG', 'Wolves']
# Filter the DataFrame to only include rows where 'Offense' and 'Defense' are in master_teams
# df = df[(df['Offense'].isin(master_teams)) | (df['Defense'].isin(master_teams))]
# date_mask = df['Date'] == '2025-08-17'
# df = df[date_mask]
df_snd = df.copy()

df_snd.head()

Unnamed: 0,Date,Map,Offense,Defense,FBTeam,FBPlayer,FBTime,FBWeapon,FBTraded?,PlantSite,PlantClock,Winner,WinType,EndClock,Clutch?,Timeout,DefenseWinner?
0,2025-08-06,Firing Range,Wolves,OUG,OUG,Solo,01:44:00,LMG,No,,,OUG,Elim,00:18:00,,,True
1,2025-08-06,Firing Range,Wolves,OUG,Wolves,Pegg,01:40:00,Oden,Yes,A,01:10:00,Wolves,Elim,00:06:00,,,False
2,2025-08-06,Firing Range,Wolves,OUG,Wolves,Pegg,01:48:00,Oden,No,,,Wolves,Elim,01:26:00,,,False
3,2025-08-06,Firing Range,Wolves,OUG,Wolves,Pegg,00:31:00,Oden,Yes,,,OUG,Time,00:00:00,,,True
4,2025-08-06,Firing Range,Wolves,OUG,Wolves,Sound,01:50:00,Nade,No,A,01:28:00,Wolves,Elim,00:16:00,,,False


In [6]:
len(df_snd)

717

## SnD Stat 1: First Blood Player Leaderboard

In [7]:
# Count total FBs per player
fb_counts = (
    df_snd.groupby(['FBPlayer', 'FBTeam'])
          .size()
          .reset_index(name='TotalFBs')
)

# Count total rounds played per team
rounds_per_team = (
    pd.concat([
        df_snd.groupby('Offense').size(),
        df_snd.groupby('Defense').size()
    ], axis=1).fillna(0).sum(axis=1).astype(int).reset_index()
)
rounds_per_team.columns = ['FBTeam', 'RoundsPlayed']

# Merge and compute FB rate
fb_leaderboard = (
    fb_counts.merge(rounds_per_team, on='FBTeam', how='left')
             .assign(FBRate=lambda x: x['TotalFBs'] * 100 / x['RoundsPlayed'])
             .sort_values(['FBRate', 'TotalFBs'], ascending=[False, False], ignore_index=True)
             .head(10)
)
display(fb_leaderboard)
# fb_leaderboard.to_csv('../Week2/SnD/fb_leaderboard.csv', index=False)

Unnamed: 0,FBPlayer,FBTeam,TotalFBs,RoundsPlayed,FBRate
0,Ryu,Xceed,10,33,30.30303
1,Six,WL,3,12,25.0
2,Marvel,Soul,15,72,20.833333
3,Sunuo,XROCK,36,180,20.0
4,Yang1,AG,6,31,19.354839
5,Liar,DVS,10,59,16.949153
6,Carl,WL,2,12,16.666667
7,Seven,OUG,36,218,16.513761
8,Abhiz,GodL,24,152,15.789474
9,Kakkii,Xceed,5,33,15.151515


In [11]:
# FBs per team
fb_per_team = (
    df_snd.groupby('FBTeam')
          .size()
          .reset_index(name='TotalFBs')
          .sort_values('TotalFBs', ascending=False)
)
fb_rate_per_team = (
    fb_per_team.merge(rounds_per_team, on='FBTeam', how='left')
    .assign(FBRate=lambda x: x['TotalFBs'] * 100 / x['RoundsPlayed'])
    .sort_values('FBRate', ascending=False, ignore_index=True)
)
display(fb_rate_per_team)

Unnamed: 0,FBTeam,TotalFBs,RoundsPlayed,FBRate
0,WL,7,12,58.333333
1,Wolves,122,218,55.963303
2,DVS,33,59,55.932203
3,Xceed,18,33,54.545455
4,XROCK,96,180,53.333333
5,SPG,126,252,50.0
6,Soul,35,72,48.611111
7,Q9,88,183,48.087432
8,OUG,101,218,46.330275
9,GodL,70,152,46.052632


In [12]:
fb_weapon_leaderboard = (
    df_snd.groupby('FBWeapon')
          .size()
          .reset_index(name='TotalFBs')
          .assign(
              FBShare=lambda d: d['TotalFBs'] * 100/ d['TotalFBs'].sum(),         # share of all FBs
          )
          .sort_values(['TotalFBs', 'FBShare'], ascending=[False, False])
          .head(10)
          .reset_index(drop=True)
)
display(fb_weapon_leaderboard)
# fb_weapon_leaderboard.to_csv('../Week2/SnD/fb_weapon_leaderboard.csv', index=False)

Unnamed: 0,FBWeapon,TotalFBs,FBShare
0,Sniper,319,44.490934
1,VMP,88,12.273361
2,USS,81,11.297071
3,Shotgun,65,9.065551
4,Oden,63,8.786611
5,Nade,59,8.228731
6,Pistol,19,2.64993
7,LMG,6,0.83682
8,Type63,6,0.83682
9,Fennec,4,0.55788


In [13]:
# Compare win rate when drawing first blood vs not drawing first blood
df['FBWin?'] = (df['FBTeam'] == df['Winner']).astype(int)

df['FBWin?'].mean()

0.6792189679218968

In [14]:
attack_mask = (df['Offense'] == df['Winner'])
attack_df = df[attack_mask]

attack_df['FBWin?'].mean()

0.5921450151057401

In [15]:
defense_mask = (df['Defense'] == df['Winner'])
defense_df = df[defense_mask]

defense_df['FBWin?'].mean()

0.7538860103626943

In [16]:
# Count plants per map/site (ignore rounds with no plant)
plants = (
    df_snd.dropna(subset=['PlantSite'])
         .groupby(['Map', 'PlantSite'])
         .size()
         .reset_index(name='Plants')
)

# Wide format: columns A and B
site_counts = (
    plants.pivot(index='Map', columns='PlantSite', values='Plants')
          .fillna(0)
          .rename(columns={'A':'Plants_A', 'B':'Plants_B'})
          .reset_index()
)

# Totals and shares
site_counts['TotalPlants'] = site_counts['Plants_A'] + site_counts['Plants_B']
site_counts['Share_A'] = np.where(site_counts['TotalPlants']>0,
                                  site_counts['Plants_A']*100/site_counts['TotalPlants'], 0.0)
site_counts['Share_B'] = np.where(site_counts['TotalPlants']>0,
                                  site_counts['Plants_B']*100/site_counts['TotalPlants'], 0.0)

# # Favored site
# def favored(row):
#     if row['Plants_A'] > row['Plants_B']:
#         return 'A'
#     elif row['Plants_B'] > row['Plants_A']:
#         return 'B'
#     else:
#         return 'Tie'

# site_counts['FavoredSite'] = site_counts.apply(favored, axis=1)

# Sort and save
site_counts = site_counts.sort_values(['Share_A'], ascending=False, ignore_index=True)
display(site_counts)
# site_counts.to_csv('../Week2/SnD/site_counts.csv', index=False)

PlantSite,Map,Plants_A,Plants_B,TotalPlants,Share_A,Share_B
0,Standoff,47,17,64,73.4375,26.5625
1,Firing Range,93,38,131,70.992366,29.007634
2,Kurohana,21,19,40,52.5,47.5
3,Tunisia,43,39,82,52.439024,47.560976
4,Coastal,30,36,66,45.454545,54.545455


In [17]:
df = df_snd.copy()

# Winner side: Attack if OffTeam == WinningTeam else Defense
df['WinnerSide'] = np.where(df['Winner'] == df['Offense'], 'Attack', 'Defense')

def classify_win(row):
    """Classify round win type with special case for 'eliminate after planting'."""
    if pd.notna(row['PlantSite']):  # Bomb was planted
        if row['WinType'] == 'Elim':
            return 'Post Plant'
        elif row['WinType'] == 'Detonate':
            return 'Post Plant'
        elif row['WinType'] == 'Defuse':
            return 'Post Plant'
    else:  # No plant happened
        if row['WinType'] == 'Elim':
            return 'Pre Plant'
        elif row['WinType'] == 'Time':
            return 'Pre Plant'
    return row['WinType']  # fallback

# Classify each round
df['WinTypeClass'] = df.apply(classify_win, axis=1)

# Aggregate per WinnerSide and WinTypeClass
win_type_stats_side = (
    df.groupby(['WinnerSide', 'WinTypeClass'])
      .size()
      .reset_index(name='Rounds')
)

# Shares within each WinnerSide
win_type_stats_side['Share'] = (
    win_type_stats_side['Rounds'] *100/
    win_type_stats_side.groupby('WinnerSide')['Rounds'].transform('sum')
)

# Sort for readability (largest first within each side)
win_type_stats_side = (
    win_type_stats_side
    .sort_values(['WinnerSide', 'Rounds'], ascending=[True, False])
    .reset_index(drop=True)
)
display(win_type_stats_side)
# win_type_stats_side.to_csv('../Week2/SnD/win_type_stats_side.csv', index=False)

Unnamed: 0,WinnerSide,WinTypeClass,Rounds,Share
0,Attack,Post Plant,270,81.570997
1,Attack,Pre Plant,61,18.429003
2,Defense,Pre Plant,273,70.725389
3,Defense,Post Plant,113,29.274611


In [18]:
win_type_team = (
    df.groupby(['Winner', 'WinnerSide', 'WinTypeClass'])
    .size()
    .reset_index(name='Rounds')
)

# Pivot the table to have WinTypeClass as columns
win_type_team_pivot = (
    win_type_team.pivot(index=['Winner', 'WinnerSide'], columns='WinTypeClass', values='Rounds')
                .fillna(0)
                .reset_index()
                .rename_axis(None, axis=1)  # Remove the MultiIndex name
)

win_type_team_pivot['Post Plant Share'] = (
    win_type_team_pivot['Post Plant'] * 100 /
    (win_type_team_pivot['Post Plant'] + win_type_team_pivot['Pre Plant'])
)

win_type_team_pivot = win_type_team_pivot.sort_values(['WinnerSide', 'Post Plant Share'], ascending=[True, False], ignore_index=True)

display(win_type_team_pivot)

Unnamed: 0,Winner,WinnerSide,Post Plant,Pre Plant,Post Plant Share
0,WL,Attack,1.0,0.0,100.0
1,XLR8,Attack,1.0,0.0,100.0
2,OUG,Attack,41.0,4.0,91.111111
3,Xceed,Attack,9.0,1.0,90.0
4,AG,Attack,7.0,1.0,87.5
5,Q9,Attack,41.0,7.0,85.416667
6,XROCK,Attack,28.0,5.0,84.848485
7,SPG,Attack,57.0,11.0,83.823529
8,DVS,Attack,11.0,4.0,73.333333
9,Wolves,Attack,38.0,14.0,73.076923


In [19]:
# Flag whether offense planted
df['Planted'] = pd.notna(df['PlantSite'])

# Flag offense win
df['OffenseWin'] = df['Winner'] == df['Offense']

# Aggregate
off_win_plant_stats = (
    df.groupby(['Offense','Planted'])['OffenseWin']
      .mean()
      .reset_index()
      .pivot(index='Offense', columns='Planted', values='OffenseWin')
      .rename(columns={False:'WinRate_NoPlant', True:'WinRate_Plant'})
      .reset_index()
      .fillna(0)
)

off_win_plant_stats['WinRate_NoPlant'] = off_win_plant_stats['WinRate_NoPlant'] * 100
off_win_plant_stats['WinRate_Plant'] = off_win_plant_stats['WinRate_Plant'] * 100
display(off_win_plant_stats)

# off_win_plant_stats.to_csv('../Week2/SnD/off_win_plant_stats.csv', index=False)

Planted,Offense,WinRate_NoPlant,WinRate_Plant
0,AG,25.0,70.0
1,DVS,44.444444,52.380952
2,GodL,24.324324,62.162162
3,OUG,9.090909,69.491525
4,Q9,17.948718,75.925926
5,SPG,22.44898,73.076923
6,Soul,25.0,61.904762
7,WL,0.0,100.0
8,Wolves,21.538462,79.166667
9,XLR8,0.0,33.333333


In [20]:
len(df[df['Planted'] == True]) / len(df) * 100

53.417015341701536

In [22]:
# Clutch Plays
mask = df_snd['Clutch?'].notna() 
clutch_df = df_snd[mask][['Winner', 'Clutch?']].copy()

# function to parse clutch players
def parse_clutch_players(clutch_str):
    player = clutch_str.split(':')[0].strip()  # Get the player name before the colon
    # if two players, split by '+' and return as a list
    if '+' in player:
        return [p.strip() for p in player.split('+')]
    return [player]

# Apply the function to create a new column with lists of clutch players
clutch_df['ClutchPlayers'] = clutch_df['Clutch?'].apply(parse_clutch_players)

# Explode the DataFrame to have one row per clutch player
clutch_df_exploded = clutch_df.explode('ClutchPlayers')

# Count clutch plays per player
clutch_counts = (
    clutch_df_exploded.groupby('ClutchPlayers')
                      .size()
                      .reset_index(name='ClutchPlays')
                      .sort_values(by='ClutchPlays', ascending=False, ignore_index=True)
)
clutch_counts[:10]

Unnamed: 0,ClutchPlayers,ClutchPlays
0,Bird,3
1,Cartels,3
2,Wind,3
3,Seven,3
4,Zai,3
5,JaBen,3
6,Nan,2
7,Ouling,2
8,Raph,2
9,Suiwan,2


In [23]:
# Aggregate plant rate
plant_rate_per_team = (
    df.groupby('Offense')['Planted']
      .mean()
      .reset_index(name='PlantRate')
)

# Aggregate offense win rate
attack_win_rate = (
    df.groupby('Offense')['OffenseWin']
      .mean()
      .reset_index(name='AttackWinRate')
)

# Merge both
plant_rate_per_team = (
    plant_rate_per_team.merge(attack_win_rate, on='Offense')
                       .sort_values('PlantRate', ascending=False)
                       .reset_index(drop=True)
)

plant_rate_per_team['PlantRate'] = plant_rate_per_team['PlantRate'] * 100
plant_rate_per_team['AttackWinRate'] = plant_rate_per_team['AttackWinRate'] * 100
display(plant_rate_per_team)
# plant_rate_per_team.to_csv('../Week2/SnD/plant_rate_per_team.csv', index=False)

Unnamed: 0,Offense,PlantRate,AttackWinRate
0,Xceed,80.0,66.666667
1,AG,71.428571,57.142857
2,DVS,70.0,50.0
3,SPG,61.417323,53.543307
4,Q9,58.064516,51.612903
5,OUG,57.281553,43.68932
6,Soul,51.219512,43.902439
7,GodL,50.0,43.243243
8,XROCK,43.820225,37.078652
9,Wolves,42.477876,46.017699


In [24]:
# Filter rounds with a plant while team is on defense
retake_df = df[pd.notna(df['PlantSite'])]

# For each defense team: total planted-against rounds
retake_stats = (
    retake_df.groupby('Defense')
             .size()
             .reset_index(name='RoundsWithPlantAgainst')
)

# For each defense team: successful retakes (win by defuse)
retake_success = (
    retake_df[retake_df['WinType'].str.lower() == 'defuse']
        .groupby('Defense')
        .size()
        .reset_index(name='SuccessfulRetakes')
)
# Defense win rate
def_win_rate = (
    df.groupby('Defense')['OffenseWin']
      .apply(lambda x: 100*(1 - x.mean()))
      .reset_index(name='DefenseWinRate')
)
# Merge and compute rate
retake_stats = retake_stats.merge(retake_success, on='Defense', how='left').fillna(0)
retake_stats['RetakeRate'] = retake_stats['SuccessfulRetakes'] *100 / retake_stats['RoundsWithPlantAgainst']

retake_stats = retake_stats.merge(def_win_rate, on='Defense', how='left')
retake_stats = retake_stats[['Defense', 'RetakeRate']]

In [25]:
# Cumulative round differential across ALL rounds
all_rounds = []

# For Offense perspective
offense_results = df[['Offense', 'Winner']].copy()
offense_results['Diff'] = np.where(offense_results['Offense'] == offense_results['Winner'], 1, -1)
offense_results = offense_results.rename(columns={'Offense': 'Team'})[['Team', 'Diff']]
all_rounds.append(offense_results)

# For Defense perspective
defense_results = df[['Defense', 'Winner']].copy()
defense_results['Diff'] = np.where(defense_results['Defense'] == defense_results['Winner'], 1, -1)
defense_results = defense_results.rename(columns={'Defense': 'Team'})[['Team', 'Diff']]
all_rounds.append(defense_results)

# Combine offense + defense
round_diff = (
    pd.concat(all_rounds)
      .groupby('Team')['Diff']
      .sum()
      .rename('RoundDiff')
)

retake_stats = retake_stats.merge(plant_rate_per_team[['Offense', 'PlantRate']], left_on='Defense', right_on='Offense', how='left')

retake_stats = retake_stats.merge(round_diff, left_on='Defense', right_index=True, how='left')


In [30]:
retake_stats = retake_stats[['Defense', 'PlantRate', 'RetakeRate', 'RoundDiff']]
retake_stats = retake_stats.merge(
    fb_rate_per_team[['FBTeam', 'TotalFBs', 'FBRate']],
    left_on='Defense',
    right_on='FBTeam',
    how='left'
)
retake_stats = retake_stats.rename(columns={
    "Defense": "Team",
})
retake_stats = retake_stats[['Team', 'PlantRate', 'RetakeRate', 'RoundDiff', 'FBRate']]
display(retake_stats)


Unnamed: 0,Team,PlantRate,RetakeRate,RoundDiff,FBRate
0,AG,71.428571,50.0,7,45.16129
1,DVS,70.0,27.272727,-5,55.932203
2,GodL,50.0,28.947368,4,46.052632
3,OUG,57.281553,30.0,-14,46.330275
4,Q9,58.064516,35.897436,25,48.087432
5,SPG,61.417323,16.666667,16,50.0
6,Soul,51.219512,31.578947,-8,48.611111
7,WL,25.0,0.0,-6,58.333333
8,Wolves,42.477876,34.482759,6,55.963303
9,XLR8,21.428571,75.0,-12,29.166667


In [31]:
retake_stats.to_csv('../rankings/snd_stats.csv', index=False)

## Tempo Stats

In [26]:
def parse_clock_to_seconds(x):
    """Parse HH:MM:SS, M:SS, or SS into integer seconds (match time remaining)."""
    if pd.isna(x):
        return np.nan
    s = str(x).strip()
    if s == "":
        return np.nan

    # Handle HH:MM:SS
    parts = s.split(":")
    if len(parts) == 3:  # HH:MM:SS → ignore hours
        mm, ss, _ = parts
        return int(mm) * 60 + int(ss)
    elif len(parts) == 2:  # M:SS
        mm, ss = parts
        return int(mm) * 60 + int(ss)
    elif s.isdigit():  # seconds only
        return int(s)

    return np.nan

def bootstrap_ci_mean(a, n_boot=1000, ci=95, rng=None):
    """Percentile bootstrap CI for the mean; ignores NaNs."""
    arr = pd.Series(a).dropna().to_numpy()
    if arr.size == 0:
        return (np.nan, np.nan)
    if rng is None:
        rng = np.random.default_rng(42)
    boot = np.empty(n_boot, dtype=float)
    n = arr.size
    for i in range(n_boot):
        boot[i] = np.mean(rng.choice(arr, size=n, replace=True))
    alpha = (100 - ci) / 2.0
    return (np.percentile(boot, alpha), np.percentile(boot, 100 - alpha))

# Harmonize FB clock column name
if 'FBClock' not in df.columns and 'FBTime' in df.columns:
    df = df.rename(columns={'FBTime': 'FBClock'})

# Parse clocks -> seconds remaining at the event
for col in ['PlantClock', 'EndClock', 'FBClock']:
    if col in df.columns:
        df[col + '_s'] = df[col].apply(parse_clock_to_seconds)
    else:
        df[col + '_s'] = np.nan  # if missing, fill with NaN

# Calculate elapsed time for planting and end
df['Planted'] = df['PlantSite'].notna()

# Round elapsed
df['RoundElapsed_s'] = np.where(
    df['Planted'],
    (120 - df['PlantClock_s']) + (45 - df['EndClock_s']),
    120 - df['EndClock_s']
)

df['FBElapsed_s'] = 120 - df['FBClock_s']

# Plant elapsed (only if planted)
df['PlantElapsed_s'] = np.where(df['Planted'], 120 - df['PlantClock_s'], np.nan)

# --- Attack-only aggregation with bootstrap CIs ---
rng = np.random.default_rng(42)

def agg_with_ci(group, col):
    mean_val = group[col].mean()
    lo, hi = bootstrap_ci_mean(group[col], n_boot=1000, ci=95, rng=rng)
    return pd.Series({f'{col}_mean': mean_val, f'{col}_CI_low': lo, f'{col}_CI_high': hi})

# Per attacking team
tempo_round = df.groupby('Offense').apply(agg_with_ci, col='RoundElapsed_s').reset_index()
tempo_fb    = df.groupby('Offense').apply(agg_with_ci, col='FBElapsed_s').reset_index()
tempo_plant = (df[df['Planted']]
               .groupby('Offense')
               .apply(agg_with_ci, col='PlantElapsed_s')
               .reset_index())

# Merge all
tempo = (tempo_round
         .merge(tempo_fb, on='Offense', how='left')
         .merge(tempo_plant, on='Offense', how='left')
         .rename(columns={
             'RoundElapsed_s_mean': 'AvgRoundLen_s',
             'FBElapsed_s_mean':    'AvgFBElapsed_s',
             'PlantElapsed_s_mean': 'AvgPlantElapsed_s'
         })
        )

# Optional: order columns nicely
cols_order = [
    'Offense',
    'AvgRoundLen_s','RoundElapsed_s_CI_low','RoundElapsed_s_CI_high',
    'AvgFBElapsed_s','FBElapsed_s_CI_low','FBElapsed_s_CI_high',
    'AvgPlantElapsed_s','PlantElapsed_s_CI_low','PlantElapsed_s_CI_high'
]
tempo = tempo.reindex(columns=[c for c in cols_order if c in tempo.columns])
display(tempo[['Offense', 'AvgRoundLen_s', 'AvgFBElapsed_s', 'AvgPlantElapsed_s']])
# tempo.to_csv('../Week2/SnD/tempo.csv', index=False)

Unnamed: 0,Offense,AvgRoundLen_s,AvgFBElapsed_s,AvgPlantElapsed_s
0,GodL,69.880952,26.452381,57.380952
1,OUG,73.555556,23.588889,48.54717
2,Q9,75.694118,28.070588,53.4375
3,SPG,87.160714,33.6875,66.265625
4,WL,58.5,22.5,31.0
5,Wolves,91.87619,32.838095,79.071429
6,XROCK,86.863014,31.520548,60.741935


## Timeout Stats

In [27]:
df_timeouts = pd.read_excel('../Week2/SnD/snd.xlsx', sheet_name='Timeouts')

# --- Total timeouts taken ---
timeouts_per_team = df_timeouts.groupby('TOTeam').size().reset_index(name='TimeoutsTaken')

# --- Unique games played ---
# Each "game" is defined by (Map + Date + Team) combo
games_played = pd.concat([
    df[['Map', 'Date', 'Offense']].rename(columns={'Offense': 'Team'}),
    df[['Map', 'Date', 'Defense']].rename(columns={'Defense': 'Team'})
])

# Drop duplicates so if a team played both O and D on the same map in the same match, it counts as one
games_played = games_played.drop_duplicates(subset=['Map', 'Date', 'Team'])

games_per_team = games_played.groupby('Team').size().reset_index(name='GamesPlayed')

# --- Merge ---
timeouts_stats = timeouts_per_team.merge(games_per_team, left_on='TOTeam', right_on='Team', how='left')
timeouts_stats.drop(columns='Team', inplace=True)
timeouts_stats['TimeoutsPerGame'] = timeouts_stats['TimeoutsTaken'] / timeouts_stats['GamesPlayed']
# Sort
timeouts_stats = timeouts_stats.sort_values(by='TimeoutsTaken', ascending=False)

display(timeouts_stats)
# timeouts_stats.to_csv('../Week2/SnD/timeouts_stats.csv', index=False)

Unnamed: 0,TOTeam,TimeoutsTaken,GamesPlayed,TimeoutsPerGame
3,OUG,9,11.0,0.818182
7,Wolves,9,11.0,0.818182
2,GodL,8,5.0,1.6
4,Q9,7,10.0,0.7
5,SPG,5,12.0,0.416667
6,Soul,4,,
9,XROCK,4,10.0,0.4
0,AG,2,,
1,DVS,1,,
8,XLR8,1,,


In [28]:
# --- Overall distribution ---
round_dist = df_timeouts.groupby('Round#').size().reset_index(name='TimeoutsCount')
round_dist = round_dist.sort_values('Round#')

# --- Per-team distribution ---
team_round_dist = df_timeouts.groupby(['TOTeam', 'Round#']).size().reset_index(name='TimeoutsCount')

# Pivot to make it easy to see patterns (teams as rows, rounds as columns)
team_round_pivot = team_round_dist.pivot(index='TOTeam', columns='Round#', values='TimeoutsCount').fillna(0).astype(int)

# # --- Save ---
# round_dist.to_csv('timeout_round_distribution_overall.csv', index=False)
# team_round_pivot.to_csv('timeout_round_distribution_by_team.csv')

print("Overall distribution:")
print(round_dist)

print("\nPer-team distribution:")
print(team_round_pivot)

Overall distribution:
    Round#  TimeoutsCount
0        1              1
1        2              3
2        3              2
3        4              6
4        5              3
5        6              6
6        7              1
7       10              5
8       11              8
9       12              6
10      13              3
11      14              1
12      17              1
13      18              2
14      25              1
15      26              1

Per-team distribution:
Round#  1   2   3   4   5   6   7   10  11  12  13  14  17  18  25  26
TOTeam                                                                
AG       0   0   0   0   0   0   0   0   0   1   0   1   0   0   0   0
DVS      0   0   0   1   0   0   0   0   0   0   0   0   0   0   0   0
GodL     0   1   1   1   0   2   0   2   1   0   0   0   0   0   0   0
OUG      0   0   1   1   1   1   1   0   0   1   1   0   0   2   0   0
Q9       1   1   0   0   1   0   0   2   1   1   0   0   0   0   0   0
SPG      0   0 

In [29]:
# --- Split post-timeout record into individual rounds ---
df_timeouts[['R1', 'R2', 'R3']] = df_timeouts['PostTOStreak'].str.split(',', expand=True)

# Convert to win indicator (1 for W, 0 for L)
for col in ['R1', 'R2', 'R3']:
    df_timeouts[col] = df_timeouts[col].map({'W': 1, 'L': 0})

# --- Bootstrap CIs function ---
def bootstrap_ci(data, n_bootstrap=1000, ci=95):
    stats = []
    for _ in range(n_bootstrap):
        sample = np.random.choice(data, size=len(data), replace=True)
        stats.append(np.mean(sample))
    lower = np.percentile(stats, (100-ci)/2)
    upper = np.percentile(stats, 100 - (100-ci)/2)
    return np.mean(data), lower, upper

# --- Compute win rates + CIs ---
results = []

for team, group in df_timeouts.groupby('TOTeam'):
    # Next round win rate
    mean_nr, low_nr, high_nr = bootstrap_ci(group['R1'].values)

    # 3-round average win rate
    avg_3rounds = group[['R1', 'R2', 'R3']].mean(axis=1).values
    mean_3r, low_3r, high_3r = bootstrap_ci(avg_3rounds)

    results.append({
        'TOTeam': team,
        'TimeoutsTaken': len(group),
        'NextRoundWinRate': mean_nr * 100,
        'NR_CI_Low': low_nr,
        'NR_CI_High': high_nr,
        'ThreeRoundAvgWinRate': mean_3r * 100,
        '3R_CI_Low': low_3r,
        '3R_CI_High': high_3r
    })

timeout_execution_ci = pd.DataFrame(results)

# Save to CSV
# timeout_execution_ci.to_csv('timeout_post_execution_ci.csv', index=False)
display(timeout_execution_ci)
# timeout_execution_ci.to_csv('../Week2/SnD/timeout_post_execution_ci.csv', index=False)

Unnamed: 0,TOTeam,TimeoutsTaken,NextRoundWinRate,NR_CI_Low,NR_CI_High,ThreeRoundAvgWinRate,3R_CI_Low,3R_CI_High
0,AG,2,100.0,1.0,1.0,100.0,1.0,1.0
1,DVS,1,0.0,0.0,0.0,66.666667,0.666667,0.666667
2,GodL,8,37.5,0.121875,0.75,54.166667,0.416667,0.666667
3,OUG,9,66.666667,0.333333,0.888889,55.555556,0.444444,0.666667
4,Q9,7,71.428571,0.428571,1.0,66.666667,0.47619,0.857143
5,SPG,5,60.0,0.2,1.0,46.666667,0.2,0.666667
6,Soul,4,25.0,0.0,0.75,33.333333,0.083333,0.583333
7,Wolves,9,44.444444,0.111111,0.777778,55.555556,0.37037,0.703704
8,XLR8,1,0.0,0.0,0.0,0.0,0.0,0.0
9,XROCK,4,25.0,0.0,0.75,50.0,0.333333,0.666667


In [30]:
# MapWin is already stored as "W" or "L" for the team taking the timeout
df_timeouts['MapWin?'] = df_timeouts['MapWin?'].map({'Yes': 1, 'No': 0})

# Group by team and calculate win rate
map_win_rate = (
    df_timeouts.groupby('TOTeam')
    .agg(
        TimeoutsTaken=('MapWin?', 'count'),
        MapWins=('MapWin?', 'sum')
    )
    .reset_index()
)

map_win_rate['MapWinRate'] = map_win_rate['MapWins'] * 100 / map_win_rate['TimeoutsTaken']

display(map_win_rate)
# map_win_rate.to_csv('../Week2/SnD/timeout_map_win_rate.csv', index=False)

Unnamed: 0,TOTeam,TimeoutsTaken,MapWins,MapWinRate
0,AG,2,2,100.0
1,DVS,1,0,0.0
2,GodL,8,3,37.5
3,OUG,9,3,33.333333
4,Q9,7,5,71.428571
5,SPG,5,1,20.0
6,Soul,4,0,0.0
7,Wolves,9,5,55.555556
8,XLR8,1,0,0.0
9,XROCK,4,1,25.0


In [31]:
# % rounds where each team draws first blood vs. total rounds played
fb_rate = (
    df_snd.groupby('FBTeam')
          .agg(
              TotalRounds=('FBPlayer', 'count'),
              FBs=('FBPlayer', lambda x: x.notna().sum())
          )
          .reset_index()
)

fb_rate


Unnamed: 0,FBTeam,TotalRounds,FBs
0,GodL,33,33
1,OUG,93,93
2,Q9,79,79
3,SPG,106,106
4,WL,7,7
5,Wolves,115,115
6,XROCK,78,78
