In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.stats.proportion import proportion_confint

# Suppress pandas warnings
pd.options.mode.chained_assignment = None

In [2]:
# Load and preprocess Control data
df = pd.read_excel('../Week2/Control/control.xlsx')

# Create side win flag for Offense
df['Off_Win'] = (df['Winner'] == df['Offense']).astype(int)

# Parse life differentials
def parse_diff(s):
    try:
        off, defe = s.split('/')
        return int(off) - int(defe)
    except:
        return pd.NA

def get_off_lives(s):
    try:
        return int(s.split('/')[0])
    except:
        return pd.NA

def get_def_lives(s):
    try:
        return int(s.split('/')[1])
    except:
        return pd.NA
    
df['LifeDiff_2Seg'] = df['Off/Def-2T'].apply(parse_diff)
df['LifeDiff_End'] = df['Off/Def_RoundEnd'].apply(parse_diff)
df['OffLivesEnd'] = df['Off/Def_RoundEnd'].apply(get_off_lives)
df['DefLivesEnd'] = df['Off/Def_RoundEnd'].apply(get_def_lives)

In [3]:
# Split into Masters and Challengers groups
# masters = ['XROCK', 'SPG', 'OUG', 'Wolves', 'GodL', 'Q9']
# df_masters = df[df['Offense'].isin(masters) | df['Defense'].isin(masters)]
df_masters = df.copy()

In [4]:
# 1. Off-vs-Def win splits for each map
win_split = df_masters.groupby('Map')['Off_Win'].agg(
    OffenseWins='sum', TotalRounds='count'
).assign(DefenseWins=lambda x: x['TotalRounds'] - x['OffenseWins'],
         OffenseWinRate=lambda x: x['OffenseWins'] / x['TotalRounds'],
         DefenseWinRate=lambda x: x['DefenseWins'] / x['TotalRounds']
).reset_index()

win_split.drop(columns=['OffenseWins', 'DefenseWins', 'TotalRounds'], inplace=True)

In [6]:
# Logistic win-probability curve with bootstrapped 95% CIs
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression

# --- 0) Config ---
OUTPUT_PATH = '../Week2/Control/life_diff_curve.csv'
N_BOOT = 1000
SEED = 42

# --- 1) Prepare raw data ---
df_clean = (
    df_masters
    .dropna(subset=['LifeDiff_2Seg', 'Off_Win'])
    .copy()
)
X = df_clean['LifeDiff_2Seg'].to_numpy().reshape(-1, 1)
y = df_clean['Off_Win'].astype(int).to_numpy()

# --- 2) Fit logistic regression on raw rounds ---
# (regularization helps when you have near-perfect separation)
log_reg = LogisticRegression(solver='lbfgs')
log_reg.fit(X, y)

# Prediction grid (extend a bit beyond observed range to look nicer)
x_min, x_max = X.min(), X.max()
pad = max(1.0, 0.1 * (x_max - x_min))
x_grid = np.linspace(x_min - pad, x_max + pad, 201).reshape(-1, 1)

y_pred = log_reg.predict_proba(x_grid)[:, 1]

# --- 3) Bootstrap CIs ---
rng = np.random.default_rng(SEED)
boot_preds = np.empty((N_BOOT, len(x_grid)), dtype=float)
boot_preds[:] = np.nan

for i in range(N_BOOT):
    idx = rng.integers(0, len(X), size=len(X))
    Xb, yb = X[idx], y[idx]
    try:
        m = LogisticRegression(solver='lbfgs')
        m.fit(Xb, yb)
        boot_preds[i] = m.predict_proba(x_grid)[:, 1]
    except Exception:
        # In rare cases of singular fits, leave NaNs; we'll nanpercentile later
        pass

ci_low = np.nanpercentile(boot_preds, 2.5, axis=0)
ci_high = np.nanpercentile(boot_preds, 97.5, axis=0)

# --- 4) Save CSV for Datawrapper ---
out = pd.DataFrame({
    'LifeDiff': x_grid.ravel(),
    'WinProb': y_pred,
    'CI_low': ci_low,
    'CI_high': ci_high,
})
out.to_csv(OUTPUT_PATH, index=False)

print(f"Saved curve with CIs to: {OUTPUT_PATH}")

Saved curve with CIs to: ../Week2/Control/life_diff_curve.csv


In [5]:
# Convert 2TickTime to seconds
def convert_to_seconds(t):
    try:
        minutes, seconds = map(int, t.split(':'))
        return 120 - (minutes * 60 + seconds)
    except:
        return pd.NA

df_masters['TimeTo2Ticks'] = df_masters['2TickTime'].apply(convert_to_seconds)

In [8]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LogisticRegression

# Prepare data
df = df_masters.dropna(subset=['LifeDiff_2Seg', 'TimeTo2Ticks', 'Off_Win']).copy()
df['Off_Win'] = df['Off_Win'].astype(int)

# (Optional) Winsorize extreme times to reduce leverage
t = df['TimeTo2Ticks'].to_numpy()
low, high = np.percentile(t, [1, 99])
df['TimeTo2Ticks_clip'] = np.clip(df['TimeTo2Ticks'], low, high)

X = df[['LifeDiff_2Seg', 'TimeTo2Ticks_clip']].to_numpy()
y = df['Off_Win'].to_numpy()

# Logistic with degree-2 features: [1, L, T, L^2, L*T, T^2]
model = Pipeline([
    ('poly', PolynomialFeatures(degree=2, include_bias=False, interaction_only=False)),
    ('scaler', StandardScaler()),
    ('logreg', LogisticRegression(solver='lbfgs', max_iter=1000))
])
model.fit(X, y)

In [9]:
# Logistic win-probability curve with 2 features:
#   X1 = LifeDiff at 2 ticks
#   X2 = "2-tick time" (e.g., seconds to reach 2 segments; rename TIME_COL to your actual column)
# We output a 2D curve (LifeDiff on x-axis) by holding X2 at a reference value (median),
# plus optional comparison lines at fast/slow (25th/75th percentile). CI is bootstrapped for the median line.

import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression

# --- 0) Config ---
OUTPUT_PATH = '../Week2/Control/life_diff_curve_2feat.csv'
N_BOOT = 1000
SEED = 42

LIFE_COL = 'LifeDiff_2Seg'     # <- keep as is
TIME_COL = 'TimeTo2Ticks'       # <- CHANGE to your actual "2-tick time" column name (e.g., 'TimeTo2Seg_s')

# --- 1) Prepare raw data ---
df_clean = (
    df_masters
      .dropna(subset=[LIFE_COL, TIME_COL, 'Off_Win'])
      .copy()
)
X_life = df_clean[LIFE_COL].to_numpy().reshape(-1, 1)
X_time = df_clean[TIME_COL].to_numpy().reshape(-1, 1)
y = df_clean['Off_Win'].astype(int).to_numpy()

# Stack features: [LifeDiff, TwoTickTime]
X = np.hstack([X_life, X_time])

# --- 2) Fit logistic regression on raw rounds (2D)
log_reg = LogisticRegression(solver='lbfgs', max_iter=1000)
log_reg.fit(X, y)

# --- 3) Build prediction grid along LifeDiff only, holding time at reference(s)
x_min, x_max = X_life.min(), X_life.max()
pad = max(1.0, 0.1 * (x_max - x_min))
life_grid = np.linspace(x_min - pad, x_max + pad, 201)[:, None]

# Time references (for plotting lines): median (main), and optional fast/slow (25th/75th)
t_median = np.median(X_time)
t_p25    = np.percentile(X_time, 25)
t_p75    = np.percentile(X_time, 75)

# Helper to predict over life grid for a fixed time value
def predict_at_time(t_fixed):
    Xg = np.hstack([life_grid, np.full_like(life_grid, fill_value=t_fixed)])
    return log_reg.predict_proba(Xg)[:, 1]

p_med  = predict_at_time(t_median)
p_fast = predict_at_time(t_p25)   # "faster" reach to 2 ticks (lower time)
p_slow = predict_at_time(t_p75)   # "slower" reach to 2 ticks (higher time)

# --- 4) Bootstrap CIs (for median-time curve)
rng = np.random.default_rng(SEED)
boot_preds = np.full((N_BOOT, life_grid.shape[0]), np.nan, dtype=float)

for i in range(N_BOOT):
    idx = rng.integers(0, len(X), size=len(X))  # sample rows with replacement
    Xb, yb = X[idx], y[idx]
    try:
        m = LogisticRegression(solver='lbfgs', max_iter=1000)
        m.fit(Xb, yb)
        # predict on life grid at median time
        Xg_med = np.hstack([life_grid, np.full_like(life_grid, fill_value=t_median)])
        boot_preds[i, :] = m.predict_proba(Xg_med)[:, 1]
    except Exception:
        # leave this bootstrap row as NaNs on failure (rare with small samples / separation)
        pass

ci_low  = np.nanpercentile(boot_preds,  2.5, axis=0)
ci_high = np.nanpercentile(boot_preds, 97.5, axis=0)

# --- 5) Save CSV for Datawrapper ---
out = pd.DataFrame({
    'LifeDiff': life_grid.ravel(),
    'WinProb_medTime': p_med,     # main curve (time fixed at median)
    'CI_low': ci_low,             # 95% CI around the median-time curve
    'CI_high': ci_high,
    'WinProb_fastTime': p_fast,   # optional comparison lines (no CI)
    'WinProb_slowTime': p_slow
})

# (Optional) include the numeric time references used (seconds) as columns for clarity
out.attrs = {'t_median': float(t_median), 't_p25': float(t_p25), 't_p75': float(t_p75)}
out.to_csv(OUTPUT_PATH, index=False)

print(f"Saved curve (2-feature model; LifeDiff on x-axis) with CI at median {TIME_COL}: {OUTPUT_PATH}")
print(f"Time refs used — median: {t_median:.2f}, p25: {t_p25:.2f}, p75: {t_p75:.2f} (same units as {TIME_COL})")


Saved curve (2-feature model; LifeDiff on x-axis) with CI at median TimeTo2Ticks: ../Week2/Control/life_diff_curve_2feat.csv
Time refs used — median: 46.00, p25: 23.00, p75: 66.00 (same units as TimeTo2Ticks)


In [26]:
# Plotting the win probability curves
out

Unnamed: 0,LifeDiff,WinProb_medTime,CI_low,CI_high,WinProb_fastTime,WinProb_slowTime
0,-16.200,0.012503,0.000938,0.069784,0.011436,0.013511
1,-16.068,0.012936,0.000986,0.071164,0.011833,0.013978
2,-15.936,0.013384,0.001041,0.072790,0.012243,0.014462
3,-15.804,0.013848,0.001102,0.074450,0.012667,0.014962
4,-15.672,0.014327,0.001167,0.076145,0.013106,0.015479
...,...,...,...,...,...,...
196,9.672,0.916171,0.789440,0.980928,0.908970,0.922009
197,9.804,0.918783,0.793354,0.981922,0.911784,0.924453
198,9.936,0.921320,0.797214,0.982864,0.914520,0.926827
199,10.068,0.923784,0.801020,0.983768,0.917178,0.929132


In [6]:
life_diff_records = []

for team in df_masters['Offense'].unique():
    # Select only the rounds where this team played
    mask = (df_masters['Offense'] == team) | (df_masters['Defense'] == team)
    # Compute differential per round from that team's perspective
    diffs = df_masters.loc[mask].apply(
        lambda r: (r['OffLivesEnd'] - r['DefLivesEnd'])
                  if r['Offense'] == team
                  else (r['DefLivesEnd'] - r['OffLivesEnd']),
        axis=1
    )
    life_diff_records.append({
        'Team': team,
        'AvgLifeDiff': diffs.mean()
    })

# Create DataFrame of results
life_diff = pd.DataFrame(life_diff_records)

print("4. Round Differential for Each Team:")
display(life_diff.sort_values(by='AvgLifeDiff', ascending=False, ignore_index=True))

4. Round Differential for Each Team:


Unnamed: 0,Team,AvgLifeDiff
0,DVS,2.823529
1,OUG,2.805556
2,Wolves,1.8
3,XROCK,1.545455
4,GodL,1.0
5,Q9,0.395833
6,AG,0.0
7,Soul,-0.05
8,WL,-4.0
9,Xceed,-4.4


In [7]:
teams = pd.unique(df_masters[['Offense','Defense']].values.ravel())

records = []
for team in teams:
    played = df_masters[(df_masters['Offense']==team)|(df_masters['Defense']==team)]
    wins = (played['Winner']==team).sum()
    losses = len(played) - wins
    records.append({'Team': team, 'RoundDiff': wins - losses})

round_diff = pd.DataFrame(records)
print("Round Differential (Wins − Losses) per Team:")
display(round_diff.sort_values(by='RoundDiff', ascending=False, ignore_index=True))

Round Differential (Wins − Losses) per Team:


Unnamed: 0,Team,RoundDiff
0,OUG,10
1,Wolves,7
2,XROCK,7
3,DVS,5
4,Q9,2
5,AG,1
6,GodL,-1
7,Soul,-2
8,WL,-2
9,Xceed,-3


In [8]:
# Calculate win percentage of each team
win_pct_records = {
    'XROCK': 1.0,
    'SPG': 6/13,
    'OUG': 10/13,
    'Wolves': 0.75,
    'GodL': 0.25,
    'Q9': 11/13
}
win_pct = pd.DataFrame(list(win_pct_records.items()), columns=['Team', 'WinPct'])


In [9]:
# Total ticks captured by each team on each map
ticks_off = (
    df_masters.groupby(['Map', 'Offense'], as_index=False)
      .agg(
          TicksCaptured=('OffTicks', 'sum'),
          OffenseRounds=('OffTicks', 'size')
      )
      .assign(AvgTicksPerOffRound=lambda d: d['TicksCaptured'] / d['OffenseRounds'].where(d['OffenseRounds']>0, pd.NA))
      .rename(columns={'Offense':'Team'})
      .sort_values(['Map','TicksCaptured'], ascending=[True, False])
)


# Sort by Map and TicksCaptured
ticks_off = ticks_off.sort_values(by=['Map', 'TicksCaptured'], ascending=[True, False], ignore_index=True)

# Save to CSV
# ticks_off.to_csv('../Week2/Control/ticks_captured.csv', index=False)

In [10]:
ticks_def = (
    df_masters.groupby(['Map', 'Defense'], as_index=False)
        .agg(
            TicksAllowed=('OffTicks', 'sum'),       # opponent's OffTicks while you defend
            DefenseRounds=('OffTicks', 'size')
        )
        .assign(AvgTicksAllowedPerDefRound=lambda d: d['TicksAllowed'] / d['DefenseRounds'].where(d['DefenseRounds']>0, pd.NA))
        .rename(columns={'Defense':'Team'})
)

ticks_profile = (
    pd.merge(ticks_off, ticks_def, on=['Map','Team'], how='outer')
        .fillna({'TicksCaptured':0, 'OffenseRounds':0, 'AvgTicksPerOffRound':0,
                'TicksAllowed':0, 'DefenseRounds':0, 'AvgTicksAllowedPerDefRound':0})
        .sort_values(['Map','TicksCaptured'], ascending=[True, False])
)
# ticks_profile.to_csv('../Week2/Control/ticks_team_profile_by_map.csv', index=False)


In [11]:
ticks_off_overall = (
    df_masters.groupby('Offense', as_index=False)
      .agg(TicksCaptured=('OffTicks','sum'), OffenseRounds=('OffTicks','size'))
      .assign(AvgTicksPerOffRound=lambda d: d['TicksCaptured'] / d['OffenseRounds'].where(d['OffenseRounds']>0, pd.NA))
      .rename(columns={'Offense':'Team'})
      .sort_values('TicksCaptured', ascending=False)
)
ticks_def_overall = (
    df_masters.groupby('Defense', as_index=False)
        .agg(TicksAllowed=('OffTicks','sum'), DefenseRounds=('OffTicks','size'))
        .assign(AvgTicksAllowedPerDefRound=lambda d: d['TicksAllowed'] / d['DefenseRounds'].where(d['DefenseRounds']>0, pd.NA))
        .rename(columns={'Defense':'Team'})
)

ticks_profile_overall = (
    pd.merge(ticks_off_overall, ticks_def_overall, on='Team', how='outer')
        .fillna({'TicksCaptured':0, 'OffenseRounds':0, 'AvgTicksPerOffRound':0,
                'TicksAllowed':0, 'DefenseRounds':0, 'AvgTicksAllowedPerDefRound':0})
        .sort_values('TicksCaptured', ascending=False)
)

# ticks_profile_overall.to_csv('../Week2/Control/ticks_team_profile_overall.csv', index=False)

In [12]:
ticks_profile_overall = ticks_profile_overall[['Team', 'AvgTicksPerOffRound', 'AvgTicksAllowedPerDefRound']]

# team_mask = ticks_profile_overall['Team'].isin(masters)
# ticks_profile_overall = ticks_profile_overall[team_mask].reset_index(drop=True)

ticks_profile_overall

Unnamed: 0,Team,AvgTicksPerOffRound,AvgTicksAllowedPerDefRound
0,OUG,4.055556,2.444444
1,Q9,2.958333,2.625
2,GodL,3.555556,2.789474
3,XROCK,3.6875,3.882353
4,Wolves,2.421053,2.8125
5,SPG,2.25,3.6
6,Soul,3.5,3.2
7,DVS,3.777778,3.375
8,AG,3.0,3.333333
9,Xceed,4.0,4.666667


In [13]:
# Merge diffs, round_diff, and win_pct into a single DataFrame
team_stats = pd.merge(life_diff, round_diff, on='Team')
# team_stats = pd.merge(team_stats, win_pct, on='Team')
print("Team Stats (Avg Life Diff and Round Diff):")
display(team_stats)
# display(team_stats.sort_values(by='WinPct', ascending=False, ignore_index=True))
# team_stats.to_csv('../Week2/Control/team_stats.csv', index=False)

Team Stats (Avg Life Diff and Round Diff):


Unnamed: 0,Team,AvgLifeDiff,RoundDiff
0,OUG,2.805556,10
1,Wolves,1.8,7
2,GodL,1.0,-1
3,SPG,-4.975,-18
4,DVS,2.823529,5
5,Xceed,-4.4,-3
6,Q9,0.395833,2
7,XROCK,1.545455,7
8,Soul,-0.05,-2
9,AG,0.0,1


In [14]:
team_stats = pd.merge(team_stats, ticks_profile_overall, on='Team')
display(team_stats)
# team_stats.to_csv('control_stats.csv', index=False)

Unnamed: 0,Team,AvgLifeDiff,RoundDiff,AvgTicksPerOffRound,AvgTicksAllowedPerDefRound
0,OUG,2.805556,10,4.055556,2.444444
1,Wolves,1.8,7,2.421053,2.8125
2,GodL,1.0,-1,3.555556,2.789474
3,SPG,-4.975,-18,2.25,3.6
4,DVS,2.823529,5,3.777778,3.375
5,Xceed,-4.4,-3,4.0,4.666667
6,Q9,0.395833,2,2.958333,2.625
7,XROCK,1.545455,7,3.6875,3.882353
8,Soul,-0.05,-2,3.5,3.2
9,AG,0.0,1,3.0,3.333333


In [15]:
team_stats.to_csv('../rankings/control_stats.csv', index=False)

In [19]:
# Compare FB vs Non-FB win rates
df_masters['FB_Win'] = (df_masters['FBTeam'] == df_masters['Winner']).astype(int)
fb_comparison = df_masters['FB_Win'].value_counts(normalize=True).rename_axis('FB_Win').reset_index()
fb_comparison['FB_Win'] = fb_comparison['FB_Win'].map({0: 'Non-FB', 1: 'FB'})
fb_comparison['Win Rate'] = fb_comparison['FB_Win'].map({
    'FB': df_masters['FB_Win'].mean(),
    'Non-FB': 1 - df_masters['FB_Win'].mean()
})
fb_comparison = fb_comparison[['FB_Win', 'Win Rate']].sort_values(by='FB_Win')

# Add CI to comparison
fb_comparison['CI_Low'], fb_comparison['CI_Upp'] = proportion_confint(
    fb_comparison['Win Rate'] * len(df_masters), 
    len(df_masters), 
    alpha=0.05, 
    method='wilson'
)
print("FB vs Non-FB Win Rates:")    
display(fb_comparison)
# fb_comparison.to_csv('../Week2/Control/fb_win_rates.csv', index=False)

FB vs Non-FB Win Rates:


Unnamed: 0,FB_Win,Win Rate,CI_Low,CI_Upp
0,FB,0.609756,0.521476,0.691389
1,Non-FB,0.390244,0.308611,0.478524


In [20]:
# 5. Zone capture frequencies per map
zone_counts = []
for _, row in df_masters.iterrows():
    z = row['Zone(s) Captures']
    if pd.isna(z):
        continue
    zones = [z] if z in ['A', 'B'] else ['A', 'B']
    for zone in zones:
        zone_counts.append((row['Map'], zone))
zone_df = pd.DataFrame(zone_counts, columns=['Map', 'Zone'])
zone_freq = (zone_df
             .groupby(['Map', 'Zone'])
             .size()
             .reset_index(name='Count')
             .pivot(index='Map', columns='Zone', values='Count')
             .fillna(0)).reset_index()

zone_freq.rename(columns={'A': 'A Captures', 'B': 'B Captures'}, inplace=True)
print("5. Zone Capture Frequencies:")
display(zone_freq)

5. Zone Capture Frequencies:


Zone,Map,A Captures,B Captures
0,Crossfire,45,5
1,Raid,4,13
2,Takeoff,25,3


In [21]:
# Total games played per map
total_games = df_masters['Map'].value_counts().reset_index()
total_games.columns = ['Map', 'TotalRounds']

# Merge zone frequencies with total games
zone_freq = zone_freq.merge(total_games, on='Map')

zone_freq

Unnamed: 0,Map,A Captures,B Captures,TotalRounds
0,Crossfire,45,5,53
1,Raid,4,13,21
2,Takeoff,25,3,49


In [22]:
# Combine with map win splits
map_summary = win_split.merge(zone_freq, on='Map')
print("Map Summary with Win Splits and Zone Captures:")
display(map_summary)
# map_summary.to_csv('../Week2/Control/map_stats.csv', index=False)

Map Summary with Win Splits and Zone Captures:


Unnamed: 0,Map,OffenseWinRate,DefenseWinRate,A Captures,B Captures,TotalRounds
0,Crossfire,0.433962,0.566038,45,5,53
1,Raid,0.428571,0.571429,4,13,21
2,Takeoff,0.306122,0.693878,25,3,49
