In [82]:
### SETUP ###
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold
import random
import matplotlib.pyplot as plt

pd.set_option('future.no_silent_downcasting', True)

data = joblib.load("data/pkls/rgl_df_dict.pkl")

# Load necessary data

players = data['players']
teams = data['team_stats']

players['suicide_rate'] = players['suicides'].div(players['deaths']).astype(float).round(4)

team_medic_stats = data['team_medic_stats']


In [83]:
medic_stats = data['medic_stats']


In [84]:
medic_stats

metric,id,medicstats.advantages_lost,medicstats.avg_time_before_healing,medicstats.avg_time_before_using,medicstats.avg_time_to_build,medicstats.avg_uber_length,medicstats.biggest_advantage_lost,medicstats.deaths_with_95_99_uber,medicstats.deaths_within_20s_after_uber,ubers,ubertypes.kritzkrieg,ubertypes.medigun,ubertypes.unknown,ubertypes.vaccinator,ubertypes.quickfix
steamid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
[U:1:1026268791],3868244,,,,,,,,,0,,,,,
[U:1:119182029],3868244,,,,,,,,,0,,,,,
[U:1:1233706064],3868244,2,4.713636,13.047619,52.652174,7.830769,14,2,12,21,18,3,,,
[U:1:124829275],3868244,,,,,,,,,0,,,,,
[U:1:127206357],3868244,,,,,,,,,0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
[U:1:308086877],1864908,,,,,,,,,0,,,,,
[U:1:443810853],1864908,,,,,,,,,0,,,,,
[U:1:62347347],1864908,,,,,,,,,0,,,,,
[U:1:78753744],1864908,,24.75,,,,,0,,0,,,,,


In [85]:
team_medic_stats

Unnamed: 0,id,team,medicstats.advantages_lost,medicstats.avg_time_before_healing,medicstats.avg_time_before_using,medicstats.avg_time_to_build,medicstats.avg_uber_length,medicstats.biggest_advantage_lost,medicstats.deaths_with_95_99_uber,medicstats.deaths_within_20s_after_uber,...,medic_deaths_forced,forced,successful_uber_rate,forced_uber_rate,forced_medic_death_rate,forced_drop_rate,medic_deaths_capitalized,round_losing_medic_deaths,round_losing_medic_death_rate,medic_death_capitalization_rate
0,3868244,Blue,2,4.713636,13.047619,52.652174,7.830769,14,2,12,...,7.0,3.0,0.4286,0.1429,0.2222,,8,2,0.0741,0.3636
1,3868244,Red,,4.384211,10.294118,66.588235,6.56875,,1,4,...,6.0,,0.1176,,0.3182,,10,0,0.0000,0.3704
0,3868241,Blue,4,2.081818,17.481481,53.655172,6.911111,40,,3,...,8.0,5.0,0.3333,0.1852,0.3077,,4,1,0.0769,0.2667
1,3868241,Red,1,3.530769,14.923077,56.518519,6.434615,11,3,5,...,4.0,12.0,0.3846,0.4615,0.5333,,7,1,0.0667,0.5385
0,3868230,Blue,,5.6,19.5,37.8,6.3,,1,,...,,,0.5000,,,,0,0,0.0000,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,1871769,Red,,9.04,,,,,,,...,,,,,,,1,0,0.0000,0.2000
0,1871754,Blue,,3.042857,7.666667,34.0,6.8,,1,1,...,1.0,,0.6667,,0.1250,,0,0,0.0000,0.0000
1,1871754,Red,,6.575,20.333333,57.666667,6.266667,,1,,...,1.0,,0.3333,,0.1667,,2,0,0.0000,0.2500
0,1864908,Blue,,24.75,,,,,,,...,,,,,,,2,0,0.0000,0.1818


## Data Manips
- Detect if the teams follow the medic,2 scout, 2 soldier, 1 demo

In [72]:
team_comp = players.groupby(['id', 'team'])['primary_class'].agg(lambda x: ".".join(x)).reset_index(name='class_concat')

team_comps = (team_comp['class_concat'].str.split("."))

# Test if team_comp is correct
correct = []
for team in team_comps:
    if len(team) != 6:
        correct.append(0)
        continue
    demoman = 0
    soldier = 0 
    scout = 0
    medic = 0
    for class_name in team:
        if class_name == 'demoman':
            demoman += 1
        if class_name == 'soldier':
            soldier += 1
        if class_name == 'scout':
            scout += 1
        if class_name == 'medic':
            medic += 1
    if demoman == 1 and soldier == 2 and scout == 2 and medic == 1:
        correct.append(1)
    else:
        correct.append(0)

team_comp['correct'] = correct

team_comp = team_comp.groupby('id').agg(correct_team_comp = ('correct','sum'))

team_comp = team_comp[team_comp['correct_team_comp'] == 2]

players = players[players['id'].isin(team_comp.reset_index()['id'])]

- Fix class_names to have 1's and 2's

In [60]:
# Copy the DataFrame so we don't overwrite the original
players_fixed = players.copy()

# Group by match id and team
grouped = players_fixed.groupby(['id', 'team'])

# Function to rename classes within each group
def rename_classes(df):
    counts = {'scout': 1, 'soldier': 1}
    new_classes = []
    for cls in df['primary_class']:
        if cls in counts:
            new_classes.append(f"{cls}_{counts[cls]}")
            counts[cls] += 1
        else:
            new_classes.append(cls)
    df['primary_class'] = new_classes
    return df

# Apply function to each group
players_fixed = grouped.apply(rename_classes,include_groups=False).reset_index()

players_fixed.drop('level_2',axis = 1,inplace = True)

- Drop Appropriate Columns

In [78]:
team_medic_stats

Unnamed: 0,id,team,medicstats.advantages_lost,medicstats.avg_time_before_healing,medicstats.avg_time_before_using,medicstats.avg_time_to_build,medicstats.avg_uber_length,medicstats.biggest_advantage_lost,medicstats.deaths_with_95_99_uber,medicstats.deaths_within_20s_after_uber,...,medic_deaths_forced,forced,successful_uber_rate,forced_uber_rate,forced_medic_death_rate,forced_drop_rate,medic_deaths_capitalized,round_losing_medic_deaths,round_losing_medic_death_rate,medic_death_capitalization_rate
0,3868244,Blue,2,4.713636,13.047619,52.652174,7.830769,14,2,12,...,7.0,3.0,0.4286,0.1429,0.2593,,8,2,0.0741,0.2963
0,3868241,Blue,4,2.081818,17.481481,53.655172,6.911111,40,,3,...,8.0,5.0,0.3333,0.1852,0.3077,,4,1,0.0769,0.2667
1,3868241,Red,1,3.530769,14.923077,56.518519,6.434615,11,3,5,...,4.0,12.0,0.3846,0.4615,0.5333,,7,1,0.0667,0.5385
0,3868230,Blue,,5.6,19.5,37.8,6.3,,1,,...,,,0.5000,,,,0,0,0.0000,0.0000
0,3868229,Blue,1,2.7,14.875,58.588235,6.8,22,,1,...,3.0,1.0,0.3750,0.0625,0.1000,,3,3,0.3000,0.2143
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,2081874,Blue,1,4.7,9.166667,54.333333,7.1,10,,2,...,2.0,,0.1667,,0.5000,,3,0,0.0000,0.7500
0,1910952,Blue,,4.68,6.0,10.0,7.275,,,1,...,,,0.3333,,,,2,0,0.0000,0.2000
0,1871769,Blue,,1.125,,,,,,,...,,,,,,,1,0,0.0000,0.2000
0,1871754,Blue,,3.042857,7.666667,34.0,6.8,,1,1,...,1.0,,0.6667,,0.1250,,0,0,0.0000,0.0000


In [76]:
players.columns

Index(['id', 'steamid', 'primary_class', 'primary_class_time', 'assists',
       'cpc', 'dapd', 'dapm', 'deaths', 'dmg', 'dmg_real', 'drops', 'dt',
       'dt_real', 'heal', 'hr', 'kapd', 'kills', 'kpd', 'medkits',
       'medkits_hp', 'sentries', 'suicides', 'team', 'ka', 'name',
       'offclass_time', 'offclass_pct', 'total_time', 'kill_pct', 'deaths_pct',
       'dmg_pct', 'dmg_real_pct', 'cpc_pct', 'ka_pct', 'assists_pct', 'dt_pct',
       'dt_real_pct', 'hr_pct', 'hroi', 'hroi_real', 'healps', 'suicide_rate'],
      dtype='object')

In [None]:
drop_cols = ['assists','deaths', 'dmg', 'dmg_real', 'drops', 'dt',
       'dt_real', 'heal', 'hr','kills','medkits','sentries',
       'ka','name','offclass_time','kapd','kpd','kill_pct',
       'total_time','healps','suicide_rate',
       'offclass_time']

drop_medic = []

drop_combat = []

In [66]:
# Step 1: Select the stat columns you want
stat_cols = ['ka', 'deaths', 'dmg']  # etc.

# Step 2: Set multi-index so each team in a match is a group
df_wide = (
    players_fixed
    .set_index(['id', 'team', 'primary_class'])  # MultiIndex
    .unstack('primary_class')                    # Pivot on class
)

# Step 3: Flatten the MultiIndex column names
df_wide.columns = [f"{cls}_{stat}" for stat, cls in df_wide.columns]

# Step 4: Reset index
df_wide = df_wide.reset_index()


In [None]:
drop_cols = ['primary_class_time', ]

Index(['id', 'steamid', 'primary_class', 'primary_class_time', 'assists',
       'cpc', 'dapd', 'dapm', 'deaths', 'dmg', 'dmg_real', 'drops', 'dt',
       'dt_real', 'heal', 'hr', 'kapd', 'kills', 'kpd', 'medkits',
       'medkits_hp', 'sentries', 'suicides', 'team', 'ka', 'name',
       'offclass_time', 'offclass_pct', 'total_time', 'kill_pct', 'deaths_pct',
       'dmg_pct', 'dmg_real_pct', 'cpc_pct', 'ka_pct', 'assists_pct', 'dt_pct',
       'dt_real_pct', 'hr_pct', 'hroi', 'hroi_real', 'healps'],
      dtype='object')

In [33]:
b

Unnamed: 0,id,steamid,primary_class,primary_class_time,assists,cpc,dapd,dapm,deaths,dmg,...,dmg_real_pct,cpc_pct,ka_pct,assists_pct,dt_pct,dt_real_pct,hr_pct,hroi,hroi_real,healps
0,3868241,[U:1:102169455],medic,2808,17,3,56,15,13,733,...,0.0038,0.1154,0.0826,0.1932,0.1086,0.1606,,,,18.2172
3,3868241,[U:1:151151597],soldier_1,2808,14,2,455,311,32,14563,...,0.2182,0.0769,0.1826,0.1591,0.1747,0.1736,0.0865,3.292562,0.231065,
4,3868241,[U:1:160320827],soldier_2,2808,12,2,706,347,23,16256,...,0.1593,0.0769,0.1565,0.1364,0.1671,0.1727,0.1576,2.016623,0.092544,
7,3868241,[U:1:252600561],scout_1,2801,22,20,566,266,22,12466,...,0.2296,0.7692,0.2391,0.25,0.2358,0.197,0.3468,0.702706,0.060598,
10,3868241,[U:1:872819857],demoman,2808,8,5,766,344,21,16100,...,0.1719,0.1923,0.1261,0.0909,0.1849,0.155,0.2713,1.16011,0.058005,
11,3868241,[U:1:925262370],scout_2,2793,15,6,569,255,21,11959,...,0.2172,0.2308,0.213,0.1705,0.1288,0.1411,0.1379,1.695831,0.144214,


In [39]:
players.columns

Index(['id', 'steamid', 'primary_class', 'primary_class_time', 'assists',
       'cpc', 'dapd', 'dapm', 'deaths', 'dmg', 'dmg_real', 'drops', 'dt',
       'dt_real', 'heal', 'hr', 'kapd', 'kills', 'kpd', 'medkits',
       'medkits_hp', 'sentries', 'suicides', 'team', 'ka', 'name',
       'offclass_time', 'offclass_pct', 'total_time', 'kill_pct', 'deaths_pct',
       'dmg_pct', 'dmg_real_pct', 'cpc_pct', 'ka_pct', 'assists_pct', 'dt_pct',
       'dt_real_pct', 'hr_pct', 'hroi', 'hroi_real', 'healps'],
      dtype='object')

In [34]:
# Select the columns to keep (e.g., all stat columns + id/team/primary_class)
stat_cols = ['ka', 'deaths', 'dmg']  # add any others
key_cols = ['id', 'team', 'primary_class']
b_subset = b[key_cols + stat_cols]

# Step 1: set multi-index (so we can pivot cleanly)
b_wide = b_subset.set_index(['id', 'team', 'primary_class'])

# Step 2: unstack the class dimension (i.e., wide by primary_class)
b_wide = b_wide.unstack('primary_class')

# Step 3: flatten the multi-level column index
b_wide.columns = [f"{cls}_{stat}" for stat, cls in b_wide.columns]

# Step 4: reset index
b_wide = b_wide.reset_index()


In [35]:
b_wide

Unnamed: 0,id,team,demoman_ka,medic_ka,scout_1_ka,scout_2_ka,soldier_1_ka,soldier_2_ka,demoman_deaths,medic_deaths,scout_1_deaths,scout_2_deaths,soldier_1_deaths,soldier_2_deaths,demoman_dmg,medic_dmg,scout_1_dmg,scout_2_dmg,soldier_1_dmg,soldier_2_dmg
0,3868241,Blue,29,19,55,49,42,36,21,13,22,21,32,23,16100,733,12466,11959,14563,16256
