In [39]:
import joblib
import pandas as pd
from better_profanity import profanity
import numpy as np

players = pd.read_csv("../data/players.csv")

player_rounds = pd.read_csv("../data/player_rounds.csv")

rounds = pd.read_csv("../data/rounds.csv")


valid_ids = pd.read_csv("../data/valid_ids.csv")

push_stats = pd.read_csv("../data/push_stats.csv")

teams = pd.read_csv("../data/teams.csv")

healspread = pd.read_csv("../data/healspread.csv")

info = pd.read_csv("../data/info.csv")

round_events = pd.read_csv('../data/round_events.csv')

model_ready_data = joblib.load("../data/pkls/model_ready_data_dict.pkl")
X = model_ready_data['X']

* Valid map names

In [2]:
# Find valid map names
info_correct = info[info['id'].isin(model_ready_data['ids'])].copy()
maps = info_correct['map'].str.lower().str.split("_")
map_counts = pd.Series(maps.str[1].value_counts())


valid_maps = map_counts[map_counts > 50]
valid_map_names = valid_maps.index

In [3]:
valid_info = info[info['id'].isin(valid_ids['id'])].copy()

map_names = list(valid_info['map'].str.lower().str.split("_").str[1].value_counts().index)

map_counts = []
for map in info['map'].str.lower().values:
    i = 0
    for map_name in map_names:
        if i > 0:
            continue
        if  type(map) == type(np.nan):
            map_counts.append(np.nan)
            i +=1
        elif map_name in map:
            map_counts.append(map_name)
            i += 1
        

# Define a function to find the first match
def find_match(text):
    if type(text) == type(np.nan):
        return np.nan
    for keyword in map_names:
        if keyword in text.lower():
            return keyword
    return np.nan  # if no match found

# Apply the function row by row
info['clean_map_name'] = info['map'].apply(find_match)
valid_info['clean_map_name'] = valid_info['map'].apply(find_match)

* Koth stats

In [4]:
valid_info = info[info['id'].isin(valid_ids['id'].values)]
koth = valid_info.copy()
koth = valid_info[valid_info['map'].str.split("_").str[0].str.contains("koth")]
koth_count = koth['map'].str.lower().str.split("_")
koth_count = pd.Series(koth_count.str[1].value_counts())
koth_count = koth_count[koth_count > 50]
koth_maps = koth_count.index.values
koth_matches = info[info['clean_map_name'].isin(koth_maps)].copy()

In [5]:
point_caps = round_events[
    (round_events['type'] == 'pointcap') | (round_events['type'] == 'round_win')
].copy()

match_round_list = []

for i,match_id in enumerate(koth_matches['id'].unique()):
    #if i % 100 == 0 and i != 0:
        #print(f'{i} / {koth_matches['id'].nunique()}')
    df = point_caps[point_caps['id'] == match_id].copy()

    for round_num in df['round'].unique():
        match_round = df[df['round'] == round_num].copy()

        # Compute lag_time and time_elapsed per group (team) using shift(-1)
        match_round['lag_time'] = match_round.groupby('team')['time'].shift(-1)
        match_round['time_elapsed'] = match_round['lag_time'] - match_round['time']
        match_round['time_elapsed'] = match_round['time_elapsed'].fillna(0)

        # Calculate total cap time per round and team
        cap_time_df = match_round.groupby(['round', 'team'])['time_elapsed'].sum().reset_index()
        cap_time_df.rename(columns={'time_elapsed': 'cap_time'}, inplace=True)
        match_round = match_round.merge(cap_time_df, on=['round', 'team'], how='left')

        # Number of caps per round and team
        num_caps = match_round[match_round['type'] == 'pointcap'].groupby(['round', 'team']).size().reset_index(name='num_caps')
        match_round = match_round.merge(num_caps, on=['round', 'team'], how='left')
        match_round['num_caps'] = match_round['num_caps'].fillna(0).astype(int)

        # Rolling cap times per team (cumulative sum of time_elapsed per team)
        match_round['blue_cap_time'] = match_round.apply(lambda row: row['time_elapsed'] if row['team'] == 'Blue' else 0, axis=1).cumsum()
        match_round['red_cap_time'] = match_round.apply(lambda row: row['time_elapsed'] if row['team'] == 'Red' else 0, axis=1).cumsum()

        # Winner for the round
        winners = match_round.loc[match_round['type'] == 'round_win', 'team']
        winner = winners.values[0] if not winners.empty else np.nan
        match_round['winner'] = winner

        # Roll condition
        red_cap_sum = match_round['red_cap_time'].sum()
        blue_cap_sum = match_round['blue_cap_time'].sum()
        if red_cap_sum == 0 and winner == 'Blue':
            match_round['roll'] = 1
        elif blue_cap_sum == 0 and winner == 'Red':
            match_round['roll'] = 1
        else:
            match_round['roll'] = 0

        # Comeback condition
        cond1 = (match_round['winner'] == 'Red') & (match_round['red_cap_time'] == 0) & (match_round['blue_cap_time'] >= 150)
        cond2 = (match_round['winner'] == 'Blue') & (match_round['blue_cap_time'] == 0) & (match_round['red_cap_time'] >= 150)
        if (cond1 | cond2).any():
            match_round['comeback'] = 1
        else:
          match_round['comeback'] = 0

        match_round_list.append(match_round)

koth_stats = pd.concat(match_round_list)



* Make PIM bindable

* Clean map name

### Player Overview Tables ###

* Basic Stats Long

In [28]:
normal_cols = ['kills','deaths','assists','dmg','dmg_real','dt','dt_real','hr']
pct_cols = ['kill_pct','deaths_pct','assists_pct','dmg_pct','dmg_real_pct','dt_pct','dt_real_pct','hr_pct']

both_cols = ['id','team','steamid','name','primary_class']

normal_df = players[both_cols + normal_cols].copy()
pct_df = players[both_cols + pct_cols].copy()

pct_df.columns = normal_df.columns

normal_df['coltype'] = "Raw"
pct_df['coltype'] = "Pct of Team"


long_player = pd.concat([normal_df,pct_df])

long_player = long_player.fillna(0)

long_player.to_csv("../data/long_player_stats.csv")

* Class KDA Long

In [29]:
class_cols = [col for col in players.columns if "class_kda" in col]

class_kill = [col for col in class_cols if "kills" in col and "kdapd" not in col]

class_deaths= [col for col in class_cols if "deaths" in col and "kdapd" not in col]

class_assists = [col for col in class_cols if "assists" in col and "kdapd" not in col]

class_kill_pd = [col for col in class_cols if "kills" in col and "kdapd" in col]

class_deaths_pd = [col for col in class_cols if "deaths" in col and "kdapd" in col]

class_assists_pd = [col for col in class_cols if "assists" in col and "kdapd" in col]

keep_cols = ['id','team','steamid','name','primary_class']



class_dict = {
    "Kills" : class_kill,
    "Deaths" : class_deaths,
    "Assists" : class_assists,
    "K/D" : class_kill_pd,
    "A/D" : class_assists_pd,
    "Death Rate" : class_deaths_pd
}

long_class = pd.DataFrame()
for key in class_dict.keys():
    col_type = key
    l = class_dict[key]
    df = players[keep_cols + l].copy()
    df.columns = keep_cols + [col.split("_")[0] for col in df.columns if col not in keep_cols] 
    df['col_type'] = col_type
    long_class = pd.concat([long_class,df])

long_class = long_class.fillna(0)

long_class.to_csv("../data/class_kda_long.csv")

In [90]:
# Lead changes
koth_stats['blue_cap_time_lag'] = koth_stats['blue_cap_time'].shift(1).fillna(0)
koth_stats['red_cap_time_lag'] = koth_stats['red_cap_time'].shift(1).fillna(0)


koth_stats['leader'] = np.where(
    koth_stats['blue_cap_time'] > koth_stats['red_cap_time'], 'Blue',
    np.where(koth_stats['blue_cap_time'] < koth_stats['red_cap_time'], 'Red', "")
)

koth_stats['leader_lag'] = np.where(
    koth_stats['blue_cap_time_lag'] > koth_stats['red_cap_time_lag'], 'Blue',
    np.where(koth_stats['blue_cap_time_lag'] < koth_stats['red_cap_time_lag'], 'Red', "")
)

lead_changes_long = koth_stats[(koth_stats['leader'] != koth_stats['leader_lag'])
            & (koth_stats['leader'] != "")
              & (koth_stats['leader_lag'] != "")].copy()

lead_changes_long['blue_lead_change'] = np.where(lead_changes_long['leader'] == 'Red',1,0)
lead_changes_long['red_lead_change'] = np.where(lead_changes_long['leader'] == 'Blue',1,0)

lead_changes = lead_changes_long.groupby(['id','round'])[['blue_lead_change','red_lead_change']].sum().reset_index()


In [None]:
koth_group = koth_stats.groupby(['id','round'])

comeback = koth_group.first()['comeback'].reset_index()

roll = koth_group.first()['roll'].reset_index().drop(['id','round'],axis = 1)

num_caps = koth_group.first()['num_caps'].reset_index().drop(['id','round'],axis = 1)


koth_rounds = pd.concat([comeback,roll,num_caps],axis =1)
koth_rounds = koth_rounds.merge(rounds[['id','round','firstcap','length','winner']],on = ['id','round'])
koth_rounds = koth_rounds.merge(lead_changes,on = ['id','round'],how = 'left')

NameError: name 'left' is not defined

In [92]:
koth_rounds

Unnamed: 0,id,round,comeback,roll,num_caps,firstcap,length,winner,blue_lead_change,red_lead_change
0,1460897,1,0,0,4,Red,363,Red,3,2
1,1460897,2,0,0,3,Red,361,Red,1,1
2,1460897,3,0,1,2,Blue,267,Blue,0,1
3,1466571,0,0,1,2,Blue,229,Blue,0,1
4,1466571,2,0,0,3,Red,296,Blue,1,1
...,...,...,...,...,...,...,...,...,...,...
7506,3897182,1,0,0,3,Red,440,Blue,2,2
7507,3897182,2,0,0,4,Blue,271,Blue,1,1
7508,3897222,0,0,0,3,Blue,363,Blue,1,1
7509,3897222,2,0,0,4,Red,321,Red,2,1


In [11]:
# Shortest / Long Win
koth_stats['total_time'] = koth_stats['blue_cap_time'] + koth_stats['red_cap_time']
koth_stats.groupby(['id','winner'])['total_time'].min()

id       winner
1460897  Blue      152.0
         Red       110.0
1466571  Blue       44.0
1467241  Red         0.0
1472420  Blue       54.0
                   ...  
3897174  Blue       55.0
         Red        48.0
3897182  Blue       68.0
3897222  Blue      105.0
         Red        61.0
Name: total_time, Length: 4497, dtype: float64

* Quantile Columns

In [30]:
# List out columns
quant_cols = [
    "kills",
    "assists",
    "dmg",
   'dapm',
   'kpd',
   'offclass_pct',
   'kill_pct',
   'deaths_pct',
   'dmg_pct',
   'dmg_real_pct',
   'cpc_pct',
   'ka_pct',
   'assists_pct',
   'dt_pct',
   'dt_real_pct',
   'hroi',
   'assistspd',
   'demoman_kills_class_kdapd',
   'scout_kills_class_kdapd',
   'soldier_kills_class_kdapd',
   'medic_kills_class_kdapd',
    'demoman_deaths_class_kdapd',
   'scout_deaths_class_kdapd',
   'soldier_deaths_class_kdapd',
    'medic_deaths_class_kdapd',
   'dtpm',
   'dt_realpm',
   'healpm',
   'medkits_hppm', 
   'hrpm', 
   'deathspm'

]

# Only grab valid ids to make quanitles on
sub_players = players[players['id'].isin(valid_ids['id'])].copy()
sub_players = sub_players[['id',"primary_class",'steamid']+ quant_cols]

# loop through classes and construct the quantile sets
ranked_df = pd.DataFrame()
for class_name in sub_players['primary_class'].unique():
    sub_class = sub_players[sub_players['primary_class'] == class_name].copy()
    
    binding_df = sub_class[['id',"primary_class",'steamid']].copy()
    sub_class.drop(['id',"primary_class",'steamid'],axis = 1,inplace= True)
    
    sub_class = sub_class.rank(pct = True)
    sub_class = pd.concat([binding_df,sub_class],axis = 1)
    
    ranked_df = pd.concat([ranked_df,sub_class])
    
# Rename columns
ranked_df.columns = ['id','primary_class','steamid'] + [col + '_quantile' for 
                                                        col in ranked_df.columns if
                                                        col not in ['id','steamid','primary_class']]

ranked_df.to_csv("../data/players_quantile.csv")
