In [24]:
import pandas as pd
import numpy as np
import copy

from scipy.stats import chi2_contingency
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

demos = pd.read_csv("demos.csv")

In [25]:
demos['player_loadout_array'] = demos['player_loadout'].str.split(', ')
unique_items = demos['player_loadout_array'].explode().unique().tolist()
knives = [i for i in unique_items if
          "knife" in i.lower() or
          "daggers" in i.lower() or
          "bayonet" in i.lower() or
          "karambit" in i.lower()]

knives

['Butterfly Knife',
 'Talon Knife',
 'M9 Bayonet',
 'Karambit',
 'Shadow Daggers',
 'Skeleton Knife',
 'Flip Knife',
 'Bayonet',
 'Survival Knife',
 'Nomad Knife',
 'Stiletto Knife',
 'knife_t',
 'knife',
 'Ursus Knife']

In [26]:
demos['player_loadout_array'] = demos['player_loadout_array'].apply(lambda l: [i for i in l if i not in knives])
demos['player_loadout_array'].head(10)


0              [Glock-18, C4 Explosive]
1    [Glock-18, Smoke Grenade, Molotov]
2                            [Glock-18]
3      [P250, Smoke Grenade, Flashbang]
4                               [USP-S]
5                [P2000, Smoke Grenade]
6                       [Dual Berettas]
7                               [USP-S]
8                    [USP-S, Flashbang]
9                            [Glock-18]
Name: player_loadout_array, dtype: object

In [27]:
outliar_rounds = demos.loc[demos['player_loadout_array'].str.len() == 0]['round_id'].values
print(outliar_rounds)

demos_inventory = demos.loc[demos['round_id'].isin(outliar_rounds) == False]

[ 258  454  701 1265]


In [28]:
item_types = demos_inventory['player_loadout_array'].explode().unique()
item_types

array(['Glock-18', 'C4 Explosive', 'Smoke Grenade', 'Molotov', 'P250',
       'Flashbang', 'USP-S', 'P2000', 'Dual Berettas', 'Tec-9',
       'Desert Eagle', 'MP9', 'Incendiary Grenade',
       'High Explosive Grenade', 'FAMAS', 'AK-47', 'M4A1-S', 'M4A4',
       'AWP', 'Galil AR', 'Five-SeveN', 'Decoy Grenade', 'MAC-10', 'AUG',
       'Zeus x27', 'SSG 08', 'XM1014', 'MAG-7', 'SG 553', 'CZ75-Auto',
       'MP7', 'MP5-SD', 'UMP-45'], dtype=object)

In [29]:
secondaries = ['Glock-18', 'P250', 'USP-S', 'P2000', 'Dual Berettas', 'Tec-9',
       'Desert Eagle', 'CZ75-Auto', 'Five-SeveN']

primaries = ['MP9', 'FAMAS', 'AK-47', 'M4A1-S', 'M4A4',
       'AWP', 'Galil AR', 'MAC-10', 'AUG',
       'SSG 08', 'XM1014', 'MAG-7', 'SG 553', 
       'MP7', 'MP5-SD', 'UMP-45']

primary_ser = demos_inventory['player_loadout_array'].apply(lambda x: list(filter(lambda y: y is not None, [(item if item in primaries else None) for item in x]))).apply(lambda x: x[0] if len(x) > 0 else None)
demos_inventory = demos_inventory.assign(primary_weapon=primary_ser)

secondary_ser = demos_inventory['player_loadout_array'].apply(lambda x: list(filter(lambda y: y is not None, [(item if item in secondaries else None) for item in x]))).apply(lambda x: x[0] if len(x) > 0 else None)
demos_inventory = demos_inventory.assign(secondary_weapon=secondary_ser)

weapon_ser = np.where(demos_inventory['primary_weapon'].notna(), demos_inventory['primary_weapon'], demos_inventory['secondary_weapon'])
demos_inventory = demos_inventory.assign(weapon=weapon_ser)


demos_inventory.loc[demos_inventory['weapon'].isna()]

Unnamed: 0,match_id,map_id,round_id,team_name,map_name,round_number,round_ct_team,round_first_site_hit,round_site_hit_time,round_bomb_plant_site,...,round_first_killer,round_first_death,player_headshots,player_upperbodyshots,player_stomachshots,player_legshots,player_loadout_array,primary_weapon,secondary_weapon,weapon
5694,13,29,569,Team Falcons,de_ancient,15,HEROIC,A,67.3125,A,...,False,False,0,0,0,0,[C4 Explosive],,,
6114,13,31,611,Team Falcons,de_dust2,6,HEROIC,A,51.78125,,...,False,False,0,0,0,0,"[Smoke Grenade, Flashbang]",,,
6559,14,33,655,Lynn Vision Gaming,de_anubis,11,FURIA,B,34.828125,B,...,False,False,0,1,0,0,[C4 Explosive],,,
8476,18,43,847,FURIA,de_nuke,12,FURIA,B,34.359375,A,...,False,False,0,0,0,0,"[Flashbang, Smoke Grenade]",,,
9260,20,47,926,Team Falcons,de_dust2,9,MOUZ,B,86.03125,B,...,False,False,0,0,0,0,[Flashbang],,,
12249,25,61,1224,GamerLegion,de_nuke,18,Team Falcons,,,A,...,False,False,0,10,2,0,[Smoke Grenade],,,
13375,27,67,1337,Team Vitality,de_inferno,1,Team Vitality,A,44.703125,,...,False,False,0,0,0,0,[Flashbang],,,


In [30]:
demos_inventory.loc[demos_inventory['weapon'].isna()]['round_id'].values

array([ 569,  611,  655,  847,  926, 1224, 1337])

In [31]:
more_outliar_rounds = demos_inventory.loc[demos_inventory['weapon'].isna()]['round_id'].values
demos_inventory = demos_inventory.loc[demos_inventory['round_id'].isin(more_outliar_rounds) == False]
demos_inventory.loc[demos_inventory['weapon'].isna()]['round_id'].values

array([], dtype=int64)

In [32]:
result = (demos_inventory['round_result'] == 'CT') & (demos_inventory['round_ct_team'] == demos_inventory['team_name']) | (demos_inventory['round_result'] == 'T') & (demos_inventory['round_ct_team'] != demos_inventory['team_name'])
demos_inventory = demos_inventory.assign(result=result) # makes it easier for us to do ARM
demos_inventory

Unnamed: 0,match_id,map_id,round_id,team_name,map_name,round_number,round_ct_team,round_first_site_hit,round_site_hit_time,round_bomb_plant_site,...,round_first_death,player_headshots,player_upperbodyshots,player_stomachshots,player_legshots,player_loadout_array,primary_weapon,secondary_weapon,weapon,result
0,0,0,0,Team Liquid,de_ancient,1,FaZe Clan,A,23.078125,,...,False,1,1,1,0,"[Glock-18, C4 Explosive]",,Glock-18,Glock-18,False
1,0,0,0,Team Liquid,de_ancient,1,FaZe Clan,A,23.078125,,...,False,1,3,2,0,"[Glock-18, Smoke Grenade, Molotov]",,Glock-18,Glock-18,False
2,0,0,0,Team Liquid,de_ancient,1,FaZe Clan,A,23.078125,,...,False,1,1,0,0,[Glock-18],,Glock-18,Glock-18,False
3,0,0,0,Team Liquid,de_ancient,1,FaZe Clan,A,23.078125,,...,False,0,0,0,0,"[P250, Smoke Grenade, Flashbang]",,P250,P250,False
4,0,0,0,FaZe Clan,de_ancient,1,FaZe Clan,A,23.078125,,...,False,1,5,0,0,[USP-S],,USP-S,USP-S,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14275,28,70,1427,MOUZ,de_inferno,21,Team Vitality,,,,...,False,0,0,0,0,"[Glock-18, Galil AR, Smoke Grenade, Molotov, F...",Galil AR,Glock-18,Galil AR,False
14276,28,70,1427,Team Vitality,de_inferno,21,Team Vitality,,,,...,False,1,1,0,0,"[Smoke Grenade, AK-47, Flashbang, Incendiary G...",AK-47,Five-SeveN,AK-47,True
14277,28,70,1427,Team Vitality,de_inferno,21,Team Vitality,,,,...,False,0,5,1,1,"[Five-SeveN, AK-47, Smoke Grenade, High Explos...",AK-47,Five-SeveN,AK-47,True
14278,28,70,1427,MOUZ,de_inferno,21,Team Vitality,,,,...,False,0,0,0,0,"[Glock-18, AK-47, Molotov, High Explosive Gren...",AK-47,Glock-18,AK-47,False


In [43]:
def create_baskets_df(demos_inventory: pd.DataFrame):
    baskets_df = demos_inventory.groupby(
        ['round_id', 'team_name']
        )['weapon'].apply(
            lambda x : [a for a in x] + ["win" if demos_inventory.loc[
                (demos_inventory['round_id'] == x.name[0]) & (demos_inventory['team_name'] == x.name[1])
            ]['result'].iloc[0] == True else "loss"]
        ).reset_index().rename(columns={"weapon": "basket"})
    baskets_df = pd.merge(baskets_df, demos_inventory, how="inner", on=["team_name", "round_id"])[['round_id', 'team_name', 'basket', 'result']]
    baskets_df = baskets_df.drop_duplicates(subset=['round_id', 'team_name']).reset_index()
    baskets_df = baskets_df.assign(result=baskets_df['result'].apply(lambda x: "win" if x else "loss"))
    return baskets_df

In [34]:
def mark_duplicates(basket: list):
    new_list = []
    counts = {}
    basket_clone = copy.deepcopy(basket)
    while len(basket_clone) != 0:
        item = basket_clone.pop(0)
        if item not in new_list:
            counts[item] = 1 if item not in counts else counts[item] + 1
            new_list += [item + "_" + str(counts[item]) if item != 'win' and item != 'loss' else item]
        else:
            new_list += [item + "_1" if item != 'win' and item != 'loss' else item]
    return new_list


In [35]:
def mark_duplicates_in_df(baskets_df: pd.DataFrame):
    baskets_df = baskets_df.assign(marked_basket=baskets_df['basket'].apply(mark_duplicates))
    return baskets_df

In [56]:
def generate_rules_df(baskets_df: pd.DataFrame, min_rounds: int):
    te = TransactionEncoder()
    te_ary = te.fit(baskets_df['marked_basket']).transform(baskets_df['marked_basket'])
    basket_df = pd.DataFrame(te_ary, columns=te.columns_)

    min_support = min_rounds / (len(baskets_df) / 2)

    frequent_itemsets = apriori(basket_df, min_support=min_support, use_colnames=True)
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)

    # Filtering for rules where 'win' or 'loss' is the consequence
    winloss_rules = rules[(rules['consequents'] == {'loss'}) | (rules['consequents'] == {'win'})]
    return winloss_rules

In [37]:
def extract_highest(set: frozenset[str]):
    outset = {}
    for item in set:
        split = item.split('_')
        if split[0] not in outset or outset[split[0]] < split[1]:
            outset[split[0]] = split[1]
    items = []
    for item in outset:
        items += [item + "_" + str(outset[item])]
    return frozenset(items)

def process_winloss_df(winloss_rules: pd.DataFrame):
    winloss_rules = winloss_rules.assign(antecedents=winloss_rules['antecedents'].apply(extract_highest))
    winloss_rules = winloss_rules.drop_duplicates()
    winloss_rules = winloss_rules.reset_index().drop(columns=['index'])
    winloss_rules = winloss_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]
    return winloss_rules

In [53]:
def baskets_with_rule(baskets_df: pd.DataFrame, rule: frozenset[str]):
    return baskets_df.loc[baskets_df['marked_basket'].apply(lambda x: [item in x for item in rule]).apply(all)]

def baskets_without_rule(baskets_df: pd.DataFrame, rule: frozenset[str]):
    return baskets_df.loc[baskets_df['marked_basket'].apply(lambda x: [item in x for item in rule]).apply(all).apply(lambda x : not x)]


def count_win_with(baskets_df: pd.DataFrame, rule: frozenset[str]):
    valid_baskets_df = baskets_with_rule(baskets_df, rule)
    return len(valid_baskets_df.loc[valid_baskets_df['result'] == 'win'])

def count_win_without(baskets_df: pd.DataFrame, rule: frozenset[str]):
    valid_baskets_df = baskets_without_rule(baskets_df, rule)
    return len(valid_baskets_df.loc[valid_baskets_df['result'] == 'win'])

def count_loss_with(baskets_df: pd.DataFrame, rule: frozenset[str]):
    valid_baskets_df = baskets_with_rule(baskets_df, rule)
    return len(valid_baskets_df.loc[valid_baskets_df['result'] == 'loss'])

def count_loss_without(baskets_df: pd.DataFrame, rule: frozenset[str]):
    valid_baskets_df = baskets_without_rule(baskets_df, rule)
    return len(valid_baskets_df.loc[valid_baskets_df['result'] == 'loss'])


def get_chi_square_counts(baskets_df: pd.DataFrame, rules_df: pd.DataFrame):
    rules_df = rules_df.assign(win_with=rules_df['antecedents'].apply(lambda x: count_win_with(baskets_df, x)))
    rules_df = rules_df.assign(win_without=rules_df['antecedents'].apply(lambda x: count_win_without(baskets_df, x)))
    rules_df = rules_df.assign(loss_with=rules_df['antecedents'].apply(lambda x: count_loss_with(baskets_df, x)))
    rules_df = rules_df.assign(loss_without=rules_df['antecedents'].apply(lambda x: count_loss_without(baskets_df, x)))

    return rules_df

In [39]:
def get_chisquare(row: pd.Series):
    chi2_table = [[row['win_with'], row['win_without']], [row['loss_with'], row['loss_without']]]
    use_correction = (row['win_with'] < 10) or (row['win_without'] < 10) or (row['loss_with'] < 10) or (row['loss_without'] < 10)
    chi2 = chi2_contingency(chi2_table, correction=use_correction)
    return (chi2.statistic, chi2.pvalue)

def get_chisquare_table(rules_df: pd.DataFrame, alpha = 0.05):
    chi2 = rules_df.apply(get_chisquare, axis=1)
    rules_df = rules_df.assign(chi2=chi2.apply(lambda x: x[0]), pvalue=chi2.apply(lambda x: x[1]))
    rules_df = rules_df.assign(chi2_result=rules_df.apply(lambda x: "dependent" if x['pvalue'] <= alpha else "independent", axis=1))
    return rules_df
    


In [55]:
def get_rules_chisquare_table(demos_df: pd.DataFrame, min_rounds, alpha):
    baskets_df = create_baskets_df(demos_df)
    baskets_df = mark_duplicates_in_df(baskets_df)

    rules_df = generate_rules_df(baskets_df, min_rounds)
    rules_df = process_winloss_df(rules_df)
    rules_df = get_chi_square_counts(baskets_df, rules_df)
    rules_df = get_chisquare_table(rules_df, alpha)
    return rules_df

In [66]:
get_rules_chisquare_table(demos_inventory, 20, 0.05)

Unnamed: 0,antecedents,consequents,support,confidence,lift,win_with,win_without,loss_with,loss_without,chi2,pvalue,chi2_result
0,(Desert Eagle_1),(loss),0.094566,0.788235,1.575359,72,1344,268,1150,128.077505,1.079443e-29,dependent
1,(Desert Eagle_2),(loss),0.033169,0.817391,1.633630,21,1395,94,1324,48.191733,3.865144e-12,dependent
2,(Dual Berettas_1),(win),0.021524,0.648936,1.298789,61,1355,33,1385,8.667486,3.239379e-03,dependent
3,(Five-SeveN_1),(loss),0.051517,0.789189,1.577265,39,1377,146,1272,66.047056,4.402850e-16,dependent
4,(Five-SeveN_2),(loss),0.027170,0.810526,1.619909,18,1398,77,1341,37.826912,7.730791e-10,dependent
...,...,...,...,...,...,...,...,...,...,...,...,...
63,"(AWP_1, M4A1-S_1, AK-47_2)",(win),0.038462,0.685535,1.372037,109,1307,50,1368,23.282710,1.398516e-06,dependent
64,"(AWP_1, AK-47_2, M4A4_1)",(win),0.021524,0.701149,1.403289,61,1355,26,1392,14.577417,1.345170e-04,dependent
65,"(AK-47_2, Galil AR_1, MAC-10_1)",(win),0.015526,0.698413,1.397812,44,1372,19,1399,10.182310,1.417944e-03,dependent
66,"(AK-47_2, M4A1-S_1, M4A4_1)",(win),0.015526,0.785714,1.572538,44,1372,12,1406,18.700439,1.529473e-05,dependent


In [76]:
# pistol rounds
pistol_round_rules = get_rules_chisquare_table(
    demos_inventory.loc[
        (demos_inventory['round_number'] == 1) 
        | (demos_inventory['round_number'] == 13)
        ], 2, 0.05)

pistol_round_rules.loc[pistol_round_rules['chi2_result'] == "dependent"]

Unnamed: 0,antecedents,consequents,support,confidence,lift,win_with,win_without,loss_with,loss_without,chi2,pvalue,chi2_result
0,(Dual Berettas_1),(win),0.224806,0.659091,1.32848,58,70,30,100,14.188557,0.000165,dependent
2,(Glock-18_1),(loss),0.306202,0.612403,1.215385,50,78,79,51,12.155769,0.000489,dependent
3,(Glock-18_2),(loss),0.306202,0.612403,1.215385,50,78,79,51,12.155769,0.000489,dependent
4,(Glock-18_3),(loss),0.306202,0.612403,1.215385,50,78,79,51,12.155769,0.000489,dependent
5,(Glock-18_4),(loss),0.302326,0.609375,1.209375,50,78,78,52,11.310176,0.000771,dependent
7,(P250_1),(loss),0.131783,0.708333,1.405769,14,114,34,96,9.861279,0.001688,dependent
8,(USP-S_1),(win),0.302326,0.604651,1.21875,78,50,51,79,12.155769,0.000489,dependent
9,(USP-S_2),(win),0.302326,0.604651,1.21875,78,50,51,79,12.155769,0.000489,dependent
10,"(Dual Berettas_1, P2000_1)",(win),0.069767,0.782609,1.577446,18,110,5,125,7.079834,0.007796,dependent
11,"(Dual Berettas_1, USP-S_1)",(win),0.224806,0.659091,1.32848,58,70,30,100,14.188557,0.000165,dependent


In [82]:
# normal rounds, T
t_normal_round_rules = get_rules_chisquare_table(
    demos_inventory.loc[
        ((demos_inventory['round_number'] != 1) 
        | (demos_inventory['round_number'] != 13)) &
        (demos_inventory['round_ct_team'] != demos_inventory['team_name'])
        ], 10, 0.05)

t_normal_round_rules.loc[t_normal_round_rules['chi2_result'] == "dependent"]

Unnamed: 0,antecedents,consequents,support,confidence,lift,win_with,win_without,loss_with,loss_without,chi2,pvalue,chi2_result
0,(Desert Eagle_1),(loss),0.101623,0.761905,1.503648,45,654,144,574,56.824322,4.765359e-14,dependent
1,(Desert Eagle_2),(loss),0.038814,0.820896,1.620068,12,687,55,663,27.773912,1.363524e-07,dependent
2,(Desert Eagle_3),(loss),0.016937,0.774194,1.5279,7,692,24,694,8.011235,0.004648803,dependent
3,(Glock-18_1),(loss),0.1482,0.755396,1.490802,68,631,210,508,85.5757,2.22999e-20,dependent
4,(Glock-18_2),(loss),0.109386,0.767327,1.514348,47,652,155,563,64.018662,1.232462e-15,dependent
5,(Glock-18_3),(loss),0.09386,0.76,1.499889,42,657,133,585,51.248363,8.138856e-13,dependent
6,(Glock-18_4),(loss),0.082569,0.754839,1.489703,38,661,117,601,42.870075,5.849869e-11,dependent
7,(Glock-18_5),(loss),0.03458,0.644737,1.272412,27,672,49,669,6.121469,0.01335497,dependent
8,(M4A1-S_1),(win),0.046577,0.725275,1.470264,66,633,25,693,20.93646,4.747716e-06,dependent
9,(MAC-10_1),(win),0.102329,0.644444,1.306406,145,554,80,638,24.446873,7.638741e-07,dependent


In [93]:
# normal rounds, CT
ct_normal_round_rules = get_rules_chisquare_table(
    demos_inventory.loc[
        ((demos_inventory['round_number'] != 1) 
        | (demos_inventory['round_number'] != 13)) &
        (demos_inventory['round_ct_team'] == demos_inventory['team_name'])
        ], 10, 0.05)

ct_normal_round_rules = ct_normal_round_rules.loc[ct_normal_round_rules['chi2_result'] == "dependent"].sort_values(by='lift', ascending=False)
ct_normal_round_rules.loc[ct_normal_round_rules['consequents'] == {'win'}]

Unnamed: 0,antecedents,consequents,support,confidence,lift,win_with,win_without,loss_with,loss_without,chi2,pvalue,chi2_result
73,"(AK-47_2, M4A1-S_1, M4A4_1)",(win),0.026817,0.76,1.50198,38,679,12,688,13.377227,0.000254698,dependent
28,"(AUG_1, AWP_1)",(win),0.021171,0.75,1.482218,30,687,10,690,9.803995,0.001741332,dependent
74,"(AUG_1, AWP_1, M4A1-S_1)",(win),0.016937,0.75,1.482218,24,693,8,692,6.831169,0.00895806,dependent
3,(AUG_1),(win),0.026817,0.745098,1.47253,38,679,13,687,12.099544,0.0005043417,dependent
15,"(AK-47_1, AUG_1)",(win),0.014114,0.740741,1.463919,20,697,7,693,5.148108,0.02327172,dependent
29,"(AUG_1, M4A1-S_1)",(win),0.021877,0.738095,1.45869,31,686,11,689,9.327746,0.002257099,dependent
25,"(AK-47_2, M4A4_1)",(win),0.0494,0.729167,1.441045,70,647,26,674,20.517522,5.908786e-06,dependent
69,"(AK-47_1, MP9_1, M4A1-S_2)",(win),0.01482,0.724138,1.431107,21,696,8,692,4.780265,0.02878769,dependent
68,"(AK-47_1, MP9_1, M4A1-S_1)",(win),0.023994,0.723404,1.429657,34,683,13,687,9.192029,0.002430714,dependent
17,"(AK-47_1, AWP_2)",(win),0.014114,0.714286,1.411636,20,697,8,692,4.143991,0.04178255,dependent
