In [1]:
import pandas as pd
import numpy as np
import copy

from scipy.stats import chi2_contingency
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

demos = pd.read_csv("demos.csv")

In [2]:
demos['player_loadout_array'] = demos['player_loadout'].str.split(', ')
unique_items = demos['player_loadout_array'].explode().unique().tolist()
knives = [i for i in unique_items if
          "knife" in i.lower() or
          "daggers" in i.lower() or
          "bayonet" in i.lower() or
          "karambit" in i.lower()]

knives

['Butterfly Knife',
 'Talon Knife',
 'M9 Bayonet',
 'Karambit',
 'Shadow Daggers',
 'Skeleton Knife',
 'Flip Knife',
 'Bayonet',
 'Survival Knife',
 'Nomad Knife',
 'Stiletto Knife',
 'knife_t',
 'knife',
 'Ursus Knife']

In [3]:
demos['player_loadout_array'] = demos['player_loadout_array'].apply(lambda l: [i for i in l if i not in knives])
demos['player_loadout_array'].head(10)


0              [Glock-18, C4 Explosive]
1    [Glock-18, Smoke Grenade, Molotov]
2                            [Glock-18]
3      [P250, Smoke Grenade, Flashbang]
4                               [USP-S]
5                [P2000, Smoke Grenade]
6                       [Dual Berettas]
7                               [USP-S]
8                    [USP-S, Flashbang]
9                            [Glock-18]
Name: player_loadout_array, dtype: object

In [4]:
outliar_rounds = demos.loc[demos['player_loadout_array'].str.len() == 0]['round_id'].values
print(outliar_rounds)

demos_inventory = demos.loc[demos['round_id'].isin(outliar_rounds) == False]

[ 258  454  701 1265]


In [5]:
item_types = demos_inventory['player_loadout_array'].explode().unique()
item_types

array(['Glock-18', 'C4 Explosive', 'Smoke Grenade', 'Molotov', 'P250',
       'Flashbang', 'USP-S', 'P2000', 'Dual Berettas', 'Tec-9',
       'Desert Eagle', 'MP9', 'Incendiary Grenade',
       'High Explosive Grenade', 'FAMAS', 'AK-47', 'M4A1-S', 'M4A4',
       'AWP', 'Galil AR', 'Five-SeveN', 'Decoy Grenade', 'MAC-10', 'AUG',
       'Zeus x27', 'SSG 08', 'XM1014', 'MAG-7', 'SG 553', 'CZ75-Auto',
       'MP7', 'MP5-SD', 'UMP-45'], dtype=object)

In [6]:
secondaries = ['Glock-18', 'P250', 'USP-S', 'P2000', 'Dual Berettas', 'Tec-9',
       'Desert Eagle', 'CZ75-Auto', 'Five-SeveN']

primaries = ['MP9', 'FAMAS', 'AK-47', 'M4A1-S', 'M4A4',
       'AWP', 'Galil AR', 'MAC-10', 'AUG',
       'SSG 08', 'XM1014', 'MAG-7', 'SG 553', 
       'MP7', 'MP5-SD', 'UMP-45']

primary_ser = demos_inventory['player_loadout_array'].apply(lambda x: list(filter(lambda y: y is not None, [(item if item in primaries else None) for item in x]))).apply(lambda x: x[0] if len(x) > 0 else None)
demos_inventory = demos_inventory.assign(primary_weapon=primary_ser)

secondary_ser = demos_inventory['player_loadout_array'].apply(lambda x: list(filter(lambda y: y is not None, [(item if item in secondaries else None) for item in x]))).apply(lambda x: x[0] if len(x) > 0 else None)
demos_inventory = demos_inventory.assign(secondary_weapon=secondary_ser)

weapon_ser = np.where(demos_inventory['primary_weapon'].notna(), demos_inventory['primary_weapon'], demos_inventory['secondary_weapon'])
demos_inventory = demos_inventory.assign(weapon=weapon_ser)
demos_inventory.loc[demos_inventory['weapon'].isna()]

Unnamed: 0,match_id,map_id,round_id,team_name,map_name,round_number,round_ct_team,round_first_site_hit,round_site_hit_time,round_bomb_plant_site,...,round_first_killer,round_first_death,player_headshots,player_upperbodyshots,player_stomachshots,player_legshots,player_loadout_array,primary_weapon,secondary_weapon,weapon
5694,13,29,569,Team Falcons,de_ancient,15,HEROIC,A,67.3125,A,...,False,False,0,0,0,0,[C4 Explosive],,,
6114,13,31,611,Team Falcons,de_dust2,6,HEROIC,A,51.78125,,...,False,False,0,0,0,0,"[Smoke Grenade, Flashbang]",,,
6559,14,33,655,Lynn Vision Gaming,de_anubis,11,FURIA,B,34.828125,B,...,False,False,0,1,0,0,[C4 Explosive],,,
8476,18,43,847,FURIA,de_nuke,12,FURIA,B,34.359375,A,...,False,False,0,0,0,0,"[Flashbang, Smoke Grenade]",,,
9260,20,47,926,Team Falcons,de_dust2,9,MOUZ,B,86.03125,B,...,False,False,0,0,0,0,[Flashbang],,,
12249,25,61,1224,GamerLegion,de_nuke,18,Team Falcons,,,A,...,False,False,0,10,2,0,[Smoke Grenade],,,
13375,27,67,1337,Team Vitality,de_inferno,1,Team Vitality,A,44.703125,,...,False,False,0,0,0,0,[Flashbang],,,


In [7]:
demos_inventory.loc[demos_inventory['weapon'].isna()]['round_id'].values

array([ 569,  611,  655,  847,  926, 1224, 1337])

In [8]:
more_outliar_rounds = demos_inventory.loc[demos_inventory['weapon'].isna()]['round_id'].values
demos_inventory = demos_inventory.loc[demos_inventory['round_id'].isin(more_outliar_rounds) == False]
demos_inventory.loc[demos_inventory['weapon'].isna()]['round_id'].values

array([], dtype=int64)

In [9]:
result = (demos_inventory['round_result'] == 'CT') & (demos_inventory['round_ct_team'] == demos_inventory['team_name']) | (demos_inventory['round_result'] == 'T') & (demos_inventory['round_ct_team'] != demos_inventory['team_name'])
demos_inventory = demos_inventory.assign(result=result) # makes it easier for us to do ARM
demos_inventory

Unnamed: 0,match_id,map_id,round_id,team_name,map_name,round_number,round_ct_team,round_first_site_hit,round_site_hit_time,round_bomb_plant_site,...,round_first_death,player_headshots,player_upperbodyshots,player_stomachshots,player_legshots,player_loadout_array,primary_weapon,secondary_weapon,weapon,result
0,0,0,0,Team Liquid,de_ancient,1,FaZe Clan,A,23.078125,,...,False,1,1,1,0,"[Glock-18, C4 Explosive]",,Glock-18,Glock-18,False
1,0,0,0,Team Liquid,de_ancient,1,FaZe Clan,A,23.078125,,...,False,1,3,2,0,"[Glock-18, Smoke Grenade, Molotov]",,Glock-18,Glock-18,False
2,0,0,0,Team Liquid,de_ancient,1,FaZe Clan,A,23.078125,,...,False,1,1,0,0,[Glock-18],,Glock-18,Glock-18,False
3,0,0,0,Team Liquid,de_ancient,1,FaZe Clan,A,23.078125,,...,False,0,0,0,0,"[P250, Smoke Grenade, Flashbang]",,P250,P250,False
4,0,0,0,FaZe Clan,de_ancient,1,FaZe Clan,A,23.078125,,...,False,1,5,0,0,[USP-S],,USP-S,USP-S,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14275,28,70,1427,MOUZ,de_inferno,21,Team Vitality,,,,...,False,0,0,0,0,"[Glock-18, Galil AR, Smoke Grenade, Molotov, F...",Galil AR,Glock-18,Galil AR,False
14276,28,70,1427,Team Vitality,de_inferno,21,Team Vitality,,,,...,False,1,1,0,0,"[Smoke Grenade, AK-47, Flashbang, Incendiary G...",AK-47,Five-SeveN,AK-47,True
14277,28,70,1427,Team Vitality,de_inferno,21,Team Vitality,,,,...,False,0,5,1,1,"[Five-SeveN, AK-47, Smoke Grenade, High Explos...",AK-47,Five-SeveN,AK-47,True
14278,28,70,1427,MOUZ,de_inferno,21,Team Vitality,,,,...,False,0,0,0,0,"[Glock-18, AK-47, Molotov, High Explosive Gren...",AK-47,Glock-18,AK-47,False


In [10]:
baskets_df = demos_inventory.groupby(
    ['round_id', 'team_name']
    )['weapon'].apply(
        lambda x : [a for a in x] + ["win" if demos_inventory.loc[
            (demos_inventory['round_id'] == x.name[0]) & (demos_inventory['team_name'] == x.name[1])
        ]['result'].iloc[0] == True else "loss"]
    ).reset_index().rename(columns={"weapon": "basket"})
baskets_df = pd.merge(baskets_df, demos_inventory, how="inner", on=["team_name", "round_id"])[['round_id', 'team_name', 'basket', 'result']]
baskets_df = baskets_df.drop_duplicates(subset=['round_id', 'team_name']).reset_index()
baskets_df = baskets_df.assign(result=baskets_df['result'].apply(lambda x: "win" if x else "loss"))
baskets_df

Unnamed: 0,index,round_id,team_name,basket,result
0,0,0,FaZe Clan,"[USP-S, P2000, Dual Berettas, USP-S, USP-S, win]",win
1,5,0,Team Liquid,"[Glock-18, Glock-18, Glock-18, P250, Glock-18,...",loss
2,10,1,FaZe Clan,"[MP9, MP9, FAMAS, MP9, MP9, win]",win
3,15,1,Team Liquid,"[Tec-9, Tec-9, Desert Eagle, Tec-9, Tec-9, loss]",loss
4,20,2,FaZe Clan,"[MP9, MP9, FAMAS, MP9, MP9, loss]",loss
...,...,...,...,...,...
2829,14145,1425,Team Vitality,"[AK-47, AWP, M4A1-S, M4A4, M4A1-S, loss]",loss
2830,14150,1426,MOUZ,"[AK-47, AK-47, AWP, AK-47, AK-47, loss]",loss
2831,14155,1426,Team Vitality,"[MAG-7, MP9, MP9, Five-SeveN, Five-SeveN, win]",win
2832,14160,1427,MOUZ,"[AK-47, Galil AR, Galil AR, AK-47, AK-47, loss]",loss


In [11]:
def mark_duplicates(basket: list):
    new_list = []
    counts = {}
    basket_clone = copy.deepcopy(basket)
    while len(basket_clone) != 0:
        item = basket_clone.pop(0)
        if item not in new_list:
            counts[item] = 1 if item not in counts else counts[item] + 1
            new_list += [item + "_" + str(counts[item]) if item != 'win' and item != 'loss' else item]
        else:
            new_list += [item + "_1" if item != 'win' and item != 'loss' else item]
    return new_list

baskets_df = baskets_df.assign(marked_basket=baskets_df['basket'].apply(mark_duplicates))
baskets_df


Unnamed: 0,index,round_id,team_name,basket,result,marked_basket
0,0,0,FaZe Clan,"[USP-S, P2000, Dual Berettas, USP-S, USP-S, win]",win,"[USP-S_1, P2000_1, Dual Berettas_1, USP-S_2, U..."
1,5,0,Team Liquid,"[Glock-18, Glock-18, Glock-18, P250, Glock-18,...",loss,"[Glock-18_1, Glock-18_2, Glock-18_3, P250_1, G..."
2,10,1,FaZe Clan,"[MP9, MP9, FAMAS, MP9, MP9, win]",win,"[MP9_1, MP9_2, FAMAS_1, MP9_3, MP9_4, win]"
3,15,1,Team Liquid,"[Tec-9, Tec-9, Desert Eagle, Tec-9, Tec-9, loss]",loss,"[Tec-9_1, Tec-9_2, Desert Eagle_1, Tec-9_3, Te..."
4,20,2,FaZe Clan,"[MP9, MP9, FAMAS, MP9, MP9, loss]",loss,"[MP9_1, MP9_2, FAMAS_1, MP9_3, MP9_4, loss]"
...,...,...,...,...,...,...
2829,14145,1425,Team Vitality,"[AK-47, AWP, M4A1-S, M4A4, M4A1-S, loss]",loss,"[AK-47_1, AWP_1, M4A1-S_1, M4A4_1, M4A1-S_2, l..."
2830,14150,1426,MOUZ,"[AK-47, AK-47, AWP, AK-47, AK-47, loss]",loss,"[AK-47_1, AK-47_2, AWP_1, AK-47_3, AK-47_4, loss]"
2831,14155,1426,Team Vitality,"[MAG-7, MP9, MP9, Five-SeveN, Five-SeveN, win]",win,"[MAG-7_1, MP9_1, MP9_2, Five-SeveN_1, Five-Sev..."
2832,14160,1427,MOUZ,"[AK-47, Galil AR, Galil AR, AK-47, AK-47, loss]",loss,"[AK-47_1, Galil AR_1, Galil AR_2, AK-47_2, AK-..."


In [12]:
te = TransactionEncoder()
te_ary = te.fit(baskets_df['marked_basket']).transform(baskets_df['marked_basket'])
basket_df = pd.DataFrame(te_ary, columns=te.columns_)

min_rounds = 20
min_support = min_rounds / (len(demos_inventory) / 10)
#min_support = 1 / 1e24

frequent_itemsets = apriori(basket_df, min_support=min_support, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)

# Filtering for rules where 'win' or 'loss' is the consequence
winloss_rules = rules[(rules['consequents'] == {'loss'}) | (rules['consequents'] == {'win'})]
winloss_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
31,(Desert Eagle_1),(loss),0.119972,0.500353,0.094566,0.788235,1.575359,1.0,0.034538,2.359445,0.415014,0.179866,0.576171,0.488617
33,(Desert Eagle_2),(loss),0.040579,0.500353,0.033169,0.817391,1.633630,1.0,0.012865,2.736163,0.404271,0.065323,0.634525,0.441841
37,(Dual Berettas_1),(win),0.033169,0.499647,0.021524,0.648936,1.298789,1.0,0.004952,1.425248,0.237944,0.042098,0.298367,0.346008
42,(Five-SeveN_1),(loss),0.065279,0.500353,0.051517,0.789189,1.577265,1.0,0.018855,2.370121,0.391551,0.100206,0.578081,0.446076
43,(Five-SeveN_2),(loss),0.033522,0.500353,0.027170,0.810526,1.619909,1.0,0.010397,2.637027,0.395955,0.053621,0.620785,0.432414
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1807,"(Glock-18_5, Glock-18_4, Glock-18_3, Glock-18_2)",(loss),0.033522,0.500353,0.020466,0.610526,1.220192,1.0,0.003693,1.282878,0.186716,0.039863,0.220503,0.325714
1822,"(Glock-18_2, Glock-18_4, Glock-18_3, P250_1)",(loss),0.020113,0.500353,0.014820,0.736842,1.472645,1.0,0.004756,1.898659,0.327537,0.029309,0.473313,0.383231
1934,"(M4A1-S_3, M4A1-S_2, M4A1-S_4, AWP_1, M4A1-S_1)",(win),0.025406,0.499647,0.015526,0.611111,1.223085,1.0,0.002832,1.286622,0.187150,0.030471,0.222771,0.321092
1950,"(Glock-18_3, Glock-18_2, Glock-18_5, Glock-18_...",(loss),0.033522,0.500353,0.020466,0.610526,1.220192,1.0,0.003693,1.282878,0.186716,0.039863,0.220503,0.325714


In [13]:
def extract_highest(set: frozenset[str]):
    outset = {}
    for item in set:
        split = item.split('_')
        if split[0] not in outset or outset[split[0]] < split[1]:
            outset[split[0]] = split[1]
    items = []
    for item in outset:
        items += [item + "_" + str(outset[item])]
    return frozenset(items)

winloss_rules = winloss_rules.assign(antecedents=winloss_rules['antecedents'].apply(extract_highest))
winloss_rules = winloss_rules.drop_duplicates()
winloss_rules = winloss_rules.reset_index().drop(columns=['index'])
winloss_rules = winloss_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]
winloss_rules

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(Desert Eagle_1),(loss),0.094566,0.788235,1.575359
1,(Desert Eagle_2),(loss),0.033169,0.817391,1.633630
2,(Dual Berettas_1),(win),0.021524,0.648936,1.298789
3,(Five-SeveN_1),(loss),0.051517,0.789189,1.577265
4,(Five-SeveN_2),(loss),0.027170,0.810526,1.619909
...,...,...,...,...,...
63,"(AK-47_2, AWP_1, M4A1-S_1)",(win),0.038462,0.685535,1.372037
64,"(AK-47_2, AWP_1, M4A4_1)",(win),0.021524,0.701149,1.403289
65,"(AK-47_2, Galil AR_1, MAC-10_1)",(win),0.015526,0.698413,1.397812
66,"(AK-47_2, M4A4_1, M4A1-S_1)",(win),0.015526,0.785714,1.572538


In [14]:
def baskets_with_rule(rule: frozenset[str]):
    return baskets_df.loc[baskets_df['marked_basket'].apply(lambda x: [item in x for item in rule]).apply(all)]

def baskets_without_rule(rule: frozenset[str]):
    return baskets_df.loc[baskets_df['marked_basket'].apply(lambda x: [item in x for item in rule]).apply(all).apply(lambda x : not x)]


def count_win_with(rule: frozenset[str]):
    valid_baskets_df = baskets_with_rule(rule)
    return len(valid_baskets_df.loc[valid_baskets_df['result'] == 'win'])

def count_win_without(rule: frozenset[str]):
    valid_baskets_df = baskets_without_rule(rule)
    return len(valid_baskets_df.loc[valid_baskets_df['result'] == 'win'])

def count_loss_with(rule: frozenset[str]):
    valid_baskets_df = baskets_with_rule(rule)
    return len(valid_baskets_df.loc[valid_baskets_df['result'] == 'loss'])

def count_loss_without(rule: frozenset[str]):
    valid_baskets_df = baskets_without_rule(rule)
    return len(valid_baskets_df.loc[valid_baskets_df['result'] == 'loss'])

winloss_rules = winloss_rules.assign(win_with=winloss_rules['antecedents'].apply(count_win_with))
winloss_rules = winloss_rules.assign(win_without=winloss_rules['antecedents'].apply(count_win_without))
winloss_rules = winloss_rules.assign(loss_with=winloss_rules['antecedents'].apply(count_loss_with))
winloss_rules = winloss_rules.assign(loss_without=winloss_rules['antecedents'].apply(count_loss_without))

winloss_rules

Unnamed: 0,antecedents,consequents,support,confidence,lift,win_with,win_without,loss_with,loss_without
0,(Desert Eagle_1),(loss),0.094566,0.788235,1.575359,72,1344,268,1150
1,(Desert Eagle_2),(loss),0.033169,0.817391,1.633630,21,1395,94,1324
2,(Dual Berettas_1),(win),0.021524,0.648936,1.298789,61,1355,33,1385
3,(Five-SeveN_1),(loss),0.051517,0.789189,1.577265,39,1377,146,1272
4,(Five-SeveN_2),(loss),0.027170,0.810526,1.619909,18,1398,77,1341
...,...,...,...,...,...,...,...,...,...
63,"(AK-47_2, AWP_1, M4A1-S_1)",(win),0.038462,0.685535,1.372037,109,1307,50,1368
64,"(AK-47_2, AWP_1, M4A4_1)",(win),0.021524,0.701149,1.403289,61,1355,26,1392
65,"(AK-47_2, Galil AR_1, MAC-10_1)",(win),0.015526,0.698413,1.397812,44,1372,19,1399
66,"(AK-47_2, M4A4_1, M4A1-S_1)",(win),0.015526,0.785714,1.572538,44,1372,12,1406


In [15]:
winloss_rules.sort_values(by='confidence', ascending=True)

Unnamed: 0,antecedents,consequents,support,confidence,lift,win_with,win_without,loss_with,loss_without
10,(M4A4_1),(win),0.105857,0.601202,1.203254,300,1116,199,1219
27,"(AK-47_2, AWP_1)",(win),0.125970,0.602024,1.204898,357,1059,236,1182
11,(M4A4_2),(win),0.038109,0.603352,1.207556,108,1308,71,1347
21,"(M4A1-S_2, AK-47_1)",(win),0.052576,0.605691,1.212238,149,1267,97,1321
58,"(M4A1-S_1, AWP_1, AK-47_1)",(win),0.070572,0.609756,1.220373,200,1216,128,1290
...,...,...,...,...,...,...,...,...,...
44,"(Desert Eagle_1, USP-S_1)",(loss),0.019054,0.870968,1.740707,8,1408,54,1364
36,"(Five-SeveN_2, Desert Eagle_1)",(loss),0.014820,0.875000,1.748766,6,1410,42,1376
37,"(Desert Eagle_1, Glock-18_1)",(loss),0.029287,0.892473,1.783687,10,1406,83,1335
40,"(Desert Eagle_1, P250_1)",(loss),0.017996,0.944444,1.887557,3,1413,51,1367
