### Library

In [1]:
import pandas as pd
import numpy as np
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, fpgrowth
import time

### Membaca data

In [2]:
df = pd.read_csv('../data/raw_data.csv')
df

Unnamed: 0,match_id,match_seq_num,radiant_win,start_time,duration,avg_mmr,num_mmr,lobby_type,game_mode,avg_rank_tier,num_rank_tier,cluster,radiant_team,dire_team
0,5937688909,4978118542,False,1618056811,947,5384.0,2.0,7,22,80,5,155,191201077994,31497011025
1,5937680202,4978115238,True,1618056560,1116,3938.0,3.0,7,22,64,8,171,1285859112,856564245
2,5937677706,4978115412,True,1618056484,1245,4343.0,2.0,7,22,52,5,151,135701045931,58441074687
3,5937677414,4978110742,False,1618056474,1019,3626.0,3.0,7,22,57,7,274,20718211466,26468912914
4,5937677106,4978110923,True,1618056464,1082,3216.0,3.0,7,22,43,7,227,2681048711,73701354267
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
622165,6086174414,5094955187,False,1626151320,1882,3011.0,2.0,7,22,41,6,153,30761234741,8848586984
622166,6086173100,5094955069,True,1626151212,1977,2173.0,1.0,7,22,34,3,251,25425398,109391292714
622167,6086172017,5094955078,False,1626151108,2064,3918.0,3.0,7,22,54,4,251,2614485219,107101128111
622168,6086170106,5094954883,False,1626150969,2003,4334.0,5.0,7,22,77,6,241,46611357914,27861045244


### Preprocessing

#### reset index

In [3]:
df = df[(df['avg_mmr'] > 1999) & (df['avg_mmr'] < 7001)].reset_index()
df.shape[0]

621064

#### Memisahkan daftar hero menang

In [4]:
rows_len = df.shape[0]
heroes_win_arr = []

for i in range(0, rows_len):
    if df['radiant_win'][i] == True:
        heroes_win_arr.append(df['radiant_team'][i])
    else:
        heroes_win_arr.append(df['dire_team'][i])

In [5]:
heroes_win = pd.DataFrame(data=heroes_win_arr, columns=['hero_menang'])
heroes_win

Unnamed: 0,hero_menang
0,31497011025
1,1285859112
2,135701045931
3,26468912914
4,2681048711
...,...
621059,8848586984
621060,25425398
621061,107101128111
621062,27861045244


In [6]:
df_win = list(heroes_win["hero_menang"].apply(lambda x:x.split(',')))
# df_win

In [7]:
# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

#### Transformasi data ke dalam bentuk data transaksi

In [8]:
TE = TransactionEncoder()
win_te_data = TE.fit(df_win).transform(df_win)
df_win_ar = pd.DataFrame(win_te_data, columns=TE.columns_)
# df_win_ar

### Penggalian Association Rules

In [9]:
# minimum support
support = 0.0006
print("itemset appear min %d times out of %s sample" % (support*df.shape[0], df.shape[0]))

itemset appear min 372 times out of 621064 sample


#### FP growth

In [10]:
start_time = time.time()
win_frequent = fpgrowth(df_win_ar, min_support=support, use_colnames=True)
ex_time = time.time() - start_time

print("Penggalian frequent pattern dengan fp-growth")
print("Jumlah data   : %s baris" % (heroes_win.shape[0]))
print("Waktu         : %s detik" % (int(ex_time)))

Penggalian frequent pattern dengan fp-growth
Jumlah data   : 621064 baris
Waktu         : 35 detik


In [11]:
win_frequent['length'] = win_frequent['itemsets'].apply(lambda x:len(x))
win_frequent

Unnamed: 0,support,itemsets,length
0,0.063852,(25),1
1,0.063820,(70),1
2,0.037476,(49),1
3,0.037244,(31),1
4,0.024819,(110),1
...,...,...,...
5177,0.000900,"(112, 113)",2
5178,0.000852,"(16, 113)",2
5179,0.000884,"(48, 113)",2
5180,0.000820,"(6, 113)",2


#### Daftar 3 hero paling sering menang ketika dipilih bersama

In [12]:
win_frequent[(win_frequent['length']>=3) & (win_frequent['support']>=support)]

Unnamed: 0,support,itemsets,length
142,0.000636,"(8, 25, 26)",3
143,0.000617,"(84, 25, 8)",3
144,0.000678,"(14, 25, 8)",3
145,0.000609,"(2, 25, 8)",3
168,0.000638,"(135, 70, 26)",3
...,...,...,...
3427,0.000655,"(14, 86, 8)",3
3428,0.000601,"(86, 84, 8)",3
3429,0.000610,"(86, 99, 8)",3
3430,0.000604,"(2, 86, 8)",3


In [13]:
from mlxtend.frequent_patterns import association_rules

win_rule = association_rules(win_frequent, metric="confidence", min_threshold=0.10)  # confidence 10%

pd.set_option('display.max_rows', None)
win_rule[['antecedents','consequents','antecedent support','support','confidence']]

Unnamed: 0,antecedents,consequents,antecedent support,support,confidence
0,(25),(14),0.063852,0.006391,0.100086
1,(25),(8),0.063852,0.007345,0.115039
2,"(25, 26)",(8),0.005408,0.000636,0.117595
3,"(84, 25)",(8),0.004801,0.000617,0.128437
4,"(14, 25)",(8),0.006391,0.000678,0.106072
5,"(2, 25)",(8),0.005201,0.000609,0.117028
6,(70),(74),0.06382,0.006544,0.102533
7,(70),(14),0.06382,0.007345,0.115097
8,(70),(27),0.06382,0.00694,0.10874
9,(70),(26),0.06382,0.008009,0.125492


In [14]:
print("Total rule dihasilkan : {} rule".format(len(win_rule)))

Total rule dihasilkan : 429 rule


### Mengecek total hero yang memilih rekomendasi

In [15]:
assc_rules = win_rule[['antecedents','consequents','confidence']]

antecedents = assc_rules['antecedents']
antecedents = antecedents.apply(lambda x : list(x))

consequents = assc_rules['consequents']
consequents = consequents.apply(lambda x : list(x))

confidence = assc_rules['confidence']

In [16]:
rules = [2,3,4,5,6,6]

rule_count = len(antecedents)
for i in range(rule_count):
    rule_per_row = antecedents[i]
    for j in range(len(rule_per_row)):
        hero_id = antecedents[i][j]
        if hero_id not in rules:
            rules.append(hero_id)
    
print("Total hero yang memiliki rekomendasi : %s hero" % (len(rules)))

Total hero yang memiliki rekomendasi : 121 hero


### Menyimpan association rule dalam format array list

In [17]:
ant_rule = []
for i in range(len(antecedents)):
    id_hero = []
    for j in range(len(antecedents[i])):
        id_hero.append(int(antecedents[i][j]))
    ant_rule.append(id_hero)    
# ant_rule        

In [18]:
con_rule = []
for i in range(len(consequents)):
    id_hero = []
    for j in range(len(consequents[i])):
        id_hero.append(int(consequents[i][j]))
    con_rule.append(id_hero)    
# con_rule    

In [19]:
conf = confidence.tolist()
# conf

In [20]:
# saving association rules as pickle
import pickle

pickle.dump(ant_rule, open('antecedents.sav', 'wb'))
pickle.dump(con_rule, open('consequents.sav', 'wb'))
pickle.dump(conf, open('confidence.sav', 'wb'))