In [44]:
from pprint import pprint as pp
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [45]:
def get_item_set(tx_dict: dict) -> list:
    res = set()
    for v in tx_dict.values():
        res.update(v)
    return sorted(res)

In [46]:
def get_empty_labels_dict() -> dict:
    return {item: 0 for item in ITEM_SET}

In [47]:
def tx_dict_to_ohe_df(tx_dict: dict) -> pd.DataFrame:
    encoded_val_ls = list()
    for k, v in tx_dict.items():
        labels = get_empty_labels_dict()
        for i in v:
            if i in labels.keys():
                labels[i] += 1
            else:
                labels[i] = 1
        encoded_val_ls.append(labels)

    return pd.DataFrame(encoded_val_ls)

In [48]:
def print_freq_items(freq_items: pd.DataFrame):
    for i, row in freq_items.iterrows():
        sup = row['support']
        i_set = set(row['itemsets'])

        i_set_str = str(i_set).replace("'", '')
        print(f"{i_set_str} : {sup}")

In [56]:
def sort_set(s) -> set:
    return sorted([s])[0]

def get_item_set_str(s) -> str:
    t = str(sorted(s)).replace("'", '').replace('[', '{').replace(']', '}')
    return t

In [50]:
tx_dict = {
    '1': {'a', 'b', 'd', 'e'},
    '2': {'b', 'c', 'd'},
    '3': {'a', 'b', 'd', 'e'},
    '4': {'a', 'c', 'd', 'e'},
    '5': {'b', 'c', 'd', 'e'},
    '6': {'b', 'd', 'e'},
    '7': {'c', 'd'},
    '8': {'a', 'b', 'c'},
    '9': {'a', 'd', 'e'},
    '10': {'b', 'd'},
}

In [51]:
ITEM_SET = get_item_set(tx_dict)
ohe_df = tx_dict_to_ohe_df(tx_dict)
freq_items = apriori(ohe_df, min_support=0.2, use_colnames=True, verbose=1)
print_freq_items(freq_items)

Processing 8 combinations | Sampling itemset size 43
{a} : 0.5
{b} : 0.7
{c} : 0.5
{d} : 0.9
{e} : 0.6
{a, b} : 0.3
{a, c} : 0.2
{a, d} : 0.4
{a, e} : 0.4
{c, b} : 0.3
{b, d} : 0.6
{b, e} : 0.4
{c, d} : 0.4
{c, e} : 0.2
{e, d} : 0.6
{a, b, d} : 0.2
{a, b, e} : 0.2
{e, a, d} : 0.4
{c, b, d} : 0.2
{e, b, d} : 0.4
{e, c, d} : 0.2
{e, a, b, d} : 0.2


In [52]:
rules = association_rules(freq_items, metric="confidence", min_threshold=0.6)

In [53]:
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(a),(b),0.5,0.7,0.3,0.6,0.857143,-0.05,0.75
1,(a),(d),0.5,0.9,0.4,0.8,0.888889,-0.05,0.5
2,(a),(e),0.5,0.6,0.4,0.8,1.333333,0.1,2.0
3,(e),(a),0.6,0.5,0.4,0.666667,1.333333,0.1,1.5
4,(c),(b),0.5,0.7,0.3,0.6,0.857143,-0.05,0.75


In [59]:
for i, row in rules.iterrows():
    acd = row['antecedents']
    csq = row['consequents']
    if len(acd) + len(csq) == 3:
        sup = round(row['support'], 2)
        conf = round(row['confidence'], 2)
        item_set = set()
        item_set.update(acd)
        item_set.update(csq)
        item_set_str = get_item_set_str(item_set)    
        
        print(f"{item_set_str} : min-support = {sup} min-confidence = {conf}")
    

{a, b, d} : min-support = 0.2 min-confidence = 0.67
{a, b, e} : min-support = 0.2 min-confidence = 0.67
{a, d, e} : min-support = 0.4 min-confidence = 1.0
{a, d, e} : min-support = 0.4 min-confidence = 0.67
{a, d, e} : min-support = 0.4 min-confidence = 1.0
{a, d, e} : min-support = 0.4 min-confidence = 0.67
{a, d, e} : min-support = 0.4 min-confidence = 0.8
{b, c, d} : min-support = 0.2 min-confidence = 0.67
{b, d, e} : min-support = 0.4 min-confidence = 1.0
{b, d, e} : min-support = 0.4 min-confidence = 0.67
{b, d, e} : min-support = 0.4 min-confidence = 0.67
{b, d, e} : min-support = 0.4 min-confidence = 0.67
{c, d, e} : min-support = 0.2 min-confidence = 1.0


In [60]:
rules_2 = association_rules(freq_items, metric="confidence", min_threshold=0)

In [61]:
for i, row in rules_2.iterrows():
    acd = row['antecedents']
    csq = row['consequents']
    if len(acd) + len(csq) == 3:
        sup = round(row['support'], 2)
        conf = round(row['confidence'], 2)
        item_set = set()
        item_set.update(acd)
        item_set.update(csq)
        item_set_str = get_item_set_str(item_set)    
        
        print(f"{item_set_str} : min-support = {sup} min-confidence = {conf}")

{a, b, d} : min-support = 0.2 min-confidence = 0.67
{a, b, d} : min-support = 0.2 min-confidence = 0.5
{a, b, d} : min-support = 0.2 min-confidence = 0.33
{a, b, d} : min-support = 0.2 min-confidence = 0.4
{a, b, d} : min-support = 0.2 min-confidence = 0.29
{a, b, d} : min-support = 0.2 min-confidence = 0.22
{a, b, e} : min-support = 0.2 min-confidence = 0.67
{a, b, e} : min-support = 0.2 min-confidence = 0.5
{a, b, e} : min-support = 0.2 min-confidence = 0.5
{a, b, e} : min-support = 0.2 min-confidence = 0.4
{a, b, e} : min-support = 0.2 min-confidence = 0.29
{a, b, e} : min-support = 0.2 min-confidence = 0.33
{a, d, e} : min-support = 0.4 min-confidence = 1.0
{a, d, e} : min-support = 0.4 min-confidence = 0.67
{a, d, e} : min-support = 0.4 min-confidence = 1.0
{a, d, e} : min-support = 0.4 min-confidence = 0.67
{a, d, e} : min-support = 0.4 min-confidence = 0.8
{a, d, e} : min-support = 0.4 min-confidence = 0.44
{b, c, d} : min-support = 0.2 min-confidence = 0.67
{b, c, d} : min-supp