In [1]:
import pandas as pd

In [2]:
from mlxtend.preprocessing import TransactionEncoder

In [5]:
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [7]:
dataset = [['A','B','C'],
            ['A','C'],
            ['A','D'],
            ['B','E','F']
            ]

In [8]:
#We can transofrm into the right format via the TransactionEncoder as follows:

te = TransactionEncoder()

#te_ary = te.fit(dataset).transform(dataset)
#te_ary

In [9]:
#Step 1 (provide the data to the transaction encoder)
te.fit(dataset)

TransactionEncoder()

In [10]:
TransactionEncoder()

TransactionEncoder()

In [11]:
te.columns_

['A', 'B', 'C', 'D', 'E', 'F']

In [12]:
#Step 2 (transform each of the transactions into the format of the number of columns)
te_ary = te.transform(dataset)
te_ary

array([[ True,  True,  True, False, False, False],
       [ True, False,  True, False, False, False],
       [ True, False, False,  True, False, False],
       [False,  True, False, False,  True,  True]])

In [13]:
#Create a dataframe
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,A,B,C,D,E,F
0,True,True,True,False,False,False
1,True,False,True,False,False,False
2,True,False,False,True,False,False
3,False,True,False,False,True,True


In [14]:
#Return the items and itemsets with at least 50% support
apriori(df, min_support=0.5)

Unnamed: 0,support,itemsets
0,0.75,(0)
1,0.5,(1)
2,0.5,(2)
3,0.5,"(0, 2)"


In [15]:
#By default, apriori returns the column indices of the items, which may be useful in downstream operations such as association rule mining)


In [16]:
apriori(df, min_support=0.5, use_colnames=True)

Unnamed: 0,support,itemsets
0,0.75,(A)
1,0.5,(B)
2,0.5,(C)
3,0.5,"(A, C)"


In [17]:
#Assume interest in itemsets of length 2 that have a support of at least 50 percent


In [18]:
#Create frequent itemsets via apriori and add a new column that stores the length of each itemset
frequent_itemsets = apriori(df, min_support=0.5, use_colnames=True)

frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.75,(A),1
1,0.5,(B),1
2,0.5,(C),1
3,0.5,"(A, C)",2


In [20]:
#Generate_rules fuction allows for specific of metric of interest and according threshold...measures include confidence and lift
#Interested in rules dervied from the frequent itemsets only if the level of confidence is above the 70% threshold(min_threshold=0.7)

In [21]:
association_rules(frequent_itemsets,
                 metric="confidence",
                 min_threshold=0.50)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(A),(C),0.75,0.5,0.5,0.666667,1.333333,0.125,1.5
1,(C),(A),0.5,0.75,0.5,1.0,1.333333,0.125,inf


In [23]:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.2)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(A),(C),0.75,0.5,0.5,0.666667,1.333333,0.125,1.5
1,(C),(A),0.5,0.75,0.5,1.0,1.333333,0.125,inf


In [24]:
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))

In [25]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len
0,(A),(C),0.75,0.5,0.5,0.666667,1.333333,0.125,1.5,1
1,(C),(A),0.5,0.75,0.5,1.0,1.333333,0.125,inf,1
