In [1]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from pyfpgrowth import find_frequent_patterns,generate_association_rules
from mlxtend.preprocessing import TransactionEncoder



In [2]:
data = {
    'TID':['T100','T200','T300','T400','T500','T600','T700','T800','T900'],
    'List of Items':[['I1','I2','I3'],
                     ['I2','I4'],
                     ['I2','I3'],
                     ['I1','I2','I4'],
                     ['I1','I3'],
                     ['I2','I3'],
                     ['I1','I3'],
                     ['I1','I2','I3','I5'],
                     ['I1','I2','I3']
                     ]
}

In [3]:
df = pd.DataFrame(data=data)

In [4]:
df.head(9)

Unnamed: 0,TID,List of Items
0,T100,"[I1, I2, I3]"
1,T200,"[I2, I4]"
2,T300,"[I2, I3]"
3,T400,"[I1, I2, I4]"
4,T500,"[I1, I3]"
5,T600,"[I2, I3]"
6,T700,"[I1, I3]"
7,T800,"[I1, I2, I3, I5]"
8,T900,"[I1, I2, I3]"


In [7]:
te = TransactionEncoder()
te_ary = te.fit(df['List of Items']).transform(df['List of Items'])
df_onehot = pd.DataFrame(te_ary,columns=te.columns_)

In [8]:
min_support = 0.2
min_confidence = 0.7
patterns_from_apriori = apriori(df_onehot,min_support,use_colnames=True)
print(patterns_from_apriori)

    support      itemsets
0  0.666667          (I1)
1  0.777778          (I2)
2  0.777778          (I3)
3  0.222222          (I4)
4  0.444444      (I1, I2)
5  0.555556      (I3, I1)
6  0.555556      (I3, I2)
7  0.222222      (I4, I2)
8  0.333333  (I3, I1, I2)


In [10]:
rules_from_apriori = association_rules(patterns_from_apriori,min_threshold=min_confidence)
print(rules_from_apriori[['antecedents','consequents','confidence']])

  antecedents consequents  confidence
0        (I3)        (I1)    0.714286
1        (I1)        (I3)    0.833333
2        (I3)        (I2)    0.714286
3        (I2)        (I3)    0.714286
4        (I4)        (I2)    1.000000
5    (I1, I2)        (I3)    0.750000


In [17]:
import pprint
min_support = 0.2
min_confidence = 0.7
transactions = df['List of Items'].tolist()
patterns_from_fpgrowth = find_frequent_patterns(transactions,min_support*len(transactions))
pprint.pprint(patterns_from_fpgrowth)

{('I1', 'I2'): 4,
 ('I1', 'I2', 'I3'): 3,
 ('I1', 'I3'): 5,
 ('I2',): 7,
 ('I2', 'I3'): 5,
 ('I2', 'I4'): 2,
 ('I3',): 7,
 ('I4',): 2}


In [18]:
rules_from_fpgrowth = generate_association_rules(patterns_from_fpgrowth,min_confidence)
pprint.pprint(rules_from_fpgrowth)

{('I1', 'I2'): (('I3',), 0.75),
 ('I2',): (('I3',), 0.7142857142857143),
 ('I3',): (('I2',), 0.7142857142857143),
 ('I4',): (('I2',), 1.0)}
