In [1]:
!pip install pandas
!pip install mlxtend

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


In [20]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

transactions = [
    [1, (1, 2, 5)],
    [2, (2, 4)],
    [3, (2, 3)],
    [4, (1, 2, 4)],
    [5, (1, 3)],
    [6, (2, 3)],
    [7, (1, 3)],
    [8, (1, 2, 3, 5)],
    [9, (1, 2, 3)],
]

transactions

[[1, (1, 2, 5)],
 [2, (2, 4)],
 [3, (2, 3)],
 [4, (1, 2, 4)],
 [5, (1, 3)],
 [6, (2, 3)],
 [7, (1, 3)],
 [8, (1, 2, 3, 5)],
 [9, (1, 2, 3)]]

In [19]:
# Convert tranactional data into individual rows
normalizedTransactions = []
for t in transactions:
    for i in t[1]:
        normalizedTransactions.append([t[0], i, 1])

normalizedTransactions

[[1, 1, 1],
 [1, 2, 1],
 [1, 5, 1],
 [2, 2, 1],
 [2, 4, 1],
 [3, 2, 1],
 [3, 3, 1],
 [4, 1, 1],
 [4, 2, 1],
 [4, 4, 1],
 [5, 1, 1],
 [5, 3, 1],
 [6, 2, 1],
 [6, 3, 1],
 [7, 1, 1],
 [7, 3, 1],
 [8, 1, 1],
 [8, 2, 1],
 [8, 3, 1],
 [8, 5, 1],
 [9, 1, 1],
 [9, 2, 1],
 [9, 3, 1]]

In [15]:
df = pd.DataFrame(normalizedTransactions, columns=['transactionId', 'itemId', 'quantity'])
#print(df)
df.head()

Unnamed: 0,transactionId,itemId,quantity
0,1,1,1
1,1,2,1
2,1,5,1
3,2,2,1
4,2,4,1


In [14]:
# Build basket data
basket = (df.groupby(['transactionId', 'itemId'])['quantity'].sum().unstack().reset_index().fillna(0).set_index('transactionId'))
basket

itemId,1,2,3,4,5
transactionId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1.0,1.0,0.0,0.0,1.0
2,0.0,1.0,0.0,1.0,0.0
3,0.0,1.0,1.0,0.0,0.0
4,1.0,1.0,0.0,1.0,0.0
5,1.0,0.0,1.0,0.0,0.0
6,0.0,1.0,1.0,0.0,0.0
7,1.0,0.0,1.0,0.0,0.0
8,1.0,1.0,1.0,0.0,1.0
9,1.0,1.0,1.0,0.0,0.0


In [31]:
# Build up the frequent items with minsup of 0.2
frequent_itemsets = apriori(basket, min_support=0.2, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.666667,(1)
1,0.777778,(2)
2,0.666667,(3)
3,0.222222,(4)
4,0.222222,(5)
5,0.444444,"(1, 2)"
6,0.444444,"(1, 3)"
7,0.222222,"(1, 5)"
8,0.444444,"(2, 3)"
9,0.222222,"(2, 4)"


In [32]:
# Create the rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(1),(3),0.666667,0.666667,0.444444,0.666667,1.0,0.0,1.0
1,(3),(1),0.666667,0.666667,0.444444,0.666667,1.0,0.0,1.0
2,(1),(5),0.666667,0.222222,0.222222,0.333333,1.5,0.074074,1.166667
3,(5),(1),0.222222,0.666667,0.222222,1.0,1.5,0.074074,inf
4,(2),(4),0.777778,0.222222,0.222222,0.285714,1.285714,0.049383,1.088889
5,(4),(2),0.222222,0.777778,0.222222,1.0,1.285714,0.049383,inf
6,(2),(5),0.777778,0.222222,0.222222,0.285714,1.285714,0.049383,1.088889
7,(5),(2),0.222222,0.777778,0.222222,1.0,1.285714,0.049383,inf
8,"(1, 2)",(5),0.444444,0.222222,0.222222,0.5,2.25,0.123457,1.555556
9,"(1, 5)",(2),0.222222,0.777778,0.222222,1.0,1.285714,0.049383,inf


In [33]:
# Filter rule for minconf and lift
rules[ (rules['lift'] > 1) &
       (rules['confidence'] >= 0.66) ]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
3,(5),(1),0.222222,0.666667,0.222222,1.0,1.5,0.074074,inf
5,(4),(2),0.222222,0.777778,0.222222,1.0,1.285714,0.049383,inf
7,(5),(2),0.222222,0.777778,0.222222,1.0,1.285714,0.049383,inf
9,"(1, 5)",(2),0.222222,0.777778,0.222222,1.0,1.285714,0.049383,inf
10,"(2, 5)",(1),0.222222,0.666667,0.222222,1.0,1.5,0.074074,inf
13,(5),"(1, 2)",0.222222,0.444444,0.222222,1.0,2.25,0.123457,inf
