# Association Rule

## Example1: Faceplates Promotion
A store that sells accessories for cellular phones runs a promotion on faceplates. The store managers would like
to know what colors of faceplates customers are likely to purchase together.

In [1]:
# import libraries for data management
import numpy as np
import pandas as pd

In [2]:
# load dataset
data = pd.read_csv('Faceplate.csv', index_col='Transaction')
data

Unnamed: 0_level_0,Red,White,Blue,Orange,Green,Yellow
Transaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1,1,0,0,1,0
2,0,1,0,1,0,0
3,0,1,1,0,0,0
4,1,1,0,1,0,0
5,1,0,1,0,0,0
6,0,1,1,0,0,0
7,1,0,1,0,0,0
8,1,1,1,0,1,0
9,1,1,1,0,0,0
10,0,0,0,0,0,1


In [3]:
# if using local computer: import libraries for association rule generation by using: 
    # conda config --add channels conda-forge
    # conda install mlxtend
        # http://rasbt.github.io/mlxtend/
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [4]:
# data = data.astype('bool')
# data

In [5]:
# Create frequent itemsets
itemsets = apriori (data, min_support=0.2, use_colnames=True)



In [6]:
itemsets

Unnamed: 0,support,itemsets
0,0.6,(Red)
1,0.7,(White)
2,0.6,(Blue)
3,0.2,(Orange)
4,0.2,(Green)
5,0.4,"(Red, White)"
6,0.4,"(Red, Blue)"
7,0.2,"(Red, Green)"
8,0.4,"(Blue, White)"
9,0.2,"(Orange, White)"


In [7]:
# convert into rules
# first select strong rules by confidence

rules = association_rules(itemsets, metric='confidence', min_threshold=0.5)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Red),(White),0.6,0.7,0.4,0.666667,0.952381,-0.02,0.9,-0.111111
1,(White),(Red),0.7,0.6,0.4,0.571429,0.952381,-0.02,0.933333,-0.142857
2,(Red),(Blue),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2,0.25
3,(Blue),(Red),0.6,0.6,0.4,0.666667,1.111111,0.04,1.2,0.25
4,(Green),(Red),0.2,0.6,0.2,1.0,1.666667,0.08,inf,0.5
5,(Blue),(White),0.6,0.7,0.4,0.666667,0.952381,-0.02,0.9,-0.111111
6,(White),(Blue),0.7,0.6,0.4,0.571429,0.952381,-0.02,0.933333,-0.142857
7,(Orange),(White),0.2,0.7,0.2,1.0,1.428571,0.06,inf,0.375
8,(Green),(White),0.2,0.7,0.2,1.0,1.428571,0.06,inf,0.375
9,"(Red, Blue)",(White),0.4,0.7,0.2,0.5,0.714286,-0.08,0.6,-0.4


In [8]:
# show the rules
rules.sort_values(by=['lift'], ascending=False).drop(columns=['conviction'])

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,zhangs_metric
13,"(Red, White)",(Green),0.4,0.2,0.2,0.5,2.5,0.12,1.0
15,(Green),"(Red, White)",0.2,0.4,0.2,1.0,2.5,0.12,0.75
4,(Green),(Red),0.2,0.6,0.2,1.0,1.666667,0.08,0.5
14,"(Green, White)",(Red),0.2,0.6,0.2,1.0,1.666667,0.08,0.5
7,(Orange),(White),0.2,0.7,0.2,1.0,1.428571,0.06,0.375
8,(Green),(White),0.2,0.7,0.2,1.0,1.428571,0.06,0.375
12,"(Red, Green)",(White),0.2,0.7,0.2,1.0,1.428571,0.06,0.375
2,(Red),(Blue),0.6,0.6,0.4,0.666667,1.111111,0.04,0.25
3,(Blue),(Red),0.6,0.6,0.4,0.666667,1.111111,0.04,0.25
0,(Red),(White),0.6,0.7,0.4,0.666667,0.952381,-0.02,-0.111111


In [9]:
# then further select strong rules by lift>1 and sort lift in descending order

interested_rules = rules[rules['lift']>1].sort_values(by=['lift'], 
                            ascending=False).drop(columns=['conviction'])
interested_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,zhangs_metric
13,"(Red, White)",(Green),0.4,0.2,0.2,0.5,2.5,0.12,1.0
15,(Green),"(Red, White)",0.2,0.4,0.2,1.0,2.5,0.12,0.75
4,(Green),(Red),0.2,0.6,0.2,1.0,1.666667,0.08,0.5
14,"(Green, White)",(Red),0.2,0.6,0.2,1.0,1.666667,0.08,0.5
7,(Orange),(White),0.2,0.7,0.2,1.0,1.428571,0.06,0.375
8,(Green),(White),0.2,0.7,0.2,1.0,1.428571,0.06,0.375
12,"(Red, Green)",(White),0.2,0.7,0.2,1.0,1.428571,0.06,0.375
2,(Red),(Blue),0.6,0.6,0.4,0.666667,1.111111,0.04,0.25
3,(Blue),(Red),0.6,0.6,0.4,0.666667,1.111111,0.04,0.25


## Example 2: Grocery Store Promotion
- A grocery store runs a promotion on its food. The store managers would like to know what combination of food customers are likely to purchase together.
- An introduction on dealing with transaction data in another format

In [10]:
items = pd.read_csv("items.csv", index_col='itemId')
items

Unnamed: 0_level_0,items
itemId,Unnamed: 1_level_1
1,"bread,butter,milk"
2,"bread,butter"
3,"bread,butter,beer"
4,"milk,wine"
5,"bread,butter,milk,wine"


In [11]:
items.iloc[0]

items    bread,butter,milk
Name: 1, dtype: object

In [12]:
items.iloc[0,0]

'bread,butter,milk'

In [13]:
items.iloc[0,0].split(",")

['bread', 'butter', 'milk']

In [14]:
len(items)

5

In [15]:
range(len(items))

range(0, 5)

In [16]:
list(range(0, 5))

[0, 1, 2, 3, 4]

In [17]:
# split the data by "," and transform the data in the form of lists
# the whole dataset is a big list 
# each transaction is an inner list within the outer big list
records = []
for i in range(len(items)):
    records.append(items.iloc[i,0].split(","))

In [18]:
records

[['bread', 'butter', 'milk'],
 ['bread', 'butter'],
 ['bread', 'butter', 'beer'],
 ['milk', 'wine'],
 ['bread', 'butter', 'milk', 'wine']]

In [19]:
# convert the raw data into the data structure required by apriori function
from mlxtend.preprocessing import TransactionEncoder

In [20]:
te = TransactionEncoder()
te_ary = te.fit(records).transform(records)

In [21]:
# the data takes an array format
te_ary

array([[False,  True,  True,  True, False],
       [False,  True,  True, False, False],
       [ True,  True,  True, False, False],
       [False, False, False,  True,  True],
       [False,  True,  True,  True,  True]])

In [22]:
# convert the array to dataframe with column name
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,beer,bread,butter,milk,wine
0,False,True,True,True,False
1,False,True,True,False,False
2,True,True,True,False,False
3,False,False,False,True,True
4,False,True,True,True,True


## Task: please find the interesting/meaningful association rules for the data "items.csv".

In [23]:
# Create frequent itemsets
itemsets2 = apriori (df, min_support=0.4, use_colnames=True)

In [24]:
itemsets2

Unnamed: 0,support,itemsets
0,0.8,(bread)
1,0.8,(butter)
2,0.6,(milk)
3,0.4,(wine)
4,0.8,"(bread, butter)"
5,0.4,"(bread, milk)"
6,0.4,"(milk, butter)"
7,0.4,"(wine, milk)"
8,0.4,"(bread, butter, milk)"


In [25]:
# convert into rules
rules2 = association_rules(itemsets2, metric='confidence', min_threshold=0.5)

In [26]:
rules2.sort_values(by=['lift'], ascending=False).drop(columns=['conviction'])

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,zhangs_metric
6,(wine),(milk),0.4,0.6,0.4,1.0,1.666667,0.16,0.666667
7,(milk),(wine),0.6,0.4,0.4,0.666667,1.666667,0.16,1.0
0,(bread),(butter),0.8,0.8,0.8,1.0,1.25,0.16,1.0
1,(butter),(bread),0.8,0.8,0.8,1.0,1.25,0.16,1.0
9,"(bread, milk)",(butter),0.4,0.8,0.4,1.0,1.25,0.08,0.333333
10,"(milk, butter)",(bread),0.4,0.8,0.4,1.0,1.25,0.08,0.333333
11,(bread),"(milk, butter)",0.8,0.4,0.4,0.5,1.25,0.08,1.0
12,(butter),"(bread, milk)",0.8,0.4,0.4,0.5,1.25,0.08,1.0
2,(bread),(milk),0.8,0.6,0.4,0.5,0.833333,-0.08,-0.5
3,(milk),(bread),0.6,0.8,0.4,0.666667,0.833333,-0.08,-0.333333


In [27]:
# filter meaningful association rules
interested_rules2 = rules2[rules2['lift']>1].sort_values(by=['lift'], 
                    ascending=False).drop(columns=['conviction'])
interested_rules2

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,zhangs_metric
6,(wine),(milk),0.4,0.6,0.4,1.0,1.666667,0.16,0.666667
7,(milk),(wine),0.6,0.4,0.4,0.666667,1.666667,0.16,1.0
0,(bread),(butter),0.8,0.8,0.8,1.0,1.25,0.16,1.0
1,(butter),(bread),0.8,0.8,0.8,1.0,1.25,0.16,1.0
9,"(bread, milk)",(butter),0.4,0.8,0.4,1.0,1.25,0.08,0.333333
10,"(milk, butter)",(bread),0.4,0.8,0.4,1.0,1.25,0.08,0.333333
11,(bread),"(milk, butter)",0.8,0.4,0.4,0.5,1.25,0.08,1.0
12,(butter),"(bread, milk)",0.8,0.4,0.4,0.5,1.25,0.08,1.0


In [27]:
# Promotion discount for buying wine and milk together
# Promotion discount for buying bread, butter and milk together