# Association Rule Learning

### Installing Apriori

In [2]:
pip install mlxtend # installs the mlxtend library



### Importing Required Modules

In [3]:
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder
import pandas as pd

### Import Dataset

In [4]:
data = pd.read_csv('groceries.csv', header=None)

### Creating the DataFrame of Frequent Itemsets

In [23]:
transactions = data.apply(lambda row: row.dropna().tolist(), axis=1).tolist()
te = TransactionEncoder()
te_data = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_data, columns=te.columns_)
df.head()

Unnamed: 0,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False


### Applying Apriori Algorithm and Finding Association Rules

In [24]:
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
freq = apriori(df, min_support=0.08, use_colnames=True)
freq.head()

Unnamed: 0,support,itemsets
0,0.080529,(bottled beer)
1,0.110524,(bottled water)
2,0.082766,(citrus fruit)
3,0.193493,(other vegetables)
4,0.088968,(pastry)


### Generate association rules using the lift metric

In [25]:
rules = association_rules(freq, metric="lift", min_threshold=1)
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski


### Interpret One Rule

In [26]:
# Pick the first rule and interpret
rules.iloc[0]

IndexError: single positional indexer is out-of-bounds

In [17]:
strong_rules = rules[(rules['lift'] > 4) & (rules['confidence'] > 0.8)]
print(f"Number of strong rules: {len(strong_rules)}")
strong_rules

Number of strong rules: 0


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski


### Generate association rules using the lift metric with 0.07 support

In [29]:
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning)
freq = apriori(df, min_support=0.070, use_colnames=True)
rules = association_rules(freq, metric="lift", min_threshold=1)
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(other vegetables),(whole milk),0.193493,0.255516,0.074835,0.386758,1.513634,1.0,0.025394,1.214013,0.42075,0.2,0.176286,0.339817
1,(whole milk),(other vegetables),0.255516,0.193493,0.074835,0.292877,1.513634,1.0,0.025394,1.140548,0.455803,0.2,0.123228,0.339817


In [30]:
rules.iloc[0]

Unnamed: 0,0
antecedents,(other vegetables)
consequents,(whole milk)
antecedent support,0.193493
consequent support,0.255516
support,0.074835
confidence,0.386758
lift,1.513634
representativity,1.0
leverage,0.025394
conviction,1.214013
