In [50]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


# Apriori

In [51]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

In [52]:
df = pd.read_csv('/content/gdrive/MyDrive/DMW/datasets/GroceryStoreDataSet.csv',sep=',',names=['products'])
df.head()

Unnamed: 0,products
0,"MILK,BREAD,BISCUIT"
1,"BREAD,MILK,BISCUIT,CORNFLAKES"
2,"BREAD,TEA,BOURNVITA"
3,"JAM,MAGGI,BREAD,MILK"
4,"MAGGI,TEA,BISCUIT"


In [53]:
#one hot encoding
data = list(df['products'].apply(lambda x:x.split(',')))
encoder = TransactionEncoder()
encoded_data = encoder.fit_transform(data)
df2 = pd.DataFrame(encoded_data,columns=encoder.columns_)
df2.replace(True,1,inplace=True)
df2.replace(False,0,inplace=True)

In [55]:
min_support = 0.2
min_conf = 0.6
frq_items = apriori(df2,min_support=min_support,use_colnames=True)
rules = association_rules(frq_items,metric='confidence',min_threshold=min_conf)
print(f"Enter minimum support : 0.2")
print(f"Enter minimum confidence : 0.6")
rules

Enter minimum support : 0.2
Enter minimum confidence : 0.6


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(MILK),(BREAD),0.25,0.65,0.2,0.8,1.230769,0.0375,1.75,0.25
1,(SUGER),(BREAD),0.3,0.65,0.2,0.666667,1.025641,0.005,1.05,0.035714
2,(CORNFLAKES),(COFFEE),0.3,0.4,0.2,0.666667,1.666667,0.08,1.8,0.571429
3,(SUGER),(COFFEE),0.3,0.4,0.2,0.666667,1.666667,0.08,1.8,0.571429
4,(MAGGI),(TEA),0.25,0.35,0.2,0.8,2.285714,0.1125,3.25,0.75


# FPTree

In [56]:
from mlxtend.frequent_patterns.fpgrowth import fpgrowth

In [57]:
dataset = [['f', 'a', 'c', 'd', 'g', 'i', 'm', 'p'],
['a', 'b', 'c', 'f', 'l', 'm', 'o'],
['b', 'f', 'h', 'j', 'o', 'w'],
['b', 'c', 'k', 's', 'p'],
['a', 'f', 'c', 'e', 'l', 'p', 'm', 'n']]

In [58]:
encoder = TransactionEncoder()
encoded_data = encoder.fit_transform(dataset)
fp_df = pd.DataFrame(encoded_data,columns=encoder.columns_)

In [59]:
min_conf_fp = 0.6

In [60]:
pattern = fpgrowth(fp_df,min_support=min_conf_fp,use_colnames=True)
print(f"FPTree with minimum confidence = {min_conf_fp*100}%")
pattern

FPTree with minimum confidence = 60.0%


Unnamed: 0,support,itemsets
0,0.8,(f)
1,0.8,(c)
2,0.6,(p)
3,0.6,(m)
4,0.6,(a)
5,0.6,(b)
6,0.6,"(c, f)"
7,0.6,"(p, c)"
8,0.6,"(c, m)"
9,0.6,"(f, m)"


In [62]:
rules = association_rules(pattern,metric='confidence',min_threshold=min_conf_fp)
print(f"Association rules are as follows")
rules


Association rules are as follows


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(c),(f),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8,-0.25
1,(f),(c),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8,-0.25
2,(p),(c),0.6,0.8,0.6,1.0,1.25,0.12,inf,0.5
3,(c),(p),0.8,0.6,0.6,0.75,1.25,0.12,1.6,1.0
4,(c),(m),0.8,0.6,0.6,0.75,1.25,0.12,1.6,1.0
5,(m),(c),0.6,0.8,0.6,1.0,1.25,0.12,inf,0.5
6,(f),(m),0.8,0.6,0.6,0.75,1.25,0.12,1.6,1.0
7,(m),(f),0.6,0.8,0.6,1.0,1.25,0.12,inf,0.5
8,"(c, f)",(m),0.6,0.6,0.6,1.0,1.666667,0.24,inf,1.0
9,"(c, m)",(f),0.6,0.8,0.6,1.0,1.25,0.12,inf,0.5
