## Algoritmo Apriori e sue varianti

Va installata una libreria di machine learning che contenga apriori. Nella fattispecie, usiamo `mlxtend` che installiamo con `conda`.

In [1]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# il dataset è una lista di transazioni, anch'esse espresse come liste
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]


# La classe TransactionEncoder analizza i dati, che possono avere una qualunque
# forma iterabile e genera una struttura intermedia da cui si ottiene
# il dataframe
te = TransactionEncoder()
te_ary = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)

df

Unnamed: 0,Apple,Corn,Dill,Eggs,Ice cream,Kidney Beans,Milk,Nutmeg,Onion,Unicorn,Yogurt
0,False,False,False,True,False,True,True,True,True,False,True
1,False,False,True,True,False,True,False,True,True,False,True
2,True,False,False,True,False,True,True,False,False,False,False
3,False,True,False,False,False,True,True,False,False,True,True
4,False,True,False,True,True,True,False,False,True,False,False


In [2]:
# Generare il database verticale in forma di tid-list dei singoli item è immediato

df.transpose()

Unnamed: 0,0,1,2,3,4
Apple,False,False,True,False,False
Corn,False,False,False,True,True
Dill,False,True,False,False,False
Eggs,True,True,True,False,True
Ice cream,False,False,False,False,True
Kidney Beans,True,True,True,True,True
Milk,True,False,True,True,False
Nutmeg,True,True,False,False,False
Onion,True,True,False,False,True
Unicorn,False,False,False,True,False


In [3]:
# Applichiamo Apriori con un supporto minimo di 0.6

frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)

frequent_itemsets

Unnamed: 0,support,itemsets
0,0.8,(Eggs)
1,1.0,(Kidney Beans)
2,0.6,(Milk)
3,0.6,(Onion)
4,0.6,(Yogurt)
5,0.8,"(Eggs, Kidney Beans)"
6,0.6,"(Onion, Eggs)"
7,0.6,"(Milk, Kidney Beans)"
8,0.6,"(Onion, Kidney Beans)"
9,0.6,"(Yogurt, Kidney Beans)"


In [4]:
# calcoliamo le regole a supporto mininmo 0.6 e confidenza 0.8

association_rules(frequent_itemsets, metric="confidence", min_threshold=0.8)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Eggs),(Kidney Beans),0.8,1.0,0.8,1.0,1.0,0.0,inf
1,(Kidney Beans),(Eggs),1.0,0.8,0.8,0.8,1.0,0.0,1.0
2,(Onion),(Eggs),0.6,0.8,0.6,1.0,1.25,0.12,inf
3,(Milk),(Kidney Beans),0.6,1.0,0.6,1.0,1.0,0.0,inf
4,(Onion),(Kidney Beans),0.6,1.0,0.6,1.0,1.0,0.0,inf
5,(Yogurt),(Kidney Beans),0.6,1.0,0.6,1.0,1.0,0.0,inf
6,"(Onion, Eggs)",(Kidney Beans),0.6,1.0,0.6,1.0,1.0,0.0,inf
7,"(Onion, Kidney Beans)",(Eggs),0.6,0.8,0.6,1.0,1.25,0.12,inf
8,(Onion),"(Eggs, Kidney Beans)",0.6,0.8,0.6,1.0,1.25,0.12,inf
