In [3]:
import numpy as np
import pandas as pd

In [4]:
# lendo arquivo csv, substituindo os NaN por 0 e gravando no mesmo dataframe

In [5]:
df = pd.read_csv("groceries.csv", header=None)
df.replace(np.nan, 0, inplace=True)
df.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,22,23,24,25,26,27,28,29,30,31
0,citrus fruit,semi-finished bread,margarine,ready soups,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,tropical fruit,yogurt,coffee,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,whole milk,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,pip fruit,yogurt,cream cheese,meat spreads,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,other vegetables,whole milk,condensed milk,long life bakery product,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,whole milk,butter,yogurt,rice,abrasive cleaner,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,rolls/buns,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,other vegetables,UHT-milk,rolls/buns,bottled beer,liquor (appetizer),0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,potted plants,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,whole milk,cereals,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
# Retorna uma lista sem 0
def remove_zeros(aux):
    return list(filter(lambda x: x != 0, aux))

In [7]:
finalList = []

# percorre os elementos do df, transforma em lista e retorna sem os 0
for elem, row in df.iterrows():
    listTransaction = row.values.tolist()
    listTransaction = remove_zeros(listTransaction)
    
    finalList.append(listTransaction)

In [8]:
# pre-processamento de dados utilizando mlxtend

from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()

# criando uma lista de True ou False caso haja um determinado produto na lista
teArray = te.fit(finalList).transform(finalList)
df = pd.DataFrame(teArray, columns = te.columns_)

df

Unnamed: 0,Instant food products,UHT-milk,abrasive cleaner,artif. sweetener,baby cosmetics,baby food,bags,baking powder,bathroom cleaner,beef,...,turkey,vinegar,waffles,whipped/sour cream,whisky,white bread,white wine,whole milk,yogurt,zwieback
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9830,False,False,False,False,False,False,False,False,False,True,...,False,False,False,True,False,False,False,True,False,False
9831,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
9832,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,True,False
9833,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [9]:
from mlxtend.frequent_patterns import apriori, association_rules
from IPython.core.display import HTML


# extraindo os itens mais frequentes
frequent_itens = apriori(df, min_support = 0.01, use_colnames = True)

# retorna uma lista de itens mais frequentes ordenada de maneira do maior para o menor support
frequent_itens.sort_values(by=['support'], ascending = False)


Unnamed: 0,support,itemsets
86,0.255516,(whole milk)
55,0.193493,(other vegetables)
66,0.183935,(rolls/buns)
75,0.174377,(soda)
87,0.139502,(yogurt)
...,...,...
178,0.010066,"(sausage, frankfurter)"
306,0.010066,"(yogurt, curd, whole milk)"
160,0.010066,"(curd, rolls/buns)"
212,0.010066,"(napkins, tropical fruit)"


In [10]:
# Criando regras de associacao a partir do dataframe de itens frequentes
# Ordenando o dataframe a partir do confidence (confiança)

rules = association_rules(frequent_itens, metric = "confidence", min_threshold = 0.3 )
rules.sort_values(by=['confidence'], ascending = False).drop(['antecedent support', 'consequent support', 'leverage', 'conviction'], axis=1)

Unnamed: 0,antecedents,consequents,support,confidence,lift
74,"(root vegetables, citrus fruit)",(other vegetables),0.010371,0.586207,3.029608
96,"(tropical fruit, root vegetables)",(other vegetables),0.012303,0.584541,3.020999
79,"(yogurt, curd)",(whole milk),0.010066,0.582353,2.279125
71,"(other vegetables, butter)",(whole milk),0.011490,0.573604,2.244885
118,"(tropical fruit, root vegetables)",(whole milk),0.011998,0.570048,2.230969
...,...,...,...,...,...
54,(yogurt),(other vegetables),0.043416,0.311224,1.608457
6,(bottled water),(whole milk),0.034367,0.310948,1.216940
98,"(other vegetables, whole milk)",(root vegetables),0.023183,0.309783,2.842082
3,(berries),(other vegetables),0.010269,0.308869,1.596280


In [11]:
rules5 = association_rules(frequent_itens, metric = "confidence", min_threshold = 0.5 )
rules5.sort_values(by=['confidence'], ascending = False).drop(['antecedent support', 'consequent support', 'leverage', 'conviction'], axis=1)

Unnamed: 0,antecedents,consequents,support,confidence,lift
1,"(root vegetables, citrus fruit)",(other vegetables),0.010371,0.586207,3.029608
6,"(tropical fruit, root vegetables)",(other vegetables),0.012303,0.584541,3.020999
2,"(yogurt, curd)",(whole milk),0.010066,0.582353,2.279125
0,"(other vegetables, butter)",(whole milk),0.01149,0.573604,2.244885
11,"(tropical fruit, root vegetables)",(whole milk),0.011998,0.570048,2.230969
12,"(yogurt, root vegetables)",(whole milk),0.01454,0.562992,2.203354
3,"(domestic eggs, other vegetables)",(whole milk),0.012303,0.552511,2.162336
14,"(yogurt, whipped/sour cream)",(whole milk),0.01088,0.52451,2.052747
10,"(root vegetables, rolls/buns)",(whole milk),0.01271,0.523013,2.046888
4,"(pip fruit, other vegetables)",(whole milk),0.013523,0.51751,2.025351


In [12]:
rules7 = association_rules(frequent_itens, metric = "confidence", min_threshold = 0.6 )
rules7.sort_values(by=['confidence'], ascending = False).drop(['antecedent support', 'consequent support', 'leverage', 'conviction'], axis=1)

Unnamed: 0,antecedents,consequents,support,confidence,lift


In [13]:
#Ordenando os dataframes a partir do lift

rules_elem = association_rules(frequent_itens, metric = "confidence", min_threshold = 0.2 )
rules_elem.sort_values(by=['lift'], ascending = False).drop(['antecedent support', 'consequent support', 'leverage', 'conviction'], axis=1)

Unnamed: 0,antecedents,consequents,support,confidence,lift
157,"(other vegetables, citrus fruit)",(root vegetables),0.010371,0.359155,3.295045
207,"(yogurt, other vegetables)",(whipped/sour cream),0.010168,0.234192,3.267062
184,"(tropical fruit, other vegetables)",(root vegetables),0.012303,0.342776,3.144780
2,(beef),(root vegetables),0.017387,0.331395,3.040367
158,"(root vegetables, citrus fruit)",(other vegetables),0.010371,0.586207,3.029608
...,...,...,...,...,...
64,(fruit/vegetable juice),(rolls/buns),0.014540,0.201125,1.093458
8,(bottled beer),(other vegetables),0.016167,0.200758,1.037546
10,(bottled beer),(whole milk),0.020437,0.253788,0.993237
137,(shopping bags),(whole milk),0.024504,0.248710,0.973364


In [14]:
rules_elem3 = association_rules(frequent_itens, metric = "confidence", min_threshold = 0.3 )
rules_elem3.sort_values(by=['lift'], ascending = False).drop(['antecedent support', 'consequent support', 'leverage', 'conviction'], axis=1).head(10)

Unnamed: 0,antecedents,consequents,support,confidence,lift
73,"(other vegetables, citrus fruit)",(root vegetables),0.010371,0.359155,3.295045
95,"(tropical fruit, other vegetables)",(root vegetables),0.012303,0.342776,3.14478
1,(beef),(root vegetables),0.017387,0.331395,3.040367
74,"(root vegetables, citrus fruit)",(other vegetables),0.010371,0.586207,3.029608
96,"(tropical fruit, root vegetables)",(other vegetables),0.012303,0.584541,3.020999
98,"(other vegetables, whole milk)",(root vegetables),0.023183,0.309783,2.842082
80,"(curd, whole milk)",(yogurt),0.010066,0.385214,2.761356
91,"(root vegetables, rolls/buns)",(other vegetables),0.012201,0.502092,2.59489
100,"(yogurt, root vegetables)",(other vegetables),0.012913,0.5,2.584078
122,"(tropical fruit, whole milk)",(yogurt),0.01515,0.358173,2.567516


In [15]:
rules_elem5 = association_rules(frequent_itens, metric = "confidence", min_threshold = 0.5 )
rules_elem5.sort_values(by=['lift'], ascending = False).drop(['antecedent support', 'consequent support', 'leverage', 'conviction'], axis=1).head(10)

Unnamed: 0,antecedents,consequents,support,confidence,lift
1,"(root vegetables, citrus fruit)",(other vegetables),0.010371,0.586207,3.029608
6,"(tropical fruit, root vegetables)",(other vegetables),0.012303,0.584541,3.020999
5,"(root vegetables, rolls/buns)",(other vegetables),0.012201,0.502092,2.59489
7,"(yogurt, root vegetables)",(other vegetables),0.012913,0.5,2.584078
2,"(yogurt, curd)",(whole milk),0.010066,0.582353,2.279125
0,"(other vegetables, butter)",(whole milk),0.01149,0.573604,2.244885
11,"(tropical fruit, root vegetables)",(whole milk),0.011998,0.570048,2.230969
12,"(yogurt, root vegetables)",(whole milk),0.01454,0.562992,2.203354
3,"(domestic eggs, other vegetables)",(whole milk),0.012303,0.552511,2.162336
14,"(yogurt, whipped/sour cream)",(whole milk),0.01088,0.52451,2.052747


In [16]:
rules_elem = association_rules(frequent_itens, metric = "confidence", min_threshold = 0.2 )
rules_elem.sort_values(by=['lift'], ascending = False).drop(['antecedent support', 'consequent support', 'leverage', 'conviction'], axis=1)

Unnamed: 0,antecedents,consequents,support,confidence,lift
157,"(other vegetables, citrus fruit)",(root vegetables),0.010371,0.359155,3.295045
207,"(yogurt, other vegetables)",(whipped/sour cream),0.010168,0.234192,3.267062
184,"(tropical fruit, other vegetables)",(root vegetables),0.012303,0.342776,3.144780
2,(beef),(root vegetables),0.017387,0.331395,3.040367
158,"(root vegetables, citrus fruit)",(other vegetables),0.010371,0.586207,3.029608
...,...,...,...,...,...
64,(fruit/vegetable juice),(rolls/buns),0.014540,0.201125,1.093458
8,(bottled beer),(other vegetables),0.016167,0.200758,1.037546
10,(bottled beer),(whole milk),0.020437,0.253788,0.993237
137,(shopping bags),(whole milk),0.024504,0.248710,0.973364
