# Regras de associação - Apriori

## 1. Introdução

### 1.1 Importação e carga do dataset

In [2]:
import sklearn
import pandas as pd
import numpy as np

In [3]:
dfs = pd.read_csv('mercado2.csv', sep=';'  , engine='python', header=None)

### 1.2 Análise das compras por transação

In [4]:
dfs['qt_itens']  = len(dfs[0].str.split(",").tolist()[0])

In [5]:
for i in range(0,len(dfs)):
    dfs['qt_itens'].iloc[i] = len(dfs.iloc[i].str.split(",").tolist()[0])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


In [6]:
dfs.head()

Unnamed: 0,0,qt_itens
0,"shrimp,almonds,avocado,vegetables mix,green gr...",20
1,"burgers,meatballs,eggs",3
2,chutney,1
3,"turkey,avocado",2
4,"mineral water,milk,energy bar,whole wheat rice...",5


In [7]:
dfs.describe()

Unnamed: 0,qt_itens
count,7501.0
mean,3.914545
std,2.90554
min,1.0
25%,2.0
50%,3.0
75%,5.0
max,20.0


## 2. Regras de associação  - algoritmo Apriori

In [8]:
from apyori import apriori

### 2.1 Preparação dos dados para o algoritmo

In [9]:
transactions = dfs.values.tolist()

In [10]:
dfs = dfs[0].str.split(",", expand = True)

In [11]:
dfs.fillna(0,inplace=True)

In [12]:
dfs.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,chutney,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,turkey,avocado,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,mineral water,milk,energy bar,whole wheat rice,green tea,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [13]:
transactions = []
for i in range(0,len(dfs)):
    transactions.append([str(dfs.values[i,j]) for j in range(0,20) if str(dfs.values[i,j])!='0'])

### 2.2 Execução do algoritmo Apriori

#### 2.2.1 Iteração 1 - parâmetros: suporte mínimo - 0.01 e confiança mínima - 0.05

In [44]:
rules = apriori(transactions, min_support = 0.01, min_confidence = 0.05)

In [45]:
#Support degree (support), first enter an empty list, and then assign
supports=[]
#Confidence
confidences=[]
#Lift
lifts=[]
#Based on items_base
bases=[]
#Derivation items items_add
adds=[]

In [46]:
for r in rules:
    for x in r.ordered_statistics:
        supports.append(r.support)
        confidences.append(x.confidence)
        lifts.append(x.lift)
        bases.append(list(x.items_base))
        adds.append(list(x.items_add))

In [47]:
df_result = pd.DataFrame({
    'support':supports,
    'confidence':confidences,
    'lift':lifts,
    'base':bases,
    'adds':adds
})

##### OBS.: Excluindo os resultados com apenas 1 item (compra de apenas 1 item)

In [48]:
df_result = df_result[(df_result.base.str.len() >= 1)]

#### 2.2.1.1 Filtro dos primeiros 30 resultados, ordenados em ordem decrescente de lift, confidence, support

##### // (df_result.support > 0.015) & (df_result.confidence > 0.28) & (df_result.lift > 1.7)

In [49]:
df_result.sort_values(by=['lift','confidence','support'], ascending=False).head(30)

Unnamed: 0,support,confidence,lift,base,adds
264,0.015998,0.32345,3.291994,[herb & pepper],[ground beef]
263,0.015998,0.162822,3.291994,[ground beef],[herb & pepper]
433,0.017064,0.285714,2.907928,"[mineral water, spaghetti]",[ground beef]
428,0.017064,0.173677,2.907928,[ground beef],"[mineral water, spaghetti]"
443,0.010265,0.171875,2.609786,"[mineral water, spaghetti]",[olive oil]
440,0.010265,0.15587,2.609786,[olive oil],"[mineral water, spaghetti]"
240,0.016131,0.235867,2.474464,[tomatoes],[frozen vegetables]
239,0.016131,0.169231,2.474464,[frozen vegetables],[tomatoes]
236,0.016664,0.233209,2.446574,[shrimp],[frozen vegetables]
235,0.016664,0.174825,2.446574,[frozen vegetables],[shrimp]


#### 2.2.1.2 Filtro dos primeiros 30 resultados, ordenados em ordem crescente de lift, confidence, support

In [53]:
df_result.sort_values(by=['lift','confidence'], ascending=[True,False]).head(30)

Unnamed: 0,support,confidence,lift,base,adds
130,0.010532,0.131012,0.729019,[cookies],[eggs]
131,0.010532,0.058605,0.729019,[eggs],[cookies]
144,0.011065,0.061573,0.77623,[eggs],[escalope]
145,0.011065,0.139496,0.77623,[escalope],[eggs]
86,0.010399,0.129353,0.789486,[cookies],[chocolate]
85,0.010399,0.063466,0.789486,[chocolate],[cookies]
194,0.013865,0.141113,0.825652,[ground beef],[french fries]
193,0.013865,0.081123,0.825652,[french fries],[ground beef]
199,0.033729,0.197348,0.827912,[french fries],[mineral water]
200,0.033729,0.141499,0.827912,[mineral water],[french fries]


#### 2.2.2 Iteração 2 - parâmetros: suporte mínimo - 0.015, confiança mínima - 0.30 e lift mínimo - 1.8

In [54]:
rules = apriori(transactions, min_support = 0.015, min_confidence = 0.30, min_lift = 1.8)

In [55]:
#Support degree (support), first enter an empty list, and then assign
supports=[]
#Confidence
confidences=[]
#Lift
lifts=[]
#Based on items_base
bases=[]
#Derivation items items_add
adds=[]

In [56]:
for r in rules:
    for x in r.ordered_statistics:
        supports.append(r.support)
        confidences.append(x.confidence)
        lifts.append(x.lift)
        bases.append(list(x.items_base))
        adds.append(list(x.items_add))

In [57]:
df_result = pd.DataFrame({
    'support':supports,
    'confidence':confidences,
    'lift':lifts,
    'base':bases,
    'adds':adds
})

##### OBS.: Excluindo os resultados com apenas 1 item (compra de apenas 1 item)

In [58]:
df_result = df_result[(df_result.base.str.len() >= 1) | (df_result.adds.str.len() >= 1)]

#### Filtro dos primeiros 30 resultados, ordenados em ordem decrescente de lift, confidence, support

In [59]:
df_result.sort_values(by=['lift','confidence','support'], ascending=False).head(30)

Unnamed: 0,support,confidence,lift,base,adds
2,0.015998,0.32345,3.291994,[herb & pepper],[ground beef]
8,0.017064,0.416938,2.394681,"[mineral water, ground beef]",[spaghetti]
5,0.015198,0.300792,2.321232,[soup],[milk]
3,0.039195,0.398915,2.291162,[ground beef],[spaghetti]
7,0.02293,0.348178,1.999758,[olive oil],[spaghetti]
6,0.023064,0.456464,1.914955,[soup],[mineral water]
4,0.016264,0.328841,1.888695,[herb & pepper],[spaghetti]
10,0.015731,0.327778,1.882589,"[mineral water, milk]",[spaghetti]
11,0.015731,0.443609,1.861024,"[milk, spaghetti]",[mineral water]
0,0.028796,0.330275,1.83783,[burgers],[eggs]
