In [108]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules, fpgrowth


#Suppress warnings
import warnings
warnings.filterwarnings('ignore')


In [104]:
# Definindo as colunas
columns = ["Halteres", "Bolsa", "Garrafa", "Creatina", "Whey Protein"]

# Inicializando a lista de dados
data = []

# Definindo a quantidade de transações
num_transactions = 100

# Gerando dados
for i in range(0, num_transactions):
    # Criando uma transação com valores binários
    halteres = np.random.randint(2)
    bolsa = np.random.randint(2)
    garrafa = np.random.randint(2)
    
    # Garantindo que Creatina e Whey Protein estejam juntos na maioria das vezes
    creatina = np.random.randint(2)
    whey_protein = creatina if np.random.random() < 0.8 else np.random.randint(2)
    
    # Adicionando a transação
    data.append([halteres, bolsa, garrafa, creatina, whey_protein])

# Criando o DataFrame
df = pd.DataFrame(data, columns=columns)

df.head(10)

Unnamed: 0,Halteres,Bolsa,Garrafa,Creatina,Whey Protein
0,0,0,1,1,1
1,1,1,1,0,0
2,0,1,1,1,1
3,1,1,1,1,1
4,1,1,0,0,0
5,1,1,0,0,0
6,1,1,0,0,0
7,1,1,0,1,1
8,0,1,0,0,0
9,1,0,0,0,0


In [105]:
frequent_itemsets = apriori(df, min_support=0.25, use_colnames=True)
frequent_itemsets


Unnamed: 0,support,itemsets
0,0.46,(Halteres)
1,0.57,(Bolsa)
2,0.47,(Garrafa)
3,0.51,(Creatina)
4,0.53,(Whey Protein)
5,0.26,"(Halteres, Bolsa)"
6,0.26,"(Garrafa, Bolsa)"
7,0.25,"(Creatina, Bolsa)"
8,0.26,"(Whey Protein, Bolsa)"
9,0.26,"(Creatina, Garrafa)"


In [106]:
# Separando as associações baseadas no algoritmo apriori, usando Lift como um limiar
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Creatina),(Garrafa),0.51,0.47,0.26,0.509804,1.084689,0.0203,1.0812,0.159341
1,(Garrafa),(Creatina),0.47,0.51,0.26,0.553191,1.084689,0.0203,1.096667,0.147315
2,(Whey Protein),(Garrafa),0.53,0.47,0.27,0.509434,1.083902,0.0209,1.080385,0.164697
3,(Garrafa),(Whey Protein),0.47,0.53,0.27,0.574468,1.083902,0.0209,1.1045,0.146052
4,(Creatina),(Whey Protein),0.51,0.53,0.5,0.980392,1.849797,0.2297,23.97,0.937551
5,(Whey Protein),(Creatina),0.53,0.51,0.5,0.943396,1.849797,0.2297,8.656667,0.977447
6,"(Creatina, Whey Protein)",(Garrafa),0.5,0.47,0.26,0.52,1.106383,0.025,1.104167,0.192308
7,"(Creatina, Garrafa)",(Whey Protein),0.26,0.53,0.26,1.0,1.886792,0.1222,inf,0.635135
8,"(Whey Protein, Garrafa)",(Creatina),0.27,0.51,0.26,0.962963,1.888163,0.1223,13.23,0.644362
9,(Creatina),"(Whey Protein, Garrafa)",0.51,0.27,0.26,0.509804,1.888163,0.1223,1.4892,0.959969


In [107]:
# Salvando as 10 maiores associações (em ordem de lift)
top_rules = rules.sort_values(by='lift', ascending=False).head(10)

# Convertendo antecedentes e consequentes em strings para usar como tabela
top_rules['antecedents'] = top_rules['antecedents'].apply(lambda x: ', '.join(list(x)))
top_rules['consequents'] = top_rules['consequents'].apply(lambda x: ', '.join(list(x)))

# Creating a table displaying the top 10 rules
top_rules_table = top_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].round(2)


# Renomeando as colunas
top_rules_table.rename(columns={
    'antecedents': 'Antecedentes',
    'consequents': 'Consequentes',
    'support': 'Suporte',
    'confidence': 'Confiança',
    'lift': 'Lift'
}, inplace=True)


top_rules_table.reset_index().drop(columns='index', axis =1)

Unnamed: 0,Antecedentes,Consequentes,Suporte,Confiança,Lift
0,"Whey Protein, Garrafa",Creatina,0.26,0.96,1.89
1,Creatina,"Whey Protein, Garrafa",0.26,0.51,1.89
2,Whey Protein,"Creatina, Garrafa",0.26,0.49,1.89
3,"Creatina, Garrafa",Whey Protein,0.26,1.0,1.89
4,Creatina,Whey Protein,0.5,0.98,1.85
5,Whey Protein,Creatina,0.5,0.94,1.85
6,"Creatina, Whey Protein",Garrafa,0.26,0.52,1.11
7,Garrafa,"Creatina, Whey Protein",0.26,0.55,1.11
8,Creatina,Garrafa,0.26,0.51,1.08
9,Garrafa,Creatina,0.26,0.55,1.08


In [121]:
# Aplicando o algoritmo FP-Growth
frequent_itemsets = fpgrowth(df, min_support=0.25, use_colnames=True)

# Gerando as regras de associação
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

# Renomeando colunas
rules = rules.rename(columns={
    'antecedents': 'Antecedentes',
    'consequents': 'Consequentes',
    'support': 'Suporte',
    'confidence': 'Confiança',
    'lift': 'Lift'
})

# Salvando as 10 maiores associações (em ordem de lift)
top_rules = rules.sort_values(by='Lift', ascending=False).head(10)

# Convertendo antecedentes e consequentes em strings para usar como tabela
top_rules['Antecedentes'] = top_rules['Antecedentes'].apply(lambda x: ', '.join(list(x)))
top_rules['Consequentes'] = top_rules['Consequentes'].apply(lambda x: ', '.join(list(x)))

# Creating a table displaying the top 10 rules
top_rules_table = top_rules[['Antecedentes', 'Consequentes', 'Suporte', 'Confiança', 'Lift']].round(2)


top_rules_table.reset_index().drop(columns='index', axis =1)

Unnamed: 0,Antecedentes,Consequentes,Suporte,Confiança,Lift
0,"Whey Protein, Garrafa",Creatina,0.26,0.96,1.89
1,Creatina,"Whey Protein, Garrafa",0.26,0.51,1.89
2,Whey Protein,"Creatina, Garrafa",0.26,0.49,1.89
3,"Creatina, Garrafa",Whey Protein,0.26,1.0,1.89
4,Creatina,Whey Protein,0.5,0.98,1.85
5,Whey Protein,Creatina,0.5,0.94,1.85
6,"Creatina, Whey Protein",Garrafa,0.26,0.52,1.11
7,Garrafa,"Creatina, Whey Protein",0.26,0.55,1.11
8,Creatina,Garrafa,0.26,0.51,1.08
9,Garrafa,Creatina,0.26,0.55,1.08
