#Projeto 3 - Sistema de Recomendação de Produtos

### Importações

In [None]:
import pandas as pd  # Manipulação e análise de dados com DataFrames (estrutura de tabela)

from mlxtend.preprocessing import TransactionEncoder  # Converte listas de transações em formato binário (necessário para algoritmos como Apriori)

from mlxtend.frequent_patterns import apriori, association_rules
# apriori: Algoritmo para encontrar conjuntos frequentes de itens (itemsets)
# association_rules: Gera regras de associação a partir dos conjuntos frequentes

from io import StringIO  # Permite tratar strings como arquivos, útil para simular leitura de dados em texto


### Parâmetros para Geração de Regras

In [None]:
# na geração dos conjuntos frequêntes (apriori)
suporte_minimo = 0.01
# na criação das regras (association_rules)
confianca_minima = 0.2
# filtros depois que regras foram geradas
lift_minimo = 1.5
tamanho_minimo = 1  # itens no antecedente (LHS)

### Importa Dados

In [None]:
# coloca em formato de lista
with open("transacoes.csv", 'r', encoding='utf-8') as f:
  transactions = [line.strip().split(',') for line in f]
transactions

[['"Macarrão', 'Ovos', 'Manteiga', 'Queijo', 'Leite', 'Biscoitos"'],
 ['"Banana', 'Café', 'Frango', 'Pão', 'Biscoitos', 'Arroz"'],
 ['"Frango', 'Açúcar', 'Ovos', 'Manteiga', 'Pão', 'Leite"'],
 ['"Macarrão', 'Pão', 'Cerveja"'],
 ['"Açúcar', 'Banana"'],
 ['"Ovos', 'Leite', 'Queijo', 'Manteiga"'],
 ['"Banana', 'Frango"'],
 ['"Leite', 'Pão"'],
 ['"Arroz', 'Feijão', 'Açúcar', 'Manteiga', 'Banana"'],
 ['"Frango', 'Macarrão', 'Ovos', 'Café"'],
 ['"Açúcar', 'Frango', 'Cerveja', 'Manteiga', 'Biscoitos', 'Café"'],
 ['"Cerveja', 'Manteiga"'],
 ['"Arroz', 'Frango"'],
 ['"Cerveja', 'Macarrão', 'Frango', 'Café"'],
 ['"Biscoitos', 'Cerveja', 'Banana', 'Arroz', 'Feijão"'],
 ['"Banana', 'Feijão', 'Cerveja', 'Biscoitos', 'Pão"'],
 ['"Frango', 'Macarrão"'],
 ['"Feijão', 'Cerveja', 'Macarrão"'],
 ['"Manteiga', 'Ovos', 'Arroz"'],
 ['"Biscoitos', 'Leite"'],
 ['"Biscoitos', 'Leite"'],
 ['"Banana', 'Leite', 'Manteiga"'],
 ['"Leite', 'Açúcar', 'Biscoitos', 'Arroz', 'Café"'],
 ['"Cerveja', 'Ovos', 'Açúcar', 'Ma

In [None]:
# Remove espaços e aspas dos itens
transactions = [[item.strip().replace('"', '') for item in trans] for trans in transactions]

### Transforma em formato apropriado de transações


In [None]:
# produtos em colunas
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)
df.head()

Unnamed: 0,Arroz,Açúcar,Banana,Biscoitos,Café,Cerveja,Feijão,Frango,Leite,Macarrão,Manteiga,Ovos,Pão,Queijo
0,False,False,False,True,False,False,False,False,True,True,True,True,False,True
1,True,False,True,True,True,False,False,True,False,False,False,False,True,False
2,False,True,False,False,False,False,False,True,True,False,True,True,True,False
3,False,False,False,False,False,True,False,False,False,True,False,False,True,False
4,False,True,True,False,False,False,False,False,False,False,False,False,False,False


### Minera Itens Frequêntes

In [None]:
frequent_itemsets = apriori(df, min_support=suporte_minimo, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.278,(Arroz)
1,0.298,(Açúcar)
2,0.290,(Banana)
3,0.282,(Biscoitos)
4,0.290,(Café)
...,...,...
503,0.010,"(Queijo, Leite, Café, Macarrão)"
504,0.010,"(Queijo, Leite, Cerveja, Macarrão)"
505,0.010,"(Feijão, Manteiga, Macarrão, Pão)"
506,0.010,"(Frango, Leite, Pão, Macarrão)"


### Minera Regras

In [None]:
regras = association_rules(frequent_itemsets, metric='confidence', min_threshold=confianca_minima)
regras

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Arroz),(Açúcar),0.278,0.298,0.100,0.359712,1.207088,1.0,0.017156,1.096382,0.237618,0.210084,0.087909,0.347641
1,(Açúcar),(Arroz),0.298,0.278,0.100,0.335570,1.207088,1.0,0.017156,1.086646,0.244387,0.210084,0.079737,0.347641
2,(Arroz),(Banana),0.278,0.290,0.088,0.316547,1.091541,1.0,0.007380,1.038842,0.116155,0.183333,0.037390,0.309998
3,(Banana),(Arroz),0.290,0.278,0.088,0.303448,1.091541,1.0,0.007380,1.036535,0.118118,0.183333,0.035247,0.309998
4,(Arroz),(Biscoitos),0.278,0.282,0.066,0.237410,0.841880,1.0,-0.012396,0.941528,-0.206435,0.133603,-0.062103,0.235726
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1213,"(Leite, Pão, Macarrão)",(Frango),0.028,0.296,0.010,0.357143,1.206564,1.0,0.001712,1.095111,0.176132,0.031847,0.086851,0.195463
1214,"(Frango, Queijo, Leite)",(Macarrão),0.028,0.278,0.012,0.428571,1.541624,1.0,0.004216,1.263500,0.361454,0.040816,0.208548,0.235868
1215,"(Frango, Queijo, Macarrão)",(Leite),0.018,0.290,0.012,0.666667,2.298851,1.0,0.006780,2.130000,0.575356,0.040541,0.530516,0.354023
1216,"(Frango, Leite, Macarrão)",(Queijo),0.030,0.266,0.012,0.400000,1.503759,1.0,0.004020,1.223333,0.345361,0.042254,0.182561,0.222556


### Filtra Regras

In [None]:
regras_filtradas = regras[(regras['lift'] >= lift_minimo) &
                          (regras['antecedents'].apply(lambda x: len(x) >= tamanho_minimo))]
regras_filtradas[['antecedents','consequents','support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
201,"(Arroz, Macarrão)",(Açúcar),0.034,0.472222,1.584638
202,"(Açúcar, Macarrão)",(Arroz),0.034,0.425000,1.528777
222,"(Arroz, Feijão)",(Banana),0.036,0.461538,1.591512
267,"(Arroz, Macarrão)",(Café),0.032,0.444444,1.532567
854,"(Leite, Cerveja)",(Macarrão),0.026,0.419355,1.508471
...,...,...,...,...,...
1208,"(Feijão, Pão, Macarrão)",(Manteiga),0.010,0.500000,1.724138
1210,"(Frango, Leite, Pão)",(Macarrão),0.010,0.500000,1.798561
1214,"(Frango, Queijo, Leite)",(Macarrão),0.012,0.428571,1.541624
1215,"(Frango, Queijo, Macarrão)",(Leite),0.012,0.666667,2.298851


### Estatísticas

In [None]:
print(f"Total de Regras Geradas: {len(regras)}")
print(f"Total de Regras Filtradas: {len(regras_filtradas)}")
print(f"Suporte Médio: {regras_filtradas['support'].mean():.4f}")
print(f"Confiança Média: {regras_filtradas['confidence'].mean():.4f}")
print(f"Lift Médio: {regras_filtradas['lift'].mean():.4f}")

Total de Regras Geradas: 1218
Total de Regras Filtradas: 110
Suporte Médio: 0.0126
Confiança Média: 0.4980
Lift Médio: 1.8125
