#Projeto 3 - Product Recommender System

### Importing libraries

In [24]:
import pandas as pd  # Data manipulation and analysis with DataFrames (table structure)

from mlxtend.preprocessing import TransactionEncoder  # Converts transaction lists to binary format (needed for algorithms like Apriori)

from mlxtend.frequent_patterns import apriori, association_rules
# apriori: Algorithm to find frequent itemsets
# association_rules: Generates association rules from frequent itemsets

from io import StringIO  # Allows treating strings as files, useful for simulating data reading from text


### Parameters for generating  rules

In [25]:
# for generating frequent itemsets (apriori)
suporte_minimo = 0.01 # minimum support
# for creating rules (association_rules)
confianca_minima = 0.2 # minimum confidence
# filters after rules were generated
lift_minimo = 1.5 # minimum lift
tamanho_minimo = 1  # minimum size of the antecedent (LHS)

### Loading data

In [26]:
# put in list format
with open("transactions.csv", 'r', encoding='utf-8') as f:
  transactions = [line.strip().split(',') for line in f]
transactions

[['"Pasta', 'Eggs', 'Butter', 'Cheese', 'Milk', 'Cookies"'],
 ['"Banana', 'Coffee', 'Chicken', 'Bread', 'Cookies', 'Rice"'],
 ['"Chicken', 'Sugar', 'Eggs', 'Butter', 'Bread', 'Milk"'],
 ['"Pasta', 'Bread', 'Beer"'],
 ['"Sugar', 'Banana"'],
 ['"Eggs', 'Milk', 'Cheese', 'Butter"'],
 ['"Banana', 'Chicken"'],
 ['"Milk', 'Bread"'],
 ['"Rice', 'Beans', 'Sugar', 'Butter', 'Banana"'],
 ['"Chicken', 'Pasta', 'Eggs', 'Coffee"'],
 ['"Sugar', 'Chicken', 'Beer', 'Butter', 'Cookies', 'Coffee"'],
 ['"Beer', 'Butter"'],
 ['"Rice', 'Chicken"'],
 ['"Beer', 'Pasta', 'Chicken', 'Coffee"'],
 ['"Cookies', 'Beer', 'Banana', 'Rice', 'Beans"'],
 ['"Banana', 'Beans', 'Beer', 'Cookies', 'Bread"'],
 ['"Chicken', 'Pasta"'],
 ['"Beans', 'Beer', 'Pasta"'],
 ['"Butter', 'Eggs', 'Rice"'],
 ['"Cookies', 'Milk"'],
 ['"Cookies', 'Milk"'],
 ['"Banana', 'Milk', 'Butter"'],
 ['"Milk', 'Sugar', 'Cookies', 'Rice', 'Coffee"'],
 ['"Beer', 'Eggs', 'Sugar', 'Pasta', 'Chicken', 'Milk"'],
 ['"Beer', 'Beans', 'Milk', 'Eggs', 'Cookie

In [27]:
# Remove spaces and quotes from items
transactions = [[item.strip().replace('"', '') for item in trans] for trans in transactions]

### Turning the dataset into transactions format


In [28]:
# products in columns
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)
display(df.head())

Unnamed: 0,Banana,Beans,Beer,Bread,Butter,Cheese,Chicken,Coffee,Cooki,Cookies,Eggs,Milk,Pasta,Rice,Sugar
0,False,False,False,False,True,True,False,False,False,True,True,True,True,False,False
1,True,False,False,True,False,False,True,True,False,True,False,False,False,True,False
2,False,False,False,True,True,False,True,False,False,False,True,True,False,False,True
3,False,False,True,True,False,False,False,False,False,False,False,False,True,False,False
4,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True


### Mining frequent items

In [29]:
frequent_itemsets = apriori(df, min_support=suporte_minimo, use_colnames=True)
display(frequent_itemsets)

Unnamed: 0,support,itemsets
0,0.303448,(Banana)
1,0.248276,(Beans)
2,0.272414,(Beer)
3,0.300000,(Bread)
4,0.272414,(Butter)
...,...,...
564,0.010345,"(Milk, Rice, Pasta, Cookies)"
565,0.013793,"(Milk, Sugar, Rice, Cookies)"
566,0.013793,"(Milk, Rice, Eggs, Sugar)"
567,0.010345,"(Bread, Chicken, Pasta, Coffee, Beer)"


### Mining rules

In [30]:
regras = association_rules(frequent_itemsets, metric='confidence', min_threshold=confianca_minima)
display(regras)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Banana),(Beans),0.303448,0.248276,0.079310,0.261364,1.052715,1.0,0.003971,1.017719,0.071890,0.167883,0.017410,0.290404
1,(Beans),(Banana),0.248276,0.303448,0.079310,0.319444,1.052715,1.0,0.003971,1.023505,0.066613,0.167883,0.022965,0.290404
2,(Banana),(Beer),0.303448,0.272414,0.093103,0.306818,1.126295,1.0,0.010440,1.049633,0.160983,0.192857,0.047286,0.324295
3,(Beer),(Banana),0.272414,0.303448,0.093103,0.341772,1.126295,1.0,0.010440,1.058223,0.154116,0.192857,0.055019,0.324295
4,(Banana),(Bread),0.303448,0.300000,0.072414,0.238636,0.795455,1.0,-0.018621,0.919403,-0.269628,0.136364,-0.087662,0.240008
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1491,"(Milk, Coffee, Chicken)","(Pasta, Cheese)",0.034483,0.058621,0.010345,0.300000,5.117647,1.0,0.008323,1.344828,0.833333,0.125000,0.256410,0.238235
1492,"(Milk, Pasta, Cheese)","(Coffee, Chicken)",0.037931,0.117241,0.010345,0.272727,2.326203,1.0,0.005898,1.213793,0.592593,0.071429,0.176136,0.180481
1493,"(Pasta, Coffee, Cheese)","(Milk, Chicken)",0.017241,0.100000,0.010345,0.600000,6.000000,1.0,0.008621,2.250000,0.847953,0.096774,0.555556,0.351724
1494,"(Milk, Pasta, Coffee)","(Cheese, Chicken)",0.037931,0.075862,0.010345,0.272727,3.595041,1.0,0.007467,1.270690,0.750299,0.100000,0.213026,0.204545


### Rules filtering

In [31]:
regras_filtradas = regras[(regras['lift'] >= lift_minimo) &
                          (regras['antecedents'].apply(lambda x: len(x) >= tamanho_minimo))]
display(regras_filtradas[['antecedents','consequents','support', 'confidence', 'lift']])

Unnamed: 0,antecedents,consequents,support,confidence,lift
196,"(Banana, Cheese)",(Beer),0.031034,0.450000,1.651899
225,"(Banana, Bread)",(Eggs),0.027586,0.380952,1.534392
396,"(Beans, Chicken)",(Coffee),0.037931,0.478261,1.541063
422,"(Cookies, Beans)",(Eggs),0.027586,0.380952,1.534392
433,"(Milk, Beans)",(Eggs),0.027586,0.444444,1.790123
...,...,...,...,...,...
1491,"(Milk, Coffee, Chicken)","(Pasta, Cheese)",0.010345,0.300000,5.117647
1492,"(Milk, Pasta, Cheese)","(Coffee, Chicken)",0.010345,0.272727,2.326203
1493,"(Pasta, Coffee, Cheese)","(Milk, Chicken)",0.010345,0.600000,6.000000
1494,"(Milk, Pasta, Coffee)","(Cheese, Chicken)",0.010345,0.272727,3.595041


### Statistics

In [32]:
print(f"Total Rules Generated: {len(regras)}")
print(f"Total Filtered Rules: {len(regras_filtradas)}")
print(f"Average Support: {regras_filtradas['support'].mean():.4f}")
print(f"Average Confidence: {regras_filtradas['confidence'].mean():.4f}")
print(f"Average Lift: {regras_filtradas['lift'].mean():.4f}")

Total Rules Generated: 1496
Total Filtered Rules: 360
Average Support: 0.0139
Average Confidence: 0.5036
Average Lift: 2.1564


### Financial Impact of the Recommender System (simulation)

To estimate the financial impact, we need to make a few assumptions about:

1.  **Average profit per transaction:** The profit you earn on average from each sale.
2. **Increase in conversion rate due to recommendations:** How much the likelihood of a customer making a purchase increases when they receive a relevant recommendation.
3. **Number of transactions:** The total number of transactions during the analysis period.

With these assumptions, we can calculate the estimated financial impact.

In [34]:
# --- Assumptions ---
# Define the estimated average profit per transaction (in local currency)
lucro_medio_por_transacao = 5.00 # Example: R$ 5.00

# Define the estimated percentage increase in conversion rate due to recommendations
# For example, if recommendations increase the chance of purchase by 10%, use 0.10
aumento_conversao_percentual = 0.10 # Example: 10% increase

# Get the total number of transactions from the data
num_transacoes = 500 # Updated based on user input

# --- Financial Impact Calculation ---

# Estimate the number of additional transactions generated by recommendations
transacoes_adicionais_estimadas = num_transacoes * aumento_conversao_percentual

# Calculate the estimated financial impact
impacto_financeiro_estimado = transacoes_adicionais_estimadas * lucro_medio_por_transacao

# --- Displaying the Result ---
print(f"Total number of transactions: {num_transacoes}")
print(f"Estimated number of additional transactions from recommendations: {transacoes_adicionais_estimadas:.2f}")
print(f"Estimated financial impact of recommendations: R$ {impacto_financeiro_estimado:.2f}")

Total number of transactions: 500
Estimated number of additional transactions from recommendations: 50.00
Estimated financial impact of recommendations: R$ 250.00
