In [None]:
import os
os.environ['IPYTHON_SUPPRESS_DEPRECATED_CONFIGS'] = '1'
from google.colab import drive
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

drive.mount('/content/drive', force_remount=True)

# Step 1: Load the dataset
route = '/content/drive/Shared drives/Capstone/Dataset_cleaned_merged/df_final_version.csv'
orders = pd.read_csv(route)

# Step 2: Group by cluster
clusters = orders['cluster2'].unique()

# Step 3: Define the function to process each cluster
def process_cluster(cluster, orders):
    cluster_orders = orders[orders['cluster2'] == cluster]

    # Preprocess the data for apriori algorithm
    df = cluster_orders.groupby('id_orden')['nombre_producto'].apply(list).reset_index()
    df.rename(columns={'nombre_producto': 'Transaction'}, inplace=True)

    transactions = df['Transaction'].tolist()

    # One-hot encode the transactions
    from mlxtend.preprocessing import TransactionEncoder
    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    df_onehot = pd.DataFrame(te_ary, columns=te.columns_)

    # Apply apriori algorithm
    frequent_itemsets = apriori(df_onehot, min_support=0.0035, use_colnames=True)
    rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0.0015)

    # Filter rules with adjusted thresholds
    filtered_rules = rules[(rules['lift'] > 0.8) & (rules['confidence'] > 0.4)]

    # Save the rules to CSV files
    rules.to_csv(f'/content/drive/Shared drives/Capstone/Dataset_cleaned_merged/Apriori_rules/5_cluster/cluster_2_{cluster}_rules.csv', index=False)
    filtered_rules.to_csv(f'/content/drive/Shared drives/Capstone/Dataset_cleaned_merged/Apriori_rules/5_cluster/cluster_2_{cluster}_filtered_rules.csv', index=False)

    # Print the number of rules and filtered rules
    print(f"Cluster {cluster}:")
    print(f"Number of rules: {rules.shape[0]}")
    print(f"Number of filtered rules: {filtered_rules.shape[0]}")
    print()

    # Generate sentences for metrics interpretation
    support_mean = rules['support'].mean() * 100
    confidence_min = rules['confidence'].min() * 100
    confidence_max = rules['confidence'].max() * 100
    confidence_mean = rules['confidence'].mean() * 100
    lift_min = rules['lift'].min()
    lift_max = rules['lift'].max()
    lift_mean = rules['lift'].mean()

    print(f"Metrics for Cluster {cluster}:")
    print(f"Support: Each rule applies to around {support_mean:.2f}% of the transactions.")
    print(f"Confidence: The lowest confidence means that {confidence_min:.2f}% of these transactions containing the antecedent also contain the consequent while the highest confidence implies that {confidence_max:.2f}% of the transactions containing the antecedent also contain the consequent. The average is {confidence_mean:.2f}%.")
    print(f"Lift: A lift value greater than 1 indicates a positive correlation between the antecedent and the consequent. The minimum lift value suggests that the items in the rule appear together almost {lift_min:.2f} times as often as expected if they were independent, while the highest means that the items appear together {lift_max:.2f} times more often than expected. The average is {lift_mean:.2f} times.")
    print("Leverage: Positive leverage values indicate that the items in the rule appear together more frequently than expected by chance.")
    print()

# Step 4: Process each cluster and save the results
for cluster in clusters:
    process_cluster(cluster, orders)

  and should_run_async(code)


Mounted at /content/drive
Cluster 1:
Number of rules: 142
Number of filtered rules: 0

Metrics for Cluster 1:
Support: Each rule applies to around 0.56% of the transactions.
Confidence: The lowest confidence means that 2.63% of these transactions containing the antecedent also contain the consequent while the highest confidence implies that 34.27% of the transactions containing the antecedent also contain the consequent. The average is 12.91%.
Lift: A lift value greater than 1 indicates a positive correlation between the antecedent and the consequent. The minimum lift value suggests that the items in the rule appear together almost 0.94 times as often as expected if they were independent, while the highest means that the items appear together 11.90 times more often than expected. The average is 2.28 times.
Leverage: Positive leverage values indicate that the items in the rule appear together more frequently than expected by chance.

Cluster 0:
Number of rules: 1086
Number of filtered r