In [None]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [None]:
df = pd.read_csv('EXPORT_PAPELERIA.csv')
df.columns = df.columns.str.strip().str.replace(" ","_").str.upper()

In [None]:
# Select the relevant columns for the basket analysis
basket_data = df[['ORDER_ID', 'SUB-CATEGORY']]

# Group the products by order ID
grouped_basket = basket_data.groupby('ORDER_ID')['SUB-CATEGORY'].apply(list)

# Convert the grouped data into transaction format
transactions = grouped_basket.tolist()

# Convert transaction data into a one-hot encoded format
te = TransactionEncoder()
te_ary = te.fit_transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

# Apply Apriori algorithm to find frequent itemsets
frequent_itemsets = apriori(df_encoded, min_support=0.02, use_colnames=True)

# Generate association rules from frequent itemsets
rules = association_rules(frequent_itemsets, metric='lift', min_threshold=0.001)

# # Print frequent itemsets
# print("Frequent Itemsets:")
# print(frequent_itemsets)

# # Print association rules
# print("\nAssociation Rules:")
# print(rules)

In [None]:
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib.cm as cm

# Select the metric to represent as edge attributes (choose one: 'support', 'confidence', 'lift')
metric_to_display = 'confidence'

# Create a directed graph
G = nx.DiGraph()

# Add edges from association rules
for rule in rules.iterrows():
    antecedent = ', '.join(rule[1]['antecedents'])
    consequent = ', '.join(rule[1]['consequents'])
    edge_attr = rule[1][metric_to_display]
    G.add_edge(antecedent, consequent, **{metric_to_display: edge_attr})

# Extract the edge attributes based on the selected metric
edge_attrs = nx.get_edge_attributes(G, metric_to_display)
edge_values = list(edge_attrs.values())

# Normalize the edge values for visualization
norm_edge_values = [(value - min(edge_values)) / (max(edge_values) - min(edge_values)) for value in edge_values]

# Create a dummy mappable for colorbar
sm = cm.ScalarMappable(cmap='viridis')
sm.set_array([])

# Draw the network graph with edges colored based on the selected metric
pos = nx.spring_layout(G, k=0.5)
plt.figure(figsize=(12, 8))
nx.draw_networkx(G, pos, with_labels=True, node_size=800, node_color='lightblue',
                 edge_color=norm_edge_values, cmap='viridis', font_size=10, font_weight='bold', width=2)
plt.title(f'Association Rule Network Graph ({metric_to_display.capitalize()})')
plt.colorbar(sm, label=metric_to_display.capitalize())
plt.show()

In [None]:
import seaborn as sns

# Create a pivot table with support as values
pivot_table = rules.pivot_table(values='support', index='antecedents', columns='consequents', fill_value=0)

# Generate the heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(pivot_table, annot=True, cmap='Blues', fmt=".2f", linewidths=0.5)
plt.title('Association Rule Heatmap')
plt.xlabel('Consequents')
plt.ylabel('Antecedents')
plt.show()