In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('Data/ERP Sales Register.csv')

  df = pd.read_csv('Data/ERP Sales Register.csv')


In [3]:
# Keep only necessary columns
basket_df = df[["Invoice/ Sales CN No.", "Item Name"]].dropna()

In [4]:
# Remove system/non-product rows
basket_df = basket_df[~basket_df["Item Name"].str.contains("Rounding Difference", case=False, na=False)]

In [5]:
# Filter to top 100 most frequent items
top_items = basket_df["Item Name"].value_counts().nlargest(100).index
filtered_df = basket_df[basket_df["Item Name"].isin(top_items)]

In [6]:
filtered_df = filtered_df.drop_duplicates(subset=["Invoice/ Sales CN No.", "Item Name"])

In [7]:
# Create the basket: one row per invoice, columns as items, values = True/False
basket = (filtered_df
          .groupby(["Invoice/ Sales CN No.", "Item Name"])["Item Name"]
          .count().unstack().reset_index().fillna(0)
          .set_index("Invoice/ Sales CN No."))

In [8]:
# Convert counts to 1s and 0s
basket = basket.applymap(lambda x: 1 if x > 0 else 0)

  basket = basket.applymap(lambda x: 1 if x > 0 else 0)


In [9]:
basket = basket.astype(bool)

In [10]:
from mlxtend.frequent_patterns import association_rules, fpgrowth

In [11]:
# Run faster algorithm
frequent_items = fpgrowth(basket, min_support=0.02, use_colnames=True)

In [12]:
# Generate rules from frequent itemsets
rules = association_rules(frequent_items, metric="lift", min_threshold=1.0)

In [13]:
# Sort by lift (strongest associations at the top)
rules = rules.sort_values(by="lift", ascending=False)

In [14]:
# Preview top 10 rules
rules.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
902,(200 GM STD MIXED PICKLE M/O BTL (1*60)),(200 GM STD MANGO PICKLE M/O BTL (1*60)),0.035125,0.02684,0.022631,0.644284,24.004413,1.0,0.021688,2.735777,0.993228,0.575332,0.634473,0.743721
903,(200 GM STD MANGO PICKLE M/O BTL (1*60)),(200 GM STD MIXED PICKLE M/O BTL (1*60)),0.02684,0.035125,0.022631,0.843159,24.004413,1.0,0.021688,6.151932,0.984773,0.575332,0.837449,0.743721
75,(325 GM NILONS CLASSIC MIXED PICKLE PET M/O (1...,(325 GM NILONS CLASSIC MANGO PICKLE PET M/O (1...,0.038439,0.031781,0.027482,0.714951,22.495953,1.0,0.02626,3.39668,0.993746,0.643032,0.705595,0.789838
74,(325 GM NILONS CLASSIC MANGO PICKLE PET M/O (1...,(325 GM NILONS CLASSIC MIXED PICKLE PET M/O (1...,0.031781,0.038439,0.027482,0.864725,22.495953,1.0,0.02626,7.108205,0.986913,0.643032,0.859318,0.789838
847,(500 GM NILONS CLASSIC MIXED PICKLE PET M/O (1...,(500 GM NILONS CLASSIC MANGO PICKLE PET M/O (1...,0.034976,0.035812,0.025661,0.733675,20.486928,1.0,0.024408,3.620341,0.985663,0.56864,0.723783,0.725112
842,(500 GM NILONS CLASSIC MANGO PICKLE PET M/O (1...,(500 GM NILONS CLASSIC MIXED PICKLE PET M/O (1...,0.035812,0.034976,0.025661,0.716549,20.486928,1.0,0.024408,3.404548,0.986517,0.56864,0.706275,0.725112
362,(RS 10/- NILONS FUSILLI PASTA POU (1*160)),(RS 10/- NILONS MACARONI PASTA POU (1*160)),0.03202,0.037618,0.022616,0.706294,18.775361,1.0,0.021411,3.276681,0.978056,0.480952,0.694813,0.653742
363,(RS 10/- NILONS MACARONI PASTA POU (1*160)),(RS 10/- NILONS FUSILLI PASTA POU (1*160)),0.037618,0.03202,0.022616,0.60119,18.775361,1.0,0.021411,2.427173,0.983745,0.480952,0.587998,0.653742
844,(500 GM NILONS CLASSIC MANGO PICKLE PET M/O (1...,(900 GM NILONS CLASSIC MIXED PICKLE PET M/O (1...,0.032513,0.045157,0.025661,0.789256,17.478176,1.0,0.024193,4.530825,0.974468,0.493398,0.77929,0.67876
845,(900 GM NILONS CLASSIC MIXED PICKLE PET M/O (1...,(500 GM NILONS CLASSIC MANGO PICKLE PET M/O (1...,0.045157,0.032513,0.025661,0.568264,17.478176,1.0,0.024193,2.240926,0.987372,0.493398,0.553756,0.67876


In [15]:
rules.shape

(918, 14)

In [16]:
def recommend_products(purchased_item, rules_df, top_n=5):
    recommendations = rules_df[rules_df["antecedents"] == frozenset([purchased_item])]
    recommendations = recommendations.sort_values(by="lift", ascending=False)
    return [list(x)[0] for x in recommendations["consequents"].head(top_n)]

In [17]:
basket.to_csv("basket.csv")

In [18]:
rules.to_pickle("rules.pkl")