In [5]:
!pip install mlxtend


Collecting mlxtend
  Downloading mlxtend-0.23.4-py3-none-any.whl.metadata (7.3 kB)
Downloading mlxtend-0.23.4-py3-none-any.whl (1.4 MB)
   ---------------------------------------- 0.0/1.4 MB ? eta -:--:--
   ---------------------------------------- 1.4/1.4 MB 8.7 MB/s eta 0:00:00
Installing collected packages: mlxtend
Successfully installed mlxtend-0.23.4


In [11]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

In [13]:
# Load dataset
df = pd.read_excel("PPA Excel.xlsx", sheet_name="PPA XL files")


In [45]:
df = df[['BillNo', 'Itemname']].dropna()

In [47]:
df.head()

Unnamed: 0,BillNo,Itemname
0,536365,WHITE HANGING HEART T-LIGHT HOLDER
1,536365,WHITE METAL LANTERN
2,536365,CREAM CUPID HEARTS COAT HANGER
3,536365,KNITTED UNION FLAG HOT WATER BOTTLE
4,536365,RED WOOLLY HOTTIE WHITE HEART.


In [49]:
item_freq = df['Itemname'].value_counts()
common_items = item_freq[item_freq > 100].index
df = df[df['Itemname'].isin(common_items)]


In [51]:
basket = (df.groupby(['BillNo', 'Itemname'])['Itemname']
          .count().unstack().reset_index().fillna(0)
          .set_index('BillNo'))

In [53]:
basket = basket.astype(bool)

In [55]:
frequent_items = apriori(
    basket,
    min_support=0.05,   # Only keep itemsets bought in >= 5% of transactions
    use_colnames=True,
    max_len=3           # Restrict itemsets to 2-3 items
)

In [57]:
rules = association_rules(frequent_items, metric="lift", min_threshold=1)

In [59]:
# Show results
print("Frequent Itemsets:")
print(frequent_items.head())

print("\nAssociation Rules:")
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head())


Frequent Itemsets:
    support                           itemsets
0  0.053083         (ALARM CLOCK BAKELIKE RED)
1  0.073965    (ASSORTED COLOUR BIRD ORNAMENT)
2  0.050395  (GREEN REGENCY TEACUP AND SAUCER)
3  0.061043            (HEART OF WICKER SMALL)
4  0.057683           (JAM MAKING SET PRINTED)

Association Rules:
Empty DataFrame
Columns: [antecedents, consequents, support, confidence, lift]
Index: []


In [61]:
# Keep only strong, balanced rules
balanced_rules = rules[
    (rules['support'] >= 0.05) &       # at least 5% transactions
    (rules['confidence'] >= 0.6) &     # at least 60% reliable
    (rules['lift'] > 1.2)              # not random, but meaningful
]

print(balanced_rules.head())


Empty DataFrame
Columns: [antecedents, consequents, antecedent support, consequent support, support, confidence, lift, representativity, leverage, conviction, zhangs_metric, jaccard, certainty, kulczynski]
Index: []
