In [2]:
import pandas as pd

In [3]:
# Load retail dataset (shortened version for demo)
df_mba = pd.read_excel(r"E:\Data Sets\online_retail_II.xlsx")


In [4]:
# Filter for UK customers only (optional but common)
df_mba = df_mba[df_mba['Country'] == 'United Kingdom']

# Drop rows with missing InvoiceNo or Description
df_mba.dropna(subset=['Invoice', 'Description'], inplace=True)

In [5]:
# Keep only positive quantities (purchases)
df_mba = df_mba[df_mba['Quantity'] > 0]

# Convert Invoice to string (to treat as categorical transaction IDs)
df_mba['Invoice'] = df_mba['Invoice'].astype(str)

print(df_mba.head())


  Invoice StockCode                          Description  Quantity  \
0  489434     85048  15CM CHRISTMAS GLASS BALL 20 LIGHTS        12   
1  489434    79323P                   PINK CHERRY LIGHTS        12   
2  489434    79323W                  WHITE CHERRY LIGHTS        12   
3  489434     22041         RECORD FRAME 7" SINGLE SIZE         48   
4  489434     21232       STRAWBERRY CERAMIC TRINKET BOX        24   

          InvoiceDate  Price  Customer ID         Country  
0 2009-12-01 07:45:00   6.95      13085.0  United Kingdom  
1 2009-12-01 07:45:00   6.75      13085.0  United Kingdom  
2 2009-12-01 07:45:00   6.75      13085.0  United Kingdom  
3 2009-12-01 07:45:00   2.10      13085.0  United Kingdom  
4 2009-12-01 07:45:00   1.25      13085.0  United Kingdom  


In [6]:
# Step 2: Create Transaction Matrix
# Create basket matrix: Rows = Invoice, Columns = Items, values = Quantity sum
basket = df_mba.groupby(['Invoice', 'Description'])['Quantity'].sum().unstack().fillna(0)

# Convert quantities to 1/0 (presence/absence)
def encode_units(x):
    return 1 if x > 0 else 0

basket_sets = basket.apply(lambda col: col.map(encode_units))


print(basket_sets.head())

Description  22719    DOORMAT UNION JACK GUNS AND ROSES  \
Invoice                                                   
489434           0                                    0   
489435           0                                    0   
489436           0                                    0   
489437           0                                    0   
489438           0                                    0   

Description   3 STRIPEY MICE FELTCRAFT   4 PURPLE FLOCK DINNER CANDLES  \
Invoice                                                                  
489434                               0                               0   
489435                               0                               0   
489436                               0                               0   
489437                               0                               0   
489438                               0                               0   

Description   ANIMAL STICKERS   BLACK PIRATE TREASURE CHEST  \
Invo

In [None]:
  #Step 3: Apply Apriori Algorithm
from mlxtend.frequent_patterns import apriori

# Generate frequent itemsets with min support of 1%
frequent_itemsets = apriori(basket_sets, min_support=0.01, use_colnames=True)

print(frequent_itemsets.head())



In [None]:
print(frequent_itemsets.head())