# Importing Libraries 

In [205]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

# reading data from internet

In [206]:
data = pd.read_excel("http://archive.ics.uci.edu/ml/machine-learning-databases/00352/Online%20Retail.xlsx")
data.head(3)

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom


In [94]:
data["Country"].value_counts().head(10)

United Kingdom    495478
Germany             9495
France              8557
EIRE                8196
Spain               2533
Netherlands         2371
Belgium             2069
Switzerland         2002
Portugal            1519
Australia           1259
Name: Country, dtype: int64

# Data Preprocessig

In [216]:
# let me apply apriori algo only on dataset which belongs Germany country .

germany = data[data["Country"] == "Germany"]
germany.head(2)

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
1109,536527,22809,SET OF 6 T-LIGHTS SANTA,6,2010-12-01 13:04:00,2.95,12662.0,Germany
1110,536527,84347,ROTATING SILVER ANGELS T-LIGHT HLDR,6,2010-12-01 13:04:00,2.55,12662.0,Germany


In [217]:
germany.drop(["StockCode","InvoiceDate","UnitPrice","CustomerID","Country"],axis = 1,inplace = True)

In [218]:
# check for types of variables
germany.reset_index(drop = True,inplace = True)
germany.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9495 entries, 0 to 9494
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   InvoiceNo    9495 non-null   object
 1   Description  9495 non-null   object
 2   Quantity     9495 non-null   int64 
dtypes: int64(1), object(2)
memory usage: 222.7+ KB


In [219]:
print(germany.shape)
germany.head(2)

(9495, 3)


Unnamed: 0,InvoiceNo,Description,Quantity
0,536527,SET OF 6 T-LIGHTS SANTA,6
1,536527,ROTATING SILVER ANGELS T-LIGHT HLDR,6


In [220]:
germany["Description"].value_counts().head(8)

POSTAGE                                383
ROUND SNACK BOXES SET OF4 WOODLAND     120
REGENCY CAKESTAND 3 TIER                81
ROUND SNACK BOXES SET OF 4 FRUITS       78
PLASTERS IN TIN WOODLAND ANIMALS        67
WOODLAND CHARLOTTE BAG                  59
PLASTERS IN TIN CIRCUS PARADE           57
PLASTERS IN TIN SPACEBOY                51
Name: Description, dtype: int64

In [221]:
germany = germany[germany["Description"]!="POSTAGE"]
germany["Description"].value_counts().head(8)

ROUND SNACK BOXES SET OF4 WOODLAND     120
REGENCY CAKESTAND 3 TIER                81
ROUND SNACK BOXES SET OF 4 FRUITS       78
PLASTERS IN TIN WOODLAND ANIMALS        67
WOODLAND CHARLOTTE BAG                  59
PLASTERS IN TIN CIRCUS PARADE           57
PLASTERS IN TIN SPACEBOY                51
6 RIBBONS RUSTIC CHARM                  50
Name: Description, dtype: int64

In [222]:
germany["Description"] = germany["Description"].str.strip("")
germany.dropna(axis = 0,inplace = True)
print(germany.shape)

(9112, 3)


In [223]:
germany.head(2)

Unnamed: 0,InvoiceNo,Description,Quantity
0,536527,SET OF 6 T-LIGHTS SANTA,6
1,536527,ROTATING SILVER ANGELS T-LIGHT HLDR,6


In [224]:
market_bastket_data = germany.groupby(["InvoiceNo","Description"])["Quantity"].sum().unstack().reset_index().fillna(0).set_index(["InvoiceNo"])

In [225]:
market_bastket_data.head(2)

Description,50'S CHRISTMAS GIFT BAG LARGE,DOLLY GIRL BEAKER,I LOVE LONDON MINI BACKPACK,RED SPOT GIFT BAG LARGE,SET 2 TEA TOWELS I LOVE LONDON,SPACEBOY BABY GIFT SET,10 COLOUR SPACEBOY PEN,12 COLOURED PARTY BALLOONS,12 IVORY ROSE PEG PLACE SETTINGS,12 MESSAGE CARDS WITH ENVELOPES,...,YULETIDE IMAGES GIFT WRAP SET,ZINC HEART T-LIGHT HOLDER,ZINC STAR T-LIGHT HOLDER,ZINC BOX SIGN HOME,ZINC FOLKART SLEIGH BELLS,ZINC HEART LATTICE T-LIGHT HOLDER,ZINC METAL HEART DECORATION,ZINC T-LIGHT HOLDER STAR LARGE,ZINC T-LIGHT HOLDER STARS SMALL,ZINC WILLIE WINKIE CANDLE STICK
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
536527,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
536840,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [226]:

def enocode(x):
    if(x<=0):
        return 0
    elif(x>=1):
        return 1

market_bastket_data = market_bastket_data.applymap(enocode)

In [227]:
market_bastket_data.head(2)

Description,50'S CHRISTMAS GIFT BAG LARGE,DOLLY GIRL BEAKER,I LOVE LONDON MINI BACKPACK,RED SPOT GIFT BAG LARGE,SET 2 TEA TOWELS I LOVE LONDON,SPACEBOY BABY GIFT SET,10 COLOUR SPACEBOY PEN,12 COLOURED PARTY BALLOONS,12 IVORY ROSE PEG PLACE SETTINGS,12 MESSAGE CARDS WITH ENVELOPES,...,YULETIDE IMAGES GIFT WRAP SET,ZINC HEART T-LIGHT HOLDER,ZINC STAR T-LIGHT HOLDER,ZINC BOX SIGN HOME,ZINC FOLKART SLEIGH BELLS,ZINC HEART LATTICE T-LIGHT HOLDER,ZINC METAL HEART DECORATION,ZINC T-LIGHT HOLDER STAR LARGE,ZINC T-LIGHT HOLDER STARS SMALL,ZINC WILLIE WINKIE CANDLE STICK
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
536527,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536840,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# model building using apriori algo

In [228]:
# generate frequent itemset
# support is 5%
frequent_items = apriori(market_bastket_data,min_support = 0.05,use_colnames = True)

In [229]:
frequent_items

Unnamed: 0,support,itemsets
0,0.079796,(6 RIBBONS RUSTIC CHARM)
1,0.054329,(ALARM CLOCK BAKELIKE PINK)
2,0.050934,(CHARLOTTE BAG APPLES DESIGN)
3,0.056027,(GUMBALL COAT RACK)
4,0.071307,(JAM MAKING SET PRINTED)
5,0.061121,(JUMBO BAG RED RETROSPOT)
6,0.078098,(JUMBO BAG WOODLAND ANIMALS)
7,0.061121,(LUNCH BAG WOODLAND)
8,0.066214,(PACK OF 72 RETROSPOT CAKE CASES)
9,0.089983,(PLASTERS IN TIN CIRCUS PARADE )


In [230]:
# generate association rules 
# considering lift as metric instead of confidence .

rules = association_rules(frequent_items,metric = 'lift',min_threshold =1)

In [231]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(PLASTERS IN TIN CIRCUS PARADE ),(PLASTERS IN TIN WOODLAND ANIMALS),0.089983,0.106961,0.052632,0.584906,5.468404,0.043007,2.151412
1,(PLASTERS IN TIN WOODLAND ANIMALS),(PLASTERS IN TIN CIRCUS PARADE ),0.106961,0.089983,0.052632,0.492063,5.468404,0.043007,1.791596
2,(PLASTERS IN TIN WOODLAND ANIMALS),(ROUND SNACK BOXES SET OF4 WOODLAND ),0.106961,0.190153,0.057725,0.539683,2.838152,0.037386,1.759323
3,(ROUND SNACK BOXES SET OF4 WOODLAND ),(PLASTERS IN TIN WOODLAND ANIMALS),0.190153,0.106961,0.057725,0.303571,2.838152,0.037386,1.282312
4,(ROUND SNACK BOXES SET OF4 WOODLAND ),(ROUND SNACK BOXES SET OF 4 FRUITS ),0.190153,0.122241,0.101868,0.535714,4.38244,0.078623,1.890558
5,(ROUND SNACK BOXES SET OF 4 FRUITS ),(ROUND SNACK BOXES SET OF4 WOODLAND ),0.122241,0.190153,0.101868,0.833333,4.38244,0.078623,4.859083
6,(ROUND SNACK BOXES SET OF4 WOODLAND ),(SPACEBOY LUNCH BOX ),0.190153,0.079796,0.054329,0.285714,3.580547,0.039156,1.288285
7,(SPACEBOY LUNCH BOX ),(ROUND SNACK BOXES SET OF4 WOODLAND ),0.079796,0.190153,0.054329,0.680851,3.580547,0.039156,2.537521


# Make recommendations 

In [232]:
rules[(rules["lift"]>1) & (rules["confidence"]>0.5)]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(PLASTERS IN TIN CIRCUS PARADE ),(PLASTERS IN TIN WOODLAND ANIMALS),0.089983,0.106961,0.052632,0.584906,5.468404,0.043007,2.151412
2,(PLASTERS IN TIN WOODLAND ANIMALS),(ROUND SNACK BOXES SET OF4 WOODLAND ),0.106961,0.190153,0.057725,0.539683,2.838152,0.037386,1.759323
4,(ROUND SNACK BOXES SET OF4 WOODLAND ),(ROUND SNACK BOXES SET OF 4 FRUITS ),0.190153,0.122241,0.101868,0.535714,4.38244,0.078623,1.890558
5,(ROUND SNACK BOXES SET OF 4 FRUITS ),(ROUND SNACK BOXES SET OF4 WOODLAND ),0.122241,0.190153,0.101868,0.833333,4.38244,0.078623,4.859083
7,(SPACEBOY LUNCH BOX ),(ROUND SNACK BOXES SET OF4 WOODLAND ),0.079796,0.190153,0.054329,0.680851,3.580547,0.039156,2.537521
