In [1]:
# Import apriori and association class from mlxtend library to find frequency patterns
from mlxtend.frequent_patterns import apriori as mlx_apriori, association_rules

In [2]:
# Import pandas and numpy libary
import pandas as pd
import numpy as np

In [3]:
# Read the file using read_excel function
trn = pd.read_excel("product_final.xlsx")

In [4]:
# Reset index
trn.reset_index

<bound method DataFrame.reset_index of     10 COLOUR SPACEBOY PEN  12 PENCILS TALL TUBE SKULLS  \
0                        0                            0   
1                        0                            0   
2                        0                            0   
3                        0                            0   
4                        0                            0   
5                        0                            0   
6                        0                            0   
7                        0                            0   
8                        0                            0   
9                        0                            0   
10                       0                            0   
11                       0                            0   
12                       0                            0   
13                       0                            0   
14                       0                            0   
15               

In [5]:
# check and count if na int the dataset
trn.isna().count()

10 COLOUR SPACEBOY PEN                42
12 PENCILS TALL TUBE SKULLS           42
3 PIECE SPACEBOY COOKIE CUTTER SET    42
3 STRIPEY MICE FELTCRAFT              42
3 TIER CAKE TIN GREEN AND CREAM       42
                                      ..
WRAP COWBOYS                          42
YELLOW BREAKFAST CUP AND SAUCER       42
YELLOW COAT RACK PARIS FASHION        42
YOU'RE CONFUSING ME METAL SIGN        42
ZINC WILLIE WINKIE  CANDLE STICK      42
Length: 356, dtype: int64

In [6]:
# check the data
trn.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42 entries, 0 to 41
Columns: 356 entries, 10 COLOUR SPACEBOY PEN to ZINC WILLIE WINKIE  CANDLE STICK
dtypes: int64(356)
memory usage: 116.9 KB


In [7]:
# View records
trn.head()

Unnamed: 0,10 COLOUR SPACEBOY PEN,12 PENCILS TALL TUBE SKULLS,3 PIECE SPACEBOY COOKIE CUTTER SET,3 STRIPEY MICE FELTCRAFT,3 TIER CAKE TIN GREEN AND CREAM,3 TIER CAKE TIN RED AND CREAM,4 TRADITIONAL SPINNING TOPS,5 HOOK HANGER MAGIC TOADSTOOL,5 STRAND GLASS NECKLACE CRYSTAL,6 RIBBONS ELEGANT CHRISTMAS,...,WOOD S/3 CABINET ANT WHITE FINISH,WOODEN BOX OF DOMINOES,WOODEN FRAME ANTIQUE WHITE,WOODEN OWLS LIGHT GARLAND,WOODEN PICTURE FRAME WHITE FINISH,WRAP COWBOYS,YELLOW BREAKFAST CUP AND SAUCER,YELLOW COAT RACK PARIS FASHION,YOU'RE CONFUSING ME METAL SIGN,ZINC WILLIE WINKIE CANDLE STICK
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,3,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
# Transform the data into True or Falase: True represent presence of product in the transanction and 
# False represents the absence of the product during the transaction.

df = trn.applymap(lambda x: True if x >= 1 else False)

In [9]:
# check the data
df.head()

Unnamed: 0,10 COLOUR SPACEBOY PEN,12 PENCILS TALL TUBE SKULLS,3 PIECE SPACEBOY COOKIE CUTTER SET,3 STRIPEY MICE FELTCRAFT,3 TIER CAKE TIN GREEN AND CREAM,3 TIER CAKE TIN RED AND CREAM,4 TRADITIONAL SPINNING TOPS,5 HOOK HANGER MAGIC TOADSTOOL,5 STRAND GLASS NECKLACE CRYSTAL,6 RIBBONS ELEGANT CHRISTMAS,...,WOOD S/3 CABINET ANT WHITE FINISH,WOODEN BOX OF DOMINOES,WOODEN FRAME ANTIQUE WHITE,WOODEN OWLS LIGHT GARLAND,WOODEN PICTURE FRAME WHITE FINISH,WRAP COWBOYS,YELLOW BREAKFAST CUP AND SAUCER,YELLOW COAT RACK PARIS FASHION,YOU'RE CONFUSING ME METAL SIGN,ZINC WILLIE WINKIE CANDLE STICK
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


#### FP Growth generates an FP-Tree for making frequent patterns. 
The FP-Growth Algorithm is an effective and scalable approach for mining the whole set of frequent patterns through pattern fragment growth, which includes an extended prefix-tree structure dubbed frequent-pattern tree (FP-tree) for storing compressed and critical information about frequent patterns.

In [10]:
#Function implementing FP-Growth to extract frequent itemsets for association rule mining-
#from mlxtend.frequent_patterns import fpgrowth 
from mlxtend.frequent_patterns import fpgrowth

frequent_itemsets = fpgrowth(df, min_support=0.10,use_colnames=True)
pd.set_option('display.max_colwidth', 100)
print(frequent_itemsets)

      support  \
0    0.190476   
1    0.142857   
2    0.142857   
3    0.142857   
4    0.119048   
..        ...   
127  0.119048   
128  0.119048   
129  0.119048   
130  0.119048   
131  0.119048   

                                                                                                itemsets  
0                                                                   (WHITE HANGING HEART T-LIGHT HOLDER)  
1                                                                         (SET 7 BABUSHKA NESTING BOXES)  
2                                                                       (RED WOOLLY HOTTIE WHITE HEART.)  
3                                                                  (KNITTED UNION FLAG HOT WATER BOTTLE)  
4                                                                                  (WHITE METAL LANTERN)  
..                                                                                                   ...  
127  (CREAM CUPID HEARTS COAT HANGER, RED WOOL

In [11]:
# A lambda function you can calculate how many items are present in each itemset and pass through len() function. 
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets.style

Unnamed: 0,support,itemsets,length
0,0.190476,frozenset({'WHITE HANGING HEART T-LIGHT HOLDER'}),1
1,0.142857,frozenset({'SET 7 BABUSHKA NESTING BOXES'}),1
2,0.142857,frozenset({'RED WOOLLY HOTTIE WHITE HEART.'}),1
3,0.142857,frozenset({'KNITTED UNION FLAG HOT WATER BOTTLE'}),1
4,0.119048,frozenset({'WHITE METAL LANTERN'}),1
5,0.119048,frozenset({'GLASS STAR FROSTED T-LIGHT HOLDER'}),1
6,0.119048,frozenset({'CREAM CUPID HEARTS COAT HANGER'}),1
7,0.166667,frozenset({'HAND WARMER UNION JACK'}),1
8,0.119048,frozenset({'HAND WARMER RED POLKA DOT'}),1
9,0.119048,frozenset({'WOOD 2 DRAWER CABINET WHITE FINISH'}),1


> * Above resulst shows the single item entries are part of a product pair, while those with higher numbers of items represent groups of multiple products that frequently appear together in baskets.

In [12]:
# To Calculate number of itemsets that contain multiple items and which contain single items.
frequent_itemsets['length'].value_counts()

3    35
4    35
2    23
5    21
1    10
6     7
7     1
Name: length, dtype: int64

> * Above resulte shows, three or four items, generally buy together in the basket and repeated 35 times each followed by two and five items buy together with 23 and 21 times respectively.

### Apriori generates the frequent patterns by making the itemsets using pairing such as single item set, double itemset, triple itemset.  
It moves on to find the most often occurring individual items in the database and extends them to wider and wider item sets as long as those item sets appear frequently enough in the database. Apriori's frequent item sets may be used to generate association rules that highlight broad patterns in the database, which has applications in areas such as market basket analysis.

In [13]:
#Apriori function to extract frequent itemsets for association rule mining
rules = mlx_apriori(df, min_support=0.10,use_colnames=True) 

In [14]:
rules['length'] = rules['itemsets'].apply(lambda x: len(x))
rules.style

Unnamed: 0,support,itemsets,length
0,0.119048,frozenset({'CREAM CUPID HEARTS COAT HANGER'}),1
1,0.119048,frozenset({'GLASS STAR FROSTED T-LIGHT HOLDER'}),1
2,0.119048,frozenset({'HAND WARMER RED POLKA DOT'}),1
3,0.166667,frozenset({'HAND WARMER UNION JACK'}),1
4,0.142857,frozenset({'KNITTED UNION FLAG HOT WATER BOTTLE'}),1
5,0.142857,frozenset({'RED WOOLLY HOTTIE WHITE HEART.'}),1
6,0.142857,frozenset({'SET 7 BABUSHKA NESTING BOXES'}),1
7,0.190476,frozenset({'WHITE HANGING HEART T-LIGHT HOLDER'}),1
8,0.119048,frozenset({'WHITE METAL LANTERN'}),1
9,0.119048,frozenset({'WOOD 2 DRAWER CABINET WHITE FINISH'}),1


In [15]:
rules['length'].value_counts()

3    35
4    35
2    23
5    21
1    10
6     7
7     1
Name: length, dtype: int64

### The association rule describes how two or more objects are related to one another. 
Apriori algorithm is also called frequent pattern mining. For Example, if someone buys CREAM CUPID HEARTS COAT HANGER), they’ll often buy GLASS STAR FROSTED T-LIGHT HOLDER too and confidence shows 100 % of customers buy together. Secondly,83.33% of customers if they buys KNITTED UNION FLAG HOT WATER BOTTLE, will buy CREAM CUPID HEARTS COAT HANGER too.
* The support metric indicates how frequently the itemset occurs within the dataset.

* lift of over 1 is a strong sign that sales of the antecedent influence the consequent, i.e. a promotion that increases sales of {GLASS STAR FROSTED T-LIGHT HOLDER} will also aid sales of {WHITE METAL LANTERN, RED WOOLLY HOTTIE WHITE HEART, KNITTED UNION FLAG HOT WATER BOTTLE, CREAM CUPID HEARTS COAT HANG}. Conversely, running out of one of these or setting the price too high could impact sales of the others. In other words,If you run a promotion on the GLASS STAR FROSTED T-LIGHT HOLDER, you could be expected to see an uplift in the sales of your WHITE METAL LANTERN, RED WOOLLY HOTTIE WHITE HEART, KNITTED UNION FLAG HOT WATER BOTTLE, CREAM CUPID HEARTS COAT HANG.

In [16]:
association_rules(rules)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(CREAM CUPID HEARTS COAT HANGER),(GLASS STAR FROSTED T-LIGHT HOLDER),0.119048,0.119048,0.119048,1.000000,8.4,0.104875,inf,1.000000
1,(GLASS STAR FROSTED T-LIGHT HOLDER),(CREAM CUPID HEARTS COAT HANGER),0.119048,0.119048,0.119048,1.000000,8.4,0.104875,inf,1.000000
2,(KNITTED UNION FLAG HOT WATER BOTTLE),(CREAM CUPID HEARTS COAT HANGER),0.142857,0.119048,0.119048,0.833333,7.0,0.102041,5.285714,1.000000
3,(CREAM CUPID HEARTS COAT HANGER),(KNITTED UNION FLAG HOT WATER BOTTLE),0.119048,0.142857,0.119048,1.000000,7.0,0.102041,inf,0.972973
4,(CREAM CUPID HEARTS COAT HANGER),(RED WOOLLY HOTTIE WHITE HEART.),0.119048,0.142857,0.119048,1.000000,7.0,0.102041,inf,0.972973
...,...,...,...,...,...,...,...,...,...,...
1866,(CREAM CUPID HEARTS COAT HANGER),"(WHITE METAL LANTERN, RED WOOLLY HOTTIE WHITE HEART., KNITTED UNION FLAG HOT WATER BOTTLE, GLASS...",0.119048,0.119048,0.119048,1.000000,8.4,0.104875,inf,1.000000
1867,(RED WOOLLY HOTTIE WHITE HEART.),"(WHITE METAL LANTERN, CREAM CUPID HEARTS COAT HANGER, KNITTED UNION FLAG HOT WATER BOTTLE, GLASS...",0.142857,0.119048,0.119048,0.833333,7.0,0.102041,5.285714,1.000000
1868,(KNITTED UNION FLAG HOT WATER BOTTLE),"(WHITE METAL LANTERN, CREAM CUPID HEARTS COAT HANGER, RED WOOLLY HOTTIE WHITE HEART., GLASS STAR...",0.142857,0.119048,0.119048,0.833333,7.0,0.102041,5.285714,1.000000
1869,(GLASS STAR FROSTED T-LIGHT HOLDER),"(WHITE METAL LANTERN, CREAM CUPID HEARTS COAT HANGER, RED WOOLLY HOTTIE WHITE HEART., KNITTED UN...",0.119048,0.119048,0.119048,1.000000,8.4,0.104875,inf,1.000000


:

### Advantages of FP Growth Algorithm

* This algorithm needs to scan the database twice when compared to Apriori, which scans the transactions for each iteration.

* The pairing of items is not done in this algorithm, making it faster.

* The database is stored in a compact version in memory.

* It is efficient and scalable for mining both long and short frequent patterns.

### Disadvantages of FP-Growth Algorithm

* FP Tree is more cumbersome and difficult to build than Apriori
* It may be expensive.
* The algorithm may not fit in the shared memory when the database is large.

### Advantages of Apriori Algorithm

* It is used to calculate large itemsets.
* Simple to understand and apply.

### Disadvantages of Apriori Algorithms

* Apriori algorithm is an expensive method to find support since the calculation has to pass through the whole database.
* Sometimes, you need a huge number of candidate rules, so it becomes computationally more expensive.