In [5]:
! pip install numpy pandas mlxtend xlrd

Defaulting to user installation because normal site-packages is not writeable
Collecting xlrd
  Downloading xlrd-1.2.0-py2.py3-none-any.whl (103 kB)
[K     |████████████████████████████████| 103 kB 4.9 MB/s 
Installing collected packages: xlrd
Successfully installed xlrd-1.2.0


## Download the required libraries numpy , pandas , mlxtend (for apriori algorithm)

In [1]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori , association_rules

***Reading the data from dataset***

In [2]:
data = pd.read_excel("Online Retail.xlsx")

In [3]:
data.head() # this would show first 5 rows of data

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [4]:
data.columns # getting columns of the dataset

Index([&#39;InvoiceNo&#39;, &#39;StockCode&#39;, &#39;Description&#39;, &#39;Quantity&#39;, &#39;InvoiceDate&#39;,
       &#39;UnitPrice&#39;, &#39;CustomerID&#39;, &#39;Country&#39;],
      dtype=&#39;object&#39;)

## Cleaning Data

In [5]:
data['Description'] = data['Description'].str.strip()

In [6]:
data['Description'].head()

0     WHITE HANGING HEART T-LIGHT HOLDER
1                    WHITE METAL LANTERN
2         CREAM CUPID HEARTS COAT HANGER
3    KNITTED UNION FLAG HOT WATER BOTTLE
4         RED WOOLLY HOTTIE WHITE HEART.
Name: Description, dtype: object

In [7]:
data.dropna(axis=0, subset=['InvoiceNo'], inplace=True)
data['InvoiceNo'] = data['InvoiceNo'].astype('str')

In [8]:
basket_France = (data[data['Country'] =="France"] 
          .groupby(['InvoiceNo', 'Description'])['Quantity'] 
          .sum().unstack().reset_index().fillna(0) 
          .set_index('InvoiceNo')) 
  
# Transactions done in the United Kingdom 
basket_UK = (data[data['Country'] =="United Kingdom"] 
          .groupby(['InvoiceNo', 'Description'])['Quantity'] 
          .sum().unstack().reset_index().fillna(0) 
          .set_index('InvoiceNo')) 
  
# Transactions done in Portugal 
basket_Por = (data[data['Country'] =="Portugal"] 
          .groupby(['InvoiceNo', 'Description'])['Quantity'] 
          .sum().unstack().reset_index().fillna(0) 
          .set_index('InvoiceNo')) 
  
basket_Sweden = (data[data['Country'] =="Sweden"] 
          .groupby(['InvoiceNo', 'Description'])['Quantity'] 
          .sum().unstack().reset_index().fillna(0) 
          .set_index('InvoiceNo'))

In [9]:
def hot_encode(x): 
    if(x<= 0): 
        return 0
    if(x>= 1): 
        return 1
  
# Encoding the datasets 
basket_encoded = basket_France.applymap(hot_encode) 
basket_France = basket_encoded 
  
basket_encoded = basket_UK.applymap(hot_encode) 
basket_UK = basket_encoded 
  
basket_encoded = basket_Por.applymap(hot_encode) 
basket_Por = basket_encoded 
  
basket_encoded = basket_Sweden.applymap(hot_encode) 
basket_Sweden = basket_encoded 

In [10]:
frq_items = apriori(basket_France, min_support = 0.05, use_colnames = True) 
  
# Collecting the inferred rules in a dataframe 
rules = association_rules(frq_items, metric ="lift", min_threshold = 1) 
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False]) 
print(rules.head())

                                           antecedents  \
24                        (JUMBO BAG WOODLAND ANIMALS)   
186  (SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...   
184  (SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...   
192  (SET/20 RED RETROSPOT PAPER NAPKINS, POSTAGE, ...   
190  (SET/6 RED SPOTTY PAPER PLATES, SET/20 RED RET...   

                         consequents  antecedent support  consequent support  \
24                         (POSTAGE)            0.065076            0.650759   
186  (SET/6 RED SPOTTY PAPER PLATES)            0.086768            0.108460   
184    (SET/6 RED SPOTTY PAPER CUPS)            0.086768            0.117137   
192  (SET/6 RED SPOTTY PAPER PLATES)            0.071584            0.108460   
190    (SET/6 RED SPOTTY PAPER CUPS)            0.071584            0.117137   

      support  confidence      lift  leverage  conviction  
24   0.065076    1.000000  1.536667  0.022727         inf  
186  0.084599    0.975000  8.989500  0.075188   35

In [11]:

frq_items = apriori(basket_Sweden, min_support = 0.05, use_colnames = True) 
rules = association_rules(frq_items, metric ="lift", min_threshold = 1) 
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False]) 
print(rules.head()) 


                         antecedents                     consequents  \
12         (BUBBLEGUM RING ASSORTED)     (TREASURE TIN BUFFALO BILL)   
13       (TREASURE TIN BUFFALO BILL)       (BUBBLEGUM RING ASSORTED)   
14    (TREASURE TIN GYMKHANA DESIGN)       (BUBBLEGUM RING ASSORTED)   
15         (BUBBLEGUM RING ASSORTED)  (TREASURE TIN GYMKHANA DESIGN)   
32  (MAGIC DRAWING SLATE DOLLY GIRL)    (MAGIC DRAWING SLATE PURDEY)   

    antecedent support  consequent support   support  confidence       lift  \
12            0.065217            0.065217  0.065217         1.0  15.333333   
13            0.065217            0.065217  0.065217         1.0  15.333333   
14            0.065217            0.065217  0.065217         1.0  15.333333   
15            0.065217            0.065217  0.065217         1.0  15.333333   
32            0.065217            0.065217  0.065217         1.0  15.333333   

    leverage  conviction  
12  0.060964         inf  
13  0.060964         inf  
14  0.06096

Empty DataFrame
Columns: [antecedents, consequents, antecedent support, consequent support, support, confidence, lift, leverage, conviction]
Index: []
