In [1]:
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules 


In [2]:
# Data Preprocessing and Cleaning
data = pd.read_csv('transaction_data.csv')

In [3]:
data.head()

Unnamed: 0,UserId,TransactionId,TransactionTime,ItemCode,ItemDescription,NumberOfItemsPurchased,CostPerItem,Country
0,278166,6355745,Sat Feb 02 12:50:00 IST 2019,465549,FAMILY ALBUM WHITE PICTURE FRAME,6,11.73,United Kingdom
1,337701,6283376,Wed Dec 26 09:06:00 IST 2018,482370,LONDON BUS COFFEE MUG,3,3.52,United Kingdom
2,267099,6385599,Fri Feb 15 09:45:00 IST 2019,490728,SET 12 COLOUR PENCILS DOLLY GIRL,72,0.9,France
3,380478,6044973,Fri Jun 22 07:14:00 IST 2018,459186,UNION JACK FLAG LUGGAGE TAG,3,1.73,United Kingdom
4,-1,6143225,Mon Sep 10 11:58:00 IST 2018,1733592,WASHROOM METAL SIGN,3,3.4,United Kingdom


In [4]:
# Exploring the different regions of transactions 
data.Country.unique() 

array(['United Kingdom', 'France', 'Austria', 'Australia', 'EIRE',
       'Portugal', 'Germany', 'Sweden', 'Cyprus', 'Spain', 'Japan',
       'Belgium', 'Singapore', 'Denmark', 'Netherlands', 'Norway',
       'Canada', 'Iceland', 'Switzerland', 'Poland', 'Finland',
       'Hong Kong', 'Italy', 'Malta', 'Israel', 'Channel Islands',
       'Unspecified', 'USA', 'Czech Republic', 'Lebanon', 'Brazil',
       'European Community', 'Greece', 'RSA', 'Bahrain', 'Lithuania',
       'United Arab Emirates', 'Saudi Arabia'], dtype=object)

In [5]:
# Stripping extra spaces in the ItemDescription 
data['ItemDescription'] = data['ItemDescription'].str.strip() 
  
# Dropping the rows without any TransactionId number 
data.dropna(axis = 0, subset =['TransactionId'], inplace = True) 
data['TransactionId'] = data['TransactionId'].astype('str') 
  
# Dropping all transactions which were done on credit 
data = data[~data['TransactionId'].str.contains('C')] 

In [6]:
data['Country'].value_counts()

United Kingdom          990956
Germany                  18990
France                   17114
EIRE                     16392
Spain                     5066
Netherlands               4742
Belgium                   4138
Switzerland               4004
Portugal                  3038
Australia                 2518
Norway                    2172
Italy                     1606
Channel Islands           1516
Finland                   1390
Cyprus                    1244
Sweden                     924
Unspecified                892
Austria                    802
Denmark                    778
Japan                      716
Poland                     682
Israel                     594
USA                        582
Hong Kong                  576
Singapore                  458
Iceland                    364
Canada                     302
Greece                     292
Malta                      254
United Arab Emirates       136
European Community         122
RSA                        116
Lebanon 

In [7]:
# Transactions done in France 
basket_France = (data[data['Country'] =="France"] 
          .groupby(['TransactionId', 'ItemDescription'])['NumberOfItemsPurchased'] 
          .sum().unstack().reset_index().fillna(0) 
          .set_index('TransactionId')) 

In [8]:
# One Hot Encoding 
def hot_encode(x): 
    if(x<= 0): 
        return 0
    if(x>= 1): 
        return 1
  

In [9]:
# Encoded Basket for France 
basket_encoded = basket_France.applymap(hot_encode) 
basket_France = basket_encoded 


In [10]:
# Building the model 
frq_items = apriori(basket_France, min_support = 0.05, use_colnames = True) 
  
# Collecting the inferred rules in a dataframe 
rules = association_rules(frq_items, metric ="lift", min_threshold = 1) 
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False]) 
print(rules.head()) 

                                           antecedents  \
25                        (JUMBO BAG WOODLAND ANIMALS)   
186  (SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...   
184  (SET/6 RED SPOTTY PAPER PLATES, SET/20 RED RET...   
193  (POSTAGE, SET/20 RED RETROSPOT PAPER NAPKINS, ...   
190  (SET/6 RED SPOTTY PAPER PLATES, POSTAGE, SET/2...   

                         consequents  antecedent support  consequent support  \
25                         (POSTAGE)            0.065076            0.650759   
186  (SET/6 RED SPOTTY PAPER PLATES)            0.086768            0.108460   
184    (SET/6 RED SPOTTY PAPER CUPS)            0.086768            0.117137   
193  (SET/6 RED SPOTTY PAPER PLATES)            0.071584            0.108460   
190    (SET/6 RED SPOTTY PAPER CUPS)            0.071584            0.117137   

      support  confidence      lift  leverage  conviction  
25   0.065076    1.000000  1.536667  0.022727         inf  
186  0.084599    0.975000  8.989500  0.075188   35

In [11]:
# Make count of antecendents and consequents in a Rule
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))
rules["consequents_len"] = rules["consequents"].apply(lambda x: len(x))
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len,consequents_len
25,(JUMBO BAG WOODLAND ANIMALS),(POSTAGE),0.065076,0.650759,0.065076,1.000000,1.536667,0.022727,inf,1,1
186,"(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...",(SET/6 RED SPOTTY PAPER PLATES),0.086768,0.108460,0.084599,0.975000,8.989500,0.075188,35.661605,2,1
184,"(SET/6 RED SPOTTY PAPER PLATES, SET/20 RED RET...",(SET/6 RED SPOTTY PAPER CUPS),0.086768,0.117137,0.084599,0.975000,8.323611,0.074435,35.314534,2,1
193,"(POSTAGE, SET/20 RED RETROSPOT PAPER NAPKINS, ...",(SET/6 RED SPOTTY PAPER PLATES),0.071584,0.108460,0.069414,0.969697,8.940606,0.061650,29.420824,3,1
190,"(SET/6 RED SPOTTY PAPER PLATES, POSTAGE, SET/2...",(SET/6 RED SPOTTY PAPER CUPS),0.071584,0.117137,0.069414,0.969697,8.278339,0.061029,29.134490,3,1
74,(RED RETROSPOT PICNIC BAG),(POSTAGE),0.060738,0.650759,0.058568,0.964286,1.481786,0.019043,9.778742,1,1
87,(SET OF 9 BLACK SKULL BALLOONS),(POSTAGE),0.056399,0.650759,0.054230,0.961538,1.477564,0.017528,9.080260,1,1
110,(SET/6 RED SPOTTY PAPER PLATES),(SET/6 RED SPOTTY PAPER CUPS),0.108460,0.117137,0.104121,0.960000,8.195556,0.091417,22.071584,1,1
48,(PACK OF 6 SKULL PAPER CUPS),(POSTAGE),0.054230,0.650759,0.052061,0.960000,1.475200,0.016770,8.731020,1,1
178,"(SET/6 RED SPOTTY PAPER PLATES, POSTAGE)",(SET/6 RED SPOTTY PAPER CUPS),0.091106,0.117137,0.086768,0.952381,8.130511,0.076096,18.540130,2,1


In [12]:
# Given Assignment Mentioned 2 Consequents
rules =rules[rules['consequents_len'] >= 2]
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len,consequents_len
199,"(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...","(SET/6 RED SPOTTY PAPER PLATES, POSTAGE)",0.086768,0.091106,0.069414,0.8,8.780952,0.061509,4.544469,2,2
195,"(SET/6 RED SPOTTY PAPER PLATES, SET/20 RED RET...","(POSTAGE, SET/6 RED SPOTTY PAPER CUPS)",0.086768,0.099783,0.069414,0.8,8.017391,0.060756,4.501085,2,2
181,(SET/6 RED SPOTTY PAPER PLATES),"(POSTAGE, SET/6 RED SPOTTY PAPER CUPS)",0.10846,0.099783,0.086768,0.8,8.017391,0.075945,4.501085,1,2
187,(SET/6 RED SPOTTY PAPER PLATES),"(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...",0.10846,0.086768,0.084599,0.78,8.9895,0.075188,4.151055,1,2
194,"(SET/6 RED SPOTTY PAPER PLATES, POSTAGE)","(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...",0.091106,0.086768,0.069414,0.761905,8.780952,0.061509,3.835575,2,2


In [13]:
rules.count()

antecedents           49
consequents           49
antecedent support    49
consequent support    49
support               49
confidence            49
lift                  49
leverage              49
conviction            49
antecedent_len        49
consequents_len       49
dtype: int64

In [14]:
#Calculate support to remove unuseful rules
rules =rules[rules['support']>0.007]
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len,consequents_len
199,"(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...","(SET/6 RED SPOTTY PAPER PLATES, POSTAGE)",0.086768,0.091106,0.069414,0.8,8.780952,0.061509,4.544469,2,2
195,"(SET/6 RED SPOTTY PAPER PLATES, SET/20 RED RET...","(POSTAGE, SET/6 RED SPOTTY PAPER CUPS)",0.086768,0.099783,0.069414,0.8,8.017391,0.060756,4.501085,2,2
181,(SET/6 RED SPOTTY PAPER PLATES),"(POSTAGE, SET/6 RED SPOTTY PAPER CUPS)",0.10846,0.099783,0.086768,0.8,8.017391,0.075945,4.501085,1,2
187,(SET/6 RED SPOTTY PAPER PLATES),"(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...",0.10846,0.086768,0.084599,0.78,8.9895,0.075188,4.151055,1,2
194,"(SET/6 RED SPOTTY PAPER PLATES, POSTAGE)","(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...",0.091106,0.086768,0.069414,0.761905,8.780952,0.061509,3.835575,2,2


In [15]:
rules.count()

antecedents           49
consequents           49
antecedent support    49
consequent support    49
support               49
confidence            49
lift                  49
leverage              49
conviction            49
antecedent_len        49
consequents_len       49
dtype: int64

In [16]:
#Calculate support to remove unuseful Rules
rules =rules[rules['consequent support']>0.005]
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len,consequents_len
199,"(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...","(SET/6 RED SPOTTY PAPER PLATES, POSTAGE)",0.086768,0.091106,0.069414,0.8,8.780952,0.061509,4.544469,2,2
195,"(SET/6 RED SPOTTY PAPER PLATES, SET/20 RED RET...","(POSTAGE, SET/6 RED SPOTTY PAPER CUPS)",0.086768,0.099783,0.069414,0.8,8.017391,0.060756,4.501085,2,2
181,(SET/6 RED SPOTTY PAPER PLATES),"(POSTAGE, SET/6 RED SPOTTY PAPER CUPS)",0.10846,0.099783,0.086768,0.8,8.017391,0.075945,4.501085,1,2
187,(SET/6 RED SPOTTY PAPER PLATES),"(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...",0.10846,0.086768,0.084599,0.78,8.9895,0.075188,4.151055,1,2
194,"(SET/6 RED SPOTTY PAPER PLATES, POSTAGE)","(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...",0.091106,0.086768,0.069414,0.761905,8.780952,0.061509,3.835575,2,2


In [17]:
rules.count()

antecedents           49
consequents           49
antecedent support    49
consequent support    49
support               49
confidence            49
lift                  49
leverage              49
conviction            49
antecedent_len        49
consequents_len       49
dtype: int64

In [18]:
#Prune the rules for better Confidence
rules =rules[rules['confidence']>0.5]
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len,consequents_len
199,"(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...","(SET/6 RED SPOTTY PAPER PLATES, POSTAGE)",0.086768,0.091106,0.069414,0.8,8.780952,0.061509,4.544469,2,2
195,"(SET/6 RED SPOTTY PAPER PLATES, SET/20 RED RET...","(POSTAGE, SET/6 RED SPOTTY PAPER CUPS)",0.086768,0.099783,0.069414,0.8,8.017391,0.060756,4.501085,2,2
181,(SET/6 RED SPOTTY PAPER PLATES),"(POSTAGE, SET/6 RED SPOTTY PAPER CUPS)",0.10846,0.099783,0.086768,0.8,8.017391,0.075945,4.501085,1,2
187,(SET/6 RED SPOTTY PAPER PLATES),"(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...",0.10846,0.086768,0.084599,0.78,8.9895,0.075188,4.151055,1,2
194,"(SET/6 RED SPOTTY PAPER PLATES, POSTAGE)","(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...",0.091106,0.086768,0.069414,0.761905,8.780952,0.061509,3.835575,2,2


In [19]:
rules.count()

antecedents           29
consequents           29
antecedent support    29
consequent support    29
support               29
confidence            29
lift                  29
leverage              29
conviction            29
antecedent_len        29
consequents_len       29
dtype: int64

In [20]:
#Measure performance of Rules
rules =rules[rules['lift']>=3]
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len,consequents_len
199,"(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...","(SET/6 RED SPOTTY PAPER PLATES, POSTAGE)",0.086768,0.091106,0.069414,0.8,8.780952,0.061509,4.544469,2,2
195,"(SET/6 RED SPOTTY PAPER PLATES, SET/20 RED RET...","(POSTAGE, SET/6 RED SPOTTY PAPER CUPS)",0.086768,0.099783,0.069414,0.8,8.017391,0.060756,4.501085,2,2
181,(SET/6 RED SPOTTY PAPER PLATES),"(POSTAGE, SET/6 RED SPOTTY PAPER CUPS)",0.10846,0.099783,0.086768,0.8,8.017391,0.075945,4.501085,1,2
187,(SET/6 RED SPOTTY PAPER PLATES),"(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...",0.10846,0.086768,0.084599,0.78,8.9895,0.075188,4.151055,1,2
194,"(SET/6 RED SPOTTY PAPER PLATES, POSTAGE)","(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...",0.091106,0.086768,0.069414,0.761905,8.780952,0.061509,3.835575,2,2


In [21]:
rules.count()

antecedents           29
consequents           29
antecedent support    29
consequent support    29
support               29
confidence            29
lift                  29
leverage              29
conviction            29
antecedent_len        29
consequents_len       29
dtype: int64

In [22]:
#Remove less important/useful Rules
rules =rules[rules['conviction']>=3]
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len,consequents_len
199,"(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...","(SET/6 RED SPOTTY PAPER PLATES, POSTAGE)",0.086768,0.091106,0.069414,0.8,8.780952,0.061509,4.544469,2,2
195,"(SET/6 RED SPOTTY PAPER PLATES, SET/20 RED RET...","(POSTAGE, SET/6 RED SPOTTY PAPER CUPS)",0.086768,0.099783,0.069414,0.8,8.017391,0.060756,4.501085,2,2
181,(SET/6 RED SPOTTY PAPER PLATES),"(POSTAGE, SET/6 RED SPOTTY PAPER CUPS)",0.10846,0.099783,0.086768,0.8,8.017391,0.075945,4.501085,1,2
187,(SET/6 RED SPOTTY PAPER PLATES),"(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...",0.10846,0.086768,0.084599,0.78,8.9895,0.075188,4.151055,1,2
194,"(SET/6 RED SPOTTY PAPER PLATES, POSTAGE)","(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...",0.091106,0.086768,0.069414,0.761905,8.780952,0.061509,3.835575,2,2


In [23]:
rules.count()

antecedents           13
consequents           13
antecedent support    13
consequent support    13
support               13
confidence            13
lift                  13
leverage              13
conviction            13
antecedent_len        13
consequents_len       13
dtype: int64

In [24]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len,consequents_len
199,"(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...","(SET/6 RED SPOTTY PAPER PLATES, POSTAGE)",0.086768,0.091106,0.069414,0.8,8.780952,0.061509,4.544469,2,2
195,"(SET/6 RED SPOTTY PAPER PLATES, SET/20 RED RET...","(POSTAGE, SET/6 RED SPOTTY PAPER CUPS)",0.086768,0.099783,0.069414,0.8,8.017391,0.060756,4.501085,2,2
181,(SET/6 RED SPOTTY PAPER PLATES),"(POSTAGE, SET/6 RED SPOTTY PAPER CUPS)",0.10846,0.099783,0.086768,0.8,8.017391,0.075945,4.501085,1,2
187,(SET/6 RED SPOTTY PAPER PLATES),"(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...",0.10846,0.086768,0.084599,0.78,8.9895,0.075188,4.151055,1,2
194,"(SET/6 RED SPOTTY PAPER PLATES, POSTAGE)","(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...",0.091106,0.086768,0.069414,0.761905,8.780952,0.061509,3.835575,2,2
127,(ALARM CLOCK BAKELIKE RED),"(POSTAGE, ALARM CLOCK BAKELIKE GREEN)",0.08026,0.071584,0.060738,0.756757,10.571663,0.054992,3.816823,1,2
188,(SET/20 RED RETROSPOT PAPER NAPKINS),"(SET/6 RED SPOTTY PAPER PLATES, SET/6 RED SPOT...",0.112798,0.104121,0.084599,0.75,7.203125,0.072854,3.583514,1,2
197,"(POSTAGE, SET/20 RED RETROSPOT PAPER NAPKINS)","(SET/6 RED SPOTTY PAPER PLATES, SET/6 RED SPOT...",0.093275,0.104121,0.069414,0.744186,7.147287,0.059702,3.502071,2,2
183,(SET/6 RED SPOTTY PAPER CUPS),"(SET/6 RED SPOTTY PAPER PLATES, POSTAGE)",0.117137,0.091106,0.086768,0.740741,8.130511,0.076096,3.505733,1,2
129,(ALARM CLOCK BAKELIKE GREEN),"(ALARM CLOCK BAKELIKE RED, POSTAGE)",0.08243,0.073753,0.060738,0.736842,9.990712,0.054658,3.51974,1,2


In [25]:
#Given Assignment mention to have 1 antecedent
rules = rules[rules['antecedent_len']==1]
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,antecedent_len,consequents_len
181,(SET/6 RED SPOTTY PAPER PLATES),"(POSTAGE, SET/6 RED SPOTTY PAPER CUPS)",0.10846,0.099783,0.086768,0.8,8.017391,0.075945,4.501085,1,2
187,(SET/6 RED SPOTTY PAPER PLATES),"(SET/20 RED RETROSPOT PAPER NAPKINS, SET/6 RED...",0.10846,0.086768,0.084599,0.78,8.9895,0.075188,4.151055,1,2
127,(ALARM CLOCK BAKELIKE RED),"(POSTAGE, ALARM CLOCK BAKELIKE GREEN)",0.08026,0.071584,0.060738,0.756757,10.571663,0.054992,3.816823,1,2
188,(SET/20 RED RETROSPOT PAPER NAPKINS),"(SET/6 RED SPOTTY PAPER PLATES, SET/6 RED SPOT...",0.112798,0.104121,0.084599,0.75,7.203125,0.072854,3.583514,1,2
183,(SET/6 RED SPOTTY PAPER CUPS),"(SET/6 RED SPOTTY PAPER PLATES, POSTAGE)",0.117137,0.091106,0.086768,0.740741,8.130511,0.076096,3.505733,1,2
129,(ALARM CLOCK BAKELIKE GREEN),"(ALARM CLOCK BAKELIKE RED, POSTAGE)",0.08243,0.073753,0.060738,0.736842,9.990712,0.054658,3.51974,1,2
189,(SET/6 RED SPOTTY PAPER CUPS),"(SET/6 RED SPOTTY PAPER PLATES, SET/20 RED RET...",0.117137,0.086768,0.084599,0.722222,8.323611,0.074435,3.287636,1,2
133,(ALARM CLOCK BAKELIKE RED),"(POSTAGE, ALARM CLOCK BAKELIKE PINK)",0.08026,0.075922,0.056399,0.702703,9.255598,0.050306,3.108263,1,2


KeyError: 'SET/6 RED SPOTTY PAPER PLATES'

In [28]:
rules.to_csv("rules.csv")