In [1]:
import pandas as pd
import regex as re
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules
import plotly.express as px
import networkx as nx
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
import matplotlib.pyplot as plt
import plotly.io as pio

  duck_array_version = LooseVersion(duck_array_module.__version__)
  duck_array_version = LooseVersion("0.0.0")
  duck_array_version = LooseVersion("0.0.0")
  other = LooseVersion(other)
  other = LooseVersion(other)
  if LooseVersion(np.__version__) >= "1.20.0":
  other = LooseVersion(other)
  if LooseVersion(pd.__version__) < "0.25.0":
  other = LooseVersion(other)


Support: Measures the frequency of an item or a combination of items being present in the data.
$$Support(A, B) = \frac{Frequency(A \cup B)}{Sum(Transactions)}$$


Lift: Measures the confidence that buying product A or a combination of products will improve the probability of buying product B.
$$Lift(A \rightarrow B) = \frac{Confidence(A \rightarrow B)}{Support(B)}$$


Confidence: Measures the frequency of both the antecedent and consequent items in an assosiaction rule, being present in a transaction.
$$Confidence(A \rightarrow B) = \frac{Support(A \cup B)}{Support(A)}$$

Leverage: Measures the degree that buying product A or a combination of products will result in also buying another product B.

$$Leverage(A \rightarrow B) = \frac{\text{Support}(A \cup B)}{\text{Support}(A)\text{Support}(B)}$$

Conviction: Compares the probability of buying a product A or a combination of products without product B, assuming their independence, against the frequency of product A or a combination of products being present in a transaction without product B.
$$Conviction(A \rightarrow B) = \frac{1 - Support(B)}{1 - Confidence(A \rightarrow B)}$$





Firstly we will read the data and see their format 

In [2]:
df_retail = pd.read_csv('online_retail.csv')
df_retail

Unnamed: 0,InvoiceNo,StockCode,Description
0,562583,35637A,IVORY STRING CURTAIN WITH POLE
1,562583,35638A,PINK AND BLACK STRING CURTAIN
2,562583,84927F,PSYCHEDELIC TILE HOOK
3,562583,22425,ENAMEL COLANDER CREAM
4,562583,16008,SMALL FOLDING SCISSOR(POINTED EDGE)
...,...,...,...
227755,C581229,23158,SET OF 5 LUCKY CAT MAGNETS
227756,C581229,22712,CARD DOLLY GIRL
227757,C581229,22027,TEA PARTY BIRTHDAY CARD
227758,C581229,21508,VINTAGE KID DOLLY CARD


In [3]:
df_retail['Description'] = df_retail['Description'].apply(lambda x: re.sub(r'[^a-zA-Z\s]', '', str(x)))
df_retail

Unnamed: 0,InvoiceNo,StockCode,Description
0,562583,35637A,IVORY STRING CURTAIN WITH POLE
1,562583,35638A,PINK AND BLACK STRING CURTAIN
2,562583,84927F,PSYCHEDELIC TILE HOOK
3,562583,22425,ENAMEL COLANDER CREAM
4,562583,16008,SMALL FOLDING SCISSORPOINTED EDGE
...,...,...,...
227755,C581229,23158,SET OF LUCKY CAT MAGNETS
227756,C581229,22712,CARD DOLLY GIRL
227757,C581229,22027,TEA PARTY BIRTHDAY CARD
227758,C581229,21508,VINTAGE KID DOLLY CARD


We will create a new dataframe that is the product of grouping by each item and counting the number of times each item appears in the other two columns.

In [4]:
df_grouped = df_retail.groupby(by="Description").count().sort_values(by='InvoiceNo', ascending=False)
df_grouped.reset_index()
df_grouped.rename(columns={'InvoiceNo' : 'Times Purchased'}, inplace=True)
df_grouped

Unnamed: 0_level_0,Times Purchased,StockCode
Description,Unnamed: 1_level_1,Unnamed: 2_level_1
PAPER CHAIN KIT S CHRISTMAS,927,927
JUMBO BAG RED RETROSPOT,810,810
HOT WATER BOTTLE KEEP CALM,786,786
WHITE HANGING HEART TLIGHT HOLDER,763,763
RABBIT NIGHT LIGHT,702,702
...,...,...
WHITE FRAME BIRDS AND TREE,1,1
WETMOULDY,1,1
FOLDING SHOE TIDY,1,1
SET PAPER VINTAGE CHICK PAPER EGG,1,1


In [5]:
import plotly.graph_objs as go
import plotly.express as px

fig = px.bar(df_grouped.head(30), x=df_grouped.head(30).index, y='Times Purchased', title='30 Most Frequently Purchased Items')
fig.update_layout(xaxis_title='Item', yaxis_title='Times Purchased')
fig.show()


distutils Version classes are deprecated. Use packaging.version instead.


distutils Version classes are deprecated. Use packaging.version instead.



In [6]:
pio.write_html(fig, 'figures/most_puchased_items.html', auto_open=False)

We detect that the dataframe is formated based on each individual item description. Consequently we create a list that holds all the unique invoices

In [7]:
unique_transactions = df_retail['InvoiceNo'].unique()
print(len(unique_transactions))

9709


Then we create a list (transactions) that holds the items of each invoice inside a list of strings.

In [8]:
transactions = []
for invoice in unique_transactions:
    indexes = list(df_retail[df_retail['InvoiceNo'] == invoice].index)
    items = []
    for index in indexes:
        items.append(df_retail.at[index, 'Description'])
    transactions.append(items)
len(transactions)
    

9709

In [9]:
df_retail[df_retail['InvoiceNo'] == unique_transactions[0]]

Unnamed: 0,InvoiceNo,StockCode,Description
0,562583,35637A,IVORY STRING CURTAIN WITH POLE
1,562583,35638A,PINK AND BLACK STRING CURTAIN
2,562583,84927F,PSYCHEDELIC TILE HOOK
3,562583,22425,ENAMEL COLANDER CREAM
4,562583,16008,SMALL FOLDING SCISSORPOINTED EDGE
5,562583,22232,JIGSAW TOADSTOOLS PIECE


In [10]:
transactions[0]

['IVORY STRING CURTAIN WITH POLE ',
 'PINK AND BLACK STRING CURTAIN',
 'PSYCHEDELIC TILE HOOK',
 'ENAMEL COLANDER CREAM',
 'SMALL FOLDING SCISSORPOINTED EDGE',
 'JIGSAW TOADSTOOLS  PIECE']

No we create an instance of the TransactionEncoder object that will help us identify the unique items of our data with it's fit() function

In [11]:
from mlxtend.preprocessing import TransactionEncoder
encoder = TransactionEncoder().fit(transactions)

We also use the transform() method to create an array of onehot encoded transactions

In [12]:
one_hot = encoder.transform(transactions)
one_hot = pd.DataFrame(one_hot, columns=encoder.columns_)
one_hot.columns

Index(['', '  PURPLE FLOCK DINNER CANDLES', ' BENDY SKULL STRAWS',
       ' BLACK CATS W HEARTS BLANK CARD', ' BLACK PETIT FOUR CASES',
       ' BLUE DINNER CANDLES SILVER FLOCK', ' BURGUNDY WINE DINNER CANDLES',
       ' CAKE CASES DOLLY GIRL DESIGN', ' CAKE CASES VINTAGE CHRISTMAS',
       ' CHOCOLATE LOVE HEART TLIGHTS',
       ...
       'website fixed', 'wet', 'wet boxes', 'wet pallet', 'wet rusty',
       'wrongly coded', 'wrongly coded ', 'wrongly marked', 'wrongly marked ',
       'wrongly marked carton '],
      dtype='object', length=3399)

In [13]:
one_hot

Unnamed: 0,Unnamed: 1,PURPLE FLOCK DINNER CANDLES,BENDY SKULL STRAWS,BLACK CATS W HEARTS BLANK CARD,BLACK PETIT FOUR CASES,BLUE DINNER CANDLES SILVER FLOCK,BURGUNDY WINE DINNER CANDLES,CAKE CASES DOLLY GIRL DESIGN,CAKE CASES VINTAGE CHRISTMAS,CHOCOLATE LOVE HEART TLIGHTS,...,website fixed,wet,wet boxes,wet pallet,wet rusty,wrongly coded,wrongly coded.1,wrongly marked,wrongly marked.1,wrongly marked carton
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9704,False,False,False,False,False,False,False,True,False,False,...,False,False,False,False,False,False,False,False,False,False
9705,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
9706,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
9707,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [14]:
freq_itemsets = apriori(one_hot, min_support=0.01, max_len=4, use_colnames=True)
freq_itemsets

Unnamed: 0,support,itemsets
0,0.016892,( CAKE CASES DOLLY GIRL DESIGN)
1,0.047482,( CAKE CASES VINTAGE CHRISTMAS)
2,0.013596,( COLOUR SPACEBOY PEN)
3,0.015553,( DOILIES VINTAGE CHRISTMAS)
4,0.012772,( DOLLY GIRL BEAKER)
...,...,...
2187,0.010403,"(ROSES REGENCY TEACUP AND SAUCER , GREEN REGEN..."
2188,0.010197,"(LUNCH BAG RED RETROSPOT, LUNCH BAG VINTAGE DO..."
2189,0.010815,"(LUNCH BAG RED RETROSPOT, LUNCH BAG APPLE DESI..."
2190,0.010094,"(LUNCH BAG SPACEBOY DESIGN , LUNCH BAG APPLE D..."


Without pruning we get 4360 association rules

In [15]:
rules = association_rules(freq_itemsets, metric='support', min_threshold=0)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,( CAKE CASES VINTAGE CHRISTMAS),( GIFT TAGS VINTAGE CHRISTMAS ),0.047482,0.029869,0.010094,0.212581,7.117077,0.008675,1.232039,0.902337
1,( GIFT TAGS VINTAGE CHRISTMAS ),( CAKE CASES VINTAGE CHRISTMAS),0.029869,0.047482,0.010094,0.337931,7.117077,0.008675,1.438699,0.885956
2,(BOX OF MINI VINTAGE CRACKERS),( CAKE CASES VINTAGE CHRISTMAS),0.035637,0.047482,0.011330,0.317919,6.695610,0.009638,1.396489,0.882083
3,( CAKE CASES VINTAGE CHRISTMAS),(BOX OF MINI VINTAGE CRACKERS),0.047482,0.035637,0.011330,0.238612,6.695610,0.009638,1.266585,0.893052
4,(CHRISTMAS CRAFT LITTLE FRIENDS),( CAKE CASES VINTAGE CHRISTMAS),0.041920,0.047482,0.010197,0.243243,5.122882,0.008206,1.258685,0.840011
...,...,...,...,...,...,...,...,...,...,...
4355,"(SET OF WOODEN HEART DECORATIONS, SET OF WOO...","(SET OF WOODEN SLEIGH DECORATIONS, SET OF WO...",0.020290,0.018231,0.012154,0.598985,32.856176,0.011784,2.448210,0.989645
4356,(SET OF WOODEN SLEIGH DECORATIONS),"(SET OF WOODEN STOCKING DECORATION, SET OF W...",0.026676,0.016377,0.012154,0.455598,27.820160,0.011717,1.806798,0.990477
4357,(SET OF WOODEN TREE DECORATIONS),"(SET OF WOODEN HEART DECORATIONS, SET OF WOO...",0.026779,0.013905,0.012154,0.453846,32.639943,0.011781,1.805527,0.996036
4358,(SET OF WOODEN HEART DECORATIONS),"(SET OF WOODEN STOCKING DECORATION, SET OF W...",0.037697,0.014626,0.012154,0.322404,22.043831,0.011602,1.454222,0.992032


With pruning we get

In [16]:
rules_pruned_support = association_rules(freq_itemsets, metric='support', min_threshold=0.02)
rules_pruned_support

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(PAPER CHAIN KIT S CHRISTMAS ),( CAKE CASES VINTAGE CHRISTMAS),0.092697,0.047482,0.021011,0.226667,4.773767,0.016610,1.231705,0.871288
1,( CAKE CASES VINTAGE CHRISTMAS),(PAPER CHAIN KIT S CHRISTMAS ),0.047482,0.092697,0.021011,0.442516,4.773767,0.016610,1.627496,0.829928
2,(SET OF VINTAGE CHRISTMAS NAPKINS),( CAKE CASES VINTAGE CHRISTMAS),0.046349,0.047482,0.020393,0.440000,9.266725,0.018193,1.700926,0.935444
3,( CAKE CASES VINTAGE CHRISTMAS),(SET OF VINTAGE CHRISTMAS NAPKINS),0.047482,0.046349,0.020393,0.429501,9.266725,0.018193,1.671609,0.936556
4,(ALARM CLOCK BAKELIKE RED ),(ALARM CLOCK BAKELIKE GREEN),0.044701,0.048306,0.031826,0.711982,14.739081,0.029667,3.304283,0.975771
...,...,...,...,...,...,...,...,...,...,...
149,"(WOODEN TREE CHRISTMAS SCANDINAVIAN, WOODEN ST...",(WOODEN HEART CHRISTMAS SCANDINAVIAN),0.025028,0.045628,0.021011,0.839506,18.399019,0.019869,5.946473,0.969925
150,"(WOODEN HEART CHRISTMAS SCANDINAVIAN, WOODEN S...",(WOODEN TREE CHRISTMAS SCANDINAVIAN),0.035843,0.028633,0.021011,0.586207,20.472960,0.019985,2.347470,0.986515
151,(WOODEN TREE CHRISTMAS SCANDINAVIAN),"(WOODEN HEART CHRISTMAS SCANDINAVIAN, WOODEN S...",0.028633,0.035843,0.021011,0.733813,20.472960,0.019985,3.622103,0.979193
152,(WOODEN HEART CHRISTMAS SCANDINAVIAN),"(WOODEN TREE CHRISTMAS SCANDINAVIAN, WOODEN ST...",0.045628,0.025028,0.021011,0.460497,18.399019,0.019869,1.807165,0.990860


In [17]:
rules_pruned_conf = association_rules(freq_itemsets, metric='confidence', min_threshold=0.85)
rules_pruned_conf

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(REGENCY TEA PLATE PINK),(REGENCY TEA PLATE GREEN ),0.014214,0.018025,0.012772,0.898551,49.851594,0.012515,9.679473,0.994070
1,(REGENCY TEA PLATE PINK),(REGENCY TEA PLATE ROSES ),0.014214,0.020702,0.012154,0.855072,41.302978,0.011859,6.757153,0.989858
2,(WOODEN TREE CHRISTMAS SCANDINAVIAN),(WOODEN STAR CHRISTMAS SCANDINAVIAN),0.028633,0.045628,0.025028,0.874101,19.157210,0.023722,7.580442,0.975739
3,"(ALARM CLOCK BAKELIKE RED , ALARM CLOCK BAKELI...",(ALARM CLOCK BAKELIKE GREEN),0.013802,0.048306,0.012463,0.902985,18.693139,0.011796,9.809772,0.959751
4,"(PAPER CHAIN KIT VINTAGE CHRISTMAS, BOX OF MI...",(PAPER CHAIN KIT S CHRISTMAS ),0.013081,0.092697,0.011639,0.889764,9.598574,0.010426,8.230530,0.907691
...,...,...,...,...,...,...,...,...,...,...
81,"(ROSES REGENCY TEACUP AND SAUCER , REGENCY TEA...",(GREEN REGENCY TEACUP AND SAUCER),0.011330,0.032650,0.010403,0.918182,28.121853,0.010033,11.823165,0.975492
82,"(GREEN REGENCY TEACUP AND SAUCER, REGENCY TEA ...",(ROSES REGENCY TEACUP AND SAUCER ),0.011742,0.035946,0.010403,0.885965,24.647087,0.009981,8.454012,0.970826
83,"(LUNCH BAG RED RETROSPOT, LUNCH BAG VINTAGE DO...",(JUMBO BAG VINTAGE DOILY ),0.011639,0.068493,0.010197,0.876106,12.791150,0.009400,7.518591,0.932676
84,"(SET OF WOODEN HEART DECORATIONS, SET OF WOO...",(SET OF WOODEN STOCKING DECORATION),0.014008,0.028736,0.012154,0.867647,30.193496,0.011751,7.338437,0.980616


In [18]:
rules_pruned_lift = association_rules(freq_itemsets, metric='lift', min_threshold=50)
rules_pruned_lift

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,"(BUTTON BOX , SUKI SHOULDER BAG)",(DOTCOM POSTAGE),0.0103,0.018848,0.010094,0.98,51.993552,0.0099,49.057575,0.990974
1,(DOTCOM POSTAGE),"(BUTTON BOX , SUKI SHOULDER BAG)",0.018848,0.0103,0.010094,0.535519,51.993552,0.0099,2.130766,0.999608
2,"(CHARLIELOLA PINK HOT WATER BOTTLE, SUKI SHOU...",(DOTCOM POSTAGE),0.010815,0.018848,0.010712,0.990476,52.549362,0.010508,103.020908,0.991695
3,(DOTCOM POSTAGE),"(CHARLIELOLA PINK HOT WATER BOTTLE, SUKI SHOU...",0.018848,0.010815,0.010712,0.568306,52.549362,0.010508,2.291404,0.999815
4,"(GREEN REGENCY TEACUP AND SAUCER, SUKI SHOULD...",(DOTCOM POSTAGE),0.010918,0.018848,0.010609,0.971698,51.553098,0.010403,34.667353,0.991427
5,(DOTCOM POSTAGE),"(GREEN REGENCY TEACUP AND SAUCER, SUKI SHOULD...",0.018848,0.010918,0.010609,0.562842,51.553098,0.010403,2.262526,0.99944
6,"(JAM MAKING SET PRINTED, SKULL SHOULDER BAG)",(DOTCOM POSTAGE),0.010506,0.018848,0.010506,1.0,53.054645,0.010308,inf,0.991569
7,(DOTCOM POSTAGE),"(JAM MAKING SET PRINTED, SKULL SHOULDER BAG)",0.018848,0.010506,0.010506,0.557377,53.054645,0.010308,2.235524,1.0
8,"(JAM MAKING SET PRINTED, SUKI SHOULDER BAG)",(DOTCOM POSTAGE),0.012978,0.018848,0.012772,0.984127,52.212508,0.012527,61.812545,0.993744
9,(DOTCOM POSTAGE),"(JAM MAKING SET PRINTED, SUKI SHOULDER BAG)",0.018848,0.012978,0.012772,0.677596,52.212508,0.012527,3.061442,0.99969


### The Association Rules Scatter Plot shows the relationships among the metrics of the assocation rules.

In [19]:
fig = px.scatter(rules, x="confidence", y="support", color="lift", size="confidence", title='All Assocation Rules',
                 hover_data=["support", "confidence", "lift", "antecedent support", "consequent support"],
                 hover_name=rules["antecedents"].apply(lambda x: ", ".join(list(x))) + 
                            " -> " + 
                            rules["consequents"].apply(lambda x: ", ".join(list(x))) + 
                            rules["antecedent support"].apply(lambda x: "{:.3f}".format(x)) + 
                            rules["consequent support"].apply(lambda x: "{:.3f}".format(x)))
fig.show()


distutils Version classes are deprecated. Use packaging.version instead.


distutils Version classes are deprecated. Use packaging.version instead.



In [20]:
pio.write_html(fig, 'figures/all_supp_conf.html', auto_open=False)

In [21]:
fig = px.scatter(rules_pruned_support, x="confidence", y="support", color="lift", size="confidence", title='Support Pruned Association Rules',
                 hover_data=["support", "confidence", "lift", "antecedent support", "consequent support"],
                 hover_name=rules_pruned_support["antecedents"].apply(lambda x: ", ".join(list(x))) + 
                            " -> " + 
                            rules_pruned_support["consequents"].apply(lambda x: ", ".join(list(x))) + 
                            rules_pruned_support["antecedent support"].apply(lambda x: "{:.3f}".format(x)) + 
                            rules_pruned_support["consequent support"].apply(lambda x: "{:.3f}".format(x)))
fig.show()


distutils Version classes are deprecated. Use packaging.version instead.


distutils Version classes are deprecated. Use packaging.version instead.



In [22]:
pio.write_html(fig, 'figures/supp_pruned_supp_conf.html', auto_open=False)

In [23]:
fig = px.scatter(rules_pruned_conf, x="confidence", y="support", color="lift", size="confidence", title='Confidence Pruned Association Rules',
                 hover_data=["support", "confidence", "lift", "antecedent support", "consequent support"],
                 hover_name=rules_pruned_conf["antecedents"].apply(lambda x: ", ".join(list(x))) + 
                            " -> " + 
                            rules_pruned_conf["consequents"].apply(lambda x: ", ".join(list(x))) + 
                            rules_pruned_conf["antecedent support"].apply(lambda x: "{:.3f}".format(x)) + 
                            rules_pruned_conf["consequent support"].apply(lambda x: "{:.3f}".format(x)))
fig.show()


distutils Version classes are deprecated. Use packaging.version instead.


distutils Version classes are deprecated. Use packaging.version instead.



In [24]:
pio.write_html(fig, 'figures/conf_pruned_supp_conf.html', auto_open=False)

In [25]:
fig = px.scatter(rules_pruned_lift, x="confidence", y="support", color="lift", size="confidence", title='Lift Pruned Association Rules',
                 hover_data=["support", "confidence", "lift", "antecedent support", "consequent support"],
                 hover_name=rules_pruned_lift["antecedents"].apply(lambda x: ", ".join(list(x))) + 
                            " -> " + 
                            rules_pruned_lift["consequents"].apply(lambda x: ", ".join(list(x))) + 
                            rules_pruned_lift["antecedent support"].apply(lambda x: "{:.3f}".format(x)) + 
                            rules_pruned_lift["consequent support"].apply(lambda x: "{:.3f}".format(x)))
fig.show()


distutils Version classes are deprecated. Use packaging.version instead.


distutils Version classes are deprecated. Use packaging.version instead.



In [26]:
pio.write_html(fig, 'figures/lit_pruned_supp_conf.html', auto_open=False)

In [27]:
fig = px.scatter(rules, x="lift", y="confidence", color="support", size="confidence", title='All Assocation Rules',
                 hover_data=["support", "confidence", "lift", "antecedent support", "consequent support"],
                 hover_name=rules["antecedents"].apply(lambda x: ", ".join(list(x))) + 
                            " -> " + 
                            rules["consequents"].apply(lambda x: ", ".join(list(x))) + 
                            rules["antecedent support"].apply(lambda x: "{:.3f}".format(x)) + 
                            rules["consequent support"].apply(lambda x: "{:.3f}".format(x)))
fig.show()


distutils Version classes are deprecated. Use packaging.version instead.


distutils Version classes are deprecated. Use packaging.version instead.



In [28]:
pio.write_html(fig, 'figures/all_conf_lift.html', auto_open=False)

From this figure we can detect a linear pattern in the points such that straight lines with a 15 - 45 degree are emerging from the x axis. This indicates a strong positive corellation between lift and confidence.

A positive correlation means that as one measure (in this case, lift) increases, the other measure (in this case, confidence) also increases. Therefore, the linear pattern indicates that the association rules with higher lift tend to have higher confidence as well.

This can be a useful insight for market basket analysis, as it suggests that if a product has a high lift with another product, it is also likely to have a high confidence level, indicating a strong association between the two products.

In [29]:
fig = px.scatter(rules_pruned_support, x="lift", y="confidence", color="support", size="confidence", title='Support Pruned Rules',
                 hover_data=["support", "confidence", "lift", "antecedent support", "consequent support"],
                 hover_name=rules_pruned_support["antecedents"].apply(lambda x: ", ".join(list(x))) + 
                            " -> " + 
                            rules_pruned_support["consequents"].apply(lambda x: ", ".join(list(x))) + 
                            rules_pruned_support["antecedent support"].apply(lambda x: "{:.3f}".format(x)) + 
                            rules_pruned_support["consequent support"].apply(lambda x: "{:.3f}".format(x)))
fig.show()


distutils Version classes are deprecated. Use packaging.version instead.


distutils Version classes are deprecated. Use packaging.version instead.



In [30]:
pio.write_html(fig, 'figures/supp_pruned_conf_lift.html', auto_open=False)

In [31]:
fig = px.scatter(rules_pruned_conf, x="lift", y="confidence", color="support", size="confidence", title='Confidence Pruned Rules',
                 hover_data=["support", "confidence", "lift", "antecedent support", "consequent support"],
                 hover_name=rules_pruned_conf["antecedents"].apply(lambda x: ", ".join(list(x))) + 
                            " -> " + 
                            rules_pruned_conf["consequents"].apply(lambda x: ", ".join(list(x))) + 
                            rules_pruned_conf["antecedent support"].apply(lambda x: "{:.3f}".format(x)) + 
                            rules_pruned_conf["consequent support"].apply(lambda x: "{:.3f}".format(x)))
fig.show()


distutils Version classes are deprecated. Use packaging.version instead.


distutils Version classes are deprecated. Use packaging.version instead.



In [32]:
pio.write_html(fig, 'figures/conf_pruned_conf_lift.html', auto_open=False)

In [33]:
fig = px.scatter(rules_pruned_lift, x="lift", y="confidence", color="support", size="confidence", title='Lift Pruned Rules',
                 hover_data=["support", "confidence", "lift", "antecedent support", "consequent support"],
                 hover_name=rules_pruned_lift["antecedents"].apply(lambda x: ", ".join(list(x))) + 
                            " -> " + 
                            rules_pruned_lift["consequents"].apply(lambda x: ", ".join(list(x))) + 
                            rules_pruned_lift["antecedent support"].apply(lambda x: "{:.3f}".format(x)) + 
                            rules_pruned_lift["consequent support"].apply(lambda x: "{:.3f}".format(x)))
fig.show()


distutils Version classes are deprecated. Use packaging.version instead.


distutils Version classes are deprecated. Use packaging.version instead.



In [34]:
pio.write_html(fig, 'figures/lit_pruned_conf_lift.html', auto_open=False)

In [35]:
fig = px.scatter(rules, x="lift", y="support", color="confidence", size="support", title='All Assocation Rules',
                 hover_data=["support", "confidence", "lift", "antecedent support", "consequent support"],
                 hover_name=rules["antecedents"].apply(lambda x: ", ".join(list(x))) + 
                            " -> " + 
                            rules["consequents"].apply(lambda x: ", ".join(list(x))) + 
                            rules["antecedent support"].apply(lambda x: "{:.3f}".format(x)) + 
                            rules["consequent support"].apply(lambda x: "{:.3f}".format(x)))
fig.show()


distutils Version classes are deprecated. Use packaging.version instead.


distutils Version classes are deprecated. Use packaging.version instead.



In [36]:
pio.write_html(fig, 'figures/all_supp_lift.html', auto_open=False)

We detect that all the high lift points have low support. This means that there are few transactions that contain both the antecedent and the consequent items. High lift values indicate a strong association between the antecedent and consequent items, while high support values indicate that the items are frequently occurring in transactions. Hence, a high lift value with a low support value means that although the association rule is strong, it is not very common.

In [37]:
fig = px.scatter(rules_pruned_support, x="lift", y="support", color="confidence", size="support", title='Support Pruned Assocation Rules',
                 hover_data=["support", "confidence", "lift", "antecedent support", "consequent support"],
                 hover_name=rules_pruned_support["antecedents"].apply(lambda x: ", ".join(list(x))) + 
                            " -> " + 
                            rules_pruned_support["consequents"].apply(lambda x: ", ".join(list(x))) + 
                            rules_pruned_support["antecedent support"].apply(lambda x: "{:.3f}".format(x)) + 
                            rules_pruned_support["consequent support"].apply(lambda x: "{:.3f}".format(x)))
fig.show()


distutils Version classes are deprecated. Use packaging.version instead.


distutils Version classes are deprecated. Use packaging.version instead.



In [38]:
pio.write_html(fig, 'figures/supp_pruned_supp_lift.html', auto_open=False)

In [39]:
fig = px.scatter(rules_pruned_conf, x="lift", y="support", color="confidence", size="support", title='Confidence Pruned Assocation Rules',
                 hover_data=["support", "confidence", "lift", "antecedent support", "consequent support"],
                 hover_name=rules_pruned_conf["antecedents"].apply(lambda x: ", ".join(list(x))) + 
                            " -> " + 
                            rules_pruned_conf["consequents"].apply(lambda x: ", ".join(list(x))) + 
                            rules_pruned_conf["antecedent support"].apply(lambda x: "{:.3f}".format(x)) + 
                            rules_pruned_conf["consequent support"].apply(lambda x: "{:.3f}".format(x)))
fig.show()


distutils Version classes are deprecated. Use packaging.version instead.


distutils Version classes are deprecated. Use packaging.version instead.



In [40]:
pio.write_html(fig, 'figures/conf_pruned_supp_lift.html', auto_open=False)

In [41]:
fig = px.scatter(rules_pruned_lift, x="lift", y="support", color="confidence", size="support", title='Lift Pruned Assocation Rules',
                 hover_data=["support", "confidence", "lift", "antecedent support", "consequent support"],
                 hover_name=rules_pruned_lift["antecedents"].apply(lambda x: ", ".join(list(x))) + 
                            " -> " + 
                            rules_pruned_lift["consequents"].apply(lambda x: ", ".join(list(x))) + 
                            rules_pruned_lift["antecedent support"].apply(lambda x: "{:.3f}".format(x)) + 
                            rules_pruned_lift["consequent support"].apply(lambda x: "{:.3f}".format(x)))
fig.show()


distutils Version classes are deprecated. Use packaging.version instead.


distutils Version classes are deprecated. Use packaging.version instead.



In [42]:
pio.write_html(fig, 'figures/lift_pruned_supp_lift.html', auto_open=False)

### The Association Rule Directed Graph 

In [43]:
# Create directed graph
G = nx.DiGraph()
for i, row in rules_pruned_support.iterrows():
    G.add_edge(row['antecedents'], row['consequents'], weight=row['confidence'])
    nx.set_node_attributes(G, {row['antecedents']: {'lift': row['lift']}})
    nx.set_node_attributes(G, {row['consequents']: {'lift': row['lift']}})
    nx.set_node_attributes(G, {row['antecedents']: {'support': row['support']}})
    nx.set_node_attributes(G, {row['consequents']: {'support': row['support']}})
# Create plotly figure
edge_trace = go.Scatter(
    x=[],
    y=[],
    line=dict(width=0.5,color='#888'),
    hoverinfo='none',
    mode='lines')

node_trace = go.Scatter(
    x=[],
    y=[],
    text=[],
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        colorscale=[[0, 'red'], [1, 'white']],
        reversescale=True,
        color=[],
        size=[],
        colorbar=dict(
            thickness=15,
            title='Lift',
            xanchor='left',
            titleside='right'
        ),
        line_width=2))
pos = nx.spring_layout(G, k=0.15, seed=42) # replace with your preferred layout
nx.set_node_attributes(G, pos, 'pos')

for node in G.nodes():
    x, y = G.nodes[node]['pos']
    node_trace['x'] += tuple([x])
    node_trace['y'] += tuple([y])
    text = next(iter(node)) if isinstance(node, frozenset) else node
    text = f"{text}<br>Support: {G.nodes[node]['support']:.4f}<br> Lift: {G.nodes[node]['lift']:.4f}"
    node_trace['text'] += tuple([text])
    node_trace['marker']['color'] += tuple([G.nodes[node]['lift']])
    node_trace['marker']['size'] += tuple([1000*G.nodes[node]['support']])

for edge in G.edges():
    x0, y0 = G.nodes[edge[0]]['pos']
    x1, y1 = G.nodes[edge[1]]['pos']
    edge_trace['x'] += tuple([x0, x1, None])
    edge_trace['y'] += tuple([y0, y1, None])

fig = go.Figure(data=[edge_trace, node_trace],
             layout=go.Layout(
                title='<br>Association Rules Directed Graph',
                titlefont_size=16,
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    text="Market Basket Analysis Retail Data",
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=0.005, y=-0.002 ) ],
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))

fig.update_layout(
    coloraxis=dict(
        colorbar=dict(
            title="Lift",
            thickness=15,
            xanchor="left",
            titleside="right"
        )
    ),
    plot_bgcolor='white'
)

fig.show()


distutils Version classes are deprecated. Use packaging.version instead.


distutils Version classes are deprecated. Use packaging.version instead.



The Association Rule Directed Graph shows the relationships between the *antecedents* and *consequents* of the association rules. The weight of the edges represents the confidence, their color represents the lift and their size represents the support. This graph can give insights into the strongest associations between products, which products are often purchased together and which seperately.

In [44]:
pio.write_html(fig, 'rule_graph.html', auto_open=False)

Parrallel Coordinates Plot

In [45]:
fig = px.parallel_coordinates(rules_pruned_support, dimensions=["antecedent support", "consequent support", "support", "confidence", "lift"],
                            color="lift", color_continuous_scale=px.colors.sequential.Bluyl,
                            title='',
)
fig.show()


distutils Version classes are deprecated. Use packaging.version instead.


distutils Version classes are deprecated. Use packaging.version instead.



The Parallel Coordinates Plot shows the relationship between the antecedent support, consequent support, support, confidence, and lift of the association rules. The color of the lines represents the lift. This plot can give insights into the patterns and relationships between the different measures of the association rules.

In [46]:
pio.write_html(fig, 'paral_coord.html', auto_open=False)