In [12]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import association_rules

In [14]:
df = pd.read_excel(
    'OnlineRetail.xlsx',
     engine='openpyxl',
)
df['Description'] = df['Description'].str.strip()
df.dropna(axis=0, subset=['InvoiceNo'], inplace=True)
df['InvoiceNo'] = df['InvoiceNo'].astype('str')
df = df[~df['InvoiceNo'].str.contains('C')]

In [15]:
basket = (df[df['Country'] == "France"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

In [16]:
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

In [19]:
from pprint import pprint
basket_sets = basket.applymap(encode_units)
basket_sets.drop('POSTAGE', inplace=True, axis=1)
frequent_itemsets = fpgrowth(basket_sets, min_support=0.07, use_colnames=True)
pprint(frequent_itemsets)

     support                                           itemsets
0   0.181122                    (RED TOADSTOOL LED NIGHT LIGHT)
1   0.158163               (ROUND SNACK BOXES SET OF4 WOODLAND)
2   0.125000                               (SPACEBOY LUNCH BOX)
3   0.104592                           (MINI PAINT SET VINTAGE)
4   0.102041                        (ALARM CLOCK BAKELIKE PINK)
5   0.096939                       (ALARM CLOCK BAKELIKE GREEN)
6   0.094388                         (ALARM CLOCK BAKELIKE RED)
7   0.153061                          (LUNCH BAG RED RETROSPOT)
8   0.142857                 (LUNCH BOX WITH CUTLERY RETROSPOT)
9   0.137755                         (RED RETROSPOT MINI CASES)
10  0.117347                               (LUNCH BAG WOODLAND)
11  0.094388                          (TEA PARTY BIRTHDAY CARD)
12  0.170918                 (PLASTERS IN TIN WOODLAND ANIMALS)
13  0.137755                         (PLASTERS IN TIN SPACEBOY)
14  0.125000                         (RE

In [18]:
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
rules[ (rules['lift'] >= 6) &
       (rules['confidence'] >= 0.8) ]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
2,(ALARM CLOCK BAKELIKE GREEN),(ALARM CLOCK BAKELIKE RED),0.096939,0.094388,0.079082,0.815789,8.642959,0.069932,4.916181
3,(ALARM CLOCK BAKELIKE RED),(ALARM CLOCK BAKELIKE GREEN),0.094388,0.096939,0.079082,0.837838,8.642959,0.069932,5.568878
7,(SET/6 RED SPOTTY PAPER CUPS),(SET/6 RED SPOTTY PAPER PLATES),0.137755,0.127551,0.122449,0.888889,6.968889,0.104878,7.852041
8,(SET/6 RED SPOTTY PAPER PLATES),(SET/6 RED SPOTTY PAPER CUPS),0.127551,0.137755,0.122449,0.96,6.968889,0.104878,21.556122
9,(SET/6 RED SPOTTY PAPER PLATES),(SET/20 RED RETROSPOT PAPER NAPKINS),0.127551,0.132653,0.102041,0.8,6.030769,0.085121,4.336735
11,"(SET/6 RED SPOTTY PAPER CUPS, SET/6 RED SPOTTY...",(SET/20 RED RETROSPOT PAPER NAPKINS),0.122449,0.132653,0.09949,0.8125,6.125,0.083247,4.62585
12,"(SET/6 RED SPOTTY PAPER CUPS, SET/20 RED RETRO...",(SET/6 RED SPOTTY PAPER PLATES),0.102041,0.127551,0.09949,0.975,7.644,0.086474,34.897959
13,"(SET/6 RED SPOTTY PAPER PLATES, SET/20 RED RET...",(SET/6 RED SPOTTY PAPER CUPS),0.102041,0.137755,0.09949,0.975,7.077778,0.085433,34.489796
