# Excercises

# Practicum

In [6]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

df = pd.read_excel('http://archive.ics.uci.edu/ml/machine-learning-databases/00352/Online%20Retail.xlsx')
df.head()

df['Description'] = df['Description'].str.strip()
df.dropna(axis=0, subset=['InvoiceNo'], inplace=True)
df['InvoiceNo'] = df['InvoiceNo'].astype('str')
df = df[~df['InvoiceNo'].str.contains('C')]

basket = (df[df['Country'] =="France"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

In [49]:
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

basket_sets = basket.applymap(encode_units)
basket_sets.drop('POSTAGE', inplace=True, axis=1)

frequent_itemsets = apriori(basket_sets, min_support=0.05, use_colnames=True)
frequent_itemsets.sort_values(by=["support"], ascending=False)


Unnamed: 0,support,itemsets
46,0.188776,(RABBIT NIGHT LIGHT)
52,0.181122,(RED TOADSTOOL LED NIGHT LIGHT)
44,0.170918,(PLASTERS IN TIN WOODLAND ANIMALS)
40,0.168367,(PLASTERS IN TIN CIRCUS PARADE)
59,0.158163,(ROUND SNACK BOXES SET OF4 WOODLAND)
...,...,...
6,0.051020,(BLUE HARMONICA IN BOX)
32,0.051020,(MINI LIGHTS WOODLAND MUSHROOMS)
34,0.051020,(PACK OF 20 NAPKINS RED APPLES)
70,0.051020,(SPACEBOY CHILDRENS BOWL)


The itemset with the greatest support is {Rabbit Night Light} at 0.188776 support.

In [59]:
rules1 = association_rules(frequent_itemsets, metric="confidence", min_threshold=.7)
rules1.sort_values(by=["confidence"], ascending=False)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
24,"(SET/6 RED SPOTTY PAPER PLATES, SET/20 RED RET...",(SET/6 RED SPOTTY PAPER CUPS),0.102041,0.137755,0.09949,0.975,7.077778,0.085433,34.489796
25,"(SET/6 RED SPOTTY PAPER CUPS, SET/20 RED RETRO...",(SET/6 RED SPOTTY PAPER PLATES),0.102041,0.127551,0.09949,0.975,7.644,0.086474,34.897959
17,(SET/6 RED SPOTTY PAPER PLATES),(SET/6 RED SPOTTY PAPER CUPS),0.127551,0.137755,0.122449,0.96,6.968889,0.104878,21.556122
7,(CHILDRENS CUTLERY SPACEBOY),(CHILDRENS CUTLERY DOLLY GIRL),0.068878,0.071429,0.063776,0.925926,12.962963,0.058856,12.535714
10,(PACK OF 6 SKULL PAPER PLATES),(PACK OF 6 SKULL PAPER CUPS),0.056122,0.063776,0.05102,0.909091,14.254545,0.047441,10.298469
6,(CHILDRENS CUTLERY DOLLY GIRL),(CHILDRENS CUTLERY SPACEBOY),0.071429,0.068878,0.063776,0.892857,12.962963,0.058856,8.690476
18,(SET/6 RED SPOTTY PAPER CUPS),(SET/6 RED SPOTTY PAPER PLATES),0.137755,0.127551,0.122449,0.888889,6.968889,0.104878,7.852041
20,"(ALARM CLOCK BAKELIKE PINK, ALARM CLOCK BAKELI...",(ALARM CLOCK BAKELIKE GREEN),0.07398,0.096939,0.063776,0.862069,8.892922,0.056604,6.547194
19,"(ALARM CLOCK BAKELIKE PINK, ALARM CLOCK BAKELI...",(ALARM CLOCK BAKELIKE RED),0.07398,0.094388,0.063776,0.862069,9.133271,0.056793,6.565689
3,(ALARM CLOCK BAKELIKE RED),(ALARM CLOCK BAKELIKE GREEN),0.094388,0.096939,0.079082,0.837838,8.642959,0.069932,5.568878


Association Rule with highest confidence is: {Set/6 Red Spotty Paper Plates, Set/20 Red Retrospot Paper Napkins} -> {Set/6 Red Spotty Paper Plates} with a confidence of 0.975

The antecedant is: {Set/6 Red Spotty Paper Plates, Set/20 Red Retrospot Paper Napkins}
The consequent is: {Set/6 Red Spotty Paper Plates}

In [61]:
rules2 = association_rules(frequent_itemsets, metric="lift", min_threshold=7)
rules2.sort_values(by=["lift"], ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
8,(PACK OF 6 SKULL PAPER CUPS),(PACK OF 6 SKULL PAPER PLATES),0.063776,0.056122,0.05102,0.8,14.254545,0.047441,4.719388
9,(PACK OF 6 SKULL PAPER PLATES),(PACK OF 6 SKULL PAPER CUPS),0.056122,0.063776,0.05102,0.909091,14.254545,0.047441,10.298469
6,(CHILDRENS CUTLERY DOLLY GIRL),(CHILDRENS CUTLERY SPACEBOY),0.071429,0.068878,0.063776,0.892857,12.962963,0.058856,8.690476
7,(CHILDRENS CUTLERY SPACEBOY),(CHILDRENS CUTLERY DOLLY GIRL),0.068878,0.071429,0.063776,0.925926,12.962963,0.058856,12.535714
10,"(ALARM CLOCK BAKELIKE PINK, ALARM CLOCK BAKELI...",(ALARM CLOCK BAKELIKE RED),0.07398,0.094388,0.063776,0.862069,9.133271,0.056793,6.565689
15,(ALARM CLOCK BAKELIKE RED),"(ALARM CLOCK BAKELIKE PINK, ALARM CLOCK BAKELI...",0.094388,0.07398,0.063776,0.675676,9.133271,0.056793,2.85523
11,"(ALARM CLOCK BAKELIKE PINK, ALARM CLOCK BAKELI...",(ALARM CLOCK BAKELIKE GREEN),0.07398,0.096939,0.063776,0.862069,8.892922,0.056604,6.547194
14,(ALARM CLOCK BAKELIKE GREEN),"(ALARM CLOCK BAKELIKE PINK, ALARM CLOCK BAKELI...",0.096939,0.07398,0.063776,0.657895,8.892922,0.056604,2.706829
2,(ALARM CLOCK BAKELIKE GREEN),(ALARM CLOCK BAKELIKE RED),0.096939,0.094388,0.079082,0.815789,8.642959,0.069932,4.916181
3,(ALARM CLOCK BAKELIKE RED),(ALARM CLOCK BAKELIKE GREEN),0.094388,0.096939,0.079082,0.837838,8.642959,0.069932,5.568878


Association Rule with the highest lift is: {Pack of 6 Skull Paper Cups} -> {Pack of 6 Skull Paper Cups} with a lift of ~14.255

The antecedant is: {Pack of 6 Skull Paper Cups}
The consequent is: {Pack of 6 Skull Paper Cups}

We see that the Association Rule with greatest confidence is not the same as the association rule with greatest lift. 