<a href="https://colab.research.google.com/github/ethanpnguyen/ds4e/blob/main/notebooks/task7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Bundle Items

Can you bundle products that go together based on historical transactions?

## Initialize

In [None]:
import pandas as pd

## Load Data

In [None]:
dfRet = pd.read_csv('/content/retail_transactions.csv', encoding='ISO-8859-1')
dfRet.tail()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
541904,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,12/9/2011 12:50,0.85,12680.0,France
541905,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,12/9/2011 12:50,2.1,12680.0,France
541906,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,12/9/2011 12:50,4.15,12680.0,France
541907,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,12/9/2011 12:50,4.15,12680.0,France
541908,581587,22138,BAKING SET 9 PIECE RETROSPOT,3,12/9/2011 12:50,4.95,12680.0,France


## Prepare Data

In [None]:
dfRet['clean_description'] = dfRet['Description']
dfRet['clean_description'] = dfRet['clean_description'].str.replace(' ', '_')
dfRet['clean_description'].str.replace('\W', '')

  dfRet['clean_description'].str.replace('\W', '')


0           WHITE_HANGING_HEART_TLIGHT_HOLDER
1                         WHITE_METAL_LANTERN
2              CREAM_CUPID_HEARTS_COAT_HANGER
3         KNITTED_UNION_FLAG_HOT_WATER_BOTTLE
4               RED_WOOLLY_HOTTIE_WHITE_HEART
                         ...                 
541904            PACK_OF_20_SPACEBOY_NAPKINS
541905            CHILDRENS_APRON_DOLLY_GIRL_
541906          CHILDRENS_CUTLERY_DOLLY_GIRL_
541907        CHILDRENS_CUTLERY_CIRCUS_PARADE
541908          BAKING_SET_9_PIECE_RETROSPOT_
Name: clean_description, Length: 541909, dtype: object

In [None]:
dfRet.dropna(inplace=True)

In [None]:
dfRetList = dfRet.groupby('InvoiceNo')['clean_description'].agg(list)
dfRetList.head()

InvoiceNo
536365    [WHITE_HANGING_HEART_T-LIGHT_HOLDER, WHITE_MET...
536366    [HAND_WARMER_UNION_JACK, HAND_WARMER_RED_POLKA...
536367    [ASSORTED_COLOUR_BIRD_ORNAMENT, POPPY'S_PLAYHO...
536368    [JAM_MAKING_SET_WITH_JARS, RED_COAT_RACK_PARIS...
536369                           [BATH_BUILDING_BLOCK_WORD]
Name: clean_description, dtype: object

## Build Model

In [None]:
# Encode data into a matrix
import mlxtend
from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()
te_ary = te.fit(dfRetList).transform(dfRetList)
dfRtb = pd.DataFrame(te_ary, columns=te.columns_)

In [None]:
# Creating itemsets
from mlxtend.frequent_patterns import apriori

# Get frequent itemsets using Apriori
frequent_itemsets = apriori(dfRtb, min_support=0.01, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.011221,(10_COLOUR_SPACEBOY_PEN)
1,0.014015,(12_PENCILS_SMALL_TUBE_RED_RETROSPOT)
2,0.013249,(12_PENCILS_SMALL_TUBE_SKULL)
3,0.010680,(12_PENCILS_TALL_TUBE_RED_RETROSPOT)
4,0.012528,(12_PENCIL_SMALL_TUBE_WOODLAND)
...,...,...
734,0.010140,"(LUNCH_BAG_WOODLAND, LUNCH_BAG_RED_RETROSPOT, ..."
735,0.011447,"(LUNCH_BAG__BLACK_SKULL., LUNCH_BAG_RED_RETROS..."
736,0.010455,"(LUNCH_BAG__BLACK_SKULL., LUNCH_BAG_RED_RETROS..."
737,0.012213,"(PINK_REGENCY_TEACUP_AND_SAUCER, ROSES_REGENCY..."


In [None]:
# Get association rules from frequent itemsets
from mlxtend.frequent_patterns import association_rules

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
rules.sort_values(by=['confidence'],ascending=False,inplace=True)
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
33,"(REGENCY_CAKESTAND_3_TIER, ROSES_REGENCY_TEACU...",(GREEN_REGENCY_TEACUP_AND_SAUCER),0.012213,0.033033,0.010861,0.889299,26.921613,0.010457,8.734936
22,"(PINK_REGENCY_TEACUP_AND_SAUCER, ROSES_REGENCY...",(GREEN_REGENCY_TEACUP_AND_SAUCER),0.020324,0.033033,0.017891,0.880266,26.648164,0.01722,8.075966
35,"(REGENCY_CAKESTAND_3_TIER, PINK_REGENCY_TEACUP...",(ROSES_REGENCY_TEACUP_AND_SAUCER_),0.012348,0.037675,0.010861,0.879562,23.34627,0.010396,7.990217
20,"(REGENCY_CAKESTAND_3_TIER, PINK_REGENCY_TEACUP...",(GREEN_REGENCY_TEACUP_AND_SAUCER),0.014376,0.033033,0.012348,0.858934,26.002386,0.011873,6.854722
32,"(PINK_REGENCY_TEACUP_AND_SAUCER, REGENCY_CAKES...",(ROSES_REGENCY_TEACUP_AND_SAUCER_),0.014376,0.037675,0.012213,0.84953,22.549122,0.011671,6.395454
