## **Background**

In [2]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
import re

In [3]:
# Import dataset

pail= 'https://drive.google.com/file/d/1ELg9NNsaC44Q8r-Y7i8UKzUwZuNKFEom/view?usp=sharing'
url= 'https://drive.google.com/uc?id=' + pail.split('/')[-2]
datar= pd.read_csv(url)
datar.head()

Unnamed: 0,order_id,product_code,product_name,quantity,order_date,price,customer_id
0,493410,TEST001,This is a test product.,5,2010-01-04 09:24:00,4.5,12346.0
1,C493411,21539,RETRO SPOTS BUTTER DISH,-1,2010-01-04 09:43:00,4.25,14590.0
2,493412,TEST001,This is a test product.,5,2010-01-04 09:53:00,4.5,12346.0
3,493413,21724,PANDA AND BUNNIES STICKER SHEET,1,2010-01-04 09:54:00,0.85,
4,493413,84578,ELEPHANT TOY WITH BLUE T-SHIRT,1,2010-01-04 09:54:00,3.75,


## **Data Understanding**

In [4]:
# Data Info Check
datar.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 461773 entries, 0 to 461772
Data columns (total 7 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   order_id      461773 non-null  object 
 1   product_code  461773 non-null  object 
 2   product_name  459055 non-null  object 
 3   quantity      461773 non-null  int64  
 4   order_date    461773 non-null  object 
 5   price         461773 non-null  float64
 6   customer_id   360853 non-null  float64
dtypes: float64(2), int64(1), object(4)
memory usage: 24.7+ MB


In [5]:
# Order Id column check
cid=[]
for i in datar['order_id']:
    cek= re.findall(r'\D',i)
    if cek!=[] and cek not in cid:
        cid.append(cek)
print(cid)

[['C'], ['A']]


In [6]:
# Whats 'C' in order_id
datar[datar['order_id'].str.contains('C')].head()

Unnamed: 0,order_id,product_code,product_name,quantity,order_date,price,customer_id
1,C493411,21539,RETRO SPOTS BUTTER DISH,-1,2010-01-04 09:43:00,4.25,14590.0
12,C493415,21527,RETRO SPOT TRADITIONAL TEAPOT,-3,2010-01-04 10:33:00,7.95,14590.0
13,C493426,22109,FULL ENGLISH BREAKFAST PLATE,-1,2010-01-04 10:41:00,3.39,16550.0
56,C493430,21556,CERAMIC STRAWBERRY MONEY BOX,-1,2010-01-04 11:43:00,2.55,14680.0
57,C493430,21232,STRAWBERRY CERAMIC TRINKET BOX,-2,2010-01-04 11:43:00,1.25,14680.0


In [7]:
# Product code cek (ends with letter)
pc= []
for i in datar['product_code']:
    cek= re.findall(r'[A-Za-z]$',i)
    if cek!=[] and i not in pc:
        pc.append(i)

print(pc)

['35001G', '18096C', '72369A', '84563A', '84562A', '85123A', '84029E', '47591D', '85132B', '84032A', '84997A', '84997B', '84997C', '84997D', '15060B', '85099C', '85099B', '84520D', '84596F', '84596B', '84596E', '84559A', '84031B', '72800D', '72038P', '84031A', '16207A', '40046A', '84288B', '85231B', '85231L', '85049B', '84558A', '35004P', '35004G', '46776A', '46776B', '46776E', '46776C', '46776F', '72800C', '72799F', '72799C', '72799E', '72800F', '72800A', '72800B', '72799A', '85132A', '71028B', '85167B', '85166B', '85167A', '85183A', '85183B', '47347B', '84596K', '84596J', '84535A', '79190B', '79190A', '79190D', '85099F', '82001S', '84671B', '84741C', '85049F', '84029C', '84509F', '85232B', '46000M', '47566B', '79323W', '85129C', '85129D', '84845C', '84845D', '82494L', '17164D', '84292B', '85199L', '85185B', '85185F', '47578A', '84659A', '35096A', '35096B', '84970L', '85185C', '84510A', '40046C', '84753A', '84029D', '84671A', '90004A', '90121B', '35980C', '62097A', '84877C', '85232C',

In [8]:
# are they sellable product?
datar[datar['product_code']=='84563A'].head()   # I thought these are sellable prodcuts.

Unnamed: 0,order_id,product_code,product_name,quantity,order_date,price,customer_id
32,493428,84563A,PINK & WHITE BREAKFAST TRAY,1,2010-01-04 11:15:00,5.95,16550.0
45410,498300,84563A,PINK & WHITE BREAKFAST TRAY,2,2010-02-18 09:14:00,5.95,17157.0
71391,501080,84563A,PINK & WHITE BREAKFAST TRAY,1,2010-03-12 11:19:00,11.87,
72435,501154,84563A,PINK & WHITE BREAKFAST TRAY,1,2010-03-14 15:17:00,5.95,17849.0
86581,502675,84563A,PINK & WHITE BREAKFAST TRAY,2,2010-03-26 10:02:00,5.95,14013.0


In [9]:
# Product Code Check (start with letter or only letter)
pcc= []
for i in datar['product_code']:
    cek= re.findall(r'^([A-Za-z])[A-Za-z]*\d*\D*+$',i)
    if i not in pcc and cek!=[]:
        pcc.append(i)
print(pcc)

['TEST001', 'POST', 'M', 'DOT', 'DCGS0058', 'BANK CHARGES', 'D', 'PADS', 'DCGS0068', 'DCGS0076', 'ADJUST', 'DCGSSGIRL', 'DCGS0006', 'DCGS0016', 'DCGS0027', 'DCGS0036', 'DCGS0039', 'DCGS0060', 'DCGS0056', 'DCGS0059', 'GIFT', 'DCGSLBOY', 'C2', 'm', 'DCGS0053', 'DCGS0004', 'DCGS0062', 'DCGS0037', 'DCGSSBOY', 'DCGSLGIRL', 'S', 'DCGS0069', 'DCGS0070', 'DCGS0075', 'B', 'DCGS0041', 'DCGS0003', 'ADJUST2', 'C3', 'SP1002', 'AMAZONFEE']


In [10]:
# are they sellable product?
datar[datar['product_code']=='DCGS0041'].head()  

Unnamed: 0,order_id,product_code,product_name,quantity,order_date,price,customer_id
163954,511021,DCGS0041,HAYNES MINI-COOPER PLAYING CARDS,1,2010-06-04 16:13:00,5.06,


In [11]:
# Quantity Check
datar[datar['quantity']<0].head()

Unnamed: 0,order_id,product_code,product_name,quantity,order_date,price,customer_id
1,C493411,21539,RETRO SPOTS BUTTER DISH,-1,2010-01-04 09:43:00,4.25,14590.0
12,C493415,21527,RETRO SPOT TRADITIONAL TEAPOT,-3,2010-01-04 10:33:00,7.95,14590.0
13,C493426,22109,FULL ENGLISH BREAKFAST PLATE,-1,2010-01-04 10:41:00,3.39,16550.0
56,C493430,21556,CERAMIC STRAWBERRY MONEY BOX,-1,2010-01-04 11:43:00,2.55,14680.0
57,C493430,21232,STRAWBERRY CERAMIC TRINKET BOX,-2,2010-01-04 11:43:00,1.25,14680.0


In [12]:
# Price check
datar[datar['price']<0].head()

Unnamed: 0,order_id,product_code,product_name,quantity,order_date,price,customer_id
124462,A506401,B,Adjust bad debt,1,2010-04-29 13:36:00,-53594.36,
213524,A516228,B,Adjust bad debt,1,2010-07-19 11:24:00,-44031.79,
329774,A528059,B,Adjust bad debt,1,2010-10-20 12:04:00,-38925.87,


**NOTE**
1. There are missing value from product_name and customer_id columns.
2. There are some inappropriate data type.
3. There are unique codes from order_id colomns. order_id starts with 'C' will be clasifeid as canceled order and 'A' will be clasdified as Adjustment.
4. There are some unnecessary product code and we assume there aren't transsaction goods from cust, so will be drop soon.
5. Most of a negative values from quantity columns are canceled order or Missing value from product_name.
6. Negative values from price are adjustment product.

## **Data Cleansing**

In [13]:
# Drop missing value
set= datar.copy()
datac= set.dropna()
datac.info()

<class 'pandas.core.frame.DataFrame'>
Index: 360853 entries, 0 to 461744
Data columns (total 7 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   order_id      360853 non-null  object 
 1   product_code  360853 non-null  object 
 2   product_name  360853 non-null  object 
 3   quantity      360853 non-null  int64  
 4   order_date    360853 non-null  object 
 5   price         360853 non-null  float64
 6   customer_id   360853 non-null  float64
dtypes: float64(2), int64(1), object(4)
memory usage: 22.0+ MB


In [14]:
# Drop all unnecessary data
sep='|'
product_filter= sep.join(pcc)


datac= datac[~(datac['order_id'].str.contains('C|A',case=False) |
             datac['product_code'].str.contains(product_filter,case=False))]
datac.info()

<class 'pandas.core.frame.DataFrame'>
Index: 332945 entries, 6 to 461743
Data columns (total 7 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   order_id      332945 non-null  object 
 1   product_code  332945 non-null  object 
 2   product_name  332945 non-null  object 
 3   quantity      332945 non-null  int64  
 4   order_date    332945 non-null  object 
 5   price         332945 non-null  float64
 6   customer_id   332945 non-null  float64
dtypes: float64(2), int64(1), object(4)
memory usage: 20.3+ MB


In [15]:
# Check Again
dtc=[]
for i in datac['product_code']:
    cek= re.findall(r'^([A-Za-z])[A-Za-z]*\d*\D*+$',i)
    if i not in dtc and cek!=[]:
        dtc.append(i)
print(dtc)

[]


In [16]:
# Change data type
datac['order_date']= pd.to_datetime(datac['order_date'])
datac['customer_id']= datac['customer_id'].astype(str)
datac.info()

<class 'pandas.core.frame.DataFrame'>
Index: 332945 entries, 6 to 461743
Data columns (total 7 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   order_id      332945 non-null  object        
 1   product_code  332945 non-null  object        
 2   product_name  332945 non-null  object        
 3   quantity      332945 non-null  int64         
 4   order_date    332945 non-null  datetime64[ns]
 5   price         332945 non-null  float64       
 6   customer_id   332945 non-null  object        
dtypes: datetime64[ns](1), float64(1), int64(1), object(4)
memory usage: 20.3+ MB


In [17]:
# Outlier Handling
from scipy import stats
zkor= stats.zscore(datac[['quantity','price']])
datac[(zkor>3)]


Unnamed: 0,order_id,product_code,product_name,quantity,order_date,price,customer_id
127,493434,21355,TOAST ITS - I LOVE YOU,288,2010-01-04 12:54:00,1.05,13798.0
201,493436,20868,SILVER FABRIC MIRROR,240,2010-01-04 13:03:00,0.38,13694.0
202,493436,20869,GOLD FABRIC MIRROR,240,2010-01-04 13:03:00,0.38,13694.0
210,493436,72800C,S/4 PINK DINNER CANDLE SILVER FLOCK,252,2010-01-04 13:03:00,0.79,13694.0
211,493436,72799F,IVORY PILLAR CANDLE GOLD FLOCK,480,2010-01-04 13:03:00,1.25,13694.0
...,...,...,...,...,...,...,...
459285,539663,22823,CHEST NATURAL WOOD 20 DRAWERS,1,2010-12-21 10:26:00,125.00,17961.0
459589,539721,22616,PACK OF 12 LONDON TISSUES,1296,2010-12-21 13:34:00,0.25,17306.0
459669,539728,22783,SET 3 WICKER OVAL BASKETS W LIDS,4,2010-12-21 14:26:00,16.95,16270.0
460679,539762,62018,SOMBRERO,400,2010-12-22 10:29:00,1.25,13953.0


Outlier datas are transcation of customer with a huge quantity. But, we try to drop outlier data fro next analysis.

In [18]:
# Drop outlier
datac= datac[(zkor<3).all(axis=1)]
datac.info()

<class 'pandas.core.frame.DataFrame'>
Index: 329675 entries, 6 to 461743
Data columns (total 7 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   order_id      329675 non-null  object        
 1   product_code  329675 non-null  object        
 2   product_name  329675 non-null  object        
 3   quantity      329675 non-null  int64         
 4   order_date    329675 non-null  datetime64[ns]
 5   price         329675 non-null  float64       
 6   customer_id   329675 non-null  object        
dtypes: datetime64[ns](1), float64(1), int64(1), object(4)
memory usage: 20.1+ MB


In [19]:
datac.describe()

Unnamed: 0,quantity,order_date,price
count,329675.0,329675,329675.0
mean,9.674765,2010-07-23 20:29:32.263775232,2.848155
min,1.0,2010-01-04 10:28:00,0.0
25%,2.0,2010-04-26 12:51:00,1.25
50%,4.0,2010-08-05 13:31:00,1.95
75%,12.0,2010-10-25 14:41:00,3.75
max,213.0,2010-12-23 16:06:00,15.95
std,17.005006,,2.626093


## **Analysis**

### Dataset

In [20]:
# Create dataset for Analysis
dbas = pd.pivot_table(datac, index='order_id', columns='product_name',
                     values='product_code', aggfunc= pd.Series.nunique, fill_value=0)
dbas.head()

product_name,10 COLOUR SPACEBOY PEN,12 ASS ZINC CHRISTMAS DECORATIONS,12 COLOURED PARTY BALLOONS,12 DAISY PEGS IN WOOD BOX,12 EGG HOUSE PAINTED WOOD,12 IVORY ROSE PEG PLACE SETTINGS,12 MESSAGE CARDS WITH ENVELOPES,12 MINI TOADSTOOL PEGS,12 PENCIL SMALL TUBE WOODLAND,12 PENCILS SMALL TUBE POSY,...,ZINC HEART LATTICE CHARGER LARGE,ZINC HEART LATTICE CHARGER SMALL,ZINC HEART LATTICE DOUBLE PLANTER,ZINC HEART LATTICE PLANTER BOWL,ZINC HEART LATTICE T-LIGHT HOLDER,ZINC HEART LATTICE TRAY OVAL,ZINC METAL HEART DECORATION,ZINC POLICE BOX LANTERN,ZINC TOP 2 DOOR WOODEN SHELF,ZINC WILLIE WINKIE CANDLE STICK
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
493414,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
493427,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
493428,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
493432,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
493433,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [21]:
# COlumns with values=1 are in the same order_id of the transaction
ro= dbas.iloc[0]
nonzero= ro[ro>0].index.tolist()

dbas.loc[['493414'],nonzero]

product_name,HAND OPEN SHAPE GOLD,NEW ENGLAND CERAMIC CAKE SERVER,RETRO SPOT LARGE MILK JUG,RETRO SPOT MUG,RETRO SPOT SUGAR JAM BOWL,RETRO SPOT TRADITIONAL TEAPOT
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
493414,1,1,1,1,1,1


In [22]:
dbas.head()

product_name,10 COLOUR SPACEBOY PEN,12 ASS ZINC CHRISTMAS DECORATIONS,12 COLOURED PARTY BALLOONS,12 DAISY PEGS IN WOOD BOX,12 EGG HOUSE PAINTED WOOD,12 IVORY ROSE PEG PLACE SETTINGS,12 MESSAGE CARDS WITH ENVELOPES,12 MINI TOADSTOOL PEGS,12 PENCIL SMALL TUBE WOODLAND,12 PENCILS SMALL TUBE POSY,...,ZINC HEART LATTICE CHARGER LARGE,ZINC HEART LATTICE CHARGER SMALL,ZINC HEART LATTICE DOUBLE PLANTER,ZINC HEART LATTICE PLANTER BOWL,ZINC HEART LATTICE T-LIGHT HOLDER,ZINC HEART LATTICE TRAY OVAL,ZINC METAL HEART DECORATION,ZINC POLICE BOX LANTERN,ZINC TOP 2 DOOR WOODEN SHELF,ZINC WILLIE WINKIE CANDLE STICK
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
493414,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
493427,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
493428,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
493432,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
493433,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
# Take a columns with values=1 for dataset analysis
dbas= dbas[(dbas>0).sum(axis=1)>1]
dbas.head()

product_name,10 COLOUR SPACEBOY PEN,12 ASS ZINC CHRISTMAS DECORATIONS,12 COLOURED PARTY BALLOONS,12 DAISY PEGS IN WOOD BOX,12 EGG HOUSE PAINTED WOOD,12 IVORY ROSE PEG PLACE SETTINGS,12 MESSAGE CARDS WITH ENVELOPES,12 MINI TOADSTOOL PEGS,12 PENCIL SMALL TUBE WOODLAND,12 PENCILS SMALL TUBE POSY,...,ZINC HEART LATTICE CHARGER LARGE,ZINC HEART LATTICE CHARGER SMALL,ZINC HEART LATTICE DOUBLE PLANTER,ZINC HEART LATTICE PLANTER BOWL,ZINC HEART LATTICE T-LIGHT HOLDER,ZINC HEART LATTICE TRAY OVAL,ZINC METAL HEART DECORATION,ZINC POLICE BOX LANTERN,ZINC TOP 2 DOOR WOODEN SHELF,ZINC WILLIE WINKIE CANDLE STICK
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
493414,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
493427,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
493428,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
493432,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
493433,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [40]:
# Set as bolean
def bol(x):
    if x==0:
        return False
    else:
        return True
dbas= dbas.applymap(bol)
dbas.head()

  dbas= dbas.applymap(bol)


product_name,10 COLOUR SPACEBOY PEN,12 ASS ZINC CHRISTMAS DECORATIONS,12 COLOURED PARTY BALLOONS,12 DAISY PEGS IN WOOD BOX,12 EGG HOUSE PAINTED WOOD,12 IVORY ROSE PEG PLACE SETTINGS,12 MESSAGE CARDS WITH ENVELOPES,12 MINI TOADSTOOL PEGS,12 PENCIL SMALL TUBE WOODLAND,12 PENCILS SMALL TUBE POSY,...,ZINC HEART LATTICE CHARGER LARGE,ZINC HEART LATTICE CHARGER SMALL,ZINC HEART LATTICE DOUBLE PLANTER,ZINC HEART LATTICE PLANTER BOWL,ZINC HEART LATTICE T-LIGHT HOLDER,ZINC HEART LATTICE TRAY OVAL,ZINC METAL HEART DECORATION,ZINC POLICE BOX LANTERN,ZINC TOP 2 DOOR WOODEN SHELF,ZINC WILLIE WINKIE CANDLE STICK
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
493414,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
493427,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
493428,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
493432,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
493433,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


### Apply Apriori ALgorithm

In [44]:
# Create itemset data
from mlxtend.frequent_patterns import apriori

freq_item= apriori(dbas, min_support=.01, use_colnames=True).sort_values('support', ascending=False).reset_index(drop=True)
freq_item['product_count']= freq_item['itemsets'].apply(lambda x: len(x))
freq_item

Unnamed: 0,support,itemsets,product_count
0,0.179757,(WHITE HANGING HEART T-LIGHT HOLDER),1
1,0.101949,(REGENCY CAKESTAND 3 TIER),1
2,0.078806,(ASSORTED COLOUR BIRD ORNAMENT),1
3,0.074815,(STRAWBERRY CERAMIC TRINKET BOX),1
4,0.071823,(HOME BUILDING BLOCK WORD),1
...,...,...,...
995,0.010042,(BOX OF 9 PEBBLE CANDLES),1
996,0.010042,"(WHITE HANGING HEART T-LIGHT HOLDER, FELTCRAFT...",2
997,0.010042,(BANQUET BIRTHDAY CARD),1
998,0.010042,"(WOOD BLACK BOARD ANT WHITE FINISH, WOODEN FRA...",2


## **Result**

In [45]:
# Summary of Market Basket Metrics
from mlxtend.frequent_patterns import association_rules

product_ass= association_rules(freq_item, metric='confidence', min_threshold=0.7)\
                                .sort_values(['support','confidence'], ascending=[False,False]).reset_index(drop=True)
product_ass

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(RED HANGING HEART T-LIGHT HOLDER),(WHITE HANGING HEART T-LIGHT HOLDER),0.059054,0.179757,0.042894,0.726351,4.040749,1.0,0.032279,2.997433,0.799750,0.218941,0.666381,0.482488
1,(SWEETHEART CERAMIC TRINKET BOX),(STRAWBERRY CERAMIC TRINKET BOX),0.049145,0.074815,0.037374,0.760487,10.164840,1.0,0.033698,3.862776,0.948222,0.431644,0.741119,0.630021
2,(TOILET METAL SIGN),(BATHROOM METAL SIGN),0.026934,0.040500,0.021680,0.804938,19.874970,1.0,0.020589,4.918955,0.975972,0.473837,0.796705,0.670121
3,(PAINTED METAL PEARS ASSORTED),(ASSORTED COLOUR BIRD ORNAMENT),0.022345,0.078806,0.016958,0.758929,9.630387,1.0,0.015197,3.821251,0.916644,0.201422,0.738306,0.487059
4,(LARGE POPCORN HOLDER),(SMALL POPCORN HOLDER),0.023143,0.041830,0.016559,0.715517,17.105299,1.0,0.015591,3.368112,0.963845,0.342033,0.703098,0.555692
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,"(POPPY'S PLAYHOUSE BATHROOM, POPPY'S PLAYHOUSE...",(POPPY'S PLAYHOUSE LIVINGROOM),0.010441,0.014963,0.010042,0.961783,64.277056,1.0,0.009886,25.775132,0.994829,0.653680,0.961203,0.816447
65,(POPPY'S PLAYHOUSE BATHROOM),"(POPPY'S PLAYHOUSE LIVINGROOM, POPPY'S PLAYHOU...",0.010973,0.013700,0.010042,0.915152,66.801618,1.0,0.009892,11.624255,0.995959,0.686364,0.913973,0.824081
66,"(KEY FOB , FRONT DOOR , KEY FOB , SHED)","(KEY FOB , BACK DOOR )",0.011106,0.021613,0.010042,0.904192,41.834860,1.0,0.009802,10.211911,0.987059,0.442815,0.902075,0.684404
67,"(KEY FOB , FRONT DOOR , KEY FOB , BACK DOOR )","(KEY FOB , SHED)",0.012436,0.025005,0.010042,0.807487,32.293023,1.0,0.009731,5.064557,0.981236,0.366505,0.802549,0.604541
