# MARKET BASKET ANALYSIS

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
file_path = '/content/drive/My Drive/Colab Notebooks/Online_Retail_Data.csv'

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt

In [None]:
data = pd.read_csv(file_path)

In [None]:
data.head()

Unnamed: 0,order_id,product_code,product_name,quantity,order_date,price,customer_id
0,493410,TEST001,This is a test product.,5,2010-01-04 09:24:00,4.5,12346.0
1,C493411,21539,RETRO SPOTS BUTTER DISH,-1,2010-01-04 09:43:00,4.25,14590.0
2,493412,TEST001,This is a test product.,5,2010-01-04 09:53:00,4.5,12346.0
3,493413,21724,PANDA AND BUNNIES STICKER SHEET,1,2010-01-04 09:54:00,0.85,
4,493413,84578,ELEPHANT TOY WITH BLUE T-SHIRT,1,2010-01-04 09:54:00,3.75,


# Data Cleansing

In [None]:
data['order_date'] = pd.to_datetime(data['order_date']).dt.date.astype('datetime64')

In [None]:
data.isnull().sum()

order_id             0
product_code         0
product_name      2718
quantity             0
order_date           0
price                0
customer_id     100920
dtype: int64

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 461773 entries, 0 to 461772
Data columns (total 7 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   order_id      461773 non-null  object        
 1   product_code  461773 non-null  object        
 2   product_name  459055 non-null  object        
 3   quantity      461773 non-null  int64         
 4   order_date    461773 non-null  datetime64[ns]
 5   price         461773 non-null  float64       
 6   customer_id   360853 non-null  float64       
dtypes: datetime64[ns](1), float64(2), int64(1), object(3)
memory usage: 24.7+ MB


In [None]:
data = data[~ data['customer_id'].isnull()]

In [None]:
data.isnull().sum()

order_id        0
product_code    0
product_name    0
quantity        0
order_date      0
price           0
customer_id     0
dtype: int64

In [None]:
data.duplicated().sum()

6411

In [None]:
data.drop_duplicates(inplace=True)

In [None]:
data ['product_name']= data['product_name'].str.lower()

In [None]:
data['quantity'] = data['quantity'].abs()

In [None]:
data['order_status'] = np.where(data['order_id'].str[:1]=='C', 'Cenceled', 'Delivered')

In [None]:
data['amount'] = data['price'] * data['quantity']

In [None]:
data['customer_id'] = data['customer_id'].astype(str)

In [None]:
data = data[data['price'] > 0]

In [None]:
from scipy import stats

data = data[(np.abs(stats.zscore(data[['price', 'amount']]))<3).all(axis=1)]
data.reset_index (drop=True, inplace=True)

In [None]:
data.head()

Unnamed: 0,order_id,product_code,product_name,quantity,order_date,price,customer_id,order_status,amount
0,493410,TEST001,this is a test product.,5,2010-01-04,4.5,12346.0,Delivered,22.5
1,C493411,21539,retro spots butter dish,1,2010-01-04,4.25,14590.0,Cenceled,4.25
2,493412,TEST001,this is a test product.,5,2010-01-04,4.5,12346.0,Delivered,22.5
3,493414,21844,retro spot mug,36,2010-01-04,2.55,14590.0,Delivered,91.8
4,493414,21533,retro spot large milk jug,12,2010-01-04,4.25,14590.0,Delivered,51.0


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 352813 entries, 0 to 352812
Data columns (total 9 columns):
 #   Column        Non-Null Count   Dtype         
---  ------        --------------   -----         
 0   order_id      352813 non-null  object        
 1   product_code  352813 non-null  object        
 2   product_name  352813 non-null  object        
 3   quantity      352813 non-null  int64         
 4   order_date    352813 non-null  datetime64[ns]
 5   price         352813 non-null  float64       
 6   customer_id   352813 non-null  object        
 7   order_status  352813 non-null  object        
 8   amount        352813 non-null  float64       
dtypes: datetime64[ns](1), float64(2), int64(1), object(5)
memory usage: 24.2+ MB


In [None]:
df = pd.pivot_table(data=data, index='order_id', columns='product_name', values='product_code', aggfunc='nunique', fill_value=0)

In [None]:
df.head()

product_name,10 colour spaceboy pen,12 ass zinc christmas decorations,12 coloured party balloons,12 daisy pegs in wood box,12 egg house painted wood,12 ivory rose peg place settings,12 message cards with envelopes,12 mini toadstool pegs,12 pencil small tube woodland,12 pencils small tube posy,...,zinc heart lattice charger large,zinc heart lattice charger small,zinc heart lattice double planter,zinc heart lattice planter bowl,zinc heart lattice t-light holder,zinc heart lattice tray oval,zinc metal heart decoration,zinc police box lantern,zinc top 2 door wooden shelf,zinc willie winkie candle stick
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
493410,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
493412,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
493414,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
493427,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
493428,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 20016 entries, 493410 to C539983
Columns: 4272 entries, 10 colour spaceboy pen to zinc willie winkie  candle stick
dtypes: int64(4272)
memory usage: 652.5+ MB


In [None]:
# Encoding
def encoding (x):
    if x == 0:
        return False
    if x> 0:
        return True

In [None]:
basket_encoding = df.applymap(encoding)
basket_encoding.head()

product_name,10 colour spaceboy pen,12 ass zinc christmas decorations,12 coloured party balloons,12 daisy pegs in wood box,12 egg house painted wood,12 ivory rose peg place settings,12 message cards with envelopes,12 mini toadstool pegs,12 pencil small tube woodland,12 pencils small tube posy,...,zinc heart lattice charger large,zinc heart lattice charger small,zinc heart lattice double planter,zinc heart lattice planter bowl,zinc heart lattice t-light holder,zinc heart lattice tray oval,zinc metal heart decoration,zinc police box lantern,zinc top 2 door wooden shelf,zinc willie winkie candle stick
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
493410,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
493412,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
493414,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
493427,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
493428,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [None]:
basket_filter = basket_encoding[(basket_encoding>0).sum(axis=1)>1]

In [None]:
basket_filter.tail()

product_name,10 colour spaceboy pen,12 ass zinc christmas decorations,12 coloured party balloons,12 daisy pegs in wood box,12 egg house painted wood,12 ivory rose peg place settings,12 message cards with envelopes,12 mini toadstool pegs,12 pencil small tube woodland,12 pencils small tube posy,...,zinc heart lattice charger large,zinc heart lattice charger small,zinc heart lattice double planter,zinc heart lattice planter bowl,zinc heart lattice t-light holder,zinc heart lattice tray oval,zinc metal heart decoration,zinc police box lantern,zinc top 2 door wooden shelf,zinc willie winkie candle stick
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C539711,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
C539719,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
C539943,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
C539945,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
C539950,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [None]:
from mlxtend.frequent_patterns import apriori

In [None]:
frequen_itemset = apriori(basket_filter, min_support=.01, use_colnames=True).sort_values('support', ascending = False).reset_index(drop=True)

  and should_run_async(code)


In [None]:
frequen_itemset.head()

  and should_run_async(code)


Unnamed: 0,support,itemsets
0,0.165387,(white hanging heart t-light holder)
1,0.094567,(regency cakestand 3 tier)
2,0.072679,(strawberry ceramic trinket box)
3,0.07136,(assorted colour bird ornament)
4,0.065064,(home building block word)


In [None]:
frequen_itemset['product_cnt'] = frequen_itemset['itemsets'].apply(lambda x:len(x))
frequen_itemset.tail()

  and should_run_async(code)


Unnamed: 0,support,itemsets,product_cnt
874,0.010014,(red retrospot tea cup and saucer),1
875,0.010014,"(key fob , shed, key fob , front door )",2
876,0.010014,"(white hanging heart t-light holder, lovebird ...",2
877,0.010014,"(white hanging heart t-light holder, metal 4 h...",2
878,0.010014,(toadstool money box),1


In [None]:
from mlxtend.frequent_patterns import association_rules
prod_ass = association_rules(frequen_itemset, metric = 'confidence', min_threshold =.7).sort_values(['support', 'confidence'],
          ascending=[False, False]).reset_index(drop=True)
prod_ass

  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(red hanging heart t-light holder),(white hanging heart t-light holder),0.05397,0.165387,0.038738,0.717778,4.339979,0.029812,2.957289,0.813488
1,(sweetheart ceramic trinket box),(strawberry ceramic trinket box),0.045994,0.072679,0.034781,0.756193,10.404516,0.031438,3.803503,0.947466
2,(toilet metal sign),(bathroom metal sign),0.024646,0.037119,0.019549,0.793187,21.368647,0.018634,4.655812,0.977289
3,(painted metal pears assorted),(assorted colour bird ornament),0.020209,0.07136,0.015351,0.759644,10.645229,0.013909,3.863601,0.924749
4,(kitchen metal sign),(bathroom metal sign),0.01805,0.037119,0.014752,0.817276,22.017593,0.014082,5.269584,0.972129
5,"(white hanging heart t-light holder, wooden pi...",(wooden frame antique white),0.020628,0.056428,0.014572,0.706395,12.518437,0.013408,3.213749,0.939498
6,"(key fob , back door )","(key fob , shed)",0.019489,0.022547,0.014452,0.741538,32.88802,0.014012,3.781811,0.988866
7,"(key fob , garage design)","(key fob , shed)",0.01757,0.022547,0.014212,0.808874,35.87441,0.013816,5.114172,0.989511
8,(poppy's playhouse bedroom),(poppy's playhouse kitchen),0.015471,0.01733,0.013792,0.891473,51.440144,0.013524,9.054599,0.995969
9,(poppy's playhouse kitchen),(poppy's playhouse bedroom),0.01733,0.015471,0.013792,0.795848,51.440144,0.013524,4.822522,0.997853
