# [9660] Market Basket Analysis 1

Data file:  
* https://raw.githubusercontent.com/vjavaly/Baruch-CIS-9660/main/data/shopping_carts.csv

In [1]:
from datetime import datetime
print(f'Run time: {datetime.now().strftime("%D %T")}')

Run time: 12/02/24 02:46:09


### Import libraries

In [2]:
# mlxtend is a library of Python tools and extensions for data science
#  We use it to perform market basket analyis
#     - to generate frequent itemsets and association rules
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

### Load data

In [3]:
df = pd.read_csv('https://raw.githubusercontent.com/vjavaly/Baruch-CIS-9660/main/data/shopping_carts.csv')
df.shape

(315, 6)

### Examine data

In [4]:
df.sample(5)

Unnamed: 0,item_1,item_2,item_3,item_4,item_5,item_6
Bagel,Milk,Wine,Pencil,Cheese,,
Cheese,Diaper,Pencil,Bagel,Wine,Meat,Eggs
Eggs,Meat,Wine,Bagel,Milk,Cheese,Diaper
Eggs,Bread,,,,,
Bread,,,,,,


### Prepare data

In [5]:
# Stack the DataFrame to flatten all the columns into a single Series
# Drop NaN values and extract unique items
unique_items = df.stack().unique()
items_set = set(unique_items)
items_set

{'Bagel',
 'Bread',
 'Cheese',
 'Diaper',
 'Eggs',
 'Meat',
 'Milk',
 'Pencil',
 'Wine'}

In [6]:
# Convert data to format desired by apriori algorithm
encoded_vals = []
for index, row in df.iterrows():
    rowset = set(row)
    labels = {}
    uncommons = list(items_set - rowset)
    commons = list(items_set.intersection(rowset))
    for uc in uncommons:
        labels[uc] = 0
    for com in commons:
        labels[com] = 1
    encoded_vals.append(labels)

encoded_vals[0]

{'Bread': 0,
 'Milk': 0,
 'Bagel': 0,
 'Eggs': 1,
 'Pencil': 1,
 'Meat': 1,
 'Cheese': 1,
 'Diaper': 1,
 'Wine': 1}

In [7]:
# Load data into dataframe
df = pd.DataFrame(encoded_vals)
df.sample(5)

Unnamed: 0,Bread,Milk,Bagel,Eggs,Pencil,Meat,Cheese,Diaper,Wine
230,0,1,1,0,0,0,0,0,0
100,1,0,0,0,0,0,0,0,0
258,0,0,0,0,0,0,1,1,1
259,0,1,1,0,0,0,0,0,0
83,0,0,0,0,0,0,0,1,1


### Apply apriori algorithm

In [8]:
# Filter: support >= 0.15
freq_itemsets = apriori(df, min_support=0.15, use_colnames=True, max_len=None,
                        verbose=0, low_memory=False)
freq_itemsets.shape

(18, 2)

In [9]:
freq_itemsets.sample(5)

Unnamed: 0,support,itemsets
7,0.298413,(Diaper)
5,0.374603,(Meat)
9,0.15873,"(Milk, Bagel)"
10,0.212698,"(Eggs, Milk)"
6,0.32381,(Cheese)


### Generate association rules

In [10]:
# Filter: confidence >= 0.5
assoc_rules = association_rules(freq_itemsets, num_itemsets=None,
                                metric='confidence', min_threshold=0.5)
assoc_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Eggs),(Milk),0.361905,0.453968,0.212698,0.587719,1.294626,1.0,0.048405,1.324417,0.35665,0.352632,0.244951,0.528125
1,(Meat),(Milk),0.374603,0.453968,0.209524,0.559322,1.232073,1.0,0.039466,1.239072,0.301184,0.338462,0.192944,0.51043
2,(Meat),(Eggs),0.374603,0.361905,0.215873,0.576271,1.592328,1.0,0.080302,1.505905,0.594804,0.414634,0.335947,0.586381
3,(Eggs),(Meat),0.361905,0.374603,0.215873,0.596491,1.592328,1.0,0.080302,1.549896,0.582968,0.414634,0.354796,0.586381
4,(Eggs),(Wine),0.361905,0.365079,0.180952,0.5,1.369565,1.0,0.048828,1.269841,0.422886,0.331395,0.2125,0.497826
5,(Meat),(Wine),0.374603,0.365079,0.203175,0.542373,1.48563,1.0,0.066415,1.387419,0.522684,0.378698,0.279237,0.549447
6,(Wine),(Meat),0.365079,0.374603,0.203175,0.556522,1.48563,1.0,0.066415,1.410209,0.514844,0.378698,0.290885,0.549447
7,(Diaper),(Wine),0.298413,0.365079,0.15873,0.531915,1.456984,1.0,0.049786,1.356421,0.447059,0.314465,0.262766,0.483349
8,"(Meat, Eggs)",(Milk),0.215873,0.453968,0.155556,0.720588,1.58731,1.0,0.057556,1.954219,0.471866,0.302469,0.488287,0.531623
9,"(Meat, Milk)",(Eggs),0.209524,0.361905,0.155556,0.742424,2.051435,1.0,0.079728,2.477311,0.648389,0.374046,0.596336,0.586124


In [11]:
# Filter: confidence >= 0.6
assoc_rules = association_rules(freq_itemsets, num_itemsets=None,
                                metric='confidence', min_threshold=0.6)
assoc_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,"(Meat, Eggs)",(Milk),0.215873,0.453968,0.155556,0.720588,1.58731,1.0,0.057556,1.954219,0.471866,0.302469,0.488287,0.531623
1,"(Meat, Milk)",(Eggs),0.209524,0.361905,0.155556,0.742424,2.051435,1.0,0.079728,2.477311,0.648389,0.374046,0.596336,0.586124
2,"(Eggs, Milk)",(Meat),0.212698,0.374603,0.155556,0.731343,1.952315,1.0,0.075878,2.327866,0.619569,0.360294,0.570422,0.573299


In [12]:
# Filter: lift >= 1.0
assoc_rules = association_rules(freq_itemsets, num_itemsets=None,
                                metric='lift', min_threshold=1.0)
assoc_rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Milk),(Bagel),0.453968,0.326984,0.15873,0.34965,1.069319,1.0,0.01029,1.034852,0.118721,0.255102,0.033679,0.417544
1,(Bagel),(Milk),0.326984,0.453968,0.15873,0.485437,1.069319,1.0,0.01029,1.061156,0.096321,0.255102,0.057632,0.417544
2,(Eggs),(Milk),0.361905,0.453968,0.212698,0.587719,1.294626,1.0,0.048405,1.324417,0.35665,0.352632,0.244951,0.528125
3,(Milk),(Eggs),0.453968,0.361905,0.212698,0.468531,1.294626,1.0,0.048405,1.200627,0.416782,0.352632,0.167102,0.528125
4,(Meat),(Milk),0.374603,0.453968,0.209524,0.559322,1.232073,1.0,0.039466,1.239072,0.301184,0.338462,0.192944,0.51043
5,(Milk),(Meat),0.453968,0.374603,0.209524,0.461538,1.232073,1.0,0.039466,1.161451,0.344961,0.338462,0.139008,0.51043
6,(Milk),(Wine),0.453968,0.365079,0.168254,0.370629,1.015202,1.0,0.00252,1.008818,0.027424,0.258537,0.008741,0.415749
7,(Wine),(Milk),0.365079,0.453968,0.168254,0.46087,1.015202,1.0,0.00252,1.012801,0.023585,0.258537,0.012639,0.415749
8,(Meat),(Eggs),0.374603,0.361905,0.215873,0.576271,1.592328,1.0,0.080302,1.505905,0.594804,0.414634,0.335947,0.586381
9,(Eggs),(Meat),0.361905,0.374603,0.215873,0.596491,1.592328,1.0,0.080302,1.549896,0.582968,0.414634,0.354796,0.586381
