# Instacart Recommender System - Association Rules
---

## Table of Contents

* [1. Association Rule Mining](#chapter1)
    * [1.1 Define Functions for Computing Association Rules](#chapter1_1)
    * [1.2 Run Function for Clusters on Item Level](#chapter1_2)
    * [1.3 Examine Results](#chapter1_3)
    * [1.4 Merge Cluster with Product Names](#chapter1_4)
    * [1.5 Run Function for Clusters on Aisle Level](#chapter1_5)
    * [1.6 Export Files](#chapter1_6)

In [41]:
import pandas as pd
import numpy as np
import seaborn as sns
import numpy as np
from itertools import combinations, groupby
from collections import Counter

In [42]:
orders = pd.read_csv('../data/complete_orders.csv')
cluster_df = pd.read_csv('../data/cluster_data.csv')
products = pd.read_csv('../data/products.csv')
aisles = pd.read_csv('../data/aisles.csv')
dept = pd.read_csv('../data/departments.csv')

orders = orders.sort_values(['order_id', 'product_id']).reset_index()

## 1. Association Rule Mining <a class="anchor" id="chapter1"></a>
---

The recommender will be based on association rule mining which is a session-based recommendation system, using the Apriori algorithm. The 3 main components of the algorithm are:
- Support: Probability of records containing an item set consisting of item 1 and item 2
- Confidence: Conditional probability of records containing item 2 given item 1
- Lift: Ratio of confidence to support

### 1.1 Define Functions for Computing Association Rules <a class="anchor" id="chapter1_1"></a>

In [43]:
# Returns number of unique orders
def order_count(cluster_products):
    return len(cluster_products.index.unique())

# Returns frequency for products and product pairs
def prod_freq(cluster_products):
    if type(cluster_products) == pd.core.series.Series:
        return cluster_products.value_counts().rename("freq")
    else: 
        return pd.Series(Counter(cluster_products)).rename("freq")

# Returns generator that yields item pairs, one at a time
def get_item_pairs(cluster_products):
    cluster_products = cluster_products.reset_index().to_numpy()
    for order_id, products in groupby(cluster_products, lambda x: x[0]):
        item_list = [item[1] for item in products]
        for item_pair in combinations(item_list, 2):
            yield item_pair
            
# Returns frequency and support associated with item
def merge_item_stats(item_pairs, item_stats):
    return (item_pairs
                .merge(item_stats.rename(columns={'freq': 'freqA', 'support': 'supportA'}), 
                       left_on='item_A', 
                       right_index=True)
                .merge(item_stats.rename(columns={'freq': 'freqB', 'support': 'supportB'}), 
                       left_on='item_B', 
                       right_index=True))

# Returns name associated with item
def merge_item_name(rules, item_name):
    columns = ['itemA','itemB','freqAB','supportAB','freqA','supportA','freqB','supportB', 
               'confidenceAtoB','confidenceBtoA','lift']
    rules = (rules
                .merge(item_name.rename(columns={'item_name': 'itemA'}), 
                       left_on='item_A', 
                       right_on='item_id')
                .merge(item_name.rename(columns={'item_name': 'itemB'}), 
                       left_on='item_B', 
                       right_on='item_id'))

In [44]:
def association_rules(cluster_orders, min_support, cluster_number):
    
    print(f"Computing for cluster: {cluster_number}.")
    
    # Calculate item frequency and support
    item_stats = prod_freq(cluster_orders).to_frame('freq')
    item_stats['support']  = item_stats['freq'] / order_count(cluster_orders) 

    # Narrow down to items which fulfil minimum support
    qualifying_items = item_stats[item_stats['support'] >= min_support].index
    cluster_orders = cluster_orders[cluster_orders.isin(qualifying_items)]

    print(f"Items with support >= {min_support}: {len(qualifying_items)}.")
    print(f"Remaining number of products: {len(cluster_orders)-len(qualifying_items)}.")

    # Eliminate orders with less than 2 orders
    order_size = prod_freq(cluster_orders.index)
    qualifying_orders = order_size[order_size >= 2].index
    cluster_orders = cluster_orders[cluster_orders.index.isin(qualifying_orders)]

    print(f"Remaining orders with 2+ orders: {len(qualifying_orders)}.")
    print(f"Remaining number of orders: {len(cluster_orders) - len(qualifying_orders)}.")

    # Recalculate item frequency and support
    item_stats = prod_freq(cluster_orders).to_frame("freq")
    item_stats['support'] = item_stats['freq'] / order_count(cluster_orders) 

    # Get item pairs generator
    item_pair_gen = get_item_pairs(cluster_orders)

    # Get frequency and support of item pairs
    item_pairs = prod_freq(item_pair_gen).to_frame("freqAB")
    item_pairs['supportAB'] = item_pairs['freqAB'] / len(qualifying_orders)

    print(f"Number of item pairs: {len(item_pairs)}.")

    # Get item pairs which fulfil minimum support
    item_pairs = item_pairs[item_pairs['supportAB'] >= min_support]

    print(f"Item pairs with support >= {min_support}: {len(item_pairs)}.")
    print('--------------------------------------')

    # Generate table of association rules with metrics
    item_pairs = item_pairs.reset_index().rename(columns={'level_0': 'item_A', 'level_1': 'item_B'})
    item_pairs = merge_item_stats(item_pairs, item_stats)
    
    item_pairs['confidenceAtoB'] = item_pairs['supportAB'] / item_pairs['supportA']
    item_pairs['confidenceBtoA'] = item_pairs['supportAB'] / item_pairs['supportB']
    item_pairs['lift'] = item_pairs['supportAB'] / (item_pairs['supportA'] * item_pairs['supportB'])
    
    # Return items pairs sorted by lift in descending order
    return item_pairs.sort_values('lift', ascending=False)

### 1.2 Run Function for Clusters on Item Level <a class="anchor" id="chapter1_2"></a>

In [45]:
cluster_orders = {}
for n in range(0,6):
    i = orders[orders['cluster'] == n]
    i = i.set_index('order_id')['product_id']
    cluster_orders[n] = i

In [46]:
cluster_items_apriori_dict = {}
for n in range(0,6):
    cluster_items_apriori_dict[n] = association_rules(cluster_orders[n],.0001,n)

Computing for cluster: 0.
Items with support >= 0.0001: 13272.
Remaining number of products: 8958036.
Remaining orders with 2+ orders: 971708.
Remaining number of orders: 7924682.
Number of item pairs: 15730395.
Item pairs with support >= 0.0001: 28177.
--------------------------------------
Computing for cluster: 1.
Items with support >= 0.0001: 1063.
Remaining number of products: 532794.
Remaining orders with 2+ orders: 99636.
Remaining number of orders: 416793.
Number of item pairs: 150551.
Item pairs with support >= 0.0001: 26355.
--------------------------------------
Computing for cluster: 2.
Items with support >= 0.0001: 4703.
Remaining number of products: 195665.
Remaining orders with 2+ orders: 36870.
Remaining number of orders: 152360.
Number of item pairs: 213905.
Item pairs with support >= 0.0001: 34979.
--------------------------------------
Computing for cluster: 3.
Items with support >= 0.0001: 7525.
Remaining number of products: 5416829.
Remaining orders with 2+ orders:

### 1.3 Examine Results <a class="anchor" id="chapter1_3"></a>

In [47]:
for n in range(0,6):
    print(f"Cluster: {n}")
    display(cluster_items_apriori_dict[n].head())
    print("------------------------------------------------------------------------------------------")

Cluster: 0


Unnamed: 0,item_A,item_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
23732,4376,44396,99,0.000102,211,0.000217,188,0.000193,0.469194,0.526596,2425.105677
23086,15697,35208,132,0.000136,226,0.000233,251,0.000258,0.584071,0.525896,2261.1405
26341,11224,39739,140,0.000144,211,0.000217,293,0.000302,0.663507,0.477816,2200.461317
15488,29126,36361,145,0.000149,300,0.000309,220,0.000226,0.483333,0.659091,2134.81303
23420,3858,15692,154,0.000158,366,0.000377,196,0.000202,0.420765,0.785714,2086.0242


------------------------------------------------------------------------------------------
Cluster: 1


Unnamed: 0,item_A,item_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
23721,2675,30036,12,0.00012,26,0.000261,32,0.000321,0.461538,0.375,1437.057692
23719,1071,2675,10,0.0001,34,0.000341,26,0.000261,0.294118,0.384615,1127.104072
23720,1071,30036,10,0.0001,34,0.000341,32,0.000321,0.294118,0.3125,915.772059
4625,37389,43892,10,0.0001,18,0.000181,78,0.000783,0.555556,0.128205,709.65812
9818,30280,42224,38,0.000381,53,0.000532,125,0.001255,0.716981,0.304,571.497057


------------------------------------------------------------------------------------------
Cluster: 2


Unnamed: 0,item_A,item_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
34867,23352,44911,4,0.000108,4,0.000108,5,0.000136,1.0,0.8,7374.0
33002,17950,20259,5,0.000136,5,0.000136,5,0.000136,1.0,1.0,7374.0
5358,16241,33002,5,0.000136,6,0.000163,5,0.000136,0.833333,1.0,6145.0
23836,23070,44019,5,0.000136,5,0.000136,6,0.000163,1.0,0.833333,6145.0
33238,43622,44995,5,0.000136,6,0.000163,5,0.000136,0.833333,1.0,6145.0


------------------------------------------------------------------------------------------
Cluster: 3


Unnamed: 0,item_A,item_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
77107,22416,41271,52,0.0001,122,0.000235,87,0.000168,0.42623,0.597701,2538.245713
66722,11212,12820,82,0.000158,122,0.000235,145,0.00028,0.672131,0.565517,2401.570944
80870,28122,35364,53,0.000102,101,0.000195,118,0.000228,0.524752,0.449153,2303.996895
85101,19244,26488,73,0.000141,134,0.000259,133,0.000257,0.544776,0.548872,2122.148749
75905,10339,49519,61,0.000118,147,0.000284,142,0.000274,0.414966,0.429577,1514.02678


------------------------------------------------------------------------------------------
Cluster: 4


Unnamed: 0,item_A,item_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
67698,20153,46949,139,0.000107,303,0.000234,238,0.000184,0.458746,0.584034,2494.721746
55650,29671,38652,159,0.000123,301,0.000233,300,0.000232,0.528239,0.53,2278.957741
55645,6583,38652,130,0.0001,295,0.000228,300,0.000232,0.440678,0.433333,1901.196384
48833,11187,29671,171,0.000132,427,0.00033,301,0.000233,0.400468,0.568106,1721.98212
80094,41349,49570,189,0.000146,400,0.000309,369,0.000285,0.4725,0.512195,1657.304634


------------------------------------------------------------------------------------------
Cluster: 5


Unnamed: 0,item_A,item_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
19481,4985,36278,27,0.000121,30,0.000134,32,0.000143,0.9,0.84375,6281.4375
29675,38259,38576,23,0.000103,33,0.000148,25,0.000112,0.69697,0.92,6226.448485
25621,24521,36709,24,0.000107,36,0.000161,28,0.000125,0.666667,0.857143,5317.619048
19028,83,39021,23,0.000103,40,0.000179,30,0.000134,0.575,0.766667,4280.683333
19035,3381,37377,24,0.000107,38,0.00017,33,0.000148,0.631579,0.727273,4274.449761


------------------------------------------------------------------------------------------


### 1.4 Merge Cluster with Product Names <a class="anchor" id="chapter1_4"></a>

In [48]:
for n in cluster_items_apriori_dict:
    # Get names for product A
    cluster_items_apriori_dict[n] = cluster_items_apriori_dict[n].merge(products[['product_id','product_name']]
               .rename(columns = {'product_id':'item_A','product_name':'product_name_A'}), on = 'item_A')
    # Get names for product B
    cluster_items_apriori_dict[n] = cluster_items_apriori_dict[n].merge(products[['product_id','product_name']]
               .rename(columns = {'product_id':'item_B','product_name':'product_name_B'}), on = 'item_B')
    
    cluster_items_apriori_dict[n] = cluster_items_apriori_dict[n][['item_A',
                                                                   'item_B',
                                                                   'product_name_A',
                                                                   'product_name_B',
                                                                   'freqAB',
                                                                   'supportAB',
                                                                   'freqA',
                                                                   'supportA',
                                                                   'freqB',
                                                                   'supportB',
                                                                   'confidenceAtoB',
                                                                   'confidenceBtoA',
                                                                   'lift']]

In [49]:
for n in range(0,6):
    print(f"Cluster: {n}")
    display(cluster_items_apriori_dict[n].head())
    print("------------------------------------------------------------------------------------------")

Cluster: 0


Unnamed: 0,item_A,item_B,product_name_A,product_name_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
0,4376,44396,Coconut Kale & Cacao Organic Superfoods Bar,Organic Hazelnut Hemp Cacao Superfoods Bar,99,0.000102,211,0.000217,188,0.000193,0.469194,0.526596,2425.105677
1,15697,35208,Apricot Walnut & Coconut Granola Bar,"Granola Bar, Fig, Cranberry & Hazelnut",132,0.000136,226,0.000233,251,0.000258,0.584071,0.525896,2261.1405
2,26810,35208,Organic Date Pecan & Coconut Granola Bar,"Granola Bar, Fig, Cranberry & Hazelnut",166,0.000171,334,0.000344,251,0.000258,0.497006,0.661355,1924.082449
3,10210,35208,"Cherry, Dark Chocolate & Almond Granola bar","Granola Bar, Fig, Cranberry & Hazelnut",116,0.000119,325,0.000334,251,0.000258,0.356923,0.462151,1381.772945
4,15697,26810,Apricot Walnut & Coconut Granola Bar,Organic Date Pecan & Coconut Granola Bar,160,0.000165,226,0.000233,334,0.000344,0.707965,0.479042,2059.685231


------------------------------------------------------------------------------------------
Cluster: 1


Unnamed: 0,item_A,item_B,product_name_A,product_name_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
0,2675,30036,Pale Ale,Belgian White Wheat Ale,12,0.00012,26,0.000261,32,0.000321,0.461538,0.375,1437.057692
1,1071,30036,Pils,Belgian White Wheat Ale,10,0.0001,34,0.000341,32,0.000321,0.294118,0.3125,915.772059
2,1071,2675,Pils,Pale Ale,10,0.0001,34,0.000341,26,0.000261,0.294118,0.384615,1127.104072
3,37389,43892,Whispering Angel Rosé,Premium Belgian Lager,10,0.0001,18,0.000181,78,0.000783,0.555556,0.128205,709.65812
4,30036,43892,Belgian White Wheat Ale,Premium Belgian Lager,12,0.00012,32,0.000321,78,0.000783,0.375,0.153846,479.019231


------------------------------------------------------------------------------------------
Cluster: 2


Unnamed: 0,item_A,item_B,product_name_A,product_name_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
0,23352,44911,Large Deep Dish Containers + Lids,Entrée Containers & Lids,4,0.000108,4,0.000108,5,0.000136,1.0,0.8,7374.0
1,17950,20259,Vegetable & Pepper Jack Cheese Flatbread Break...,"Special K Sausage, Egg & Cheese Flatbread Brea...",5,0.000136,5,0.000136,5,0.000136,1.0,1.0,7374.0
2,20119,20259,Sparkling Water Berry,"Special K Sausage, Egg & Cheese Flatbread Brea...",5,0.000136,2684,0.072796,5,0.000136,0.001863,1.0,13.73696
3,17950,49191,Vegetable & Pepper Jack Cheese Flatbread Break...,Cran Raspberry Sparkling Water,5,0.000136,5,0.000136,1246,0.033794,1.0,0.004013,29.59069
4,46330,49191,Revive Fruit Punch Nutrient Enhanced VitaminWater,Cran Raspberry Sparkling Water,4,0.000108,8,0.000217,1246,0.033794,0.5,0.00321,14.795345


------------------------------------------------------------------------------------------
Cluster: 3


Unnamed: 0,item_A,item_B,product_name_A,product_name_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
0,22416,41271,O'Soy Fruit on the Bottom Strawberry Organic S...,O'Soy Fruit on the Bottom Blueberry Organic So...,52,0.0001,122,0.000235,87,0.000168,0.42623,0.597701,2538.245713
1,11212,12820,Apple Blueberry Fruit Yogurt Smoothie,Organic Fruit Yogurt Smoothie Mixed Berry,82,0.000158,122,0.000235,145,0.00028,0.672131,0.565517,2401.570944
2,28122,35364,Peter Rabbit Organics Kale Broccoli and Mango ...,Peter Rabbit Organic Pea Spinach & Apple Puree...,53,0.000102,101,0.000195,118,0.000228,0.524752,0.449153,2303.996895
3,24852,35364,Banana,Peter Rabbit Organic Pea Spinach & Apple Puree...,54,0.000104,82357,0.158961,118,0.000228,0.000656,0.457627,2.878861
4,19244,26488,Grassfed Whole Milk Strawberry Yogurt,Organic Strawberry Grassfed Whole Milk Yogurt,73,0.000141,134,0.000259,133,0.000257,0.544776,0.548872,2122.148749


------------------------------------------------------------------------------------------
Cluster: 4


Unnamed: 0,item_A,item_B,product_name_A,product_name_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
0,20153,46949,Eat Your Colors Purples Puree Baby Food,Eat Your Colors Reds Puree Baby Food,139,0.000107,303,0.000234,238,0.000184,0.458746,0.584034,2494.721746
1,29671,38652,Organic Bluephoria Yerba Mate,Yerba Mate Orange Exuberance Tea,159,0.000123,301,0.000233,300,0.000232,0.528239,0.53,2278.957741
2,6583,38652,Oraganic Lemon Elation Yerba Mate Drink,Yerba Mate Orange Exuberance Tea,130,0.0001,295,0.000228,300,0.000232,0.440678,0.433333,1901.196384
3,14366,38652,Enlighten Mint Organic,Yerba Mate Orange Exuberance Tea,154,0.000119,727,0.000562,300,0.000232,0.211829,0.513333,913.885851
4,6583,14366,Oraganic Lemon Elation Yerba Mate Drink,Enlighten Mint Organic,144,0.000111,295,0.000228,727,0.000562,0.488136,0.198074,869.026387


------------------------------------------------------------------------------------------
Cluster: 5


Unnamed: 0,item_A,item_B,product_name_A,product_name_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
0,4985,36278,Homestyle Some Pulp Orange Juice,Chocolate Breakfast Biscuits Chocolate,27,0.000121,30,0.000134,32,0.000143,0.9,0.84375,6281.4375
1,4985,48927,Homestyle Some Pulp Orange Juice,Fat Free Plain Yoghurt,24,0.000107,30,0.000134,62,0.000278,0.8,0.387097,2881.806452
2,36278,48927,Chocolate Breakfast Biscuits Chocolate,Fat Free Plain Yoghurt,25,0.000112,32,0.000143,62,0.000278,0.78125,0.403226,2814.264113
3,24852,48927,Banana,Fat Free Plain Yoghurt,32,0.000143,50557,0.226368,62,0.000278,0.000633,0.516129,2.280045
4,38259,38576,Blueberry Greek Yogurt + Chia,The Epic Seed Greek Yogurt Strawberry & Chia,23,0.000103,33,0.000148,25,0.000112,0.69697,0.92,6226.448485


------------------------------------------------------------------------------------------


### 1.5 Run Function for Clusters on Aisle Level <a class="anchor" id="chapter1_5"></a>

In [52]:
# Separate data by aisle order and group by order_id and aisle_id for each cluster
cluster_aisle_orders = {}
for n in range(0,6):
    i = orders[orders['cluster'] == n]
    i = i.groupby(['order_id','aisle_id'])['product_id'].count().reset_index().set_index('order_id')['aisle_id'].rename('item_id')
    cluster_aisle_orders[n] = i

In [53]:
# Transform using association rule function
cluster_aisles_apriori_dict = {}
for n in range(0,6):
    cluster_aisles_apriori_dict[n] = association_rules(cluster_aisle_orders[n],.0001,n)

Computing for cluster: 0.
Items with support >= 0.0001: 134.
Remaining number of products: 7486862.
Remaining orders with 2+ orders: 970585.
Remaining number of orders: 6432000.
Number of item pairs: 8908.
Item pairs with support >= 0.0001: 8196.
--------------------------------------
Computing for cluster: 1.
Items with support >= 0.0001: 118.
Remaining number of products: 413887.
Remaining orders with 2+ orders: 97235.
Remaining number of orders: 296776.
Number of item pairs: 5718.
Item pairs with support >= 0.0001: 3370.
--------------------------------------
Computing for cluster: 2.
Items with support >= 0.0001: 134.
Remaining number of products: 139586.
Remaining orders with 2+ orders: 33774.
Remaining number of orders: 91260.
Number of item pairs: 6580.
Item pairs with support >= 0.0001: 4061.
--------------------------------------
Computing for cluster: 3.
Items with support >= 0.0001: 134.
Remaining number of products: 3736988.
Remaining orders with 2+ orders: 518677.
Remainin

In [54]:
for n in cluster_aisles_apriori_dict:
    # Get names for aisle A
    cluster_aisles_apriori_dict[n] = cluster_aisles_apriori_dict[n].merge(aisles[['aisle_id','aisle']]
               .rename(columns = {'aisle_id':'item_A','aisle':'aisle_name_A'}), on = 'item_A')
    # Get names for aisle B
    cluster_aisles_apriori_dict[n] = cluster_aisles_apriori_dict[n].merge(aisles[['aisle_id','aisle']]
               .rename(columns = {'aisle_id':'item_B','aisle':'aisle_name_B'}), on = 'item_B')
    
    cluster_aisles_apriori_dict[n] = cluster_aisles_apriori_dict[n][['item_A',
                                                                     'item_B',
                                                                     'aisle_name_A',
                                                                     'aisle_name_B',
                                                                     'freqAB',
                                                                     'supportAB',
                                                                     'freqA',
                                                                     'supportA',
                                                                     'freqB',
                                                                     'supportB',
                                                                     'confidenceAtoB',
                                                                     'confidenceBtoA',
                                                                     'lift']]

In [55]:
for n in range(0,6):
    print(f"Cluster: {n}")
    display(cluster_aisles_apriori_dict[n].head())
    print('----------------------------------------------------------------------------------------------------------------')

Cluster: 0


Unnamed: 0,item_A,item_B,aisle_name_A,aisle_name_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
0,28,62,red wines,white wines,5738,0.005912,16268,0.016761,17020,0.017536,0.352717,0.337133,20.11409
1,25,62,soap,white wines,113,0.000116,15659,0.016134,17020,0.017536,0.007216,0.006639,0.411518
2,27,62,beers coolers,white wines,3680,0.003792,20350,0.020967,17020,0.017536,0.180835,0.216216,10.312345
3,56,62,diapers wipes,white wines,197,0.000203,12511,0.01289,17020,0.017536,0.015746,0.011575,0.897942
4,55,62,shave needs,white wines,112,0.000115,6968,0.007179,17020,0.017536,0.016073,0.00658,0.916609


----------------------------------------------------------------------------------------------------------------
Cluster: 1


Unnamed: 0,item_A,item_B,aisle_name_A,aisle_name_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
0,28,62,red wines,white wines,102,0.001049,403,0.004145,272,0.002797,0.253102,0.375,90.479218
1,27,62,beers coolers,white wines,39,0.000401,273,0.002808,272,0.002797,0.142857,0.143382,51.068803
2,19,62,oils vinegars,white wines,17,0.000175,6841,0.070355,272,0.002797,0.002485,0.0625,0.888348
3,61,62,cookies cakes,white wines,12,0.000123,3366,0.034617,272,0.002797,0.003565,0.044118,1.274444
4,36,62,butter,white wines,13,0.000134,5039,0.051823,272,0.002797,0.00258,0.047794,0.922259


----------------------------------------------------------------------------------------------------------------
Cluster: 2


Unnamed: 0,item_A,item_B,aisle_name_A,aisle_name_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
0,28,62,red wines,white wines,49,0.001451,110,0.003257,169,0.005004,0.445455,0.289941,89.022378
1,27,62,beers coolers,white wines,46,0.001362,348,0.010304,169,0.005004,0.132184,0.272189,26.416446
2,37,62,ice cream ice,white wines,9,0.000266,1383,0.040949,169,0.005004,0.006508,0.053254,1.300517
3,38,62,frozen meals,white wines,8,0.000237,563,0.01667,169,0.005004,0.01421,0.047337,2.839732
4,17,62,baking ingredients,white wines,4,0.000118,1116,0.033043,169,0.005004,0.003584,0.023669,0.716294


----------------------------------------------------------------------------------------------------------------
Cluster: 3


Unnamed: 0,item_A,item_B,aisle_name_A,aisle_name_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
0,28,62,red wines,white wines,161,0.00031,830,0.0016,750,0.001446,0.193976,0.214667,134.147786
1,27,62,beers coolers,white wines,84,0.000162,778,0.0015,750,0.001446,0.107969,0.112,74.668154
2,54,62,paper goods,white wines,87,0.000168,18844,0.036331,750,0.001446,0.004617,0.116,3.192875
3,17,62,baking ingredients,white wines,61,0.000118,45617,0.087949,750,0.001446,0.001337,0.081333,0.924781
4,26,62,coffee,white wines,64,0.000123,16810,0.032409,750,0.001446,0.003807,0.085333,2.632983


----------------------------------------------------------------------------------------------------------------
Cluster: 4


Unnamed: 0,item_A,item_B,aisle_name_A,aisle_name_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
0,28,62,red wines,white wines,1816,0.001398,7240,0.005575,6743,0.005193,0.250829,0.269316,48.305789
1,27,62,beers coolers,white wines,795,0.000612,5634,0.004339,6743,0.005193,0.141108,0.1179,27.175165
2,6,62,other,white wines,331,0.000255,14495,0.011162,6743,0.005193,0.022835,0.049088,4.397762
3,41,62,cat food care,white wines,193,0.000149,9419,0.007253,6743,0.005193,0.02049,0.028622,3.946158
4,54,62,paper goods,white wines,806,0.000621,70185,0.054047,6743,0.005193,0.011484,0.119531,2.211631


----------------------------------------------------------------------------------------------------------------
Cluster: 5


Unnamed: 0,item_A,item_B,aisle_name_A,aisle_name_B,freqAB,supportAB,freqA,supportA,freqB,supportB,confidenceAtoB,confidenceBtoA,lift
0,28,62,red wines,white wines,147,0.000675,390,0.00179,368,0.001689,0.376923,0.399457,223.14768
1,27,62,beers coolers,white wines,84,0.000386,748,0.003433,368,0.001689,0.112299,0.228261,66.48403
2,16,62,fresh herbs,white wines,23,0.000106,9985,0.045831,368,0.001689,0.002303,0.0625,1.363702
3,32,62,packaged produce,white wines,29,0.000133,25191,0.115627,368,0.001689,0.001151,0.078804,0.681541
4,21,62,packaged cheese,white wines,28,0.000129,30260,0.138893,368,0.001689,0.000925,0.076087,0.547808


----------------------------------------------------------------------------------------------------------------


### 1.6 Export Files <a class="anchor" id="chapter1_6"></a>

In [56]:
# Export item association rules
for n in range(0,6):
    cluster_items_apriori_dict[n].to_csv('../data/cluster_'+str(n)+'_item_rules.csv', index=False)

# Export aisle association rules
for n in range(0,6):
    cluster_aisles_apriori_dict[n].to_csv('../data/cluster_'+str(n)+'_aisle_rules.csv', index=False)