* Apriori Algorithm is a Machine Learning algorithm which is used to gain insight into the structured relationships between different items involved. The most prominent practical application of the algorithm is to recommend products based on the products already present in the user’s cart. Walmart especially has made great use of the algorithm in suggesting products to it’s users.

In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules       

#### Read Datasets

In [2]:
# read and get the first five of sales receipt
sales_receipt = pd. read_csv('dataset/201904 sales reciepts.csv')
sales_receipt.head()

Unnamed: 0,transaction_id,transaction_date,transaction_time,sales_outlet_id,staff_id,customer_id,instore_yn,order,line_item_id,product_id,quantity,line_item_amount,unit_price,promo_item_yn
0,7,2019-04-01,12:04:43,3,12,558,N,1,1,52,1,2.5,2.5,N
1,11,2019-04-01,15:54:39,3,17,781,N,1,1,27,2,7.0,3.5,N
2,19,2019-04-01,14:34:59,3,17,788,Y,1,1,46,2,5.0,2.5,N
3,32,2019-04-01,16:06:04,3,12,683,N,1,1,23,2,5.0,2.5,N
4,33,2019-04-01,19:18:37,3,17,99,Y,1,1,34,1,2.45,2.45,N


In [3]:
# get the product dataset
product = pd.read_csv('dataset/product.csv')
product.head()

Unnamed: 0,product_id,product_group,product_category,product_type,product,product_description,unit_of_measure,current_wholesale_price,current_retail_price,tax_exempt_yn,promo_yn,new_product_yn
0,1,Whole Bean/Teas,Coffee beans,Organic Beans,Brazilian - Organic,It's like Carnival in a cup. Clean and smooth.,12 oz,14.4,$18.00,Y,N,N
1,2,Whole Bean/Teas,Coffee beans,House blend Beans,Our Old Time Diner Blend,Out packed blend of beans that is reminiscent ...,12 oz,14.4,$18.00,Y,N,N
2,3,Whole Bean/Teas,Coffee beans,Espresso Beans,Espresso Roast,Our house blend for a good espresso shot.,1 lb,11.8,$14.75,Y,N,N
3,4,Whole Bean/Teas,Coffee beans,Espresso Beans,Primo Espresso Roast,Our primium single source of hand roasted beans.,1 lb,16.36,$20.45,Y,N,N
4,5,Whole Bean/Teas,Coffee beans,Gourmet Beans,Columbian Medium Roast,A smooth cup of coffee any time of day.,1 lb,12.0,$15.00,Y,N,N


### Data Wrangling

In [4]:
# get the columns of sales receipt data
sales_receipt.columns

Index(['transaction_id', 'transaction_date', 'transaction_time',
       'sales_outlet_id', 'staff_id', 'customer_id', 'instore_yn', 'order',
       'line_item_id', 'product_id', 'quantity', 'line_item_amount',
       'unit_price', 'promo_item_yn'],
      dtype='object')

In [5]:
# select the necessary columns from the sales receipt aand product data
sales_receipt = sales_receipt[['transaction_id', 'transaction_date', 'customer_id', 'sales_outlet_id', 'product_id', 'quantity']]
product = product[['product_id', 'product_category', 'product']]

In [6]:
# merge sales_receipt and product data on product_id to a single dataser
dataset = pd.merge(sales_receipt, product, on='product_id', how='left')
dataset.head()

Unnamed: 0,transaction_id,transaction_date,customer_id,sales_outlet_id,product_id,quantity,product_category,product
0,7,2019-04-01,558,3,52,1,Tea,Traditional Blend Chai Rg
1,11,2019-04-01,781,3,27,2,Coffee,Brazilian Lg
2,19,2019-04-01,788,3,46,2,Tea,Serenity Green Tea Rg
3,32,2019-04-01,683,3,23,2,Coffee,Our Old Time Diner Blend Rg
4,33,2019-04-01,99,3,34,1,Coffee,Jamaican Coffee River Sm


* Remove sizes (Rg, Lg, Sm) from product

In [7]:
# filter dataset that have any size of Dark chocolate product
dataset[dataset['product'].str.contains('Dark chocolate')]['product'].unique()

array(['Dark chocolate Lg', 'Dark chocolate Rg', 'Dark chocolate'],
      dtype=object)

In [8]:
# unique value of the product with sizes
dataset['product'].nunique()

80

In [9]:
# Remove the Lg, Rg, Sm from the products
dataset['product'] = dataset['product'].str.replace(' Rg', '')
dataset['product'] = dataset['product'].str.replace(' Lg', '')
dataset['product'] = dataset['product'].str.replace(' Sm', '')

In [10]:
# unique value of the product without sizes
dataset['product'].nunique()

45

In [11]:
# view the products
print(dataset['product'].unique())

['Traditional Blend Chai' 'Brazilian' 'Serenity Green Tea'
 'Our Old Time Diner Blend' 'Jamaican Coffee River' 'Ethiopia'
 'English Breakfast' 'Sustainably Grown Organic' 'Earl Grey' 'Cappuccino'
 'Espresso shot' 'Latte' 'Dark chocolate' 'Columbian Medium Roast'
 'Oatmeal Scone' 'Morning Sunrise Chai' 'Peppermint' 'Jumbo Savory Scone'
 'Lemon Grass' 'Chocolate Chip Biscotti' 'Spicy Eye Opener Chai'
 'Ginger Biscotti' 'Chocolate Croissant' 'Hazelnut Biscotti'
 'Cranberry Scone' 'Scottish Cream Scone ' 'Croissant' 'Almond Croissant'
 'Ginger Scone' 'Ouro Brasileiro shot' 'Organic Decaf Blend'
 'Chocolate syrup' 'Hazelnut syrup' 'Carmel syrup'
 'Sugar Free Vanilla syrup' 'Jamacian Coffee River'
 'Guatemalan Sustainably Grown' 'Civet Cat' 'Chili Mayan'
 'Primo Espresso Roast' 'Brazilian - Organic' 'I Need My Bean! Diner mug'
 'Espresso Roast' 'I Need My Bean! T-shirt' 'I Need My Bean! Latte cup']


* Choose Product Subset that are either food or drinks

In [12]:
products_to_take = ['Cappuccino', 'Latte', 'Espresso shot', \
                    'Dark chocolate', 'Sugar Free Vanilla syrup', 'Chocolate syrup', \
                    'Carmel syrup', 'Hazelnut syrup', 'Ginger Scone', \
                    'Chocolate Croissant', 'Jumbo Savory Scone', 'Cranberry Scone', 'Hazelnut Biscotti', \
                    'Croissant', 'Almond Croissant', 'Oatmeal Scone', 'Chocolate Chip Biscotti' \
                    'Ginger Biscotti', \
                    ]

In [13]:
# dataset of the selected above products
dataset = dataset[dataset['product'].isin(products_to_take)]
dataset.head()

Unnamed: 0,transaction_id,transaction_date,customer_id,sales_outlet_id,product_id,quantity,product_category,product
16,108,2019-04-01,65,3,40,1,Coffee,Cappuccino
17,112,2019-04-01,90,3,37,2,Coffee,Espresso shot
20,127,2019-04-01,116,3,41,2,Coffee,Cappuccino
21,134,2019-04-01,189,3,38,2,Coffee,Latte
22,135,2019-04-01,131,3,40,1,Coffee,Cappuccino


In [14]:
# display the products and its category
dataset[['product', 'product_category']].drop_duplicates().reset_index(drop=True)

Unnamed: 0,product,product_category
0,Cappuccino,Coffee
1,Espresso shot,Coffee
2,Latte,Coffee
3,Dark chocolate,Drinking Chocolate
4,Oatmeal Scone,Bakery
5,Jumbo Savory Scone,Bakery
6,Chocolate Croissant,Bakery
7,Hazelnut Biscotti,Bakery
8,Cranberry Scone,Bakery
9,Croissant,Bakery


* Cleaned transaction for each customer

In [15]:
# create a transaction column for concatenating transaction_id and customer_id
dataset['transaction'] = dataset['transaction_id'].astype(str) + '_' + dataset['customer_id'].astype(str)
dataset.head()

Unnamed: 0,transaction_id,transaction_date,customer_id,sales_outlet_id,product_id,quantity,product_category,product,transaction
16,108,2019-04-01,65,3,40,1,Coffee,Cappuccino,108_65
17,112,2019-04-01,90,3,37,2,Coffee,Espresso shot,112_90
20,127,2019-04-01,116,3,41,2,Coffee,Cappuccino,127_116
21,134,2019-04-01,189,3,38,2,Coffee,Latte,134_189
22,135,2019-04-01,131,3,40,1,Coffee,Cappuccino,135_131


In [16]:
# get the count of each transaction
num_of_items_for_each_transaction = dataset['transaction'].value_counts().reset_index()
num_of_items_for_each_transaction.head()

Unnamed: 0,transaction,count
0,209_0,31
1,206_0,30
2,204_0,27
3,208_0,25
4,203_0,24


In [17]:
# number of just 1 transaction
num_of_items_for_each_transaction[num_of_items_for_each_transaction['count']==1]

Unnamed: 0,transaction,count
2511,969_0,1
2512,1130_8146,1
2513,1113_8406,1
2514,903_8369,1
2515,1010_0,1
...,...,...
7803,297_8353,1
7804,337_8112,1
7805,343_8399,1
7806,357_8239,1


In [18]:
# valid transaction of more than 1 item
valid_transactions = num_of_items_for_each_transaction[num_of_items_for_each_transaction['count'] > 1]['transaction'].to_list()
valid_transactions[:10]

['209_0',
 '206_0',
 '204_0',
 '208_0',
 '203_0',
 '207_0',
 '205_0',
 '151_0',
 '202_0',
 '9_0']

In [19]:
# get dataset of transaction of more than 1 item.
dataset = dataset[dataset['transaction'].isin(valid_transactions)]
dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9404 entries, 34 to 49886
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   transaction_id    9404 non-null   int64 
 1   transaction_date  9404 non-null   object
 2   customer_id       9404 non-null   int64 
 3   sales_outlet_id   9404 non-null   int64 
 4   product_id        9404 non-null   int64 
 5   quantity          9404 non-null   int64 
 6   product_category  9404 non-null   object
 7   product           9404 non-null   object
 8   transaction       9404 non-null   object
dtypes: int64(5), object(4)
memory usage: 734.7+ KB


* Product Trends

In [20]:
# most ordered product_category
dataset['product_category'].value_counts()

product_category
Bakery                3115
Coffee                3104
Flavours              2246
Drinking Chocolate     917
Packaged Chocolate      22
Name: count, dtype: int64

In [21]:
# most ordered product
dataset['product'].value_counts()

product
Cappuccino                  1251
Latte                       1235
Dark chocolate               939
Chocolate Croissant          635
Espresso shot                618
Sugar Free Vanilla syrup     605
Chocolate syrup              568
Carmel syrup                 561
Hazelnut syrup               512
Ginger Scone                 412
Jumbo Savory Scone           356
Croissant                    353
Cranberry Scone              349
Almond Croissant             343
Hazelnut Biscotti            334
Oatmeal Scone                333
Name: count, dtype: int64

* Popularity Recommendation Engine based on Products and Product_category Columns for any random person

In [22]:
product_recommendation = dataset.groupby(['product', 'product_category']).count().reset_index()
product_recommendation.head()

Unnamed: 0,product,product_category,transaction_id,transaction_date,customer_id,sales_outlet_id,product_id,quantity,transaction
0,Almond Croissant,Bakery,343,343,343,343,343,343,343
1,Cappuccino,Coffee,1251,1251,1251,1251,1251,1251,1251
2,Carmel syrup,Flavours,561,561,561,561,561,561,561
3,Chocolate Croissant,Bakery,635,635,635,635,635,635,635
4,Chocolate syrup,Flavours,568,568,568,568,568,568,568


In [None]:
# select product, product_category and transaction_id
product_recommendation = product_recommendation[['product', 'product_category', 'transaction_id']]
product_recommendation = product_recommendation.rename(columns = {'transaction_id' : 'number_of_transactions'})
product_recommendation.head()

Unnamed: 0,product,product_category,number_of_transactions
0,Almond Croissant,Bakery,343
1,Cappuccino,Coffee,1251
2,Carmel syrup,Flavours,561
3,Chocolate Croissant,Bakery,635
4,Chocolate syrup,Flavours,568


In [None]:
# save the product_recommendation in this directory
product_recommendation.to_csv('api/recommendation_objects/popular_recommendation.csv', index=False)

## Apriori Recommendation Engine

In [33]:
# training dataset
train_basket = dataset.groupby(['transaction', 'product'])['product'].count().reset_index(name='Count')
train_basket.head()

Unnamed: 0,transaction,product,Count
0,1000_0,Dark chocolate,1
1,1000_0,Oatmeal Scone,1
2,1001_8306,Cappuccino,1
3,1001_8306,Carmel syrup,1
4,1002_0,Carmel syrup,1


In [34]:
# realigning the train basket into a wide format and input 0 as missing number
my_basket = train_basket.pivot_table(index='transaction', columns='product', values='Count').fillna(0)
my_basket.head()

product,Almond Croissant,Cappuccino,Carmel syrup,Chocolate Croissant,Chocolate syrup,Cranberry Scone,Croissant,Dark chocolate,Espresso shot,Ginger Scone,Hazelnut Biscotti,Hazelnut syrup,Jumbo Savory Scone,Latte,Oatmeal Scone,Sugar Free Vanilla syrup
transaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1000_0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1001_8306,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1002_0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1004_5383,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1005_0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0


In [36]:
# write a for loop over the dataframe to make it 0s and 1s i.e get rid of any number in the dataframe
def encode_units(x):
    if x <= 0:
        return 0
    elif x > 0:
        return 1
    
my_basket_set = my_basket.applymap(encode_units)
my_basket_set.head(10)

  my_basket_set = my_basket.applymap(encode_units)


product,Almond Croissant,Cappuccino,Carmel syrup,Chocolate Croissant,Chocolate syrup,Cranberry Scone,Croissant,Dark chocolate,Espresso shot,Ginger Scone,Hazelnut Biscotti,Hazelnut syrup,Jumbo Savory Scone,Latte,Oatmeal Scone,Sugar Free Vanilla syrup
transaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1000_0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0
1001_8306,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1002_0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,0
1004_5383,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0
1005_0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0
1005_5559,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1
1006_0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0
1007_8375,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0
1008_0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0
1009_5183,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1


* Note: the mlxtend Apriori recommendation engine expects the above formats

In [38]:
# instantiate the apriori class
frequent_items = apriori(my_basket_set, min_support=0.05, use_colnames=True)
frequent_items.head(20)



Unnamed: 0,support,itemsets
0,0.120271,(Almond Croissant)
1,0.394265,(Cappuccino)
2,0.201513,(Carmel syrup)
3,0.142573,(Chocolate Croissant)
4,0.199124,(Chocolate syrup)
5,0.121864,(Cranberry Scone)
6,0.119474,(Croissant)
7,0.279968,(Dark chocolate)
8,0.216647,(Espresso shot)
9,0.13859,(Ginger Scone)


In [42]:
# Association rules aka Market Basket Analysis
rules_basket = association_rules(frequent_items, metric='lift', min_threshold=1, num_itemsets=1)
rules_basket

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Almond Croissant),(Cappuccino),0.120271,0.394265,0.056153,0.466887,1.184196,1.0,0.008734,1.136223,0.176811,0.122502,0.119891,0.304656
1,(Cappuccino),(Almond Croissant),0.394265,0.120271,0.056153,0.142424,1.184196,1.0,0.008734,1.025833,0.256788,0.122502,0.025182,0.304656
2,(Almond Croissant),(Dark chocolate),0.120271,0.279968,0.060534,0.503311,1.797745,1.0,0.026862,1.449664,0.504414,0.178195,0.310185,0.359764
3,(Dark chocolate),(Almond Croissant),0.279968,0.120271,0.060534,0.216216,1.797745,1.0,0.026862,1.122413,0.616289,0.178195,0.109062,0.359764
4,(Latte),(Almond Croissant),0.394265,0.120271,0.057348,0.145455,1.209392,1.0,0.009929,1.029470,0.285832,0.125436,0.028627,0.311138
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,"(Latte, Cappuccino)",(Dark chocolate),0.102350,0.279968,0.052170,0.509728,1.820663,1.0,0.023516,1.468636,0.502144,0.158022,0.319096,0.348036
78,"(Dark chocolate, Cappuccino)",(Latte),0.100757,0.394265,0.052170,0.517787,1.313295,1.0,0.012446,1.256155,0.265286,0.117806,0.203920,0.325055
79,(Latte),"(Dark chocolate, Cappuccino)",0.394265,0.100757,0.052170,0.132323,1.313295,1.0,0.012446,1.036381,0.393830,0.117806,0.035103,0.325055
80,(Dark chocolate),"(Latte, Cappuccino)",0.279968,0.102350,0.052170,0.186344,1.820663,1.0,0.023516,1.103231,0.626013,0.158022,0.093572,0.348036


In [46]:
# Testing any antecedents by recommending best consequents based off confidence value
rules_basket[rules_basket['antecedents'] == {'Latte'}].sort_values('confidence', ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
74,(Latte),(Sugar Free Vanilla syrup),0.394265,0.211071,0.114695,0.290909,1.37825,1.0,0.031477,1.112592,0.453074,0.233766,0.101198,0.417153
30,(Latte),(Carmel syrup),0.394265,0.201513,0.113501,0.287879,1.428584,1.0,0.034051,1.121279,0.495277,0.235343,0.108161,0.42556
40,(Latte),(Chocolate syrup),0.394265,0.199124,0.108722,0.275758,1.384855,1.0,0.030214,1.105812,0.458786,0.224322,0.095687,0.410879
70,(Latte),(Hazelnut syrup),0.394265,0.180804,0.10673,0.270707,1.497237,1.0,0.035445,1.123274,0.548265,0.227891,0.109745,0.430508
48,(Latte),(Croissant),0.394265,0.119474,0.060135,0.152525,1.276636,1.0,0.013031,1.038999,0.357733,0.132572,0.037535,0.327929
34,(Latte),(Chocolate Croissant),0.394265,0.142573,0.058144,0.147475,1.034383,1.0,0.001933,1.00575,0.054876,0.121464,0.005717,0.277648
67,(Latte),(Ginger Scone),0.394265,0.13859,0.058144,0.147475,1.064107,1.0,0.003503,1.010421,0.099457,0.122483,0.010314,0.283507
4,(Latte),(Almond Croissant),0.394265,0.120271,0.057348,0.145455,1.209392,1.0,0.009929,1.02947,0.285832,0.125436,0.028627,0.311138
44,(Latte),(Cranberry Scone),0.394265,0.121864,0.054162,0.137374,1.127273,1.0,0.006115,1.01798,0.186391,0.117241,0.017662,0.290909
79,(Latte),"(Dark chocolate, Cappuccino)",0.394265,0.100757,0.05217,0.132323,1.313295,1.0,0.012446,1.036381,0.39383,0.117806,0.035103,0.325055


* Get product and product_category into a Json Format

In [54]:
product_categories = dataset[['product', 'product_category']].drop_duplicates().set_index('product').to_dict()['product_category']
product_categories

{'Cappuccino': 'Coffee',
 'Jumbo Savory Scone': 'Bakery',
 'Latte': 'Coffee',
 'Espresso shot': 'Coffee',
 'Hazelnut Biscotti': 'Bakery',
 'Chocolate Croissant': 'Bakery',
 'Dark chocolate': 'Packaged Chocolate',
 'Cranberry Scone': 'Bakery',
 'Croissant': 'Bakery',
 'Almond Croissant': 'Bakery',
 'Oatmeal Scone': 'Bakery',
 'Ginger Scone': 'Bakery',
 'Chocolate syrup': 'Flavours',
 'Hazelnut syrup': 'Flavours',
 'Carmel syrup': 'Flavours',
 'Sugar Free Vanilla syrup': 'Flavours'}

In [56]:
rules_basket.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Almond Croissant),(Cappuccino),0.120271,0.394265,0.056153,0.466887,1.184196,1.0,0.008734,1.136223,0.176811,0.122502,0.119891,0.304656
1,(Cappuccino),(Almond Croissant),0.394265,0.120271,0.056153,0.142424,1.184196,1.0,0.008734,1.025833,0.256788,0.122502,0.025182,0.304656
2,(Almond Croissant),(Dark chocolate),0.120271,0.279968,0.060534,0.503311,1.797745,1.0,0.026862,1.449664,0.504414,0.178195,0.310185,0.359764
3,(Dark chocolate),(Almond Croissant),0.279968,0.120271,0.060534,0.216216,1.797745,1.0,0.026862,1.122413,0.616289,0.178195,0.109062,0.359764
4,(Latte),(Almond Croissant),0.394265,0.120271,0.057348,0.145455,1.209392,1.0,0.009929,1.02947,0.285832,0.125436,0.028627,0.311138


In [57]:
# Get recommendations and convert to json object

recommendations_json = {}

antecedents = rules_basket['antecedents'].unique()
for antecedent in antecedents:
    df_rec = rules_basket[rules_basket['antecedents']==antecedent]
    df_rec = df_rec.sort_values('confidence',ascending=False)
    key = "_".join(antecedent)
    recommendations_json[key] = []
    for _, row in df_rec.iterrows():
        rec_objects =row['consequents']
        for rec_object in rec_objects:
            already_exists = False
            for current_rec_object in recommendations_json[key]:
                if rec_object == current_rec_object['product']:
                    already_exists=True
            if already_exists:
                continue
            
            rec = {'product':rec_object, 
                   'product_category':product_categories[rec_object],
                   'confidence': row['confidence']
                  }
            recommendations_json[key].append(rec)

In [None]:
# pprint to see the contents of the above recommendations
import pprint

pprint.pp(recommendations_json)

{'Almond Croissant': [{'product': 'Dark chocolate',
                       'product_category': 'Packaged Chocolate',
                       'confidence': 0.5033112582781456},
                      {'product': 'Latte',
                       'product_category': 'Coffee',
                       'confidence': 0.4768211920529801},
                      {'product': 'Cappuccino',
                       'product_category': 'Coffee',
                       'confidence': 0.46688741721854304}],
 'Cappuccino': [{'product': 'Sugar Free Vanilla syrup',
                 'product_category': 'Flavours',
                 'confidence': 0.302020202020202},
                {'product': 'Chocolate syrup',
                 'product_category': 'Flavours',
                 'confidence': 0.29292929292929293},
                {'product': 'Carmel syrup',
                 'product_category': 'Flavours',
                 'confidence': 0.27474747474747474},
                {'product': 'Hazelnut syrup',
             

In [59]:
# save the recommendation in json format
import json

with open('api/recommendation_objects/apriori_recommendations.json', 'w') as json_file:
    json.dump(recommendations_json, json_file)

* This ends the recommendation engine with Apriori