In [1]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import association_rules, apriori

## Read Dataset

In [2]:
sales_receipts = pd.read_csv('dataset/201904 sales reciepts.csv')
sales_receipts.head()

Unnamed: 0,transaction_id,transaction_date,transaction_time,sales_outlet_id,staff_id,customer_id,instore_yn,order,line_item_id,product_id,quantity,line_item_amount,unit_price,promo_item_yn
0,7,2019-04-01,12:04:43,3,12,558,N,1,1,52,1,2.5,2.5,N
1,11,2019-04-01,15:54:39,3,17,781,N,1,1,27,2,7.0,3.5,N
2,19,2019-04-01,14:34:59,3,17,788,Y,1,1,46,2,5.0,2.5,N
3,32,2019-04-01,16:06:04,3,12,683,N,1,1,23,2,5.0,2.5,N
4,33,2019-04-01,19:18:37,3,17,99,Y,1,1,34,1,2.45,2.45,N


In [3]:
product = pd.read_csv('dataset/product.csv')
product.head()

Unnamed: 0,product_id,product_group,product_category,product_type,product,product_description,unit_of_measure,current_wholesale_price,current_retail_price,tax_exempt_yn,promo_yn,new_product_yn
0,1,Whole Bean/Teas,Coffee beans,Organic Beans,Brazilian - Organic,It's like Carnival in a cup. Clean and smooth.,12 oz,14.4,$18.00,Y,N,N
1,2,Whole Bean/Teas,Coffee beans,House blend Beans,Our Old Time Diner Blend,Out packed blend of beans that is reminiscent ...,12 oz,14.4,$18.00,Y,N,N
2,3,Whole Bean/Teas,Coffee beans,Espresso Beans,Espresso Roast,Our house blend for a good espresso shot.,1 lb,11.8,$14.75,Y,N,N
3,4,Whole Bean/Teas,Coffee beans,Espresso Beans,Primo Espresso Roast,Our primium single source of hand roasted beans.,1 lb,16.36,$20.45,Y,N,N
4,5,Whole Bean/Teas,Coffee beans,Gourmet Beans,Columbian Medium Roast,A smooth cup of coffee any time of day.,1 lb,12.0,$15.00,Y,N,N


# Data Wrangling

## Merge Data

In [4]:
sales_receipts = sales_receipts[['transaction_id','transaction_date','sales_outlet_id','customer_id','product_id','quantity']]
product = product[['product_id','product_category', 'product']]
dataset = pd.merge(sales_receipts, product, on='product_id', how='left')
dataset.head()

Unnamed: 0,transaction_id,transaction_date,sales_outlet_id,customer_id,product_id,quantity,product_category,product
0,7,2019-04-01,3,558,52,1,Tea,Traditional Blend Chai Rg
1,11,2019-04-01,3,781,27,2,Coffee,Brazilian Lg
2,19,2019-04-01,3,788,46,2,Tea,Serenity Green Tea Rg
3,32,2019-04-01,3,683,23,2,Coffee,Our Old Time Diner Blend Rg
4,33,2019-04-01,3,99,34,1,Coffee,Jamaican Coffee River Sm


## Remove Sizes

In [5]:
dataset[dataset['product'].str.contains("Dark chocolate")]['product'].unique()

array(['Dark chocolate Lg', 'Dark chocolate Rg', 'Dark chocolate'],
      dtype=object)

In [6]:
dataset['product'].nunique()

80

In [7]:
dataset['product'] = dataset['product'].str.replace(' Rg', '')
dataset['product'] = dataset['product'].str.replace(' Sm', '')
dataset['product'] = dataset['product'].str.replace(' Lg', '')

In [8]:
dataset['product'].nunique()

45

In [9]:
dataset['product'].unique()

array(['Traditional Blend Chai', 'Brazilian', 'Serenity Green Tea',
       'Our Old Time Diner Blend', 'Jamaican Coffee River', 'Ethiopia',
       'English Breakfast', 'Sustainably Grown Organic', 'Earl Grey',
       'Cappuccino', 'Espresso shot', 'Latte', 'Dark chocolate',
       'Columbian Medium Roast', 'Oatmeal Scone', 'Morning Sunrise Chai',
       'Peppermint', 'Jumbo Savory Scone', 'Lemon Grass',
       'Chocolate Chip Biscotti', 'Spicy Eye Opener Chai',
       'Ginger Biscotti', 'Chocolate Croissant', 'Hazelnut Biscotti',
       'Cranberry Scone', 'Scottish Cream Scone ', 'Croissant',
       'Almond Croissant', 'Ginger Scone', 'Ouro Brasileiro shot',
       'Organic Decaf Blend', 'Chocolate syrup', 'Hazelnut syrup',
       'Carmel syrup', 'Sugar Free Vanilla syrup',
       'Jamacian Coffee River', 'Guatemalan Sustainably Grown',
       'Civet Cat', 'Chili Mayan', 'Primo Espresso Roast',
       'Brazilian - Organic', 'I Need My Bean! Diner mug',
       'Espresso Roast', 'I Need 

# Filter Products for Product Subset

In [10]:
products_to_take = ['Cappuccino', 'Latte', 'Espresso shot',  \
                     'Dark chocolate','Sugar Free Vanilla syrup', 'Chocolate syrup',\
                    'Carmel syrup', 'Hazelnut syrup', 'Ginger Scone',  \
                    'Chocolate Croissant', 'Jumbo Savory Scone', 'Cranberry Scone', 'Hazelnut Biscotti',\
                    'Croissant', 'Almond Croissant', 'Oatmeal Scone', 'Chocolate Chip Biscotti',\
                    'Ginger Biscotti',\
                   ]

In [11]:
dataset = dataset[dataset['product'].isin(products_to_take)]
dataset['product'].nunique()

18

In [12]:
dataset[['product', 'product_category']].drop_duplicates().reset_index(drop=True)

Unnamed: 0,product,product_category
0,Cappuccino,Coffee
1,Espresso shot,Coffee
2,Latte,Coffee
3,Dark chocolate,Drinking Chocolate
4,Oatmeal Scone,Bakery
5,Jumbo Savory Scone,Bakery
6,Chocolate Chip Biscotti,Bakery
7,Ginger Biscotti,Bakery
8,Chocolate Croissant,Bakery
9,Hazelnut Biscotti,Bakery


## Clean Transactions

In [13]:
dataset['transaction'] = dataset['transaction_id'].astype(str) + '_' +  dataset['customer_id'].astype(str)
dataset.head()

Unnamed: 0,transaction_id,transaction_date,sales_outlet_id,customer_id,product_id,quantity,product_category,product,transaction
16,108,2019-04-01,3,65,40,1,Coffee,Cappuccino,108_65
17,112,2019-04-01,3,90,37,2,Coffee,Espresso shot,112_90
20,127,2019-04-01,3,116,41,2,Coffee,Cappuccino,127_116
21,134,2019-04-01,3,189,38,2,Coffee,Latte,134_189
22,135,2019-04-01,3,131,40,1,Coffee,Cappuccino,135_131


In [14]:
num_of_items_per_transaction = dataset['transaction'].value_counts().reset_index()
num_of_items_per_transaction.head()

Unnamed: 0,transaction,count
0,209_0,31
1,206_0,30
2,204_0,27
3,208_0,25
4,203_0,24


In [15]:
valid_transactions = num_of_items_per_transaction[num_of_items_per_transaction['count'] > 1]['transaction'].to_list()
dataset = dataset[dataset['transaction'].isin(valid_transactions)]

## Product Trends

In [16]:
dataset['product_category'].value_counts()

product_category
Bakery                3800
Coffee                3174
Flavours              2246
Drinking Chocolate     947
Packaged Chocolate      22
Name: count, dtype: int64

In [17]:
dataset['product'].value_counts()

product
Cappuccino                  1290
Latte                       1256
Dark chocolate               969
Chocolate Croissant          636
Espresso shot                628
Sugar Free Vanilla syrup     605
Chocolate syrup              568
Carmel syrup                 561
Hazelnut syrup               512
Ginger Scone                 417
Jumbo Savory Scone           357
Croissant                    355
Chocolate Chip Biscotti      352
Cranberry Scone              350
Almond Croissant             347
Hazelnut Biscotti            338
Oatmeal Scone                334
Ginger Biscotti              314
Name: count, dtype: int64

# Popularity Recommendation Engine

In [18]:
product_recommendation = dataset.groupby(['product','product_category']).count().reset_index()

In [19]:
product_recommendation = product_recommendation[['product','product_category','transaction']]
product_recommendation = product_recommendation.rename(columns={'transaction': 'num_of_transactions'})
product_recommendation.head()

Unnamed: 0,product,product_category,num_of_transactions
0,Almond Croissant,Bakery,347
1,Cappuccino,Coffee,1290
2,Carmel syrup,Flavours,561
3,Chocolate Chip Biscotti,Bakery,352
4,Chocolate Croissant,Bakery,636


In [20]:
product_recommendation.to_csv('api/recommendation_objects/popularity_recommendation.csv', index=False)

# Apriori Recommendation Engine

In [21]:
train_basket = (dataset.groupby(['transaction', 'product'])['product']).count().reset_index(name='count')
train_basket.head()

Unnamed: 0,transaction,product,count
0,1000_0,Dark chocolate,1
1,1000_0,Oatmeal Scone,1
2,1001_8306,Cappuccino,1
3,1001_8306,Carmel syrup,1
4,1002_0,Carmel syrup,1


In [22]:
my_basket = train_basket.pivot_table(index='transaction', columns='product',values='count').fillna(0)
my_basket.head()

product,Almond Croissant,Cappuccino,Carmel syrup,Chocolate Chip Biscotti,Chocolate Croissant,Chocolate syrup,Cranberry Scone,Croissant,Dark chocolate,Espresso shot,Ginger Biscotti,Ginger Scone,Hazelnut Biscotti,Hazelnut syrup,Jumbo Savory Scone,Latte,Oatmeal Scone,Sugar Free Vanilla syrup
transaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1000_0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1001_8306,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1002_0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1004_5383,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1005_0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0


In [23]:
def encode_units(x):
    if x <= 0:
        return 0
    return 1
my_basket_sets = my_basket.applymap(encode_units)
my_basket_sets.head()

product,Almond Croissant,Cappuccino,Carmel syrup,Chocolate Chip Biscotti,Chocolate Croissant,Chocolate syrup,Cranberry Scone,Croissant,Dark chocolate,Espresso shot,Ginger Biscotti,Ginger Scone,Hazelnut Biscotti,Hazelnut syrup,Jumbo Savory Scone,Latte,Oatmeal Scone,Sugar Free Vanilla syrup
transaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1000_0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0
1001_8306,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1002_0,0,0,1,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0
1004_5383,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0
1005_0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0


In [24]:
frequent_items = apriori(my_basket_sets,min_support=0.05,use_colnames=True)
frequent_items



Unnamed: 0,support,itemsets
0,0.115646,(Almond Croissant)
1,0.388889,(Cappuccino)
2,0.191232,(Carmel syrup)
3,0.112623,(Chocolate Chip Biscotti)
4,0.135676,(Chocolate Croissant)
5,0.188964,(Chocolate syrup)
6,0.116024,(Cranberry Scone)
7,0.114135,(Croissant)
8,0.277022,(Dark chocolate)
9,0.209373,(Espresso shot)


In [25]:
rules_basket = association_rules(frequent_items,metric='lift', min_threshold=1)
rules_basket.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(Almond Croissant),(Cappuccino),0.115646,0.388889,0.053288,0.460784,1.184874,0.008314,1.133333,0.176432
1,(Cappuccino),(Almond Croissant),0.388889,0.115646,0.053288,0.137026,1.184874,0.008314,1.024775,0.255319
2,(Almond Croissant),(Dark chocolate),0.115646,0.277022,0.057445,0.496732,1.793115,0.025409,1.436567,0.500152
3,(Dark chocolate),(Almond Croissant),0.277022,0.115646,0.057445,0.207367,1.793115,0.025409,1.115717,0.611791
4,(Latte),(Almond Croissant),0.382086,0.115646,0.054422,0.142433,1.231629,0.010235,1.031236,0.304358


In [26]:
rules_basket[rules_basket['antecedents'] == {'Latte'}].sort_values('confidence',ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
71,(Latte),(Sugar Free Vanilla syrup),0.382086,0.200302,0.108844,0.284866,1.422182,0.032311,1.118249,0.480415
33,(Latte),(Carmel syrup),0.382086,0.191232,0.10771,0.281899,1.474121,0.034643,1.12626,0.520509
44,(Latte),(Chocolate syrup),0.382086,0.188964,0.103175,0.27003,1.428997,0.030974,1.111053,0.485842
68,(Latte),(Hazelnut syrup),0.382086,0.17158,0.101285,0.265084,1.544961,0.035727,1.127231,0.570848
52,(Latte),(Croissant),0.382086,0.114135,0.057067,0.149357,1.308605,0.013458,1.041407,0.381651
38,(Latte),(Chocolate Croissant),0.382086,0.135676,0.055178,0.144411,1.064381,0.003338,1.010209,0.097889
66,(Latte),(Ginger Scone),0.382086,0.133409,0.055178,0.144411,1.082472,0.004204,1.01286,0.1233
4,(Latte),(Almond Croissant),0.382086,0.115646,0.054422,0.142433,1.231629,0.010235,1.031236,0.304358
64,(Latte),(Ginger Biscotti),0.382086,0.106198,0.054044,0.141444,1.33189,0.013467,1.041053,0.403272
48,(Latte),(Cranberry Scone),0.382086,0.116024,0.051398,0.13452,1.159416,0.007067,1.021371,0.222518


## Save in Json Format

In [27]:
product_categories = dataset[['product', 'product_category']].drop_duplicates().set_index('product').to_dict()['product_category']
product_categories

{'Cappuccino': 'Coffee',
 'Jumbo Savory Scone': 'Bakery',
 'Latte': 'Coffee',
 'Chocolate Chip Biscotti': 'Bakery',
 'Espresso shot': 'Coffee',
 'Hazelnut Biscotti': 'Bakery',
 'Chocolate Croissant': 'Bakery',
 'Dark chocolate': 'Packaged Chocolate',
 'Cranberry Scone': 'Bakery',
 'Croissant': 'Bakery',
 'Almond Croissant': 'Bakery',
 'Ginger Biscotti': 'Bakery',
 'Oatmeal Scone': 'Bakery',
 'Ginger Scone': 'Bakery',
 'Chocolate syrup': 'Flavours',
 'Hazelnut syrup': 'Flavours',
 'Carmel syrup': 'Flavours',
 'Sugar Free Vanilla syrup': 'Flavours'}

In [28]:
recommendation_json = {}

antecedents = rules_basket['antecedents'].unique()

for antecedent in antecedents:
    df_rec = rules_basket[rules_basket['antecedents'] == antecedent].sort_values('confidence', ascending=False)
    key = "_".join(antecedent)
    recommendation_json[key] = []

    for _, row in df_rec.iterrows():
        rec_objects = row['consequents']
        for rec_object in rec_objects:
            seen = False
            for current_rec in recommendation_json[key]:
                if rec_object == current_rec['product']:
                    seen = True
            if seen:
                continue
            rec = {
                'product': rec_object,
                'product_category': product_categories[rec_object],
                'confidence': row['confidence']
            }

            recommendation_json[key].append(rec)

In [29]:
import pprint
pprint.pp(recommendation_json)

{'Almond Croissant': [{'product': 'Dark chocolate',
                       'product_category': 'Packaged Chocolate',
                       'confidence': 0.4967320261437908},
                      {'product': 'Latte',
                       'product_category': 'Coffee',
                       'confidence': 0.47058823529411764},
                      {'product': 'Cappuccino',
                       'product_category': 'Coffee',
                       'confidence': 0.46078431372549017}],
 'Cappuccino': [{'product': 'Sugar Free Vanilla syrup',
                 'product_category': 'Flavours',
                 'confidence': 0.29057337220602525},
                {'product': 'Chocolate syrup',
                 'product_category': 'Flavours',
                 'confidence': 0.2818270165208941},
                {'product': 'Carmel syrup',
                 'product_category': 'Flavours',
                 'confidence': 0.26433430515063167},
                {'product': 'Hazelnut syrup',
           

In [30]:
import json
with open('api/recommendation_objects/apriori_recommendation.json', 'w') as json_file:
    json.dump(recommendation_json, json_file)