This notebook will be used as an input for our recommendation agent. We are going to base the recommendations on the previous transactions. 

In [1]:
import pandas as pd 
from mlxtend.frequent_patterns import association_rules, apriori 

# Read Dataset

In [2]:
sales_reciepts = pd.read_csv('dataset/201904_sales_reciepts.csv')

In [3]:
sales_reciepts.head()

Unnamed: 0,transaction_id,transaction_date,transaction_time,sales_outlet_id,staff_id,customer_id,instore_yn,order,line_item_id,product_id,quantity,line_item_amount,unit_price,promo_item_yn
0,7,2019-04-01,12:04:43,3,12,558,N,1,1,52,1,2.5,2.5,N
1,11,2019-04-01,15:54:39,3,17,781,N,1,1,27,2,7.0,3.5,N
2,19,2019-04-01,14:34:59,3,17,788,Y,1,1,46,2,5.0,2.5,N
3,32,2019-04-01,16:06:04,3,12,683,N,1,1,23,2,5.0,2.5,N
4,33,2019-04-01,19:18:37,3,17,99,Y,1,1,34,1,2.45,2.45,N


In [4]:
product = pd.read_csv('dataset/product.csv')

In [5]:
product.head()

Unnamed: 0.1,Unnamed: 0,product_id,product_group,product_category,product_type,product,product_description,unit_of_measure,current_wholesale_price,current_retail_price,tax_exempt_yn,promo_yn,new_product_yn
0,0,1,Whole Bean/Teas,Coffee beans,Organic Beans,Brazilian - Organic,It's like Carnival in a cup. Clean and smooth.,12 oz,14.4,$18.00,Y,N,N
1,1,2,Whole Bean/Teas,Coffee beans,House blend Beans,Our Old Time Diner Blend,Out packed blend of beans that is reminiscent ...,12 oz,14.4,$18.00,Y,N,N
2,2,3,Whole Bean/Teas,Coffee beans,Espresso Beans,Espresso Roast,Our house blend for a good espresso shot.,1 lb,11.8,$14.75,Y,N,N
3,3,4,Whole Bean/Teas,Coffee beans,Espresso Beans,Primo Espresso Roast,Our primium single source of hand roasted beans.,1 lb,16.36,$20.45,Y,N,N
4,4,5,Whole Bean/Teas,Coffee beans,Gourmet Beans,Columbian Medium Roast,A smooth cup of coffee any time of day.,1 lb,12.0,$15.00,Y,N,N


# Data Merging 

In [6]:
dataset = pd.merge(sales_reciepts[['transaction_id','transaction_date', 'sales_outlet_id', 'customer_id', 'product_id', 'quantity']],
                    product[['product_id', 'product_category', 'product']],
                    on ='product_id', how = 'left'
                )

In [7]:
dataset.head()

Unnamed: 0,transaction_id,transaction_date,sales_outlet_id,customer_id,product_id,quantity,product_category,product
0,7,2019-04-01,3,558,52,1,Tea,Traditional Blend Chai Rg
1,11,2019-04-01,3,781,27,2,Coffee,Brazilian Lg
2,19,2019-04-01,3,788,46,2,Tea,Serenity Green Tea Rg
3,32,2019-04-01,3,683,23,2,Coffee,Our Old Time Diner Blend Rg
4,33,2019-04-01,3,99,34,1,Coffee,Jamaican Coffee River Sm


In [8]:
# Remove sizes 
dataset['product'] = dataset['product'].str.replace(' Rg', '')
dataset['product'] = dataset['product'].str.replace(' Sm', '')
dataset['product'] = dataset['product'].str.replace(' Lg', '')

In [9]:
dataset.head(2)

Unnamed: 0,transaction_id,transaction_date,sales_outlet_id,customer_id,product_id,quantity,product_category,product
0,7,2019-04-01,3,558,52,1,Tea,Traditional Blend Chai
1,11,2019-04-01,3,781,27,2,Coffee,Brazilian


In [10]:
# Choose product
products_to_take = ['Cappucin', 'Direct', 'Espresso shot',  \
                     'Dark chocolate','Sugar Free Vanilla syrup', 'Chocolate syrup',\
                    'Carmel syrup', 'Hazelnut syrup', 'Ginger Scone',  \
                    'Chocolate Croissant', 'Jumbo Savory Scone', 'Cranberry Scone', 'Hazelnut Biscotti',\
                    'Croissant', 'Almond Croissant', 'Oatmeal Scone', 'Chocolate Chip Biscotti',\
                    'Ginger Biscotti',\
                   ]
dataset = dataset[dataset['product'].isin(products_to_take)]

In [11]:
# Clean transactions 
dataset['transaction'] = dataset['transaction_id'].astype(str) + "_" + dataset['customer_id'].astype(str)
num_of_items_for_each_transaction = dataset['transaction'].value_counts().reset_index()
valid_transactions = num_of_items_for_each_transaction[(num_of_items_for_each_transaction['count']>1)]['transaction'].tolist()
dataset = dataset[dataset['transaction'].isin(valid_transactions)]

# Recommendation Engine

In [12]:
# Popularity recommendation engine 
popularity_recommendation = dataset.groupby(['product','product_category']).count().reset_index()
popularity_recommendation = popularity_recommendation[['product','product_category','transaction_id']]
popularity_recommendation = popularity_recommendation.rename(columns = {'transaction_id':'number_of_transactions'})
popularity_recommendation.to_csv('api/recommendation_objects/popularity_recommendation.csv',index=False)

In [13]:
# Apriori recommendation engine 
train_basket = dataset.groupby(['transaction','product'])['product'].count().reset_index(name='Count')
train_basket.head(10)

Unnamed: 0,transaction,product,Count
0,1000_0,Dark chocolate,1
1,1000_0,Oatmeal Scone,1
2,1001_8306,Cappucin,1
3,1001_8306,Carmel syrup,1
4,1002_0,Carmel syrup,1
5,1002_0,Dark chocolate,2
6,1002_0,Espresso shot,1
7,1002_0,Ginger Scone,1
8,1004_5383,Chocolate syrup,1
9,1004_5383,Direct,1


In [14]:
my_basket = train_basket.pivot_table(index='transaction', columns='product', values='Count',aggfunc='sum').fillna(0)
my_basket.head()

product,Almond Croissant,Cappucin,Carmel syrup,Chocolate Chip Biscotti,Chocolate Croissant,Chocolate syrup,Cranberry Scone,Croissant,Dark chocolate,Direct,Espresso shot,Ginger Biscotti,Ginger Scone,Hazelnut Biscotti,Hazelnut syrup,Jumbo Savory Scone,Oatmeal Scone,Sugar Free Vanilla syrup
transaction,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1000_0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1001_8306,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1002_0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1004_5383,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1005_0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0


In [15]:
def encode_units(x):
  if x <= 0:
    return 0
  if x >= 0:
    return 1

my_basket_sets = my_basket.applymap(encode_units)

  my_basket_sets = my_basket.applymap(encode_units)


In [16]:
frequent_items = apriori(my_basket_sets, min_support = 0.05, use_colnames=True)
frequent_items.head()



Unnamed: 0,support,itemsets
0,0.132471,(Almond Croissant)
1,0.247993,(Cappucin)
2,0.193131,(Carmel syrup)
3,0.125781,(Chocolate Chip Biscotti)
4,0.156557,(Chocolate Croissant)


In [18]:
rules_basket = association_rules(frequent_items, metric = "lift", min_threshold=1)
rules_basket.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Dark chocolate),(Almond Croissant),0.323818,0.132471,0.067797,0.209366,1.58047,1.0,0.0249,1.097258,0.543162,0.174512,0.088637,0.360575
1,(Almond Croissant),(Dark chocolate),0.132471,0.323818,0.067797,0.511785,1.58047,1.0,0.0249,1.385007,0.423359,0.174512,0.277982,0.360575
2,(Carmel syrup),(Cappucin),0.193131,0.247993,0.066905,0.34642,1.396896,1.0,0.019009,1.150597,0.352136,0.178784,0.130886,0.308102
3,(Cappucin),(Carmel syrup),0.247993,0.193131,0.066905,0.269784,1.396896,1.0,0.019009,1.104973,0.377825,0.178784,0.095001,0.308102
4,(Cappucin),(Chocolate syrup),0.247993,0.196253,0.075825,0.305755,1.557963,1.0,0.027156,1.157728,0.47624,0.205811,0.13624,0.34606


In [19]:
rules_basket.to_pickle('rules_basket.pkl')

# Save in Json Format 

In [20]:
product_categories = dataset[['product','product_category']].drop_duplicates().set_index('product').to_dict()['product_category']

In [21]:
recommendations_json = {}
antecedents = rules_basket['antecedents'].unique()
for antecedent in antecedents: 
    df_rec = rules_basket[rules_basket['antecedents']==antecedent]
    df_rec = df_rec.sort_values('confidence', ascending = False)
    key = "_".join(antecedent)
    recommendations_json[key] = []
    for _, row in df_rec.iterrows():
        rec_objects =row['consequents']
        for rec_object in rec_objects:
            already_exists = False
            for current_rec_object in recommendations_json[key]:
                if rec_object == current_rec_object['product']:
                    already_exists=True
            if already_exists:
                continue
            
            rec = {'product':rec_object, 
                   'product_category':product_categories[rec_object],
                   'confidence': row['confidence']
                  }
            recommendations_json[key].append(rec)

In [23]:
import pprint
pprint.pp(recommendations_json)

{'Dark chocolate': [{'product': 'Chocolate Croissant',
                     'product_category': 'Bakery',
                     'confidence': 0.2190082644628099},
                    {'product': 'Ginger Scone',
                     'product_category': 'Bakery',
                     'confidence': 0.21763085399449036},
                    {'product': 'Almond Croissant',
                     'product_category': 'Bakery',
                     'confidence': 0.209366391184573},
                    {'product': 'Cranberry Scone',
                     'product_category': 'Bakery',
                     'confidence': 0.20523415977961432},
                    {'product': 'Carmel syrup',
                     'product_category': 'Flavours',
                     'confidence': 0.20110192837465563},
                    {'product': 'Chocolate Chip Biscotti',
                     'product_category': 'Bakery',
                     'confidence': 0.19559228650137742},
                    {'product': 'Jumbo S

In [24]:
import json
with open('api/recommendation_objects/apriori_recommendations.json', 'w') as json_file:
    json.dump(recommendations_json, json_file)