## Init

In [None]:
# !pip install mlxtend
# !pip install pandas 
# !pip install pprintpp

In [None]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import association_rules, apriori




## Get data


In [None]:
sales_reciepts = pd.read_csv ('data/201904 sales reciepts.csv')
sales_reciepts.head()

In [None]:
product = pd.read_csv ('data/product.csv')
product.head()



## Data merging



In [None]:
dataset = pd.merge(sales_reciepts[['transaction_id','transaction_date', 'sales_outlet_id', 'customer_id', 'product_id', 'quantity']],
                      product[['product_id', 'product_category', 'product']],
                      on ='product_id', how = 'left' )
dataset.head()

In [3]:
dataset[dataset['product'].str.contains('Dark chocolate')]['product'].unique()
dataset['product'].nunique()
dataset['product']= dataset['product'].str.replace(' Rg','')
dataset['product']= dataset['product'].str.replace(' Sm','')
dataset['product']= dataset['product'].str.replace(' Lg','')
dataset['product'].nunique()

## Products subset


In [None]:
products_to_take = ['Cappuccino', 'Latte', 'Espresso shot',  \
                     'Dark chocolate','Sugar Free Vanilla syrup', 'Chocolate syrup',\
                    'Carmel syrup', 'Hazelnut syrup', 'Ginger Scone',  \
                    'Chocolate Croissant', 'Jumbo Savory Scone', 'Cranberry Scone', 'Hazelnut Biscotti',\
                    'Croissant', 'Almond Croissant', 'Oatmeal Scone', 'Chocolate Chip Biscotti',\
                    'Ginger Biscotti',\
                   ]

In [None]:
dataset = dataset[dataset['product'].isin(products_to_take)]
dataset['product'].nunique()

In [None]:
dataset[['product','product_category']].drop_duplicates().reset_index(drop=True)


## Clean


In [None]:
dataset['transaction'] = dataset['transaction_id'].astype(str) +"_"+  dataset['customer_id'].astype(str)

In [None]:
num_of_items_for_each_transaction = dataset['transaction'].value_counts().reset_index()
valid_transactions = num_of_items_for_each_transaction[(num_of_items_for_each_transaction['count']>1)]['transaction'].tolist()
dataset = dataset[dataset['transaction'].isin(valid_transactions)]
dataset.shape


## Product Trends



In [None]:
dataset["product_category"].value_counts()

In [None]:
dataset['product'].value_counts()

## Popularity Recommendation Engine



In [None]:
popularity_recommendation = dataset.groupby(['product','product_category']).count().reset_index()
popularity_recommendation = popularity_recommendation[['product','product_category','transaction_id']]
popularity_recommendation = popularity_recommendation.rename(columns = {'transaction_id':'number_of_transactions'})
popularity_recommendation.to_csv('api/recomend/popularity_recommendation.csv',index=False)

## Apriori Recommendation Engine


In [None]:
train_basket = (dataset.groupby(['transaction','product'])['product'].count().reset_index(name ='Count'))
train_basket.head(10)

In [None]:
my_basket = train_basket.pivot_table(index='transaction', columns='product', values='Count', aggfunc='sum').fillna(0)
my_basket.head()

In [None]:
def encode_units(x):
  if x <= 0:
    return 0
  if x >= 0:
    return 1

my_basket_sets = my_basket.applymap(encode_units)
my_basket_sets.head(10)

In [None]:
frequent_items = apriori(my_basket_sets, min_support = 0.05,use_colnames = True)
frequent_items.head()

In [None]:
rules_basket = association_rules(frequent_items, metric = "lift", min_threshold = 1)
rules_basket.head()

In [None]:
rules_basket.to_pickle('rules_basket.pkl')


In [None]:
rules_basket[rules_basket['antecedents']=={'Latte'}].sort_values('confidence',ascending=False).head(50)


## To JSON


In [None]:
product_categories = dataset[['product','product_category']].drop_duplicates().set_index('product').to_dict()['product_category']


In [None]:
recommendations_json = {}

antecedents = rules_basket['antecedents'].unique()
for antecedent in antecedents:
    df_rec = rules_basket[rules_basket['antecedents']==antecedent]
    df_rec = df_rec.sort_values('confidence',ascending=False)
    key = "_".join(antecedent)
    recommendations_json[key] = []
    for _, row in df_rec.iterrows():
        rec_objects =row['consequents']
        for rec_object in rec_objects:
            already_exists = False
            for current_rec_object in recommendations_json[key]:
                if rec_object == current_rec_object['product']:
                    already_exists=True
            if already_exists:
                continue
            
            rec = {'product':rec_object, 
                   'product_category':product_categories[rec_object],
                   'confidence': row['confidence']
                  }
            recommendations_json[key].append(rec)

In [None]:
import pprint
pprint.pp(recommendations_json)

In [None]:
import json
with open('recomend/apriori_recommendations.json', 'w') as json_file:
    json.dump(recommendations_json, json_file)