In [None]:
import pandas as pd
from mlxtend.frequent_patterns import association_rules, apriori

# Read Dataset

In [None]:
sales_reciepts = pd.read_csv('dataset/201904 sales reciepts.csv')
sales_reciepts.head()

In [None]:
product = pd.read_csv('dataset/product.csv')
product.head()

# Data Wrangling

## Merge Data

In [None]:
sales_reciepts = sales_reciepts[['transaction_id','transaction_date', 'sales_outlet_id', 'customer_id', 'product_id', 'quantity']],
product = product[['product_id', 'product_category', 'product']]

dataset = pd.merge(sales_reciepts, product, on='product_id',how="left")
dataset.head()

## Remove sizes

In [None]:
dataset[dataset["product"].str.contains("Dark chocolate")]['product'].unique()

In [None]:
dataset["product"].nunique()

In [None]:
dataset["product"] = dataset["product"].str.replace(' Rg','')
dataset["product"] = dataset["product"].str.replace(' Sm','')
dataset["product"] = dataset["product"].str.replace(' Lg','')

In [None]:
dataset["product"].nunique()

In [None]:
print (dataset["product"].unique())

## Choose Product Subset

In [None]:
products_to_take = ['Cappuccino', 'Latte', 'Espresso shot',  \
                     'Dark chocolate','Sugar Free Vanilla syrup', 'Chocolate syrup',\
                    'Carmel syrup', 'Hazelnut syrup', 'Ginger Scone',  \
                    'Chocolate Croissant', 'Jumbo Savory Scone', 'Cranberry Scone', 'Hazelnut Biscotti',\
                    'Croissant', 'Almond Croissant', 'Oatmeal Scone', 'Chocolate Chip Biscotti',\
                    'Ginger Biscotti',\
                   ]

In [None]:
dataset= dataset[dataset['product'].isin(products_to_take)]

In [None]:
dataset.head()

In [None]:
dataset[['product', 'product_category']].drop_duplicates().reset_index(drop=True)

## Clean transactions

In [None]:
dataset['transaction'] = dataset['transaction_id'].astype(str) + '_'+dataset['customer_id'].astype(str)

In [None]:
dataset.head()

In [None]:
num_of_items_for_each_transaction = dataset['transaction'].value_counts().reset_index()
num_of_items_for_each_transaction.head()

In [None]:
num_of_items_for_each_transaction[num_of_items_for_each_transaction['count']==1]

In [None]:
valid_transactions = num_of_items_for_each_transaction[(num_of_items_for_each_transaction['count']>1)]['transaction'].tolist()

valid_transactions[:10]

In [None]:
dataset = dataset[dataset['transaction'].isin(valid_transactions)]

In [None]:
dataset.shape

## Product Trends

In [None]:
dataset["product_category"].value_counts()

In [None]:
dataset["product"].value_counts()

## Popularity Recommendation enginer

In [None]:
product_recommendation = dataset.groupby(["product","product_category"]).count().reset_index()

In [None]:
product_recommendation = product_recommendation[['product','product_category',"transaction_id"]]
product_recommendation = product_recommendation.rename(columns = {"transaction_id":"number_of_transactions"})

In [None]:
product_recommendation.head()

In [None]:
product_recommendation.to_csv('api/recommendation_objects/popularity_recommendation.csv',index=False)

# Apriori Recommendation Engine

In [None]:
train_basket = (dataset.groupby(['transaction', 'product'])['product'].count().reset_index(name='Count'))
train_basket.head()

In [None]:
my_basket = train_basket.pivot_table(index='transaction', columns='product', values='Count').fillna(0)
my_basket.head()

In [None]:
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1
    
my_basket_sets = my_basket.applymap(encode_units)
my_basket_sets.head(10)

In [None]:
frequent_items = apriori(my_basket_sets, min_support=0.05, use_colnames=True)
frequent_items.head()

In [None]:
rules_basket = association_rules(frequent_items, metric="lift", min_threshold=1)
rules_basket.head()

In [None]:
rules_basket[rules_basket['antecedents'] == {'Latte'}].sort_values('confidence', ascending=False)

## Save in Json Format

In [None]:
product_categories = dataset[['product','product_category']].drop_duplicates().set_index('product').to_dict()['product_category']

In [None]:
recommendations_json={}

antecedents = rules_basket['antecedents'].unique()
for antecedent in antecedents:
    df_rec = rules_basket[rules_basket['antecedents'] == antecedent]
    df_rec = df_rec.sort_values('confidence', ascending=False)

    key = "_".join(antecedent)
    recommendations_json[key] = []
    for _, row in df_rec.iterrows():
        rec_objects = row['consequents']
        for rec_object in rec_objects:
            already_exists = False
            for current_rec_object in recommendations_json[key]:
                if rec_object in current_rec_object['product']:
                    already_exists = True
            if already_exists:
                continue
            rec = {
                "product": rec_object,
                "product_category": product_categories[rec_object],
                "confidence": row['confidence']
            }

            recommendations_json[key].append(rec)

In [None]:
import pprint
pprint.pprint(recommendations_json)

In [None]:
import json 
with open("api/recommendation_objects/apriori_recommendation.json", "w") as json_file: 
    json.dump(recommendations_json, json_file)