# Putting it all together


## Imports



In [None]:
%load_ext autoreload
%autoreload 2

import pandas as ps
import numpy as np

import time
import turicreate as tc
from sklearn.model_selection import train_test_split
import json
import matplotlib.pyplot as plt

%matplotlib inline

import sys

## Importing CSV data

In [None]:
transactions_raw = ps.read_csv('transactions.csv', names = ['SKU', 'CUSTOMER'], header = 1)
products_raw = ps.read_csv('products_.csv')


## Defining methods

### Pie chart


In [None]:
# Pie chart, where the slices will be ordered and plotted counter-clockwise:
def pie_chart(labels, sizes):
    explode = (0, 0.1, 0, 0)  # only "explode" the 2nd slice (i.e. 'Hogs')

    fig1, ax1 = plt.subplots()
    ax1.pie(sizes, labels=labels, autopct='%1.1f%%',
            shadow=True, startangle=90)
    ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

    plt.show()

### Get the customer data

In [None]:
def get_customer_data(customer_id):
    transactions = transactions_raw[transactions_raw['CUSTOMER'] == customer_id]
    products = products_raw[products_raw['SKU'].isin(transactions['SKU'])]
    products['COLOR'] = products['ATTRIBUTES'].apply(lambda x: json.loads(x)['color'])
    products['BRAND'] = products['ATTRIBUTES'].apply(lambda x: json.loads(x)['brand'].upper())
    return products

### Visualize the Customer data

In [None]:
def visualize_customer_data(products):
    grouped_brands = products['BRAND'].value_counts().rename_axis('BRAND').reset_index(name='COUNT')
    grouped_colors = products['COLOR'].value_counts().rename_axis('COLOR').reset_index(name='COUNT')
    grouped_categories = products['NAME'].value_counts().rename_axis('CATEGORY').reset_index(name='COUNT')
    pie_chart(grouped_brands['BRAND'], grouped_brands['COUNT'])
    pie_chart(grouped_colors['COLOR'], grouped_colors['COUNT'])
    pie_chart(grouped_categories['CATEGORY'], grouped_categories['COUNT'])

### Normalize Data for model

In [None]:
def normalize_item_purchase_count(transactions):
    transactions_count = transactions.groupby(['SKU', 'CUSTOMER']).size().reset_index(name='COUNT')
    transactions_count = transactions_count.join(transactions_count.groupby(['SKU'])['COUNT'].transform(lambda x: x / x.sum()), rsuffix='_FREQ')
    return transactions_count

### Train & Test data

In [None]:
def get_train_test_data(transactions):
    train, test = train_test_split(transactions, test_size = .2)
    train = tc.SFrame(train)
    test = tc.SFrame(test)
    return train, test

In [None]:
def get_normilized_data():
    transactions = normalize_item_purchase_count(transactions_raw)
    train, test = get_train_test_data(transactions)
    return train, test

### Model


In [None]:
def model(train_data, name, user_id, item_id, target, users_to_recommend, n_rec, n_display):
    if name == 'popularity':
        model = tc.popularity_recommender.create(train_data, 
                                                    user_id=user_id, 
                                                    item_id=item_id, 
                                                    target=target)
    elif name == 'cosine':
        model = tc.item_similarity_recommender.create(train_data, 
                                                    user_id=user_id, 
                                                    item_id=item_id, 
                                                    target=target, 
                                                    similarity_type='cosine')
    elif name == 'pearson':
        model = tc.item_similarity_recommender.create(train_data, 
                                                    user_id=user_id, 
                                                    item_id=item_id, 
                                                    target=target, 
                                                    similarity_type='pearson')
        
    recom = model.recommend(users=users_to_recommend, k=n_rec)
    recom.print_rows(n_display)
    return model

### Getting a recommendation 

In [None]:
def get_model(similarity, train, target = 'COUNT'):
    user_id = 'CUSTOMER'
    item_id = 'SKU'
    users_to_recommend = list([])
    n_rec = 10 # number of items to recommend
    n_display = 30 # to display the first few rows in an output dataset
    
    popularity = model(train, similarity, user_id, item_id, target, users_to_recommend, n_rec, n_display)
    return popularity


In [None]:
def merge_product_info(items):
    products_rec = products_raw[products_raw['SKU'].isin(items['SKU'])] 
    products_rec['COLOR'] = products_rec['ATTRIBUTES'].apply(lambda x: json.loads(x)['color'])
    products_rec['BRAND'] = products_rec['ATTRIBUTES'].apply(lambda x: json.loads(x)['brand'].upper())
    products_rec = ps.merge(items, products_rec, how='left', on=['SKU']).drop_duplicates(subset=['SKU'])
    return products_rec[['rank','score','NAME','BRAND', 'COLOR']]

In [None]:
def get_recommendation_for_user(customer_id, trained_model):
    recommended_items = trained_model[trained_model['CUSTOMER']==customer_id].to_dataframe()
    print(merge_product_info(recommended_items))
    visualize_customer_data(get_customer_data(customer_id))

### Evaluating models

In [None]:
def evaluate(models_w, models_names, test_data):
    eval_norm = tc.recommender.util.compare_models(test_data, models_w, model_names=models_names)
    return eval_norm

## Main stuff
If you want to get a recommendation list for a specific customer, fill the `customer_id` and run this block

In [None]:
customer_id = 1006890
train_norm, test_norm = get_normilized_data()
trained_model = get_model('cosine', train_norm, tagret='COUNT_FREQ')
recommentations_data = trained_model.recommend()
get_recommendation_for_user(customer_id, recommentations_data)


## Run and evalute

### Compare and evaluate models with default `COUNT` as target

In [None]:
train_norm, test_norm = get_normilized_data()
pop_norm = get_model('popularity', train_norm)
pop_norm.save('saved_models/pop_norm_count')
cos_norm = get_model('cosine', train_norm)
cos_norm.save('saved_models/cos_norm_count')
pear_norm = get_model('pearson', train_norm)
pear_norm.save('saved_models/pear_norm_count')

models_w_norm = [pop_norm, cos_norm, pear_norm]
names_w_norm = ['Popularity Model on COUNTS', 'Cosine Similarity on COUNTS', 'Pearson Similarity on COUNTS']
evaluation_result = evaluate(models_w_norm, names_w_norm, test_norm)


### Compare and evaluate models with 'COUNT_FREQ` as target

In [None]:
pop_norm_freq = get_model('popularity', train_norm, target="COUNT_FREQ")
pop_norm_freq.save('saved_models/pop_norm_freq')
cos_norm_freq = get_model('cosine', train_norm, target="COUNT_FREQ")
cos_norm_freq.save('saved_models/cos_norm_freq')
pear_norm_freq = get_model('pearson', train_norm, target="COUNT_FREQ")
pear_norm_freq.save('saved_models/pear_norm_freq')

models_w_norm = [pop_norm_freq, cos_norm_freq, pear_norm_freq]
names_w_norm = ['Popularity Model on COUNTS_FREQ', 'Cosine Similarity on COUNTS_FREQ', 'Pearson Similarity on COUNTS_FREQ']
evaluation_result_freq = evaluate(models_w_norm, test_norm)


### Compare and evaluate `Cosine` similarity of `COUNT` and `COUNT_FREQ` targets

In [None]:
models_w_norm = [cos_norm, cos_norm_freq]
names_w_norm = ['Cosine Similarity on COUNTS', 'Cosine Similarity on COUNTS_FREQ']
evaluation_result_freq = evaluate(models_w_norm, names_w_norm, test_norm)