# Product Recommendation

There are heaps of prediction models that can be developed in this space, all the way from sales forecasts to advertising. But it wouldn't be right if we didn't try to implement the sexiest model going around; product recommendation using collaborative filtering. Let's dive in.

In [1]:
import pandas as pd
import tensorflow as tf
import numpy as np
from scipy.sparse import coo_matrix
from sklearn.model_selection import train_test_split
from tensorflow.contrib.factorization.python.ops import factorization_ops
np.set_printoptions(suppress=True)

In [2]:
customer = pd.read_csv("data/customer.csv")
product = pd.read_csv("data/product.csv")
transaction = pd.read_csv("data/transaction.csv").rename(columns={'\ufeffSalesID':'SalesID'}) # Column rename.
customer.head(5)

Unnamed: 0,﻿CustomerID,FirstName,MiddleInitial,LastName,CityID,Address
0,1,Stefanie,Y,Frye,79,97 Oak Avenue
1,2,Sandy,T,Kirby,96,52 White First Freeway
2,3,Lee,T,Zhang,55,921 White Fabien Avenue
3,4,Regina,S,Avery,40,75 Old Avenue
4,5,Daniel,S,Mccann,2,283 South Green Hague Avenue


In [3]:
product.head(5)

Unnamed: 0,﻿ProductID,ProductName,Price,CategoryID,Class,ModifyDate,Resistant,IsAllergic,VitalityDays
0,1,Flour - Whole Wheat,742988,3,Medium,2018-02-16 08:21:49.190,Durable,,
1,2,Cookie Chocolate Chip With,912329,3,Medium,2017-02-12 11:39:10.970,,,
2,3,Onions - Cippolini,91379,9,Medium,2018-03-15 08:11:51.560,Weak,False,111.0
3,4,"Sauce - Gravy, Au Jus, Mix",543055,9,Medium,2017-07-16 00:46:28.880,Durable,,
4,5,Artichokes - Jerusalem,654771,2,Low,2017-08-16 14:13:35.430,Durable,True,27.0


In [4]:
transaction.head(5)

Unnamed: 0,SalesID,SalesPersonID,CustomerID,ProductID,Quantity,Discount,TotalPrice,SalesDate,TransactionNumber
0,1,6,27039,381,7,,0,2018-02-05 07:38:25.430,FQL4S94E4ME1EZFTG42G
1,2,16,25011,61,7,,0,2018-02-02 16:03:31.150,12UGLX40DJ1A5DTFBHB8
2,3,13,94024,23,24,,0,2018-05-03 19:31:56.880,5DT8RCPL87KI5EORO7B0
3,4,8,73966,176,19,0.2,0,2018-04-07 14:43:55.420,R3DR9MLD5NR76VO17ULE
4,5,10,32653,310,9,,0,2018-02-12 15:37:03.940,4BGS0Z5OMAZ8NDAFHHP3


Colaborative filtering is usually implemented via matrix factorisation, so we need to convert this data set into a essentially a massive sparse matrix.

In [5]:
# Subset the sales to customer and product.
tx_minimal = transaction[['CustomerID','ProductID']]
tx_minimal.head()

Unnamed: 0,CustomerID,ProductID
0,27039,381
1,25011,61
2,94024,23
3,73966,176
4,32653,310


In [6]:
# Get the counts for customers and products
n_customers = len(tx_minimal['CustomerID'].unique())
n_products = len(tx_minimal['ProductID'].unique())
print(n_customers, n_products)

98759 452


In [7]:
# Check the min customer and product id
print("Min_customer: {0} \n Min_Product: {1}".format(tx_minimal[['CustomerID']].min(), tx_minimal[['ProductID']].min()))

Min_customer: CustomerID    1
dtype: int64 
 Min_Product: ProductID    1
dtype: int64


In [8]:
# Make the customers zero indexed for matrix operations
def map_identifier(identifier):
    return identifier - 1

tx_minimal = tx_minimal.apply(map_identifier)

In [9]:
print("Min_customer: {0} \n Min_Product: {1}".format(tx_minimal[['CustomerID']].min(), tx_minimal[['ProductID']].min()))

Min_customer: CustomerID    0
dtype: int64 
 Min_Product: ProductID    0
dtype: int64


In [10]:
tx_matrix = tx_minimal.as_matrix()

  """Entry point for launching an IPython kernel.


In [11]:
tx_matrix

array([[27038,   380],
       [25010,    60],
       [94023,    22],
       ...,
       [44736,   377],
       [79524,    76],
       [95837,   401]])

In [12]:
# Append a column of 1s to signify that this user bought this product
bought = np.ones((tx_matrix.shape[0],3))
bought[:,:-1] = tx_matrix
tx_matrix = bought.astype(int)
tx_matrix

array([[27038,   380,     1],
       [25010,    60,     1],
       [94023,    22,     1],
       ...,
       [44736,   377,     1],
       [79524,    76,     1],
       [95837,   401,     1]])

In [13]:
tr_m, ts_m = train_test_split(tx_matrix,test_size = 0.1)

In [14]:
tr_m.shape

(900000, 3)

In [15]:
ts_m.shape

(100000, 3)

In [16]:
c_tr , p_tr, b_tr = zip(*tr_m)

In [17]:
tr_sparse = coo_matrix((b_tr, (c_tr, p_tr)), shape=(n_customers, n_products))

In [18]:
input_tensor = tf.SparseTensor(indices=list(zip(tr_sparse.row,tr_sparse.col))
                               ,values=(tr_sparse.data).astype(np.float32)
                               ,dense_shape=tr_sparse.shape)


In [None]:
model = factorization_ops.WALSModel(tr_sparse.shape[0],tr_sparse.shape[1], 10)

Instructions for updating:
Colocations handled automatically by placer.


In [None]:
row_factor = model.row_factors[0]
col_factor = model.col_factors[0]

In [None]:
# Train the Model
row_update_op = model.update_row_factors(sp_input = input_tensor)[0]
col_update_op = model.update_col_factors(sp_input = input_tensor)[1]

In [None]:
with tf.Session() as sess:
    sess.run(model.initialize_op)
    sess.run(model.worker_init)
    
    for _ in range(1000):
        sess.run(model.row_update_prep_gramian_op)
        sess.run(model.initialize_row_update_op)
        sess.run(row_update_op)
        sess.run(model.col_update_prep_gramian_op)
        sess.run(model.initialize_col_update_op)
        sess.run(col_update_op)
        
        output_row = row_factor.eval(session=sess)
        output_col = col_factor.eval(session=sess)

In [None]:
output_row.shape

In [None]:
# Predict 
user_id = 33165
user_factor = output_row[1]

In [None]:
pred = output_col.dot(user_factor)

In [None]:
candidate_items = np.argsort(pred)

In [None]:
candidate_items

In [None]:
10 + len([3,4,5])

In [None]:
candidate_items[-13:]

In [None]:
product[product['\ufeffProductID'] == 342]

In [None]:
product.columns.values

In [None]:
ts_m