In [1]:
#import the required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pyodbc
import scipy.sparse as sparse
from scipy.sparse.linalg import spsolve

In [2]:
#importing your dataset from sql
db = 'AdventureWorksDW2019' #databaseName
server = 'DESKTOP-RDRRI0S'  #serverName

#create the connection
conn = pyodbc.connect('DRIVER={SQL SERVER};SERVER=' + server + ';DATABASE=' + db + ';Trusted_Connection=yes')



#query 
query = """
SELECT FS.ProductKey,
	   FS.CustomerKey,
	   FS.OrderQuantity,
	   FS.OrderDate,
	   DP.EnglishProductName AS Product_Description,
	   DSP.ProductSubcategoryKey,
	   DSP.EnglishProductSubcategoryName,
	   DCP.EnglishProductCategoryName
FROM FactInternetSales AS FS
LEFT JOIN DimProduct AS DP
ON FS.ProductKey = DP.ProductKey
LEFT JOIN DimProductSubCategory AS DSP
ON DP.ProductSubCategoryKey = DSP.ProductSubCategoryKey
LEFT JOIN DimProductCategory AS DCP
ON DSP.ProductCategoryKey = DCP.ProductCategoryKey
"""

In [3]:
#reading the sql file from the engine
dataset = pd.read_sql(query, conn) 

In [4]:
dataset.head()

Unnamed: 0,ProductKey,CustomerKey,OrderQuantity,OrderDate,Product_Description,ProductSubcategoryKey,EnglishProductSubcategoryName,EnglishProductCategoryName
0,310,21768,1,2010-12-29,"Road-150 Red, 62",2,Road Bikes,Bikes
1,346,28389,1,2010-12-29,"Mountain-100 Silver, 44",1,Mountain Bikes,Bikes
2,346,25863,1,2010-12-29,"Mountain-100 Silver, 44",1,Mountain Bikes,Bikes
3,336,14501,1,2010-12-29,"Road-650 Black, 62",2,Road Bikes,Bikes
4,346,11003,1,2010-12-29,"Mountain-100 Silver, 44",1,Mountain Bikes,Bikes


In [5]:
#check to see if there are any missing values

dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60398 entries, 0 to 60397
Data columns (total 8 columns):
 #   Column                         Non-Null Count  Dtype         
---  ------                         --------------  -----         
 0   ProductKey                     60398 non-null  int64         
 1   CustomerKey                    60398 non-null  int64         
 2   OrderQuantity                  60398 non-null  int64         
 3   OrderDate                      60398 non-null  datetime64[ns]
 4   Product_Description            60398 non-null  object        
 5   ProductSubcategoryKey          60398 non-null  int64         
 6   EnglishProductSubcategoryName  60398 non-null  object        
 7   EnglishProductCategoryName     60398 non-null  object        
dtypes: datetime64[ns](1), int64(4), object(3)
memory usage: 3.7+ MB


In [6]:
#returns only unique product/name pairs
item_lookup = dataset[['ProductKey', 'Product_Description']].drop_duplicates() 

#Encode ProductKey as string for easy future lookup
item_lookup['ProductKey'] = item_lookup.ProductKey.astype(str) 

In [7]:
item_lookup.head()

Unnamed: 0,ProductKey,Product_Description
0,310,"Road-150 Red, 62"
1,346,"Mountain-100 Silver, 44"
3,336,"Road-650 Black, 62"
5,311,"Road-150 Red, 44"
7,351,"Mountain-100 Black, 48"


In [8]:
dataset['ProductKey'] = dataset.ProductKey.astype(str) #convert ProductKey to str data type

dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60398 entries, 0 to 60397
Data columns (total 8 columns):
 #   Column                         Non-Null Count  Dtype         
---  ------                         --------------  -----         
 0   ProductKey                     60398 non-null  object        
 1   CustomerKey                    60398 non-null  int64         
 2   OrderQuantity                  60398 non-null  int64         
 3   OrderDate                      60398 non-null  datetime64[ns]
 4   Product_Description            60398 non-null  object        
 5   ProductSubcategoryKey          60398 non-null  int64         
 6   EnglishProductSubcategoryName  60398 non-null  object        
 7   EnglishProductCategoryName     60398 non-null  object        
dtypes: datetime64[ns](1), int64(3), object(4)
memory usage: 3.7+ MB


In [9]:
#get rid of unnecessary info, keep only ProductKey, CustomerKey and Quantity
dataset = dataset[['CustomerKey', 'ProductKey', 'OrderQuantity']]
dataset

Unnamed: 0,CustomerKey,ProductKey,OrderQuantity
0,21768,310,1
1,28389,346,1
2,25863,346,1
3,14501,336,1
4,11003,346,1
...,...,...,...
60393,15868,485,1
60394,15868,225,1
60395,18759,485,1
60396,18759,486,1


In [10]:
#Group data together
grouped_dataset = dataset.groupby(['CustomerKey', 'ProductKey']).sum().reset_index() 

grouped_dataset

Unnamed: 0,CustomerKey,ProductKey,OrderQuantity
0,11000,214,1
1,11000,344,1
2,11000,353,1
3,11000,485,1
4,11000,488,1
...,...,...,...
59046,29480,479,1
59047,29480,562,1
59048,29481,349,1
59049,29482,358,1


In [11]:
#Replace a sum of zero purchases with a one to indicate purchased
grouped_dataset.OrderQuantity.loc[grouped_dataset.OrderQuantity == 0] = 1 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


In [12]:
#Get customers where purchase totals were positive
grouped_purchased = grouped_dataset.query('OrderQuantity > 0') 

In [13]:
grouped_purchased.head()

Unnamed: 0,CustomerKey,ProductKey,OrderQuantity
0,11000,214,1
1,11000,344,1
2,11000,353,1
3,11000,485,1
4,11000,488,1


In [14]:
grouped_purchased.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 59051 entries, 0 to 59050
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   CustomerKey    59051 non-null  int64 
 1   ProductKey     59051 non-null  object
 2   OrderQuantity  59051 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 1.8+ MB


In [15]:
from pandas.api.types import CategoricalDtype

In [16]:
#Get all unique customers
customers = list(np.sort(grouped_purchased.CustomerKey.unique()))

customers

[11000,
 11001,
 11002,
 11003,
 11004,
 11005,
 11006,
 11007,
 11008,
 11009,
 11010,
 11011,
 11012,
 11013,
 11014,
 11015,
 11016,
 11017,
 11018,
 11019,
 11020,
 11021,
 11022,
 11023,
 11024,
 11025,
 11026,
 11027,
 11028,
 11029,
 11030,
 11031,
 11032,
 11033,
 11034,
 11035,
 11036,
 11037,
 11038,
 11039,
 11040,
 11041,
 11042,
 11043,
 11044,
 11045,
 11046,
 11047,
 11048,
 11049,
 11050,
 11051,
 11052,
 11053,
 11054,
 11055,
 11056,
 11057,
 11058,
 11059,
 11060,
 11061,
 11062,
 11063,
 11064,
 11065,
 11066,
 11067,
 11068,
 11069,
 11070,
 11071,
 11072,
 11073,
 11074,
 11075,
 11076,
 11077,
 11078,
 11079,
 11080,
 11081,
 11082,
 11083,
 11084,
 11085,
 11086,
 11087,
 11088,
 11089,
 11090,
 11091,
 11092,
 11093,
 11094,
 11095,
 11096,
 11097,
 11098,
 11099,
 11100,
 11101,
 11102,
 11103,
 11104,
 11105,
 11106,
 11107,
 11108,
 11109,
 11110,
 11111,
 11112,
 11113,
 11114,
 11115,
 11116,
 11117,
 11118,
 11119,
 11120,
 11121,
 11122,
 11123,
 11124,


In [17]:
#Get all unique products
products = list(grouped_purchased.ProductKey.unique())

products

['214',
 '344',
 '353',
 '485',
 '488',
 '530',
 '541',
 '573',
 '217',
 '225',
 '350',
 '477',
 '478',
 '479',
 '491',
 '604',
 '222',
 '346',
 '359',
 '561',
 '361',
 '480',
 '564',
 '345',
 '355',
 '562',
 '351',
 '528',
 '537',
 '357',
 '347',
 '348',
 '575',
 '489',
 '574',
 '529',
 '465',
 '486',
 '363',
 '569',
 '228',
 '463',
 '467',
 '475',
 '482',
 '483',
 '535',
 '536',
 '538',
 '539',
 '487',
 '490',
 '484',
 '472',
 '565',
 '567',
 '571',
 '349',
 '566',
 '568',
 '570',
 '585',
 '572',
 '481',
 '471',
 '234',
 '473',
 '587',
 '576',
 '231',
 '474',
 '310',
 '540',
 '563',
 '590',
 '313',
 '589',
 '237',
 '312',
 '476',
 '380',
 '578',
 '605',
 '577',
 '584',
 '579',
 '606',
 '360',
 '591',
 '592',
 '378',
 '374',
 '586',
 '580',
 '311',
 '314',
 '356',
 '362',
 '588',
 '354',
 '358',
 '352',
 '583',
 '600',
 '599',
 '597',
 '581',
 '593',
 '376',
 '560',
 '596',
 '598',
 '390',
 '372',
 '594',
 '595',
 '388',
 '382',
 '384',
 '368',
 '375',
 '379',
 '370',
 '369',
 '377',


In [18]:
#Get all purchases
quantity = list(grouped_purchased.OrderQuantity) 

quantity

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 5,
 2,
 2,
 2,
 1,
 3,
 3,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,


In [19]:
#Get the associated row indcies
rows = grouped_purchased.CustomerKey.astype(CategoricalDtype(categories = customers)).cat.codes

In [20]:
#Get the associated column indices
cols = grouped_purchased.ProductKey.astype(CategoricalDtype(categories = products)).cat.codes

In [21]:
#We check our final matrix object
purchases_sparse = sparse.csr_matrix((quantity, (rows, cols)), shape=(len(customers), len(products)))

purchases_sparse

<18484x158 sparse matrix of type '<class 'numpy.intc'>'
	with 59051 stored elements in Compressed Sparse Row format>

In [22]:
#Shows the number of possible interactions in the matrix
matrix_size = purchases_sparse.shape[0]*purchases_sparse.shape[1] 

#Shows the number of items interacted with sparsity
num_purchases = len(purchases_sparse.nonzero()[0]) 
sparsity = 100*(1 - (num_purchases/matrix_size))
sparsity

97.97803231806365

#### 97.97% of the interaction matrix is sparse. 
#### For collaborative filtering to work, the maximum sparsity should be about 99.5% or so. 
#### We are well below this, so we should be able to get decent results.

### Creating a Validation and Testing Set

In [23]:
#Our test set is an exact copy of our original data. 
#The training set, however, will mask a random percentage of user/item interactions and act as if the user never purchased the item (making it a sparse entry with a zero). 
#We then check in the test set which items were recommended to the user that they ended up actually purchasing.
#If the users frequently ended up purchasing the items most recommended to them by the system, we can conclude the system seems to be working.

#As an additional check, we can compare our system to simply recommending the most popular items to every user (beating popularity is a bit difficult). This will be our baseline.

In [24]:
import random

In [25]:
def make_train(ratings, pct_test = 0.2):

    # Make a copy of the original dataset to be the test set
    test_set = ratings.copy()

    # Store the test set as a binary preference matrix
    test_set[test_set != 0] = 1  

    # Make a copy of the original data we can alter as our training set
    training_set = ratings.copy() 

    # Find the indices in the ratings data where an interaction exists
    nonzero_inds = training_set.nonzero() 

    # Zip these pairs together of user,item index into list
    nonzero_pairs = list(zip(nonzero_inds[0], nonzero_inds[1])) 

    # Set the random seed to zero for reproducibility
    random.seed(0) 

    # Round the number of samples needed to the nearest integer
    num_samples = int(np.ceil(pct_test*len(nonzero_pairs))) 

    # Sample a random number of user-item pairs without replacement
    samples = random.sample(nonzero_pairs, num_samples) 

    # Get the user row indices 
    user_inds = [index[0] for index in samples] 

    # Get the item column indices
    item_inds = [index[1] for index in samples] 

    # Assign all of the randomly chosen user-item pairs to zero
    training_set[user_inds, item_inds] = 0 

    # Get rid of zeros in sparse array storage after update to save space
    training_set.eliminate_zeros()  

    # Output the unique list of user rows that were altered
    return training_set, test_set, list(set(user_inds)) 

In [26]:
product_train, product_test, product_users_altered = make_train(purchases_sparse, pct_test = 0.2)

In [27]:
##Implementing Alternating Least Square (ALS) algorithm for implicit feedback
def implicit_weighted_ALS(training_set, lambda_val = 0.1, alpha = 40, iterations = 10, rank_size = 20, seed = 0):
    
    conf = (alpha*training_set) #Here we set our confidence matrix to stay sparse.
    
    # Get the size of our original ratings matrix, m x n
    num_user = conf.shape[0] 
    num_item = conf.shape[1] 
    
    # initialize our X/Y feature vectors randomly with a set seed
    rstate = np.random.RandomState(seed)
    
    # Random numbers in a m x rank shape
    X = sparse.csr_matrix(rstate.normal(size = (num_user, rank_size)))
    
    # Normally this would be rank x n but we can # transpose at the end. Makes calculation more simple.
    Y = sparse.csr_matrix(rstate.normal(size = (num_item, rank_size)))  
    
    X_eye = sparse.eye(num_user) 
    Y_eye = sparse.eye(num_item) 
    
    # Our regularization term lambda*I.
    lambda_eye = lambda_val * sparse.eye(rank_size)  # We can compute this before iteration starts. 

    # Begin iterations
    # Iterate back and forth between solving X given fixed Y and vice versa
    for iter_step in range(iterations):
        #Compute yTy and xTx at beginning of each iteration to save computing time 
        yTy = Y.T.dot(Y) 
        xTx = X.T.dot(X)
    
        #Begin iteration to solve for X based on fixed Y 
        # Grab user row from confidence matrix and convert to dense
        for u in range(num_user): 
            conf_samp = conf[u,:].toarray() 
    
            # Create binarized preference vector 
            pref = conf_samp.copy() 
            pref[pref != 0] = 1
    
            # Get Cu - I term, don’t need to subtract 1 since we never added it 
            CuI = sparse.diags(conf_samp, [0])
            #Cu = CuI + Y_eye
    
            # This is the yT(Cu-I)Y term  
            yTCuIY = Y.T.dot(CuI).dot(Y) 
            # This is the yTCuPu term, where we add the eye back in Cu - I + I
            yTCupu = Y.T.dot(CuI + Y_eye).dot(pref.T) 
            # Solve for Xu = ((yTy + yT(Cu-I)Y + lambdaI)^-1)yTCuPu 
            X[u] = spsolve(yTy + yTCuIY + lambda_eye, yTCupu) 


    # Begin iteration to solve for Y based on fixed X 
    for i in range(num_item): 
            # transpose to get it in row format and convert to dense
            conf_samp = conf[:,i].T.toarray()
            # Create binarized preference vector 
            pref = conf_samp.copy() 
            pref[pref != 0] = 1 
            # Get Ci - I term, don’t need to subtract 1 since we never added it 
            CiI = sparse.diags(conf_samp, [0]) 
    
            # This is the xT(Cu-I)X term
            xTCiIX = X.T.dot(CiI).dot(X) 
            # This is the xTCiPi term 
            xTCiPi = X.T.dot(CiI + X_eye).dot(pref.T) 
            # Solve for Yi = ((xTx + xT(Cu-I)X) + lambdaI)^-1)xTCiPi
            Y[i] = spsolve(xTx + xTCiIX + lambda_eye, xTCiPi) 
     
    # End iterations 
    return X, Y.T 
# Transpose at the end to make up for not being transposed at the beginning. 
# Y needs to be rank x n. Keep these as separate matrices for scale reasons.    

In [28]:
user_vecs, item_vecs = implicit_weighted_ALS(product_train, lambda_val = 0.1, alpha = 15, iterations = 1, rank_size = 20)

In [29]:
user_vecs[0,:].dot(item_vecs).toarray()[0,:5]

array([5.24299219e-02, 9.85570586e-05, 1.25361284e-02, 3.89815739e-02,
       7.83943702e-03])

### Speeding up the ALS

In [30]:
#pip install implicit

In [31]:
import implicit

In [32]:
# The implicit library expects data as a item-user matrix so we
# create two matricies, one for fitting the model (item-user) 
# and one for recommendations (user-item)
#sparse_item_user = sparse.csr_matrix((grouped_purchased['OrderQuantity'].astype(float), (grouped_purchased['ProductKey'], grouped_purchased['CustomerKey'])))
#sparse_user_item = sparse.csr_matrix((grouped_purchased['OrderQuantity'].astype(float), (grouped_purchased['CustomerKey'], grouped_purchased['ProductKey'])))

# Initialize the als model and fit it using the sparse item-user matrix
model = implicit.als.AlternatingLeastSquares(factors=20, regularization=0.1, iterations=50)

# Calculate the confidence by multiplying it by our alpha value.
alpha_val = 15
data_conf = (product_train * alpha_val).astype('double')

# Fit the model
model.fit(data_conf)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=50.0), HTML(value='')))




### Evaluating the Recommender System

In [33]:
from sklearn import metrics

In [34]:
def auc_score(predictions, test): #This function outputs the area under the curve using sklearn's metrics parameters
    fpr, tpr, thresholds = metrics.roc_curve(test, predictions) 
    return metrics.auc(fpr, tpr)   

In [35]:
def calc_mean_auc(training_set, altered_users, predictions, test_set): #This function will calculate the mean AUC by user for any user that had their user-item matrix altered. parameters:

    
    store_auc = [] # An empty list to store the AUC for each user that had an item removed from the training set
    popularity_auc = []  # To store popular AUC scores
    # Get sum of item iteractions to find most popular  
    pop_items = np.array(test_set.sum(axis = 0)).reshape(-1)  
    item_vecs = predictions[1] 
    for user in altered_users: # Iterate through each user that had an item altered
        # Get the training set row 
        training_row = training_set[user,:].toarray().reshape(-1)
        # Find where the interaction had not yet occurred
        zero_inds = np.where(training_row == 0)  
        # Get the predicted values based on our user/item vectors 
        user_vec = predictions[0][user,:] 
        # Get only the items that were originally zero 
        pred = user_vec.dot(item_vecs).toarray()[0,zero_inds].reshape(-1) 
        # Select all ratings from the MF prediction for this user that originally had no iteraction 
        actual = test_set[user,:].toarray()[0,zero_inds].reshape(-1) 
        # Select the binarized yes/no interaction pairs from the original full data 
        # that align with the same pairs in training 
        pop = pop_items[zero_inds] # Get the item popularity for our chosen items 
        store_auc.append(auc_score(pred, actual)) # Calculate AUC for the given user and store 
        popularity_auc.append(auc_score(pop, actual)) # Calculate AUC using most popular and score 
    # End users iteration

    return float("%.3f"%np.mean(store_auc)), float("%.3f"%np.mean(popularity_auc)) # Return the mean AUC rounded to three decimal places for both test and popularity benchmark

In [36]:
user_vecs.shape

(18484, 20)

In [37]:
item_vecs.shape

(20, 158)

In [38]:
calc_mean_auc(product_train, product_users_altered, 
              [sparse.csr_matrix(user_vecs), sparse.csr_matrix(item_vecs)], product_test)

(0.643, 0.836)

### Testing the Recommender System

In [39]:
customers_arr = np.array(customers) # Array of customer IDs from the ratings matrix 
products_arr = np.array(products) # Array of product IDs from the ratings matrix

#### Previous purchase by the customer

In [70]:
def get_items_purchased(customer_id, mf_train, customers_list, products_list, item_lookup):
    cust_ind = np.where(customers_list == customer_id)[0][0] # Returns the index row of our customer id
    purchased_ind = mf_train[cust_ind,:].nonzero()[1] # Get column indices of purchased items
    prod_codes = products_list[purchased_ind] # Get the stock codes for our purchased items
    return item_lookup.loc[item_lookup.ProductKey.isin(prod_codes)]

In [71]:
customers_arr[:5]

array([11000, 11001, 11002, 11003, 11004], dtype=int64)

#### Enter the customer key to get items purchased

In [72]:
get_items_purchased(11900, product_train, customers_arr, products_arr, item_lookup)

Unnamed: 0,ProductKey,Product_Description
1,346,"Mountain-100 Silver, 44"
5432,353,"Mountain-200 Silver, 38"
5455,222,"Sport-100 Helmet, Blue"
5459,573,"Touring-1000 Blue, 46"
5504,489,"Short-Sleeve Classic Jersey, M"


#### Recommending Products to a Customer

In [73]:
from sklearn.preprocessing import MaxAbsScaler

In [82]:
def rec_items(customer_id, mf_train, user_vecs, item_vecs, customer_list, item_list, item_lookup, num_items = 10):
    cust_ind = np.where(customer_list == customer_id)[0][0] # Returns the index row of our customer id
    pref_vec = mf_train[cust_ind,:].toarray() # Get the ratings from the training set ratings matrix
    pref_vec = pref_vec.reshape(-1) + 1 # Add 1 to everything, so that items not purchased yet become equal to 1
    pref_vec[pref_vec > 1] = 0 # Make everything already purchased zero
    rec_vector = user_vecs[cust_ind,:].dot(item_vecs).toarray() # Get dot product of user vector and all item vectors
    # Scale this recommendation vector between 0 and 1
    max_abs = MaxAbsScaler()
    rec_vector_scaled = max_abs.fit_transform(rec_vector.reshape(-1,1))[:,0] 
    recommend_vector = pref_vec*rec_vector_scaled 
    # Items already purchased have their recommendation multiplied by zero
    product_idx = np.argsort(recommend_vector)[::-1][:num_items] # Sort the indices of the items into order of best recommendations
   
    rec_list = [] # start empty list to store items
    for index in product_idx:
        code = item_list[index]
        rec_list.append([code, item_lookup.Product_Description.loc[item_lookup.ProductKey == code].iloc[0]]) 
        # Append our descriptions to the list
    codes = [item[0] for item in rec_list]
    descriptions = [item[1] for item in rec_list]
    final_frame = pd.DataFrame({'ProductKey': codes, 'Product_Description': descriptions}) # Create a dataframe 
    return final_frame[['ProductKey', 'Product_Description']] # Switch order of columns around

#### Enter the customer ID to get the list of products recommended

In [83]:
rec_items(11900, product_train, user_vecs, item_vecs, customers_arr, products_arr, item_lookup,
                       num_items = 10)

Unnamed: 0,ProductKey,Product_Description
0,477,Water Bottle - 30 oz.
1,479,Road Bottle Cage
2,478,Mountain Bottle Cage
3,310,"Road-150 Red, 62"
4,311,"Road-150 Red, 44"
5,487,Hydration Pack - 70 oz.
6,225,AWC Logo Cap
7,386,"Road-550-W Yellow, 42"
8,463,"Half-Finger Gloves, S"
9,363,"Mountain-200 Black, 46"


#### Enter Customer ID to get list of recommendations

In [90]:
Customer_id = int(input('The ID of the customer is: '))
Reco_number = int(input('Number of recommendations: '))

The ID of the customer is: 11000
Number of recommendations: 10


#### List of items purchased vs recommended

In [91]:
print('Following is the list of items purchased by Customer no.', Customer_id)
get_items_purchased(Customer_id, product_train, customers_arr, products_arr, item_lookup)

Following is the list of items purchased by Customer no. 11000


Unnamed: 0,ProductKey,Product_Description
5432,353,"Mountain-200 Silver, 38"
5442,214,"Sport-100 Helmet, Red"
5457,541,Touring Tire
5458,530,Touring Tire Tube
5459,573,"Touring-1000 Blue, 46"
5476,485,Fender Set - Mountain


In [92]:
print('Following is the list of items recommended to Customer no.', Customer_id)
rec_items(Customer_id, product_train, user_vecs, item_vecs, customers_arr, products_arr, item_lookup,
                       num_items = Reco_number)

Following is the list of items recommended to Customer no. 11000


Unnamed: 0,ProductKey,Product_Description
0,477,Water Bottle - 30 oz.
1,225,AWC Logo Cap
2,478,Mountain Bottle Cage
3,222,"Sport-100 Helmet, Blue"
4,479,Road Bottle Cage
5,487,Hydration Pack - 70 oz.
6,491,"Short-Sleeve Classic Jersey, XL"
7,361,"Mountain-200 Black, 42"
8,310,"Road-150 Red, 62"
9,488,"Short-Sleeve Classic Jersey, S"
