In [1]:
import pandas as pd
import scipy.sparse as sparse
import numpy as np
from scipy.sparse.linalg import spsolve

In [2]:
grouped_purchased = pd.read_csv('cvs_data_file/online-retail.csv', header = 0)
grouped_purchased.head()

Unnamed: 0,CustomerID,StockCode,Quantity
0,12346,23166,1
1,12347,16008,24
2,12347,17021,36
3,12347,20665,6
4,12347,20719,40


In [3]:
customers = list(np.sort(grouped_purchased.CustomerID.unique())) # Get our unique customers
products = list(grouped_purchased.StockCode.unique()) # Get our unique products that were purchased
quantity = list(grouped_purchased.Quantity) # All of our purchases

rows = grouped_purchased.CustomerID.astype('category', categories = customers).cat.codes 
# Get the associated row indices
cols = grouped_purchased.StockCode.astype('category', categories = products).cat.codes 
# Get the associated column indices
purchases_sparse = sparse.csr_matrix((quantity, (rows, cols)), shape=(len(customers), len(products)))

  """
  import sys


In [4]:
print(purchases_sparse.A)

[[ 1  0  0 ...  0  0  0]
 [ 0 24 36 ...  0  0  0]
 [ 0  0  0 ...  0  0  0]
 ...
 [ 0  0  0 ...  0  0  0]
 [ 0  0  0 ...  0  0  0]
 [ 0  0  0 ...  0  0  0]]


In [5]:
matrix_size = purchases_sparse.shape[0]*purchases_sparse.shape[1] # Number of possible interactions in the matrix
num_purchases = len(purchases_sparse.nonzero()[0]) # Number of items interacted with
sparsity = 100*(1 - (num_purchases/matrix_size))
sparsity

98.32190920694744

In [6]:
import random

In [7]:
def make_train(ratings, pct_test = 0.2):
    test_set = ratings.copy() # Make a copy of the original set to be the test set. 
    test_set[test_set != 0] = 1 # Store the test set as a binary preference matrix
    training_set = ratings.copy() # Make a copy of the original data we can alter as our training set. 
    nonzero_inds = training_set.nonzero() # Find the indices in the ratings data where an interaction exists
    nonzero_pairs = list(zip(nonzero_inds[0], nonzero_inds[1])) # Zip these pairs together of user,item index into list
    random.seed(0) # Set the random seed to zero for reproducibility
    num_samples = int(np.ceil(pct_test*len(nonzero_pairs))) # Round the number of samples needed to the nearest integer
    samples = random.sample(nonzero_pairs, num_samples) # Sample a random number of user-item pairs without replacement
    user_inds = [index[0] for index in samples] # Get the user row indices
    item_inds = [index[1] for index in samples] # Get the item column indices
    training_set[user_inds, item_inds] = 0 # Assign all of the randomly chosen user-item pairs to zero
    training_set.eliminate_zeros() # Get rid of zeros in sparse array storage after update to save space
    return training_set, test_set, list(set(user_inds)) # Output the unique list of user rows that were altered  

In [8]:
product_train, product_test, product_users_altered = make_train(purchases_sparse, pct_test = 0.2)

In [9]:
item_lookup = pd.read_csv('cvs_data_file/item_lookup.csv', header = 0)
item_lookup.head()

Unnamed: 0,StockCode,Description
0,85123A,WHITE HANGING HEART T-LIGHT HOLDER
1,71053,WHITE METAL LANTERN
2,84406B,CREAM CUPID HEARTS COAT HANGER
3,84029G,KNITTED UNION FLAG HOT WATER BOTTLE
4,84029E,RED WOOLLY HOTTIE WHITE HEART.


In [10]:
import pickle

with open('saved_model','rb') as f:
            saved_model=pickle.load(f)
            model=saved_model['model']



In [11]:
# new
# item TO item recommendation
from sklearn.metrics.pairwise import cosine_similarity

print(item_lookup['Description'][cosine_similarity(
        model.item_embeddings)[2].argsort()][-5:][::-1])


2          CREAM CUPID HEARTS COAT HANGER
2608                LA PALMIERA TILE HOOK
2472             VINTAGE WOODEN BAR STOOL
1662    BIRDHOUSE DECORATION MAGIC GARDEN
2285      BLUE GINGHAM ROSE CUSHION COVER
Name: Description, dtype: object


In [18]:

def display_recommended_items(model, data, user_ids):
    customers_arr = np.array(customers)
    print("customers_arr",customers_arr)
    
    user_ids = np.where(customers_arr == user_ids)[0][0]
    print(user_ids)
    n_users, n_items = data.shape

    known_positives = item_lookup['Description'][data.tocsr()[user_ids].indices]
    known_positives_df = pd.DataFrame(data=known_positives)
    print(known_positives_df,'\n','######################################')            
        
    scores = model.predict(user_ids, np.arange(n_items))

    top_items = item_lookup['Description'][np.argsort(-scores)]
    df = pd.DataFrame(data=top_items)
    print(df)            
            

display_recommended_items(model, product_train, 18287)


customers_arr [12346 12347 12348 ... 18282 18283 18287]
4337
                              Description
59                JUMBO BAG PINK POLKADOT
119         3 TIER CAKE TIN RED AND CREAM
120      SET 3 WICKER OVAL BASKETS W LIDS
128                 HEART OF WICKER LARGE
137         ORGANISER WOOD ANTIQUE WHITE 
309            HANGING METAL STAR LANTERN
369                  FELTCRAFT DOLL MOLLY
406                    VINTAGE SNAP CARDS
625   CREAM SLICE FLANNEL CHOCOLATE SPOT 
715               20 DOLLY PEGS RETROSPOT
726            PEACE WOODEN BLOCK LETTERS
772                    TOOL BOX SOFT TOY 
801         FRENCH BLUE METAL DOOR SIGN 3
802        FRENCH BLUE METAL DOOR SIGN No
918                36 PENCILS TUBE SKULLS
919                   GREEN FERN JOURNAL 
952          FLUTED ANTIQUE CANDLE HOLDER
955                TEA TIME KITCHEN APRON
1003              WATERING CAN PINK BUNNY
1022            BROCANTE SHELF WITH HOOKS
1102      DINOSAUR LUNCH BOX WITH CUTLERY
1148       PINK