In [1]:
import pandas as pd
import numpy as np
import pickle
import math

from scipy.sparse import csc_matrix, csr_matrix
from sparsesvd import sparsesvd

## SVD recommender

In [2]:
USER_COL = 'user_id'
ITEM_COL = 'product_id'
DEFAULT_RATING_COL = 'rating'
DEFAULT_K = 10
DEFAULT_M = 90

class SVDRecommender:

    def __init__(self):
        pass

    def fit(self, train_df, 
            col_user=USER_COL, 
            col_item=ITEM_COL, 
            col_rating=DEFAULT_RATING_COL,
            m=DEFAULT_M) -> None:
        """
        perform train procedure for the recommendation algorithm
        :param col_item: name for items column
        :param col_user: name for user column
        :param col_rating: name for rating column
        :param train_df: pandas data frame with users, items and ratings columns
        :param m: number of most significant features
        :return: None
        """
        self.train_df = train_df.copy()
        self.col_user = col_user
        self.col_item = col_item
        self.col_rating = col_rating
        
        self.items = np.unique(self.train_df[self.col_item].values)
        item_to_encodeditem = {x: i for i, x in enumerate(self.items)}
        self.train_df[self.col_item] = self.train_df[self.col_item].map(item_to_encodeditem)
        
        self.users = np.unique(self.train_df[self.col_user].values)
        self.user_to_encodeduser = {x: i for i, x in enumerate(self.users)}
        self.train_df[self.col_user] = self.train_df[self.col_user].map(self.user_to_encodeduser)

        sparse_matrix = csc_matrix((self.train_df[self.col_rating], 
                            (self.train_df[self.col_user].values, self.train_df[self.col_item].values)), 
                            shape = (len(self.users), len(self.items)))
        
        U, S, Vt = sparsesvd(sparse_matrix, m)
        
        S_diag = np.zeros((len(S), len(S)), dtype=np.float32)
        
        for i in range(len(S)):
            S_diag[i, i] = math.sqrt(S[i])
    
        self.U = csr_matrix(np.transpose(U), dtype=np.float32)
        
        S_diag = csr_matrix(S_diag, dtype=np.float32)
        Vt = csr_matrix(Vt, dtype=np.float32)
        self.right_term = S_diag * Vt

    def predict(self, test_df, col_user=USER_COL, k=DEFAULT_K) -> pd.DataFrame:
        """
        predicts recommendations for test users
        :param test_df: pandas data frame with users column
        :param k: number of items to predict per user
        :return: prediction pandas data frame with two columns first contains user's
        and second contains list of recommended items
        """        
        test_df_new = test_df.copy()
        estimatedRatings = []
        
        encodeditem_to_item = {i: x for i, x in enumerate(self.items)}
        
        test_df_new[col_user] = test_df_new[col_user].map(self.user_to_encodeduser)
        user_list = test_df_new[col_user].values
        
        for i in user_list:
            prod = self.U[i, :] * self.right_term
            prod_dense = prod.todense()
            prod_dense_top = (-prod_dense).argsort()[0, :k].tolist()[0]
            prod_dense_top_encoded = [encodeditem_to_item.get(prod_dense_top[j]) for j in range(k)]
            estimatedRatings.append(prod_dense_top_encoded)
        
        encodeduser_to_user = {i: x for i, x in enumerate(self.users)}
        test_df_new[col_user] = test_df_new[col_user].map(encodeduser_to_user)
        
        test_df_new['svd'] = estimatedRatings
        return test_df_new

In [3]:
pivot_table = pd.read_pickle('data/pivot_table.pkl')
pivot_table.head(3)

Unnamed: 0,user_id,val_products,test_products
0,1,"[196, 46149, 39657, 38928, 25133, 10258, 35951...","[196, 25133, 38928, 26405, 39657, 10258, 13032..."
1,2,"[24852, 16589, 1559, 19156, 18523, 22825, 2741...","[22963, 7963, 16589, 32792, 41787, 22825, 1364..."
2,3,"[39190, 47766, 21903, 43961, 17668]","[39190, 18599, 23650, 21903, 47766, 24810]"


In [4]:
df_test = pd.read_pickle('data/df_test.pkl')
df_test.head(3)

Unnamed: 0,user_id,product_id,rating,rating_sum,rating_norm,reordered
0,1,196,10,59,0.169492,1
1,1,10258,9,59,0.152542,1
2,1,10326,1,59,0.016949,0


In [5]:
svd = SVDRecommender()
svd.fit(df_test)

In [6]:
svd.predict(pivot_table[:1])

Unnamed: 0,user_id,val_products,test_products,svd
0,1,"[196, 46149, 39657, 38928, 25133, 10258, 35951...","[196, 25133, 38928, 26405, 39657, 10258, 13032...","[196, 46149, 43154, 49235, 38928, 13176, 35951..."


In [7]:
pickle.dump(svd, open("models/svd.pickle.dat", "wb"))

In [8]:
loaded_svd = pickle.load(open("models/svd.pickle.dat", "rb"))

In [18]:
loaded_svd.predict(pivot_table[:1], k=10)['svd']

0    [196, 46149, 43154, 49235, 38928, 13176, 35951...
Name: svd, dtype: object

In [21]:
y = loaded_svd.predict(pivot_table[:1], k=10)['svd'][0]
y

[196, 46149, 43154, 49235, 38928, 13176, 35951, 31651, 46061, 41276]

In [22]:
type(y)

list

In [31]:
[int(i) for i in y]

[196, 46149, 43154, 49235, 38928, 13176, 35951, 31651, 46061, 41276]

In [16]:
import json

In [25]:
data_set = {"user_id": [1], "items": [y]}

In [32]:
data_set = {"user_id": int(1), "items": [int(i) for i in y]}
print(data_set)

json_dump = json.dumps(data_set)
print(json_dump)

{'user_id': 1, 'items': [196, 46149, 43154, 49235, 38928, 13176, 35951, 31651, 46061, 41276]}
{"user_id": 1, "items": [196, 46149, 43154, 49235, 38928, 13176, 35951, 31651, 46061, 41276]}
