In [1]:
import pandas as pd
import numpy as np
import json
import tensorflow as tf 
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
data=pd.read_json('/content/drive/MyDrive/Bigdata/sampled_reviews.json', orient='records')

In [3]:
data.drop('_id', inplace=True, axis=1)

In [4]:
df=data[['asin', 'reviewerID', 'overall']]

In [5]:
# encoding UserID and ProductID to simple integers to improve computation effeciency 
# Maintaing a map to get back the decoded UserID and ProductID after the calculations .

user_ids = df["reviewerID"].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
userencoded2user = {i: x for i, x in enumerate(user_ids)}

product_ids = df["asin"].unique().tolist()
product2product_encoded = {x: i for i, x in enumerate(product_ids)}
product_encoded2product = {i: x for i, x in enumerate(product_ids)}

df["reviewerID"] = df["reviewerID"].map(user2user_encoded)
df["asin"] = df["asin"].map(product2product_encoded)

num_users = len(user2user_encoded)
num_product = len(product_encoded2product)
df['overall'] = df['overall'].values.astype(np.float32)

min_rating = min(df['overall'])
max_rating = max(df['overall'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["reviewerID"] = df["reviewerID"].map(user2user_encoded)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["asin"] = df["asin"].map(product2product_encoded)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['overall'] = df['overall'].values.astype(np.float32)


In [6]:
df = df.sample(frac=1, random_state=42)
x = df[["reviewerID", "asin"]].values

y = df["overall"].apply(lambda x: (x - min_rating) / (max_rating - min_rating)).values

train_indices = int(0.7 * df.shape[0])
val_indices = int(0.9 * df.shape[0]) 

x_train, x_val, x_test , y_train, y_val , y_test = (
    x[:train_indices],
    x[train_indices:val_indices],
    x[val_indices : ] , 
    y[:train_indices],
    y[train_indices:val_indices], 
    y[val_indices : ]
)

In [None]:
model.save("model", save_format="tf")

In [8]:
model = tf.keras.models.load_model("model")

In [9]:
EMBEDDING_SIZE = 40

class Recommender(keras.Model):
    def __init__(self, num_users, num_product, embedding_size):
        super(Recommender, self).__init__()
        self.num_users = num_users
        self.num_product = num_product
        self.embedding_size = embedding_size
        self.user_embedding = layers.Embedding(
            num_users,
            embedding_size,
            embeddings_initializer="he_normal",
            embeddings_regularizer=keras.regularizers.l2(1e-6),
        )
        self.user_bias = layers.Embedding(num_users, 1)
        self.product_embedding = layers.Embedding(
            num_product,
            embedding_size,
            embeddings_initializer="he_normal",
            embeddings_regularizer=keras.regularizers.l2(1e-6)
        )
        self.product_bias = layers.Embedding(num_product, 1)
        
    def call(self, inputs):
        
        user_vector = self.user_embedding(inputs[:, 0])
        product_vector = self.product_embedding(inputs[:, 1])
        
        user_bias = self.user_bias(inputs[:, 0])
        product_bias = self.product_bias(inputs[:, 1])
        
        dot_prod = tf.tensordot(user_vector, product_vector, 2)

        x = dot_prod + user_bias + product_bias
        
        return tf.nn.sigmoid(x)
    
    def getRecomendation(self , df , user , k )  : 
        key = list(filter(lambda x: user2user_encoded[x] == 100, user2user_encoded))[0]
        encoded_user = user2user_encoded[key]

        all_prods = df['asin'].unique() 
        prods = df[df.reviewerID == encoded_user]['asin'].values
        remainder = list(set(all_prods) - set(prods))
        n = len(remainder) 
        out = np.empty((n, 2),dtype=int)
        out[: ,  0 ] = encoded_user
        out[ : , 1 ] = remainder[:None]
        output = self.predict(out)

        ndx = map(lambda x : product_encoded2product[x] , remainder )
        vals = output[: , 0 ]

        return pd.Series(index = ndx , data = vals).sort_values(ascending = False )[ :k ].index
    
model = Recommender(num_users, num_product, EMBEDDING_SIZE)
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(), optimizer=keras.optimizers.Adam(learning_rate=0.001)
)


In [8]:
history = model.fit(
    x=x_train,
    y=y_train,
    batch_size=32,
    epochs=5,
#     verbose=1,
    validation_data=(x_val, y_val)
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [10]:
u = df['reviewerID'].sample(1).values[0]               #take user id as input
K = 10                                                 #top k items
top_10_prod = model.getRecomendation(df , u ,K )

print("Top {k} recommendations for userID  : {user} are - {l} ".format( k = K  , user = u , l = list(top_10_prod)))

Top 10 recommendations for userID  : 16321 are - ['1414371217', 'B004BA6LSU', 'B012BT3C2A', 'B0023ZQDEC', '0099513587', '1932225412', '0976247909', '0670918466', 'B007OTFC08', 'B001IDYHO8'] 


In [11]:
#more info about the reccomended items

data[data['asin'].isin(top_10_prod)]

Unnamed: 0,asin,feedback,overall,reviewText,reviewerID,unixReviewTime,verified,vote
15083,B004BA6LSU,bad,1,it defective. it look nice but there are some ...,A3DMBR1T5K8ZCA,1448668800,True,19
20553,B012BT3C2A,bad,1,the chain turn black within 2 day & turn my ne...,A9BD9TOYLIN1W,1496707200,True,23
53849,0670918466,bad,2,lot of inform with some overlap with the tudor...,A393OVPV6OAEKV,1396915200,True,5
54576,B007OTFC08,good,4,this knife is great for most kitchen tasks. it...,A3C9YEVK93XRGY,1375315200,True,3
54587,B0023ZQDEC,good,4,fit nice on me. the materi is soft and comfort...,A3NJ54IYXF6LKB,1458604800,True,3
58489,0976247909,bad,1,"from what i know, one is either an alcohol or ...",AW6E0EY4CL5WU,1246060800,True,4
83690,B001IDYHO8,good,5,it was exact what i was look for and didn't ev...,A08728679ECQXI7MLGUX,1353369600,True,2
92884,0099513587,good,5,i purchas this book for a friend who absolut l...,AVXB9EUGLL9GY,1306627200,True,2
99103,1414371217,good,5,are you readi to face this our countri is get ...,AG4XEUY6OVVC8,1370649600,True,2
103259,1932225412,good,5,great five star,A2W5PXJ2Q7JVIF,1452902400,True,2
