## Import Data and Dependencies

In [1]:
!pip install gradio
!wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Electronics.json.gz
!wget http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz
!pip install -q tensorflow-recommenders


Collecting gradio
  Downloading gradio-3.47.1-py3-none-any.whl (20.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.3/20.3 MB[0m [31m67.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.103.2-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.3/66.3 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.1.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==0.6.0 (from gradio)
  Downloading gradio_client-0.6.0-py3-none-any.whl (298 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.8/298.8 kB[0m [31m30.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx (from gradio)
  Downloading httpx-0.25.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import numpy as np
import pandas as pd
from datetime import datetime,timedelta
# Plotting import
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
from matplotlib.ticker import MaxNLocator

import seaborn as sns
from IPython.display import Markdown, display
def printmd(string):
    display(Markdown(string))

import os
import json
import gzip
import pandas as pd
from urllib.request import urlopen

## function to convert json.gz into dataframe

In [3]:
# useful functions for data loading
# which are provided by the source of the datasets

def parse(path):
    g = gzip.open(path, 'rb')
    for l in g:
        yield eval(l)

def getDF(path):
    i = 0
    df = {}
    for d in parse(path):
        df[i] = d
        i += 1
    return pd.DataFrame.from_dict(df, orient='index')

## Data preprocessing

Create Dataframe for product and review

In [4]:

# load data
df_product = getDF('/content/meta_Electronics.json.gz')
df_review = getDF('/content/reviews_Electronics_5.json.gz')

In [5]:
df_product.head()

Unnamed: 0,asin,imUrl,description,categories,title,price,salesRank,related,brand
0,132793040,http://ecx.images-amazon.com/images/I/31JIPhp%...,The Kelby Training DVD Mastering Blend Modes i...,"[[Electronics, Computers & Accessories, Cables...",Kelby Training DVD: Mastering Blend Modes in A...,,,,
1,321732944,http://ecx.images-amazon.com/images/I/31uogm6Y...,,"[[Electronics, Computers & Accessories, Cables...",Kelby Training DVD: Adobe Photoshop CS5 Crash ...,,,,
2,439886341,http://ecx.images-amazon.com/images/I/51k0qa8f...,Digital Organizer and Messenger,"[[Electronics, Computers & Accessories, PDAs, ...",Digital Organizer and Messenger,8.15,{'Electronics': 144944},"{'also_viewed': ['0545016266', 'B009ECM8QY', '...",
3,511189877,http://ecx.images-amazon.com/images/I/41HaAhbv...,The CLIKR-5 UR5U-8780L remote control is desig...,"[[Electronics, Accessories & Supplies, Audio &...",CLIKR-5 Time Warner Cable Remote Control UR5U-...,23.36,,"{'also_viewed': ['B001KC08A4', 'B00KUL8O0W', '...",
4,528881469,http://ecx.images-amazon.com/images/I/51FnRkJq...,"Like its award-winning predecessor, the Intell...","[[Electronics, GPS & Navigation, Vehicle GPS, ...",Rand McNally 528881469 7-inch Intelliroute TND...,299.99,,"{'also_viewed': ['B006ZOI9OY', 'B00C7FKT2A', '...",


In [6]:
df_product.shape
df_review.shape

(1689188, 9)

In [7]:
df_review.head()

Unnamed: 0,reviewerID,asin,reviewerName,helpful,reviewText,overall,summary,unixReviewTime,reviewTime
0,AO94DHGC771SJ,528881469,amazdnu,"[0, 0]",We got this GPS for my husband who is an (OTR)...,5.0,Gotta have GPS!,1370131200,"06 2, 2013"
1,AMO214LNFCEI4,528881469,Amazon Customer,"[12, 15]","I'm a professional OTR truck driver, and I bou...",1.0,Very Disappointed,1290643200,"11 25, 2010"
2,A3N7T0DY83Y4IG,528881469,C. A. Freeman,"[43, 45]","Well, what can I say. I've had this unit in m...",3.0,1st impression,1283990400,"09 9, 2010"
3,A1H8PY3QHMQQA0,528881469,"Dave M. Shaw ""mack dave""","[9, 10]","Not going to write a long review, even thought...",2.0,"Great grafics, POOR GPS",1290556800,"11 24, 2010"
4,A24EV6RXELQZ63,528881469,Wayne Smith,"[0, 0]",I've had mine for a year and here's what we go...,1.0,"Major issues, only excuses for support",1317254400,"09 29, 2011"


Compare columns of two dataframe

In [8]:
df_review['reviewerID'].isin(df_product['asin']).value_counts()

False    1689188
Name: reviewerID, dtype: int64

Check if productID(asin) in both dataframe

In [9]:
# get all review product asins
product_with_review_asin = df_review.asin.index.values

# get all metadata product asins
product_asin = df_product['asin'].values

if (list(set(product_asin) & set(product_with_review_asin)) == list(product_with_review_asin)):
    print("All products who possess reviews have the corresponding product description in metadata dataset!")
else:
    print("There exists some review products which is not in metadata dataset.")

There exists some review products which is not in metadata dataset.


In [10]:
product_id_review_df = list(df_review.asin.values)

In [11]:
# electronic_data_merged = df_review[df_review['asin'].isin(product_id_review_df)]

In [12]:
electronic_data_merged = df_product.merge(df_review[df_review['asin'].isin(df_product['asin'])], on='asin')
electronic_data_merged.head()


Unnamed: 0,asin,imUrl,description,categories,title,price,salesRank,related,brand,reviewerID,reviewerName,helpful,reviewText,overall,summary,unixReviewTime,reviewTime
0,528881469,http://ecx.images-amazon.com/images/I/51FnRkJq...,"Like its award-winning predecessor, the Intell...","[[Electronics, GPS & Navigation, Vehicle GPS, ...",Rand McNally 528881469 7-inch Intelliroute TND...,299.99,,"{'also_viewed': ['B006ZOI9OY', 'B00C7FKT2A', '...",,AO94DHGC771SJ,amazdnu,"[0, 0]",We got this GPS for my husband who is an (OTR)...,5.0,Gotta have GPS!,1370131200,"06 2, 2013"
1,528881469,http://ecx.images-amazon.com/images/I/51FnRkJq...,"Like its award-winning predecessor, the Intell...","[[Electronics, GPS & Navigation, Vehicle GPS, ...",Rand McNally 528881469 7-inch Intelliroute TND...,299.99,,"{'also_viewed': ['B006ZOI9OY', 'B00C7FKT2A', '...",,AMO214LNFCEI4,Amazon Customer,"[12, 15]","I'm a professional OTR truck driver, and I bou...",1.0,Very Disappointed,1290643200,"11 25, 2010"
2,528881469,http://ecx.images-amazon.com/images/I/51FnRkJq...,"Like its award-winning predecessor, the Intell...","[[Electronics, GPS & Navigation, Vehicle GPS, ...",Rand McNally 528881469 7-inch Intelliroute TND...,299.99,,"{'also_viewed': ['B006ZOI9OY', 'B00C7FKT2A', '...",,A3N7T0DY83Y4IG,C. A. Freeman,"[43, 45]","Well, what can I say. I've had this unit in m...",3.0,1st impression,1283990400,"09 9, 2010"
3,528881469,http://ecx.images-amazon.com/images/I/51FnRkJq...,"Like its award-winning predecessor, the Intell...","[[Electronics, GPS & Navigation, Vehicle GPS, ...",Rand McNally 528881469 7-inch Intelliroute TND...,299.99,,"{'also_viewed': ['B006ZOI9OY', 'B00C7FKT2A', '...",,A1H8PY3QHMQQA0,"Dave M. Shaw ""mack dave""","[9, 10]","Not going to write a long review, even thought...",2.0,"Great grafics, POOR GPS",1290556800,"11 24, 2010"
4,528881469,http://ecx.images-amazon.com/images/I/51FnRkJq...,"Like its award-winning predecessor, the Intell...","[[Electronics, GPS & Navigation, Vehicle GPS, ...",Rand McNally 528881469 7-inch Intelliroute TND...,299.99,,"{'also_viewed': ['B006ZOI9OY', 'B00C7FKT2A', '...",,A24EV6RXELQZ63,Wayne Smith,"[0, 0]",I've had mine for a year and here's what we go...,1.0,"Major issues, only excuses for support",1317254400,"09 29, 2011"


In [13]:
print(electronic_data_merged['brand'].unique())

[nan 'Barnes &amp; Noble' 'VideoSecu' ... 'Abco Tech' 'Game Golf' 'Noot']


In [14]:
electronic_data_merged.isnull().sum()

asin                   0
imUrl               1213
description        33677
categories             0
title              45502
price              49306
salesRank         879118
related            27046
brand             734937
reviewerID             0
reviewerName       24730
helpful                0
reviewText             0
overall                0
summary                0
unixReviewTime         0
reviewTime             0
dtype: int64

In [15]:
electronic_data_merged.dropna(subset = ['imUrl'], inplace=True)

In [16]:
electronic_data_merged.shape

(1687975, 17)

In [18]:
electronic_data_merged.shape

(1687975, 17)

In [19]:
electronic_data_merged.head(1)

Unnamed: 0,asin,imUrl,description,categories,title,price,salesRank,related,brand,reviewerID,reviewerName,helpful,reviewText,overall,summary,unixReviewTime,reviewTime
0,528881469,http://ecx.images-amazon.com/images/I/51FnRkJq...,"Like its award-winning predecessor, the Intell...","[[Electronics, GPS & Navigation, Vehicle GPS, ...",Rand McNally 528881469 7-inch Intelliroute TND...,299.99,,"{'also_viewed': ['B006ZOI9OY', 'B00C7FKT2A', '...",,AO94DHGC771SJ,amazdnu,"[0, 0]",We got this GPS for my husband who is an (OTR)...,5.0,Gotta have GPS!,1370131200,"06 2, 2013"


In [20]:
print(electronic_data_merged['brand'].unique())

[nan 'Barnes &amp; Noble' 'VideoSecu' ... 'Abco Tech' 'Game Golf' 'Noot']


In [21]:
electronic_data_merged.overall.count()

1687975

In [22]:
printmd("**Number of Rating**: {:,}".format(electronic_data_merged.shape[0]) )
printmd("**Columns**: {}".format( np.array2string(electronic_data_merged.columns.values)) )
printmd("**Number of Users**: {:,}".format(len(electronic_data_merged.reviewerID.unique()) ) )
printmd("**Number of Products**: {:,}".format(len(electronic_data_merged.asin.unique())  ) )

**Number of Rating**: 1,687,975

**Columns**: ['asin' 'imUrl' 'description' 'categories' 'title' 'price' 'salesRank'
 'related' 'brand' 'reviewerID' 'reviewerName' 'helpful' 'reviewText'
 'overall' 'summary' 'unixReviewTime' 'reviewTime']

**Number of Users**: 192,403

**Number of Products**: 62,957

In [23]:
electronic_data_merged.head(1)

Unnamed: 0,asin,imUrl,description,categories,title,price,salesRank,related,brand,reviewerID,reviewerName,helpful,reviewText,overall,summary,unixReviewTime,reviewTime
0,528881469,http://ecx.images-amazon.com/images/I/51FnRkJq...,"Like its award-winning predecessor, the Intell...","[[Electronics, GPS & Navigation, Vehicle GPS, ...",Rand McNally 528881469 7-inch Intelliroute TND...,299.99,,"{'also_viewed': ['B006ZOI9OY', 'B00C7FKT2A', '...",,AO94DHGC771SJ,amazdnu,"[0, 0]",We got this GPS for my husband who is an (OTR)...,5.0,Gotta have GPS!,1370131200,"06 2, 2013"


In [24]:
electronic_data_merged.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1687975 entries, 0 to 1689187
Data columns (total 17 columns):
 #   Column          Non-Null Count    Dtype  
---  ------          --------------    -----  
 0   asin            1687975 non-null  object 
 1   imUrl           1687975 non-null  object 
 2   description     1655420 non-null  object 
 3   categories      1687975 non-null  object 
 4   title           1643686 non-null  object 
 5   price           1639791 non-null  float64
 6   salesRank       810003 non-null   object 
 7   related         1662017 non-null  object 
 8   brand           954251 non-null   object 
 9   reviewerID      1687975 non-null  object 
 10  reviewerName    1663245 non-null  object 
 11  helpful         1687975 non-null  object 
 12  reviewText      1687975 non-null  object 
 13  overall         1687975 non-null  float64
 14  summary         1687975 non-null  object 
 15  unixReviewTime  1687975 non-null  int64  
 16  reviewTime      1687975 non-null  ob

In [25]:
electronic_data_merged.isnull().sum()

asin                   0
imUrl                  0
description        32555
categories             0
title              44289
price              48184
salesRank         877972
related            25958
brand             733724
reviewerID             0
reviewerName       24730
helpful                0
reviewText             0
overall                0
summary                0
unixReviewTime         0
reviewTime             0
dtype: int64

In [26]:
data_by_date = electronic_data_merged.copy()
data_by_date.unixReviewTime = pd.to_datetime(electronic_data_merged.unixReviewTime, unit="s")#.dt.date
data_by_date = data_by_date.sort_values(by="unixReviewTime", ascending=False).reset_index(drop=True)
printmd("**Number of Ratings each day:**")
data_by_date.groupby("unixReviewTime")["overall"].count().tail(10).reset_index()

**Number of Ratings each day:**

Unnamed: 0,unixReviewTime,overall
0,2014-07-14,1498
1,2014-07-15,1059
2,2014-07-16,936
3,2014-07-17,597
4,2014-07-18,552
5,2014-07-19,572
6,2014-07-20,491
7,2014-07-21,661
8,2014-07-22,510
9,2014-07-23,58


In [27]:
data_by_date.head()

Unnamed: 0,asin,imUrl,description,categories,title,price,salesRank,related,brand,reviewerID,reviewerName,helpful,reviewText,overall,summary,unixReviewTime,reviewTime
0,B00ICM7UKG,http://ecx.images-amazon.com/images/I/41aU08Av...,Intel Celeron processor N2820- Features a 1MB ...,"[[Electronics, Computers & Accessories, Laptops]]",Toshiba Satellite C55-A5180 Windows 8.1 15.6&q...,303.95,,"{'also_bought': ['B00E3EALS0', 'B00JEKFKZ2', '...",Satellite,A2IFGGXG3YV3Y6,"Scott Anderson ""Sharpsburg""","[0, 0]","Very tight system, lets discuss the pros and c...",5.0,Can't go wrong here - Excellent System,2014-07-23,"07 23, 2014"
1,B00G4771KA,http://ecx.images-amazon.com/images/I/41moZpAi...,The LCD Touch BacPac is a removable LCD touch ...,"[[Electronics, Camera & Photo, Accessories, Pr...",GoPro LCD Touch BacPac,79.99,{},"{'also_bought': ['B00AU0HMGA', 'B00AAIPT76', '...",,A2YQ9AX4GOTA0S,t. kyle baskett,"[0, 0]",Helps out tremendously.,5.0,GoPro fun.,2014-07-23,"07 23, 2014"
2,B008AL9VXI,http://ecx.images-amazon.com/images/I/31%2BA88...,,"[[Electronics, Computers & Accessories, Extern...",Apple USB Superdrive,79.0,{},"{'also_bought': ['B008ALA6DW', 'B002TLTGM6', '...",,A3SBTW3WS4IQSN,,"[0, 0]",Worked. Don't use it much.,4.0,Worked. Don't use it much.,2014-07-23,"07 23, 2014"
3,B00F9FCW7K,http://ecx.images-amazon.com/images/I/41xoN5qd...,,"[[Electronics, Camera & Photo, Video Surveilla...",Dropcam Pro Wi-Fi Wireless Video Monitoring Ca...,199.99,{'Camera &amp; Photo': 3},"{'also_bought': ['B009GUMXQE', 'B00K5D3CPC', '...",Dropcam,A1NXNMRAVPGTO3,Y. Xu,"[0, 0]","The wi-fi range is very limited, maybe because...",1.0,limited wi-fi range and unable to record,2014-07-23,"07 23, 2014"
4,B00KHA2DQM,http://ecx.images-amazon.com/images/I/41jvQo10...,,"[[Electronics, eBook Readers & Accessories, eB...","Samsung Galaxy Tab S 8.4-Inch Tablet (16 GB, T...",397.99,{'Electronics': 641},"{'also_bought': ['B00KBVPCLK', 'B00KVGAN3W', '...",Samsung,A37G90KBW3CZXT,Cms3717,"[0, 0]","Great tablet, I love this thing. Excellent rep...",5.0,Best tablet I've owned.,2014-07-23,"07 23, 2014"


In [28]:
data_by_date["year"]  = data_by_date.unixReviewTime.dt.year
data_by_date["month"] = data_by_date.unixReviewTime.dt.month
rating_by_year = data_by_date.groupby(["year","month"])["overall"].count().reset_index()
rating_by_year["date"] = pd.to_datetime(rating_by_year["year"].astype("str")  +"-"+rating_by_year["month"].astype("str") +"-1")


In [29]:
ratings = electronic_data_merged.groupby("overall").agg({"overall":"count"})

In [30]:
rating_by_user = electronic_data_merged.groupby(by='reviewerID')['overall'].count().sort_values(ascending=False)

In [31]:
#rating_by_product = electronics_data.groupby(by='productId')['Rating'].count().sort_values(ascending=False).reset_index()
rating_by_product = electronic_data_merged.groupby("asin").agg({"reviewerID":"count","overall":"mean"}).rename(
                        columns={"reviewerID":"Number of Ratings", "overall":"Average Rating"}).reset_index()

In [32]:
printmd("**Top 10 products by Number of Rating**")
rating_by_product.sort_values(by="Number of Ratings",ascending=False ).reset_index(drop=True).head(10)

**Top 10 products by Number of Rating**

Unnamed: 0,asin,Number of Ratings,Average Rating
0,B007WTAJTO,4915,4.587589
1,B003ES5ZUU,4143,4.800386
2,B00DR0PDNE,3798,3.997894
3,B0019EHU8G,3435,4.801164
4,B002WE6D44,2813,4.659794
5,B003ELYQGG,2652,4.355958
6,B0002L5R78,2599,4.599846
7,B009SYZ8OC,2542,4.444925
8,B00BGGDVOO,2104,4.421578
9,B002V88HFE,2082,4.736311


In [33]:
# Define Cutoff is the value where the products is counted when its number of ratings is greater than.
# product with less than cutoff number of ratings will be neglected
cutoff = 50
top_rated = rating_by_product.loc[rating_by_product["Number of Ratings"]>cutoff].sort_values(
                                by="Average Rating",ascending=False).reset_index(drop=True)

In [34]:
printmd("**Top 5 Products have the highest Ratings**")
top_rated.head(5)

**Top 5 Products have the highest Ratings**

Unnamed: 0,asin,Number of Ratings,Average Rating
0,B003ZSHNE0,52,4.961538
1,B00CG70K78,51,4.960784
2,B005LJQPE0,94,4.946809
3,B00006I53W,77,4.935065
4,B008LTBINU,98,4.928571


In [35]:
printmd("**With cutoff = {} we have only {:,} products.**".format(cutoff,top_rated.shape[0] ))
top_rated.describe()

**With cutoff = 50 we have only 6,374 products.**

Unnamed: 0,Number of Ratings,Average Rating
count,6374.0,6374.0
mean,143.979762,4.228762
std,201.547109,0.384346
min,51.0,2.037037
25%,64.0,4.009217
50%,87.0,4.301045
75%,143.0,4.506329
max,4915.0,4.961538


In [36]:
begin_date     = data_by_date.unixReviewTime[0] - timedelta(days=30)
data_by_date30 = data_by_date.loc[ data_by_date.unixReviewTime > begin_date  ]
products_30days= data_by_date30.groupby(["asin"]).agg({"overall":["mean","count"]}).droplevel(axis=1,level=0).reset_index()

In [37]:
top_rated = products_30days.sort_values(by="count", ascending=False).head(40)



In [38]:
top_rated = products_30days.loc[products_30days["count"] > 50].sort_values(by="mean", ascending=False).head(40)



# TensorFlow Recommenders

TensorFlow Recommenders
TensorFlow Recommenders (TFRS) is a library for building recommender system models.

It helps with the full workflow of building a recommender system: data preparation, model formulation, training, evaluation, and deployment. It's built on Keras and aims to have a gentle learning curve while still giving you the flexibility to build complex models. TFRS is open source and available on Github.

TFRS makes it possible to:

Build and evaluate flexible recommendation retrieval models.
Freely incorporate item, user, and context information into recommendation models.
Train multi-task models that jointly optimize multiple recommendation objectives.

In [40]:
import numpy as np
import tensorflow as tf
import tensorflow_recommenders as tfrs

# Build a model.
class RankingModel(tf.keras.Model):

    def __init__(self):
        super().__init__()
        embedding_dimension = 32

        self.user_embeddings = tf.keras.Sequential([
            # This convert string into integer indices for embeddings
                                    tf.keras.layers.experimental.preprocessing.StringLookup(
                                        vocabulary=unique_userIds, mask_token=None),
                                        # add addional embedding to account for unknow tokens
                                    tf.keras.layers.Embedding(len(unique_userIds)+1, embedding_dimension)
                                    ])

        self.product_embeddings = tf.keras.Sequential([
            # This convert string into integer indices for embeddings
                                    tf.keras.layers.experimental.preprocessing.StringLookup(
                                        vocabulary=unique_productIds, mask_token=None),
                                    # add addional embedding to account for unknow tokens
                                    tf.keras.layers.Embedding(len(unique_productIds)+1, embedding_dimension)
                                    ])
        # Set up a retrieval task and evaluation metrics over the
        # entire dataset of candidates.
        self.ratings = tf.keras.Sequential([
                            tf.keras.layers.Dense(256, activation="relu"),
                            tf.keras.layers.Dense(64,  activation="relu"),
                            tf.keras.layers.Dense(1)
                              ])
    def call(self, userId, productId):
        user_embeddings  = self.user_embeddings (userId)
        product_embeddings = self.product_embeddings(productId)
        product_embeddings = self.product_embeddings(productId)

        return self.ratings(tf.concat([user_embeddings,product_embeddings], axis=1))

# Build a model.
class amazonModel(tfrs.models.Model):

    def __init__(self):
        super().__init__()
        self.ranking_model: tf.keras.Model = RankingModel()
        self.task: tf.keras.layers.Layer   = tfrs.tasks.Ranking(
                                                    loss    =  tf.keras.losses.MeanSquaredError(),
                                                    metrics=[tf.keras.metrics.RootMeanSquaredError()])


    def compute_loss(self, features, training=False):
        rating_predictions = self.ranking_model(features["userId"], features["productId"]  )

        return self.task( labels=features["rating"], predictions=rating_predictions)

In [41]:
cutoff_no_rat = 50    ## Only count products which received more than or equal 50
cutoff_year   = 2011  ## Only count Rating after 2011
recent_data   = data_by_date.loc[data_by_date["year"] > cutoff_year]
recent_data.head(1)
print("Number of Rating: {:,}".format(recent_data.shape[0]) )
print("Number of Users: {:,}".format(len(recent_data.reviewerID.unique()) ) )
print("Number of Products: {:,}".format(len(recent_data.asin.unique())  ) )
del data_by_date  ### Free up memory ###
recent_prod   = recent_data.loc[recent_data.groupby("asin")["overall"].transform('count').ge(cutoff_no_rat)].reset_index(
                    drop=True).drop(["imUrl","description","categories", "title", "helpful", "price", "salesRank", "related", "brand", "reviewerName", "helpful", "reviewText", "summary", "unixReviewTime", "reviewTime", "year", "month"],axis=1)
#del recent_data  ### Free up memory ###

Number of Rating: 1,216,260
Number of Users: 181,704
Number of Products: 56,805


In [42]:
recent_data.head(1)

Unnamed: 0,asin,imUrl,description,categories,title,price,salesRank,related,brand,reviewerID,reviewerName,helpful,reviewText,overall,summary,unixReviewTime,reviewTime,year,month
0,B00ICM7UKG,http://ecx.images-amazon.com/images/I/41aU08Av...,Intel Celeron processor N2820- Features a 1MB ...,"[[Electronics, Computers & Accessories, Laptops]]",Toshiba Satellite C55-A5180 Windows 8.1 15.6&q...,303.95,,"{'also_bought': ['B00E3EALS0', 'B00JEKFKZ2', '...",Satellite,A2IFGGXG3YV3Y6,"Scott Anderson ""Sharpsburg""","[0, 0]","Very tight system, lets discuss the pros and c...",5.0,Can't go wrong here - Excellent System,2014-07-23,"07 23, 2014",2014,7


In [43]:
recent_prod.head(1)

Unnamed: 0,asin,reviewerID,overall
0,B008AL9VXI,A3SBTW3WS4IQSN,4.0


In [44]:
userIds    = recent_prod.reviewerID.unique()
productIds = recent_prod.asin.unique()
total_ratings= len(recent_prod.index)

In [45]:
ratings = tf.data.Dataset.from_tensor_slices( {"userId":tf.cast( recent_prod.reviewerID.values  ,tf.string),
                                "productId":tf.cast( recent_prod.asin.values,tf.string),
                                "rating":tf.cast( recent_prod.overall.values  ,tf.int8,) } )

In [46]:
tf.random.set_seed(42)
shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take( int(total_ratings*0.8) )
test = shuffled.skip(int(total_ratings*0.8)).take(int(total_ratings*0.2))

unique_productIds = productIds
unique_userIds    = userIds

In [47]:
model = amazonModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad( learning_rate=0.1 ))
cached_train = train.shuffle(100_000).batch(8192).cache()
cached_test = test.batch(4096).cache()
model.fit(cached_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x797c14c24400>

In [48]:
# Evaluate.
model.evaluate(cached_test, return_dict=True)



{'root_mean_squared_error': 1.255643606185913,
 'loss': 1.1346827745437622,
 'regularization_loss': 0,
 'total_loss': 1.1346827745437622}

In [49]:
userIds[110]

'A2YL0DG11VA95'

# predict_recommended_product_details function
It takes userID and recommend top 5 product along with other user's feedbacks

In [50]:
import random
import requests
from PIL import Image
from io import BytesIO

def get_img(List, a, b):
  img = requests.get(List[a][b])
  img = Image.open(BytesIO(img.content))
  return img

In [51]:
def predict_recommended_product_details(user_rand):
  # user_rand = random.choice(userIds)
  test_rating = {}
  for m in test.take(5):
      test_rating[m["productId"].numpy()]=RankingModel()(tf.convert_to_tensor([user_rand]),tf.convert_to_tensor([m["productId"]]))
  print("Top 5 recommended products for User {}: ".format(user_rand))

  all_products = []
  for m in sorted(test_rating, key=test_rating.get, reverse=True):
      top_5 = m.decode()
      all_products.append(top_5)
  print(all_products)
  filtered_df = electronic_data_merged[electronic_data_merged['asin'].isin(all_products)]
  filtered_df = filtered_df.drop(["related", "salesRank"], axis = 1)
  filtered_df = filtered_df.drop_duplicates(subset='asin')

 #   filtered_df = filtered_df.where(filtered_df.overall > 4).dropna().drop_duplicates(subset='asin')

  rec = filtered_df.values.tolist()

  first_img = get_img(rec, 0, 1)
  second_img = get_img(rec, 1, 1)
  third_img = get_img(rec, 2, 1)
  fourth_img = get_img(rec, 3, 1)
  fifth_img = get_img(rec, 4, 1)

  return rec[0][6], rec[0][10], rec[0][4], first_img, rec[1][6], rec[1][10], rec[1][4], second_img, rec[2][6], rec[2][10], rec[2][4], third_img, rec[3][6], rec[3][10], rec[3][4], fourth_img,  rec[4][6], rec[4][10], rec[4][4], fifth_img



In [52]:
predict_recommended_product_details(userIds[110])

Top 5 recommended products for User A2YL0DG11VA95: 
['B0019D6H2M', 'B002WE6D44', 'B002OOWB3O', 'B0027YYMU6', 'B006V72AHC']


('VideoSecu',
 'Got these mounts for 37" TVs. The build quality is solid, the built-in spirit level makes them a snap to install and hardware is included for mounting different TVs to different surfaces.I mounted the LG TVs to the studs in the wall, the included bolts made gripped the studs well and the large number of mounting holes made is easy to align the mount to the studs to get a solid mount. The brackets that screw into the TV which then grip the plate on the wall have a large lateral adjustment range so it is easy to mount the TV exactly where you want even if studs aren\'t perfectly positioned in the center for your location.I was surprised with how easy these were to install, all told, each TV took me about 15 minutes to put up. When you tighten the bolts into studs, it will help you a lot if you have a ratcheting socket wrench to turn them as there isn\'t a whole lot of space around the bolt head.',
 'VideoSecu TV Wall Mount Tilt Low Profile Ultra Slim Television Mount Brac

# Deployment Gradio

In [53]:
import gradio as gr

In [54]:
sample_texts = [ [userIds[140]], [userIds[100]], [userIds[110]], [userIds[150]]  ]

In [55]:
with gr.Blocks() as demo:
  with gr.Tab("Recommendation System For Amazon"):

      with gr.Row():
          with gr.Column():
              user_id = gr.components.Textbox(label = "User ID")
      with gr.Row():
          with gr.Column():
              first_recommendation_brand = gr.components.Textbox(label = " 1st Recommendation Brand")
              first_recommendation_review = gr.components.Textbox(label = " 1st Recommendation product review")
              first_recommendation_product = gr.components.Textbox(label = " 1st Recommendation product")
              first_recommendation_image = gr.components.Image(label = " 1st Recommendation product Image")
          with gr.Column():
              second_recommendation_brand = gr.components.Textbox(label = " 2nd Recommendation Brand")
              second_recommendation_review = gr.components.Textbox(label = " 2nd Recommendation product review")
              second_recommendation_product = gr.components.Textbox(label = " 2nd Recommendation product")
              second_recommendation_image = gr.components.Image(label = " 2nd Recommendation product Image")

          with gr.Column():
              third_recommendation_brand = gr.components.Textbox(label = " 3rd Recommendation Brand")
              third_recommendation_review = gr.components.Textbox(label = " 3rd Recommendation product review")
              third_recommendation_product = gr.components.Textbox(label = " 3rd Recommendation product")
              third_recommendation_image = gr.components.Image(label = " 3rd Recommendation product Image")

          with gr.Column():
              fourth_recommendation_brand = gr.components.Textbox(label = " 4th Recommendation Brand")
              fourth_recommendation_review = gr.components.Textbox(label = " 4th Recommendation product review")
              fourth_recommendation_product = gr.components.Textbox(label = " 4th Recommendation product")
              fourth_recommendation_image = gr.components.Image(label = " 4th Recommendation product Image")

          with gr.Column():
              fifth_recommendation_brand = gr.components.Textbox(label = " 5th Recommendation Brand")
              fifth_recommendation_review = gr.components.Textbox(label = " 5th Recommendation product review")
              fifth_recommendation_product = gr.components.Textbox(label = " 5th Recommendation product")
              fifth_recommendation_image = gr.components.Image(label = " 5th Recommendation product Image")



      btn = gr.Button("Get recommendation details")
      btn.click(predict_recommended_product_details,inputs = [user_id], outputs=[first_recommendation_brand, first_recommendation_review, first_recommendation_product, first_recommendation_image,
                                                                                 second_recommendation_brand, second_recommendation_review, second_recommendation_product, second_recommendation_image,
                                                                                 third_recommendation_brand, third_recommendation_review, third_recommendation_product, third_recommendation_image,
                                                                                 fourth_recommendation_brand, fourth_recommendation_review, fourth_recommendation_product, fourth_recommendation_image,
                                                                                 fifth_recommendation_brand, fifth_recommendation_review, fifth_recommendation_product, fifth_recommendation_image
                                                                                 ])
      gr.Examples(sample_texts, inputs=[user_id])


if __name__ == "__main__":
    demo.launch(share=True, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://14a624419c63c5f4ad.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


Top 5 recommended products for User ATSRHCCG86WPW: 
['B0019D6H2M', 'B006V72AHC', 'B002WE6D44', 'B002OOWB3O', 'B0027YYMU6']
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://14a624419c63c5f4ad.gradio.live
