# Import  Libraries

In [12]:
from typing import Dict, Text

import numpy as np
import pandas as pd
import tensorflow as tf


import os
from tensorflow import keras

import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

# Load cleaned data for tensorflow which is preprocess using data_preprocessing funtion

In [22]:
def product_df(site_id):

    path = 'dataset/tf_data/tensor_data.csv'
    df = pd.read_csv(path)
    df['product_id'] = df['product_id'].astype('str')

    product_df = df.copy()
    product_df.drop(['short_description'], axis=1, inplace=True)
    products1 = tf.data.Dataset.from_tensor_slices(dict(product_df))
    products = products1.map(lambda x: x["product_id"])

    category1 = tf.data.Dataset.from_tensor_slices(dict(df))
    category = category1.map(lambda x: {
        "short_description": x['short_description'],
        "product_id": x['product_id']
    })
    
    return products, category, df

In [23]:
def vocabulary(site_id):
    
    df=product_df(site_id)
    product_df1=df[2]

    ids=product_df1['product_id'].values.tolist()
    product_ids_vocabulary = tf.keras.layers.StringLookup(vocabulary=ids,mask_token=None)
    
    
    df3=product_df1.copy()

    df3.drop_duplicates(subset="short_description",inplace=True)
    cat=df3['short_description'].values.tolist()
    product_category_vocabulary = tf.keras.layers.StringLookup(vocabulary=cat,mask_token=None)
    
    return product_ids_vocabulary, product_category_vocabulary

In [24]:
class RecommendModel(tfrs.Model):
  # We derive from a custom base class to help reduce boilerplate. Under the hood,
  # these are still plain Keras Models.

    def __init__(self,product_model: tf.keras.Model,category_model: tf.keras.Model,task: tfrs.tasks.Retrieval):
        super().__init__()

        # Set up user and movie representations.
        self.product_model = product_model
        self.category_model = category_model

        # Set up a retrieval task.
        self.task = task

    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    # Define how the loss is computed.

        product_embeddings = self.product_model(features["product_id"])
        category_embeddings = self.category_model(features["short_description"])

        return self.task(product_embeddings, category_embeddings)

In [25]:
def neural_net(site_id):
    
    vo=vocabulary(site_id)
    product_ids_vocabulary=vo[0]
    product_category_vocabulary=vo[1]
    
    # Define user and movie models.
    product_model = tf.keras.Sequential([
        product_ids_vocabulary,
        tf.keras.layers.Embedding(product_ids_vocabulary.vocabulary_size(), 64)
    ])
    category_model = tf.keras.Sequential([
        product_category_vocabulary,
        tf.keras.layers.Embedding(product_category_vocabulary.vocabulary_size(), 64)
    ])
    
    return product_model,category_model

In [26]:
def task1(site_id):
    
    
    
    data=product_df(site_id)
    products=data[0]

    cat_mod=neural_net(site_id)
    product_model=cat_mod[0]
    
    # Define your objectives.
    task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(
        products.batch(128).map(product_model)
      )
    )
    
    return task

# For training the tf model

In [27]:
def train(site_id):
    
    df=product_df(site_id)
    category=df[1]
    
    neural=neural_net(site_id)
    product_model=neural[0]
    category_model=neural[1]
    
    task=task1(site_id)
    
    # Create a retrieval model.
    model = MovieLensModel(product_model, category_model, task)
    model.compile(optimizer = 'Adam', loss = 'mse') #metrics = ['accuracy']


    # Train for 3 epochs.
    model.fit(category.batch(50), epochs=3)
    
    path=path='dataset/tf_model/model'
    model.save_weights(path, save_format='tf')
    

In [None]:
# train('1')

In [28]:
def load_model(site_id):
    
    df=product_df(site_id)
    products=df[0]
    
    neural=neural_net(site_id)
    product_model=neural[0]
    category_model=neural[1]
    
    task=task1(site_id)

    model = RecommendModel(product_model, category_model, task)
    
    path='dataset/tf_model/model'
    model.load_weights(path)
    
    # Use brute-force search to set up retrieval using the trained representations.
    index = tfrs.layers.factorized_top_k.BruteForce(model.category_model)
    index.index_from_dataset(
        products.batch(100).map(lambda title: (title, model.product_model(title))))
    
    return index

In [29]:
def recommendation(site_id,pid):
    
    data=product_df(site_id)
    df=data[2]
    
    index=load_model(site_id)
    
    p_id=str(pid)
    test=df[(df['product_id']==p_id)]
    reco=test['short_description'].tolist()
    for i in reco:
        # Get some recommendations.
        _, titles = index(np.array([i]))
        #print(f"Top 10 recommendations for product id {p_id}: {titles}")
        
    ids=pd.DataFrame(titles)
    reco_id=ids.values.tolist()
    reco_id=[ int(j.decode()) for i in reco_id for j in i]
    
    return reco_id

In [30]:
reco_id=recommendation('1',95782)
reco_id



[115947,
 116918,
 113761,
 116166,
 115362,
 117164,
 115049,
 116582,
 116507,
 113268]

In [31]:
def reco_name_testing():
    df=pd.read_csv('dataset/tf_data/tensor_data.csv')
    name=[]
    for i in reco_id:
        idx=df[(df['product_id']==i)].index.values
        for j in idx:
            name1=df['product_name'].iloc[j]
            name.append(name1)
    return name
reco_name_testing()

['Bottle - Bri Glass, With Cover, Blue Flower',
 'Stainless Steel Water Bottle - Silver, BB 492 1',
 'Trendy Stainless Steel Bottle With Sipper Cap - Steel Matt Finish, PXP 1002 CQ',
 'Plastic Bottle - Narrow Neck, Blue, Apollo, 20371BL',
 'Glass Water Bottle With Square Base - Blue, BB 1360',
 'Plastic Bottle - Narrow Neck, Green, Apollo, 20371GN',
 'Leo Plastic Pet Water Bottle - White, Wide Mouth',
 'Aqua Stainless Steel Bottle With Steel Cap - Steel Mirror Finish, PXP 1003 CK',
 'Frost Stainless Steel Bottle With Sipper Cap - Steel Mirror Finish, PXP 1004 CQ',
 'Whip Insulated Water Bottle - Red']