In [36]:
import os
import io

import numpy as np
import pandas as pd

from gensim.models import Word2Vec

from chatterbot import ChatBot
from chatterbot.trainers import ListTrainer

import nltk
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))

import tensorflow as tf
from tensorflow.train import Checkpoint

from tensorboard.plugins import projector

### Training hobby model to identify hobby from chatbot conversation

In [None]:
hobbyData = pd.read_csv('hobbies.csv')

In [None]:
hobbies = [str(hobby).lower().split() for hobby in hobbyData['HOBBIES']]

In [None]:
hobbies = [[h for h in hobby if h not in stop_words] for hobby in hobbies]

In [None]:
hobbyModel = Word2Vec(hobbies, min_count=1, size=100)

In [None]:
hobbyModel.save('hobby.w2v')

In [14]:
hobbyModel = Word2Vec.load('hobby.w2v')

### Visualizing HobbyModel

In [48]:
embeddings = {}

for word in hobbyModel.wv.vocab:
    embeddings[word] = hobbyModel.wv[word]

In [49]:
embeddings_vectors = np.stack(list(embeddings.values()))

In [50]:
# https://www.javaer101.com/en/article/985198.html

tf.compat.v1.disable_eager_execution()

# Create some variables.
emb = tf.Variable(embeddings_vectors, name='hobby_embeddings')

# Add an op to initialize the variable.
init_op = tf.compat.v1.global_variables_initializer()

# Add ops to save and restore all the variables.
saver = tf.compat.v1.train.Saver()

# Later, launch the model, initialize the variables and save the
# variables to disk.
with tf.compat.v1.Session() as sess:
   sess.run(init_op)

# Save the variables to disk.
   save_path = saver.save(sess, "hobby_logs/hobby.ckpt")
   print("Model saved in path: %s" % save_path)

Model saved in path: hobby_logs/hobby.ckpt


In [51]:
hobby_log_dir = 'hobby_logs'

if not os.path.exists(hobby_log_dir):
    os.makedirs(hobby_log_dir)

out_v_path = os.path.join(hobby_log_dir, 'vecs.tsv')
out_m_path = os.path.join(hobby_log_dir, 'metadata.tsv')

out_v = io.open(out_v_path, 'w', encoding='utf-8')
out_m = io.open(out_m_path, 'w', encoding='utf-8')

for word in hobbyModel.wv.vocab:
    vec = hobbyModel.wv[word]
    out_m.write(word + '\n')
    out_v.write('\t'.join([str(x) for x in vec]) + '\n')
    
out_v.close()
out_m.close()        

In [52]:
config = projector.ProjectorConfig()
embedding = config.embeddings.add()

embedding.tensor_name = 'hobbymodel/.ATTRIBUTES/VARIABLE_VALUE'
embedding.metadata_path = 'metadata.tsv'

projector.visualize_embeddings(hobby_log_dir, config)

In [53]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [58]:
%tensorboard --logdir recommender_logs --host localhost

ERROR: Timed out waiting for TensorBoard to start. It may still be running as pid 7512.

***
### Processing amazon_au.tsv dataset to include a column with hobby keywords from Product Description

In [50]:
data = pd.read_table('./amazon_au.tsv', usecols=['Product Name Source', 'Product Description'])

In [51]:
product_description = [str(desc).lower().split() for desc in data['Product Description']]

In [52]:
hobbyModel = Word2Vec.load('hobby.w2v')

def optimize(frame):
    sentence = str(frame['Product Description']).split()
    keywords = ''
    
    for word in sentence:
        if word in hobbyModel.wv.vocab:
            sim_test = hobbyModel.wv.most_similar(positive=[word], topn=1)

            if len(sim_test) > 0 and sim_test[0][1] > 0.1:
                keywords += str(word) + ' '

    frame['Keywords'] = keywords
    
    return frame

In [53]:
new_data = data.apply(optimize, axis=1)

In [54]:
new_data.to_csv('./product_hobby.csv', columns=['Product Name Source', 'Keywords'])

In [None]:
word2vecModel = Word2Vec(product_description, min_count=1, size=100)

In [None]:
word2vecModel.save('recommender.w2v')

***

In [55]:
word2vecModel = Word2Vec.load('recommender.w2v')

In [56]:
recommender_log_dir = 'recommender_logs'

if not os.path.exists(recommender_log_dir):
    os.makedirs(recommender_log_dir)

out_v_path = os.path.join(recommender_log_dir, 'vecs.tsv')
out_m_path = os.path.join(recommender_log_dir, 'metadata.tsv')

out_v = io.open(out_v_path, 'w', encoding='utf-8')
out_m = io.open(out_m_path, 'w', encoding='utf-8')

for word in word2vecModel.wv.vocab:
    vec = word2vecModel.wv[word]
    out_m.write(word + '\n')
    out_v.write('\t'.join([str(x) for x in vec]) + '\n')
    
out_v.close()
out_m.close()        

In [64]:
keywords = ['reading']

def lookup(frame):
        simi_score = 0
        counter = 0

        for word1 in keywords:
            if word1 in word2vecModel.wv.vocab:
                for word2 in str(frame['Keywords']).split(' '):
                    if word2 in word2vecModel.wv.vocab:
                        simi_score += word2vecModel.wv.similarity(word1, word2)
                        counter += 1
        
        if counter > 0:
            frame['Score'] = simi_score / counter
        else:
            frame['Score'] = 0

        return frame

In [65]:
products = pd.read_csv('./product_hobby.csv', usecols=['Product Name Source', 'Keywords'])

In [66]:
new_products = products.apply(lookup, axis=1)

In [68]:
new_products.dropna(inplace=True)

In [69]:
new_products.sort_values(by=['Score'], ascending=False).head()

Unnamed: 0,Product Name Source,Keywords,Score
7039,LXLIGHTS Headboard Bedside Cushion Waist Pad ...,reading,1.0
2581,Bedside Back Cushion/Triangle Sofa Big Backre...,reading,1.0
8208,RMJAI Solid Wood Shoe Bench Square Wood Suppo...,watching,0.789482
27422,YMM Industrial Bar Stool Kitchen Vintage Dini...,table reading,0.696118
9307,Warmies® Microwavable French Lavender Scented...,relaxing,0.63666


In [6]:
bot = ChatBot(
    'Recommender',
    storage_adapter='chatterbot.storage.SQLStorageAdapter',
    database_uri='sqlite:///database.sqlite3.2',
    logic_adapters=[
        {'import_path':'HobbyLogic.HobbyLogic'},
        'chatterbot.logic.BestMatch',
        ]
)

In [7]:
trainer = ListTrainer(bot)

In [8]:
trainer.train([
    'Hi', 'Hello', 'Hi! How are you?', 'Hello! How are you?', 'How are you?', 'I am fine. Thank you. How are you?', 'I am fine. Thank you.',
    'Hello. What is your name?', 'Hi, What is your name?', 'What is your name?',
    'Nice to meet you. I am ProductBot. I can recommend products for you if you tell me your hobby.',
    'My name is ProductBot. I can recommend products for you if you tell me your hobby.',
    'I can recommend products for you if you tell me your hobby.',
    'What is your hobby?',
    'My hobby is to ask about your hobby. Tell me about your hobby.'
])

List Trainer: [####################] 100%


In [63]:
name=input("Enter Your Name: ")
print("Welcome to the recommender! Let me help you find the perfect product.")
while True:
    request=input(name+':')
    if request=='Bye' or request =='bye':
        print('Bot: Bye')
        break
    else:
        response=bot.get_response(request)
        print('Bot:',response)

Enter Your Name:  Ishrak


Welcome to the recommender! Let me help you find the perfect product.


Ishrak: Hello


Bot: Hi! How are you?


Ishrak: I am fine. Thank you. How are you?


Bot: I am fine. Thank you.


Ishrak: What is your name?


Bot: Nice to meet you. I am ProductBot. I can recommend products for you if you tell me your hobby.


Ishrak: Nice to meet you. My hobby is reading


Bot: Based on your hobby, I am recommending the following products:
1. (96.50378627702594) HEEPZZZ Memory Foam Travel Neck Pillow with Soft 4 Way Plush Cover - Supports Your Head & Chin While Sleeping on Airplane, Bus, Train or Car Travel - Includes Carry Bag Accessories - for Men & Women 
2. (35.31861296016723) ZXH77f Lazy Removable Bedside Table Laptop Table Desktop Bed with Simple Desk Simple Folding Small Table (Size : 50x70cm) 
3. (34.8835014840588) ZXH77f Bedside Edge Computer Desk Removable Laptop Desk Bedside Sofa Mobile Table Lazy Lift Bedside Study Table 
4. (33.849266801960766) ZXH77f Foldable Multi-Function Side Table, Nordic Small Apartment Creative Sofa Small Coffee Table (Color : Red) 
5. (31.04464272595942) ZXH77f Wall-Mounted Folding Table Dining Table Student Desk Study Table Wall Table Computer Desk Note Table (Size : 70cmx50cm) 



Ishrak: Bye


Bot: Bye


***
## References

1. https://www.kaggle.com/muhadel/hobbies
2. https://data.world/promptcloud/amazon-australia-product-listing
3. https://chatterbot.readthedocs.io/en/latest/conversations.html
4. https://www.datacamp.com/community/tutorials/building-a-chatbot-using-chatterbot
5. https://radimrehurek.com/gensim/auto_examples/tutorials/run_word2vec.html
6. https://www.javaer101.com/en/article/985198.html
7. https://www.tensorflow.org/tensorboard/tensorboard_projector_plugin
8. https://projector.tensorflow.org/