In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('concatenated_reviews.csv')

In [3]:
df['title_body'] = df['title'] + ' ' + df['body']

In [4]:
df['title_body']

0      Sweet book. Great for Father's Day Bought this...
1      One of my favourites Such a lovely book which ...
2                                      childs book great
3      Great gift for a first time dad. Gave this as ...
4      Super cute Love this book bought it for my dau...
                             ...                        
575    Great read and very relevant as we move furthe...
576    Four Stars Good Book, not for light reading th...
577                      Five Stars Really Helpful book!
578    Many typos in the kindle version Disappointing...
579    Too hard to read on paperback Font is small an...
Name: title_body, Length: 580, dtype: object

In [5]:
docs = df['title_body'].astype(str).tolist()

In [6]:
from top2vec import Top2Vec
import tensorflow_hub as hub
import os

In [7]:
# Load the Universal Sentence Encoder's TF Hub module
model = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")

In [9]:
model2 = Top2Vec(docs, embedding_model=model)

2024-06-05 21:57:59,063 - top2vec - INFO - Pre-processing documents for training
INFO:top2vec:Pre-processing documents for training
2024-06-05 21:57:59,149 - top2vec - INFO - Creating joint document/word embedding
INFO:top2vec:Creating joint document/word embedding
2024-06-05 21:57:59,744 - top2vec - INFO - Creating lower dimension embedding of documents
INFO:top2vec:Creating lower dimension embedding of documents
OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
2024-06-05 21:58:04,526 - top2vec - INFO - Finding dense areas of documents
INFO:top2vec:Finding dense areas of documents
2024-06-05 21:58:04,542 - top2vec - INFO - Finding topics
INFO:top2vec:Finding topics


In [10]:
topic_sizes, topic_nums = model2.get_topic_sizes()
print(topic_sizes)

[501  79]


In [11]:
print(topic_nums)

[0 1]


In [14]:
topic_words, word_scores, topic_nums = model2.get_topics(2)

In [15]:
for words, scores, num in zip(topic_words, word_scores, topic_nums):
    print(num)
    print(words)  

0
['book' 'books' 'loved' 'reading' 'read' 'amazing' 'great' 'recommend'
 'good' 'series' 'author' 'best' 'story' 'much' 'love' 'very' 'really'
 'first' 'one' 'this' 'little' 'written' 'way' 'an' 'characters' 'such'
 'have' 'had' 'has' 'like' 'into' 'well' 'not' 'it' 'about' 'with' 'get'
 'that' 'who' 'no' 'been' 'the' 'many' 'some' 'would' 'as' 'her' 'through'
 'end' 'so']
1
['book' 'reading' 'read' 'books' 'much' 'recommend' 'an' 'she' 'very' 'so'
 'have' 'one' 'really' 'great' 'this' 'author' 'about' 'all' 'good' 'has'
 'written' 'best' 'such' 'who' 'many' 'for' 'had' 'loved' 'with' 'into'
 'know' 'and' 'her' 'now' 'life' 'every' 'not' 'to' 'am' 'by' 'what' 'no'
 'as' 'amazing' 'it' 'also' 'get' 'way' 'well' 'from']


In [18]:
documents, document_scores, document_ids = model2.search_documents_by_topic(topic_num=0, num_docs=10)

for doc, score, doc_id in zip(documents, document_scores, document_ids):
    print(doc_id)
    print(doc)
    print(score)

394
A fantastic book. Unbelievably unhinged!! What a fantastic, fantastic book. Had me hooked from the first page. It only took me 3 days to read (not a fast reader usually) I don't think I've ever read a book this quick. I couldn't put it down and when I did I couldn't stop thinking about it. Do yourself a favour and read this book, you won't be disappointed!
0.7858136
374
Could not put this down. This was the first Lisa Jewell book I have read - and what an introduction it was! This book was one of the best books I have read all year, full of emotion, twists and turns and the characters were such a fascinating read. Would recommend this for those who love a good thriller. Will be reading more from this author.
0.7671149
401
A brilliant read I loved this book. Lots of fwists and turns. The ending was brilliant. I couldn't put it down .
0.7666582
380
AMAZING!!! Just finished this book, and as a first time Lisa Jewell reader, I can safely say that I am now a huge fan. This book is an am