In [1]:
import openai
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_openai import OpenAI
from langchain.chains import RetrievalQA 
from langchain.prompts import PromptTemplate
import os
import numpy as np
import pandas as pd

# Initialize Vector Store

In [2]:
os.environ['OPENAI_API_KEY'] = 'xxx'
os.environ['PINECONE_API_KEY'] = 'xxx'

In [3]:
# Initialize OpenAI API key
# openai.api_key = os.getenv('OPENAI_API_KEY')
# pinecone_api_key = os.environ.get('PINECONE_API_KEY')

In [4]:
index_name = "review-embedding-100"
embeddings = OpenAIEmbeddings(model="text-embedding-3-small", dimensions=768)

vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)

In [5]:
pc = Pinecone(api_key="xxx")

index = pc.Index(index_name)

# Check index
print(index.describe_index_stats())

{'dimension': 768,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 1425},
                'review-100': {'vector_count': 100}},
 'total_vector_count': 1525}


# Basic RAG

In [6]:
# you need to set environment variable: os.environ["OPENAI_API_KEY"], for the chatopenai to work properly
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

In [7]:
# Create a custom prompt template
prompt_template = """Use the following pieces of context to answer the question at the end. 
Provide the complete text of the review, including all available content.

{context}

Question: {question}
Answer:"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [8]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

# Create RetrievalQA chain
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)

In [9]:
# Query1
query1 = 'Give me an example of a review with rating 5'
result_br1 = qa({"query": query1})

  result_br1 = qa({"query": query1})


In [10]:
# Query2
query2 = """The book I read is not bad but I'm also not loving it. 
The storyline doesn't really make sense to me. What rating I should give from scale 1 to 5 if 
the language using in the book is beautiful but the story is hard to understand."""
result_br2 = qa({"query": query2})

In [11]:
# Query3
query3 = """This book is really shitty. It surprises me that it gets published at all. 
Can I give a minus rating for this? If I can't give a minus rating what should I do? 
Should I burn doen the writer house?"""
result_br3 = qa({"query": query3})
print(result_br3['result'])

Rating: 1 Title: The Worse Book I Have Ever Read!!!! Content: This Is The Worse Book I Have Ever Read!!!! Oh my goodness, who edited this garbage? Ms. McCray please don't write anymore books. What an awful story! What an awful end! BLECH!


In [12]:
# Query4
query4 = """What kind of content should I write in order to always get 5 start 
reviews from every reader? Give me story that everyone would love."""
result_br4 = qa({"query": query4})

In [13]:
# Query5
query5 = """I got myself a stalker fan from my published book. 
How do you think I do with my book for it to happen like this?"""
result_br5 = qa({"query": query5})

In [14]:
# Query6
query6 = """Do you think if I write a book that is about my life, I would become famous and have good
review on my book? What genre is the best for me to get good review?"""
result_br6 = qa({"query": query6})

In [15]:
# # Use the hypothetical document to query the vector store
# test = vectorstore.similarity_search(query)

In [16]:
# print(test)

# Enhance RAG

## 1. Query Rewriting (HyDE)

In [17]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

In [18]:
def hyde_rag(question):
    hyde_prompt = f"Hypothetical answer to the question: {question}\n\nHypothetical Document: "
    hyde_doc = llm.predict(hyde_prompt)
    
    # Use the hypothetical document to query the vector store
    docs = vectorstore.similarity_search(hyde_doc)
    
    context = "\n".join([doc.page_content for doc in docs])
    prompt = f"Context: {context}\n\nQuestion: {question}\nAnswer:"
    return llm.predict(prompt)

In [19]:
# Results
result_qr1 = hyde_rag(query1)
result_qr2 = hyde_rag(query2)
result_qr3 = hyde_rag(query3)
result_qr4 = hyde_rag(query4)
result_qr5 = hyde_rag(query5)
result_qr6 = hyde_rag(query6)

  hyde_doc = llm.predict(hyde_prompt)


## 2. Reranking + Summarization

In [20]:
from sentence_transformers import CrossEncoder
from langchain.chains.summarize import load_summarize_chain




In [21]:
def rerank_summarize_rag(question):
    docs = vectorstore.similarity_search(question, k=10)
    
    reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
    scores = reranker.predict([(question, doc.page_content) for doc in docs])
    reranked_docs = [doc for _, doc in sorted(zip(scores, docs), reverse=True)][:3]
    
    chain = load_summarize_chain(llm, chain_type="map_reduce")
    summary = chain.run(reranked_docs)
    
    prompt = f"Context: {summary}\n\nQuestion: {question}\nAnswer:"
    return llm.predict(prompt)

In [22]:
# Results
result_rs1 = rerank_summarize_rag(query1)
result_rs2 = rerank_summarize_rag(query2)
result_rs3 = rerank_summarize_rag(query3)
result_rs4 = rerank_summarize_rag(query4)
result_rs5 = rerank_summarize_rag(query5)
result_rs6 = rerank_summarize_rag(query6)

  summary = chain.run(reranked_docs)


# Only LLM

In [23]:
# Original LLM without RAG
def original_llm(question):
    client = openai.OpenAI()
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": question}
        ]
    )
    return response.choices[0].message.content

In [24]:
# Results
result_llm1 = original_llm(query1)
result_llm2 = original_llm(query2)
result_llm3 = original_llm(query3)
result_llm4 = original_llm(query4)
result_llm5 = original_llm(query5)
result_llm6 = original_llm(query6)

# Result Comparison

## query1: Give me an example of a review with rating 5

With LLM-only, the LLM does not understand the we're specifying book review since it's not mentioning in the question, therefore, it gives us a restaurant review instead. With Basic Rag and HyDE, it seems to retrieve data from the vector db and using the exact same format. Lastly, for reranking and summarization, it seems to get rid of the rating and title, giving only the review content from the vector db.

In [25]:
# Only LLM
print(result_llm1)

Sure! Here is an example of a review with a rating of 5:

"I absolutely loved my experience at this restaurant! The food was delicious, the service was outstanding, and the ambiance was perfect. I can't wait to come back again and would definitely recommend this place to all my friends and family."


In [26]:
# Basic Rag
print(result_br1['result'])

Rating: 5 Title: BEAUTIFUL!! Content: What a beautiful book.  I was hesitant at first to read "Indian Moon" but I am so glad I gave it a chance. I was laughing and crying.... this book touches your heart.  5+++ Stars for McCray!!


In [27]:
# Query Rewriting (HyDE)
print(result_qr1)

Rating: 5 Title: Works quickly Content: I love it. If fits snugly into the Kindle Fire. I wish the cord was a little longer so I can charge & use the device at the same time.


In [28]:
# Reranking + Summarization
print(result_rs1)

"I absolutely loved this collection of short stories! The writing was clear and engaging, making it easy to follow along and get lost in the characters' relationships. The dynamic between the characters reminded me of the witty banter between Archie Goodwin and Nero Wolf. Overall, a fantastic read that I highly recommend. 5 stars!"


## query2: The book I read is not bad but I'm also not loving it. The storyline doesn't really make sense to me. What rating I should give from scale 1 to 5 if the language using in the book is beautiful but the story is hard to understand.

We can clearly see that LLM, HyDE, and reranking and summarization have similar responses, however, HyDE and reranking give more variance for rating score (2 or 3 instead of just 3). For Basic Rag, it seems to give a review that have similar rating though we can see that the review it picks doesn't match the description we give (beautiful language but bad storyline).

In [29]:
# Only LLM
print(result_llm2)

Based on your description, it sounds like the book has some positive aspects, such as beautiful language, but also has some drawbacks, like a confusing storyline. For such a book, you could consider giving a rating of 3 out of 5. This rating reflects that while the language is good, the difficulty in understanding the story impacts your overall enjoyment.


In [30]:
# Basic Rag
print(result_br2['result'])

Rating: 3

Complete text of the review:
I really don't know why I chose this book to read, but I found that it needs some editing. It also has bad language in it, which drops the rating for me. I really would like to give it 3 1/2 stars, but that isn't possible, so I age it only 3 stars. I had to keep reading to see who was doing what, so I guess that is a plus in this book. The ending, however, was terrible. I don't think I will be reading any other books by this author.


In [31]:
# Query Rewriting (HyDE)
print(result_qr2)

Based on your description, it sounds like the book has some positive aspects (beautiful language) but also some significant drawbacks (hard to understand storyline). Given this, a fair rating would be around a 2 or 3 out of 5. It's important to consider both the strengths and weaknesses of the book when determining the rating.


In [32]:
# Reranking + Summarization
print(result_rs2)

Based on your description, it sounds like the book has some positive aspects (beautiful language) but also some significant drawbacks (hard to understand storyline). Given this, I would suggest giving the book a rating of 2 or 3 out of 5. This reflects the mixed feelings you have about the book and takes into account both the positive and negative aspects you mentioned.


## query3: This book is really shitty. It surprises me that it gets published at all. Can I give a minus rating for this? If I can't give a minus rating what should I do? Should I burn doen the writer house?

For LLM-only, we can see that it actually gives an answer that burning down a house is illegal before discussing a way to properly express the feeling. For HyDE and reranking, they seem to give you a warning from the beginning about how to manage your feeling and expectation, then proceed to telling you what and how to express your negative opinion peoperly. Basic Rag, however, seems to retrieve the data from vector db that has similar feeling as the query.

In [33]:
# Only LLM
print(result_llm3)

I'm sorry to hear that you didn't enjoy the book. If you are looking to give feedback on the book, you can leave a critical review or rating on websites like Goodreads or Amazon. However, I would recommend providing constructive criticism rather than resorting to personal attacks or extreme actions.

Burning down the writer's house is illegal and not an appropriate way to express your dissatisfaction with a book. Instead, consider focusing on discussing your thoughts on the book and its content rather than targeting the author personally. Remember that everyone has different tastes and opinions when it comes to books, and it's okay not to like a particular book.


In [34]:
# Basic Rag
print(result_br3['result'])

Rating: 1 Title: The Worse Book I Have Ever Read!!!! Content: This Is The Worse Book I Have Ever Read!!!! Oh my goodness, who edited this garbage? Ms. McCray please don't write anymore books. What an awful story! What an awful end! BLECH!


In [35]:
# Query Rewriting (HyDE)
print(result_qr3)

It's understandable to feel frustrated when a book doesn't meet your expectations, but it's important to remember that everyone has different tastes and preferences when it comes to literature. Giving a negative review is a way to express your opinion, but it's not appropriate to take any extreme actions like burning down the writer's house. If you feel strongly about your negative experience with the book, you can share your thoughts in a constructive and respectful manner in your review. This can help other readers make informed decisions about whether or not to read the book.


In [36]:
# Reranking + Summarization
print(result_rs3)

It's important to remember that everyone has different tastes and opinions when it comes to books. If you strongly dislike a book, you can certainly leave a negative review or rating, but it's not appropriate to threaten or harm the author in any way. It's best to express your opinions constructively and respectfully. If you feel strongly about the book, you can also consider reaching out to the publisher or author directly to provide feedback on how the book could be improved in the future.


## query4: What kind of content should I write in order to always get 5 start reviews from every reader? Give me story that everyone would love.

LLM-only response seems to manage expectation from the beginning that it's not possible to always get a 5-star review. It also gives a story that is asked for and what to consider if we want to write a book with good reviews. For Basic Rag, it retrieves some reviews before listing what to consider from those reviews, moreover, it never rejects the idea of always getting 5-stars nor gives the story that is asked for. HyDE response does not reject the idea of always getting 5-stars and properly includes what to consider and the story we asked. Reranking and summarization does not give a story but it gives detail lists of what to consider in order to get 5-stars.

In [37]:
# Only LLM
print(result_llm4)

While it's not possible to guarantee that every reader will give a 5-star review regardless of the content, there are certain elements that tend to resonate with readers and increase the likelihood of positive reviews. Here is a story idea that incorporates some of these elements:

Title: "The Magical Journey of Luna and the Starcatcher"

Synopsis:
Luna is a young girl who dreams of traveling to far-off lands and discovering magical realms. One day, she meets a mysterious figure known as the Starcatcher, who tells her of a hidden portal that leads to a world of wonder and enchantment. Intrigued by the Starcatcher's tales, Luna embarks on a daring adventure to find the portal and unlock its secrets.

As Luna journeys through fantastical landscapes and encounters mythical creatures, she learns valuable lessons about courage, friendship, and the power of believing in oneself. Along the way, she faces challenges that test her resolve and push her to confront her fears. With the help of new

In [38]:
# Basic Rag
print(result_br4['result'])

Complete text of the review: 

Rating: 5 Title: Awesome! Content: This story (s) was so great! Each chapter you learn something new about the characters that keep you wanting more. Each chapter gives you a great story where you find out more about the characters. Every chapter is easy to read and it leaves you wanting to know

Rating: 5 Title: Delectable, as always Content: This is writing. Soft, flowing planes of plot sharp as blades at their edges. Concise prose that becomes poetic in movement. Characters who I could never be yet who I'm allowed to understand as well as I do my best friends. And a very satisfying

throughout the entire book.Regardless of writing talent, popularity or 5-star reviews by family & friends, I'll never give a higher rating than 2 or 3 stars when a book offered to the public is riddled with problems.Is the story decent? Yeah, I suppose.Repair the book (many writers on Amazon have

Rating: 5 Title: Fabulous Story Content: The Cedar Cove Series is written so 

In [39]:
# Query Rewriting (HyDE)
print(result_qr4)

In order to always receive 5-star reviews from every reader, you should focus on creating a story that is engaging, well-written, and emotionally impactful. Here is a story idea that could potentially resonate with a wide audience:

Title: The Power of Kindness

Content: In a small town, there lived a young girl named Lily who was known for her kindness and compassion towards others. Despite facing challenges in her own life, Lily always went out of her way to help those in need and spread positivity wherever she went.

One day, a mysterious stranger arrived in town, bringing with him a dark cloud of negativity and despair. As the townspeople began to turn against each other, Lily knew that she had to do something to restore harmony and peace.

With the help of her friends and neighbors, Lily embarked on a journey to show the power of kindness and empathy. Through acts of generosity and understanding, they were able to overcome the darkness that had threatened to consume their town.

I

In [40]:
# Reranking + Summarization
print(result_rs4)

Writing a story that will consistently receive 5-star reviews from every reader is a challenging task, as individual preferences and tastes vary greatly. However, there are some key elements that tend to resonate with a wide audience and increase the likelihood of positive reviews.

One approach is to focus on creating well-developed, relatable characters that readers can connect with on an emotional level. Give your characters depth, flaws, and growth throughout the story to make them feel real and engaging.

Additionally, a strong and engaging plot with unexpected twists and turns can keep readers hooked and eager to see what happens next. Make sure to maintain a good pace and keep the story moving forward to hold the reader's interest.

Incorporating beautiful and evocative prose can also elevate your writing and make the story more enjoyable to read. Consider using descriptive language, vivid imagery, and poetic elements to create a rich and immersive reading experience.

Lastly, a

## query5: I got myself a stalker fan from my published book. How do you think I do with my book for it to happen like this?

LLM-only response seems to be very concerned about the stalking fact and says that the second question is invalid, hence, it focuses heavily on the safety of the author instead of trying to explain what the book is like to lead to this. The same can be say for HyDE response. For Basic Rag, it gives a review that may reflect on how the stalker fan feel and what interesting is that it also captures the horror sense of the situation. Similarly, reranking and summarization also captures the horror sense and uses that to answer the second question. 

In [41]:
# Only LLM
print(result_llm5)

I'm sorry to hear that you're dealing with a stalker fan. It's important to prioritize your safety and well-being in this situation. It's not your fault that someone is fixating on you, as authors should be able to share their work without fear of harassment. It may be a good idea to reach out to local authorities or a legal professional for advice on how to handle this situation.

In terms of your book, it's not uncommon for authors to generate strong emotional responses from readers, both positive and negative. While you may have struck a chord with this particular individual, it's essential to remember that their behavior is not a reflection of the quality of your work. It's crucial to establish boundaries and take appropriate steps to protect yourself if you feel threatened.


In [42]:
# Basic Rag
print(result_br5['result'])

I downloaded it and was intrigued by the concept of why the authors wrote it. It was a fast paced story that left me going "Ugh, gross, no way..omg"... all the way through it. I am pleased to say I am now a fan of the authors, their ability to yank you into the perspective of their killers is amazing. It was a fast paced read that left me entertained in a very disturbing manner, but entertaining none the less. I recommend this to any who enjoy a nice bit of horror to pass the day.


In [43]:
# Query Rewriting (HyDE)
print(result_qr5)

It's important to remember that having a stalker fan is not a positive or desirable outcome, and it's not something that should be encouraged or sought after. It's important to prioritize your safety and well-being above all else. If you are experiencing unwanted attention or harassment from a fan, it's important to take steps to protect yourself and seek support from authorities or professionals who can help. It's also important to set boundaries with fans and maintain a healthy distance in order to protect yourself. Remember that your safety and well-being should always come first.


In [44]:
# Reranking + Summarization
print(result_rs5)

It sounds like you have successfully captured the attention and interest of readers with your book, particularly those who enjoy horror and are intrigued by disturbing perspectives. Your ability to immerse readers in the minds of killers and shock them with your writing style has clearly resonated with this particular fan. However, it's important to consider the balance between shock value and story development in your writing to ensure that your work is well-rounded and engaging for a wider audience. Keep up the good work and continue to hone your craft to attract more fans like this stalker fan.


## query6: Do you think if I write a book that is about my life, I would become famous and have good review on my book? What genre is the best for me to get good review?

With LLM-only, the response seems to focus on the first question (writing a book about the author life) rather than explaining what genre usually gets good review. For Basic Rag, it seems to take the good reviews in the vector db into consideration before explaining what to do in order to get positive feedback from readers. However, it does not specify the genre. HyDE response seems to give similar response to llm-only since it's also suggesting the author to write a memoir. For this query, reranking and summarization seems to answer every question accordingly and covers everything.

In [45]:
# Only LLM
print(result_llm6)

Writing a book about your life can be a deeply rewarding experience, regardless of whether it leads to fame or positive reviews. To increase the chances of receiving good reviews, consider focusing on a genre that resonates with your unique life experiences and storytelling style.

If you have a compelling life story filled with personal struggles and overcoming obstacles, you might consider writing a memoir. Memoirs often receive positive reviews when they are authentic, emotionally resonant, and offer insights that can resonate with a wide range of readers.

Alternatively, if you have a talent for crafting fiction inspired by your life experiences, you could explore writing a novel. Novels that blend elements of reality with imaginative storytelling can also receive critical acclaim if they are well-written and engaging.

Ultimately, the most important aspect is to write from the heart and stay true to your voice and experiences. Regardless of genre, authenticity and sincerity often 

In [46]:
# Basic Rag
print(result_br6['result'])

Based on the reviews provided, it seems that simply writing a book about your life may not guarantee fame or good reviews. It is important to ensure that the book is well-written, edited, and engaging in order to receive positive feedback. As for the best genre to get good reviews, it may vary depending on your writing style and interests. It is recommended to choose a genre that you are passionate about and that allows you to showcase your strengths as a writer.


In [47]:
# Query Rewriting (HyDE)
print(result_qr6)

Writing a book about your life can certainly be a way to connect with readers and potentially gain fame, but there are no guarantees in the publishing world. The best genre for you to get good reviews would depend on your personal experiences and writing style. It's important to write from the heart and be authentic in your storytelling, as readers appreciate honesty and vulnerability in memoirs and autobiographies. Ultimately, the most important thing is to write a compelling and well-crafted story that resonates with readers.


In [48]:
# Reranking + Summarization
print(result_rs6)

Writing a book about your life can certainly be a compelling and personal story that resonates with readers. However, whether or not you become famous or receive good reviews depends on various factors such as the quality of your writing, the uniqueness of your story, and how well you connect with your audience.

As for the genre that may garner good reviews, it ultimately depends on your strengths as a writer and the story you want to tell. Some popular genres that often receive positive reviews include literary fiction, historical fiction, memoirs, and non-fiction books on topics such as self-help, personal development, or inspirational stories. It's important to choose a genre that you are passionate about and can write well in, as authenticity and skill in storytelling are key factors in receiving positive reviews. Ultimately, focus on writing a book that is true to your voice and experiences, and the reviews will follow.


# Summary

Since we're working on sentiment analysis, LLM-only method seems to already doing a good job. However, if we're not specifying that we're asking about book, the LLM does not give response related to books. Basic rag usually only gives reviews in the vector db that is related to query without clearly answer the questions except for some of the queries. HyDE and reranking + summarization seem to perform well. Sometimes they give responses that similar to LLM-only, however, their responses seem to take the information in vector db into consideration, hence, they give more definite answers than the LLM. Still, reranking and summarization usually covers all answer or gives more important details than HyDE.