# About
- Build basic RAG chain
- Build RAG chain that also returns the sources

# Step 1: Imports and Settings

In [1]:
import os
import gradio as gr
import torch

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.prompts import (
    PromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatPromptTemplate,
)
from langchain_core.output_parsers import StrOutputParser
from langchain import hub
from langchain_core.runnables import RunnablePassthrough

from dotenv import load_dotenv
load_dotenv()
OPENAI_API_KEY = os.environ['OPENAI_API_KEY']

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
!nvidia-smi

Sat Nov  9 16:50:34 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 565.90                 Driver Version: 565.90         CUDA Version: 12.7     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3050 ...  WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   44C    P8              3W /   75W |       0MiB /   6144MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [3]:
print(torch.__version__)
use_cuda = torch.cuda.is_available()
print("CUDA available:",use_cuda)
if use_cuda:
    print('__CUDNN VERSION:', torch.backends.cudnn.version())
    print('__Number CUDA Devices:', torch.cuda.device_count())
    print('__CUDA Device Name:',torch.cuda.get_device_name(0))
    print('__CUDA Device Total Memory [GB]:',torch.cuda.get_device_properties(0).total_memory/1e9)

2.3.1+cu118
CUDA available: True
__CUDNN VERSION: 8700
__Number CUDA Devices: 1
__CUDA Device Name: NVIDIA GeForce RTX 3050 6GB Laptop GPU
__CUDA Device Total Memory [GB]: 6.441926656


In [None]:
# Run this if CUDA is not activate
device = torch.device("cuda" if use_cuda else "cpu")
print("Device: ",device)

# Step 2: Create Basic RAG Chain

In [4]:
# Instantiate embeddings model
embeddings_model = OpenAIEmbeddings(api_key=OPENAI_API_KEY, model='text-embedding-ada-002', max_retries=100, chunk_size=16, show_progress_bar=False)

# Instantiate chat model
chat_model = ChatOpenAI(api_key=OPENAI_API_KEY, temperature=0, model='gpt-4o-mini')

In [5]:
# Load chroma from disk
vectorstore = Chroma(persist_directory="chroma_data", embedding_function=embeddings_model)

# Set up the vectorstore to be the retriever
k = 5
retriever = vectorstore.as_retriever(search_kwargs={"k":k})

# Format docs function
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Step 3: Test out the retriever

In [6]:
# Test out the retriever
query = "music streaming platform"
results = retriever.invoke(query)

In [7]:
# How would the formatted documents be rendered in the query?
print(format_docs(results))

Good music streaming platform

Great music streaming platform

Best streaming music platform

Best music streaming platform

Best music streaming platform


In [None]:
# Get the prompt from the langchain hub. Or you could write your own!
prompt = hub.pull("rlm/rag-prompt")

In [8]:
review_template_str = """You are an advanced Q&A assistant specialized in analyzing user reviews for a music streaming application
similar to Spotify. Your role is to extract meaningful and actionable insights from a large dataset of 3.4 million Google Store reviews.
These insights should help management understand what users like, dislike, compare, and suggest about the application.

Your responses should be accurate, clear, and concise, directly addressing the management's inquiries. Ensure your answers cover the
following aspects:
1. Positive aspects and features users appreciate most.
2. Comparisons made with competitor apps (e.g., Pandora).
3. Common complaints and areas of dissatisfaction among users.
4. Emerging trends or patterns that could influence product strategy.

In addition to your answers, provide a brief confidence score (out of 10) for each response to indicate the answer’s reliability.

Example responses to management questions might look like:
- **Q**: "What are the features users appreciate most?"
  **A**: "Users often praise the intuitive UI design, extensive variety of music options, and seamless listening experience."

- **Q**: "Which platforms do users compare us with most?"
  **A**: "Users frequently compare the application with Pandora, especially regarding feature availability and user experience."

{context}

"""

In [9]:
review_system_prompt = SystemMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["context"],
        template=review_template_str,
    )
)

In [10]:
review_human_prompt = HumanMessagePromptTemplate(
    prompt=PromptTemplate(
        input_variables=["question"],
        template="{question}",
    )
)

In [11]:
messages = [review_system_prompt, review_human_prompt]

In [12]:
review_prompt_template = ChatPromptTemplate(
    input_variables=["context", "question"],
    messages=messages,
)

# Step 4: Test out the RAG Chain

In [14]:
# Create RAG Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | review_prompt_template
    | chat_model
    | StrOutputParser())

In [15]:
query = "music streaming platform"
rag_chain.invoke(query)

'**Q**: "What are the features users appreciate most?"  \n**A**: "Users appreciate the vast music library, personalized playlists, and high-quality audio streaming. Additionally, the offline listening feature and user-friendly interface are frequently highlighted as major positives."  \n**Confidence Score**: 9/10  \n\n**Q**: "Which platforms do users compare us with most?"  \n**A**: "The application is often compared to Apple Music and Pandora, with users noting differences in music discovery features and subscription pricing."  \n**Confidence Score**: 8/10  \n\n**Q**: "What are the common complaints and areas of dissatisfaction among users?"  \n**A**: "Common complaints include issues with song availability, occasional app crashes, and the presence of ads in the free version. Users also express dissatisfaction with the recommendation algorithm not aligning with their music preferences."  \n**Confidence Score**: 9/10  \n\n**Q**: "What emerging trends or patterns could influence product

# Step 5: Create Simple Gradio Answer Bot

In [16]:
# Function to generate answer
def generate_answer(message, history):
    return rag_chain.invoke(message)

In [17]:
# Set up chat bot interface
answer_bot = gr.ChatInterface(
                            generate_answer,
                            chatbot=gr.Chatbot(height=300),
                            textbox=gr.Textbox(placeholder="Ask me a questions", container=False, scale=7),
                            title="Q&A Chatbot",
                            description="Ask for meaningful information from the Google Store Reviews",
                            theme="soft",
                            examples=["music streaming platform", "trends", "features"],
                            cache_examples=False,
                        )



In [18]:
answer_bot.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://c73ea6228eeec1eadc.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


