In [4]:
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain import LLMChain
from langchain.chains.question_answering import load_qa_chain

In [3]:
import os

os.environ["OPENAI_API_KEY"] = "YOUR_API_KEY"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

First, initialize your LLM

In [6]:
from langchain.chat_models import ChatOpenAI

model_name = 'gpt-3.5-turbo'
llm = ChatOpenAI(model_name=model_name, temperature=0, max_tokens=2000)

Then, load reviews from tv-reviews.csv

In [11]:
# TODO: load your documents
# print(docs)

loader = CSVLoader(file_path='./tv-reviews.csv')
docs = loader.load()



Split the documents you loaded into smaller chunks

In [12]:
# TODO: use a Text Splitter to split the documents into chunks
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
split_docs = splitter.split_documents(docs)

Now, initialize your embeddings

In [13]:
# TODO: initialize your embeddings model

embeddings = OpenAIEmbeddings()


Initialize your vector db with your embeddings model and populate with your text chunks

In [14]:
# TODO: populate your vector database with the chunks

db = Chroma.from_documents(split_docs, embeddings)


Query your vector database for 5 most semantically similar chunks

In [17]:
query = """
    Based on the reviews in the context, tell me what people liked about the picture quality.
    Make sure you do not paraphrase the reviews, and only use the information provided in the reviews.
    """
# find top 5 semantically similar documents to the query

use_chain_helper = False
if use_chain_helper:
    rag = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever())
    print(rag.run(query))
else:
    similar_docs = db.similarity_search(query, k=5)
    prompt = PromptTemplate(
        template="{query}\nContext: {context}",
        input_variables=["query", "context"],
    )
    chain = load_qa_chain(llm, prompt = prompt, chain_type="stuff")
    print(chain.run(input_documents=similar_docs, query = query))

People liked the vibrant colors and crystal clear images of the picture quality. They were impressed by the sharpness and lifelike details, making everything look stunning and enhancing their viewing experience. The visuals were described as outstanding, with vivid colors and incredible detail, bringing every scene to life like a cinema experience.


Combined, they should provide enough information to answer our question about picture quality