In [3]:
!pip install pandas

Collecting pandas
  Using cached pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl.metadata (89 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2024.2-py2.py3-none-any.whl.metadata (1.4 kB)
Using cached pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl (11.4 MB)
Downloading pytz-2024.2-py2.py3-none-any.whl (508 kB)
Using cached tzdata-2024.2-py2.py3-none-any.whl (346 kB)
Installing collected packages: pytz, tzdata, pandas
Successfully installed pandas-2.2.3 pytz-2024.2 tzdata-2024.2


In [4]:
!pip freeze > requirements.txt

In [1]:
# Load OpenAI Key

import os
from dotenv import load_dotenv

load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [2]:
# Load CSV
import pandas as pd

df = pd.read_csv("./superheroes.csv")
df.head()

Unnamed: 0,Superhero Name,Superpower,Power Level,Catchphrase
0,Captain Thunder,Bolt Manipulation,90,Feel the power of the storm!
1,Silver Falcon,Flight and Agility,85,"Soar high, fearlessly!"
2,Mystic Shadow,Invisibility and Illusions,78,Disappear into the darkness!
3,Blaze Runner,Pyrokinesis,88,Burn bright and fierce!
4,Electra-Wave,Electric Manipulation,82,Unleash the electric waves!


In [3]:
# load for AI
from langchain.document_loaders.csv_loader import CSVLoader

loader = CSVLoader("./superheroes.csv")
data = loader.load()
data[:5]

[Document(metadata={'source': './superheroes.csv', 'row': 0}, page_content='Superhero Name: Captain Thunder\nSuperpower: Bolt Manipulation\nPower Level: 90\nCatchphrase: Feel the power of the storm!'),
 Document(metadata={'source': './superheroes.csv', 'row': 1}, page_content='Superhero Name: Silver Falcon\nSuperpower: Flight and Agility\nPower Level: 85\nCatchphrase: Soar high, fearlessly!'),
 Document(metadata={'source': './superheroes.csv', 'row': 2}, page_content='Superhero Name: Mystic Shadow\nSuperpower: Invisibility and Illusions\nPower Level: 78\nCatchphrase: Disappear into the darkness!'),
 Document(metadata={'source': './superheroes.csv', 'row': 3}, page_content='Superhero Name: Blaze Runner\nSuperpower: Pyrokinesis\nPower Level: 88\nCatchphrase: Burn bright and fierce!'),
 Document(metadata={'source': './superheroes.csv', 'row': 4}, page_content='Superhero Name: Electra-Wave\nSuperpower: Electric Manipulation\nPower Level: 82\nCatchphrase: Unleash the electric waves!')]

In [4]:
from langchain.chat_models import ChatOllama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

llm = ChatOllama(
    model="llama3",
    verbose=True,
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])
)

  llm = ChatOllama(
  llm = ChatOllama(


In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100)
all_splits = text_splitter.split_documents(data)

In [8]:
# Embed and store
from langchain.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

vectordb = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())


In [15]:
# Test vectorDB retrieval
question = "What is the name of the thunder super hero"
docs = vectordb.similarity_search(question)
print(f"{len(docs)=}")
print(f"\n{chr(10).join([str(doc) for doc in  docs])}")

len(docs)=4

page_content='Superhero Name: Thunderstrike
Superpower: Lightning Control
Power Level: 91
Catchphrase: Electrify the battlefield!' metadata={'row': 30, 'source': './superheroes.csv'}
page_content='Superhero Name: Thunderstrike
Superpower: Lightning Control
Power Level: 91
Catchphrase: Electrify the battlefield!' metadata={'row': 30, 'source': './superheroes.csv'}
page_content='Superhero Name: Captain Thunder
Superpower: Bolt Manipulation
Power Level: 90
Catchphrase: Feel the power of the storm!' metadata={'row': 0, 'source': './superheroes.csv'}
page_content='Superhero Name: Captain Thunder
Superpower: Bolt Manipulation
Power Level: 90
Catchphrase: Feel the power of the storm!' metadata={'row': 0, 'source': './superheroes.csv'}


In [20]:
# Create prompt
from langchain import hub

QA_CHAIN_PROMPT = hub.pull("rlm/rag-prompt-llama")
print(f"{QA_CHAIN_PROMPT=}")



QA_CHAIN_PROMPT=ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt-llama', 'lc_hub_commit_hash': '693a2db5447e3b58c060a6ac02758dc7f1aaaaa4ee6214d127bf70b443158630'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="[INST]<<SYS>> You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.<</SYS>> \nQuestion: {question} \nContext: {context} \nAnswer: [/INST]"), additional_kwargs={})])


In [22]:
# Set up LLM with OpenAI

from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA

llm = ChatOpenAI(
    model="gpt-3.5-turbo-0125",
    temperature=0.0
)

# Set up QA Chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectordb.as_retriever(),
    chain_type_kwargs={"prompt":QA_CHAIN_PROMPT},
)



In [24]:
# Try invocation
question = "What is the catchphrase of the super hero with the power of thunder?"
result = qa_chain.invoke({"query": question})
print(result['result'])

The catchphrase of the superhero with the power of thunder is "Feel the power of the storm!"
