In [23]:
import os
from dotenv import load_dotenv

# Load .env file
_ = load_dotenv(override=True)
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')

In [None]:
import pinecone
from langchain.text_splitter import RecursiveCharacterTextSplitter
import tiktoken
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Pinecone
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI

* `Connect Pinecone`

In [None]:
# Connect pinecone and create an index
pc = pinecone.Pinecone(api_key=PINECONE_API_KEY)

index_name = "quickstart"

try:
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric="cosine",
        spec=pinecone.ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        ) 
    )
except:
    pass

# Get the index itself
index = pc.Index(index_name)

* `Splitting & Embedding Text`

In [None]:
# Read the file
with open('../assets/churchill_speech.txt') as f:
    texts = f.read()


# Text Splitter with some params
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
    length_function=len
)

# Create Document
chunks = text_splitter.create_documents([texts])

print(f'Number of Chunks is {len(chunks)}')
# An exmaple
print(chunks[2].page_content)

In [None]:
# Calculate the embedding cost
enc = tiktoken.encoding_for_model('text-embedding-3-small')
total_tokens = sum([len(enc.encode(c.page_content)) for c in chunks])
print(f'Total Tokens is: {total_tokens}')
print(f'The cost is {total_tokens * 0.02 / 1e6} USD')

In [None]:
# Use openai embeddign from langchain
embeddings = OpenAIEmbeddings(model='text-embedding-3-small', dimensions=1536)
vector_0 = embeddings.embed_query(chunks[0].page_content)
vector_0[:10]

* `Upserting to Pincone`

In [None]:
# Upserting to Pincone
vector_store = Pinecone.from_documents(chunks, embeddings, index_name=index_name)

In [None]:
# Check after
index.describe_index_stats() 

* `Query`

In [None]:
query = 'Where should we fight?'
result = vector_store.similarity_search(query=query, k=3)

for r in result:
    print(r.page_content)
    print('-' * 50)

----

* `Answering using LLM (RAG)`

In [None]:
# LLM
llm = ChatOpenAI(model='gpt-4o-mini', temperature=0.2)

# Retriever based on simialrity measure
retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 4})

# Create a RetrievalQA chain using the defined LLM, chain type 'stuff', and retriever
# This chain takes a list of documents and formats them all into a prompt, then passes that prompt to an LLM
chain = RetrievalQA.from_chain_type(llm=llm, chain_type='stuff', retriever=retriever)

In [None]:
# An example
query = 'Answer only from the provided input. Where should we fight?'
answer = chain.invoke(query)
print(answer['result'])

In [None]:
# Another example
query = 'Answer only from the provided input. Who was the king of Belgium at that time?'
answer = chain.invoke(query)
print(answer['result'])

In [None]:
# Another exmaple
query = 'Answer only from the provided input. Does French defenses at Sedan?'
answer = chain.invoke(query)
print(answer['result'])

----