In [5]:
from langchain.vectorstores import FAISS
from langchain.embeddings import OllamaEmbeddings
from langchain_experimental.text_splitter import SemanticChunker
from langchain_community.document_loaders import TextLoader
from langchain_community.chat_models import ChatOllama
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.chains import RetrievalQA

# 1. Load and split docs
loader = TextLoader("citi.txt", encoding='utf-8')
docs = loader.load()
embedding = OllamaEmbeddings(model="nomic-embed-text:v1.5")
splitter = SemanticChunker(embedding)
split_docs = splitter.split_documents(docs)

# 2. Vector DB
db = FAISS.from_documents(split_docs, embedding)

# 3. Base LLM
llm = ChatOllama(model="llama3.2:3b")

# 4. Multi-query retriever
multi_query_retriever = MultiQueryRetriever.from_llm(
    retriever=db.as_retriever(), llm=llm
)

# 5. QA chain with multi-query retriever
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=multi_query_retriever)

# 6. Ask a question
response = qa_chain.run("What is the EPS of citi bank in 2025 Q1?")
print(response)


ValueError: Error raised by inference API HTTP code: 404, {"error":"model \"nomic-embed-text:v1.5\" not found, try pulling it first"}

📄 Text File (citi.txt)
   ↓
🧩 Split into Chunks
   ↓
🔢 Embedded into Vectors
   ↓
🗃️ Stored in FAISS
   ↓
❓ You Ask a Question
   ↓
🔍 Relevant Chunks Retrieved
   ↓
🧠 Sent to LLM (LLaMA 3.2)
   ↓
💬 Final Answer Generated


In [4]:
from langchain.vectorstores import FAISS
from langchain.embeddings import OllamaEmbeddings
from langchain_experimental.text_splitter import SemanticChunker
from langchain_community.document_loaders import TextLoader
from langchain_community.chat_models import ChatOllama
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.chains import RetrievalQA

class MultiPromptQA:
    def __init__(self, file_path: str, embedding_model: str, llm_model: str):
        self.file_path = file_path
        self.embedding_model = embedding_model
        self.llm_model = llm_model

        self.docs = self.load_and_split()
        self.db = self.create_vectorstore()
        self.llm = ChatOllama(model=self.llm_model)
        self.retriever = self.create_multi_query_retriever()
        self.qa_chain = RetrievalQA.from_chain_type(llm=self.llm, retriever=self.retriever)

    def load_and_split(self):
        loader = TextLoader(self.file_path, encoding='utf-8')
        docs = loader.load()

        embedding = OllamaEmbeddings(model=self.embedding_model)
        splitter = SemanticChunker(embedding)
        split_docs = splitter.split_documents(docs)

        return split_docs

    def create_vectorstore(self):
        embedding = OllamaEmbeddings(model=self.embedding_model)
        return FAISS.from_documents(self.docs, embedding)

    def create_multi_query_retriever(self):
        return MultiQueryRetriever.from_llm(
            retriever=self.db.as_retriever(),
            llm=self.llm,
            include_original=True
        )

    def ask(self, question: str) -> str:
        return self.qa_chain.run(question)


qa = MultiPromptQA(
    file_path="C:/Users/Akshaya V/git/Earnings research/Earnings_agent/citi.txt",
    embedding_model="qwen2.5:0.5b",
    llm_model="llama3.2:3b"
)

response = qa.ask("What is the EPS of citi in 2025Q1?")
print(response)


The diluted EPS (Earnings Per Share) for Citigroup Inc. in Q1 2025 was $1.96 per share.


In [None]:
from langchain.vectorstores import FAISS
from langchain.embeddings import OllamaEmbeddings
from langchain_experimental.text_splitter import SemanticChunker
from langchain_community.document_loaders import TextLoader
from langchain_community.chat_models import ChatOllama
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.chains import RetrievalQA

class MultiPromptQA:
    def __init__(self, file_paths: list, embedding_model: str, llm_model: str):
        self.file_paths = file_paths
        self.embedding_model = embedding_model
        self.llm_model = llm_model

        self.docs = self.load_and_split_all()
        self.db = self.create_vectorstore()
        self.llm = ChatOllama(model=self.llm_model)
        self.retriever = self.create_multi_query_retriever()
        self.qa_chain = RetrievalQA.from_chain_type(llm=self.llm, retriever=self.retriever)

    def load_and_split_all(self):
        all_split_docs = []
        embedding = OllamaEmbeddings(model=self.embedding_model)
        splitter = SemanticChunker(embedding)

        for path in self.file_paths:
            loader = TextLoader(path, encoding='utf-8')
            docs = loader.load()
            split_docs = splitter.split_documents(docs)
            all_split_docs.extend(split_docs)

        return all_split_docs

    def create_vectorstore(self):
        embedding = OllamaEmbeddings(model=self.embedding_model)
        return FAISS.from_documents(self.docs, embedding)

    def create_multi_query_retriever(self):
        return MultiQueryRetriever.from_llm(
            retriever=self.db.as_retriever(),
            llm=self.llm,
            include_original=True
        )

    def ask(self, question: str) -> str:
        return self.qa_chain.run(question)
qa = MultiPromptQA(
    file_paths=[
        "C:/Users/Akshaya V/git/Earnings research/Earnings_agent/citi.txt",
        "C:/Users/Akshaya V/git/Earnings research/Earnings_agent/jpmc.txt"
    ],
    embedding_model="qwen2.5:0.5b",
    llm_model="llama3.2:3b"
)

response = qa.ask("Compare the EPS of Citi and JPMorgan in 2025Q1.")
print(response)


In [None]:
)