In [35]:
import os
import streamlit as st
import pickle
import requests
from bs4 import BeautifulSoup
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import Ollama
from langchain.docstore.document import Document
    

In [36]:
llm = Ollama(model="llama2")


In [48]:
def extract_text_from_urls(urls):
    all_text = ""
    for url in urls:
        try:
            response = requests.get(url)
            soup = BeautifulSoup(response.content, 'html.parser')
            # Extract text from paragraphs
            paragraphs = soup.find_all('p')
            text = ' '.join([p.get_text() for p in paragraphs])
            all_text += f"\n\n--- Content from {url} ---\n{text}"
        except Exception as e:
            print(f"Error loading {url}: {e}")
    return all_text

# URLs to load
urls = [
    "https://www.moneycontrol.com/news/business/banks/hdfc-bank-re-appoints-sanmoy-chakrabarti-as-chief-risk-officer-11259771.html",
    "https://www.moneycontrol.com/news/business/markets/market-corrects-post-rbi-ups-inflation-forecast-icrr-bet-on-these-top-10-rate-sensitive-stocks-ideas-11142611.html"
]

try:
    loader = UnstructuredURLLoader(urls=urls)
    data = loader.load()
except Exception as e:
    print(f"UnstructuredURLLoader failed: {e}")
    print("Using fallback URL loading method...")
    data_text = extract_text_from_urls(urls)
    
    data = [Document(page_content=data_text, metadata={"source": " | ".join(urls)})]


In [49]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
docs = text_splitter.split_documents(data)

In [50]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_index = FAISS.from_documents(docs, embeddings)

  return forward_call(*args, **kwargs)


In [51]:
file_path = '../vector_index.pkl'
with open(file_path, "wb") as f:
    pickle.dump(vector_index, f)


In [42]:
if os.path.exists(file_path):
    with open(file_path, "rb") as f:
        vectorIndex = pickle.load(f)

In [52]:
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_index.as_retriever(),
    return_source_documents=True
)
chain

RetrievalQA(verbose=False, combine_documents_chain=StuffDocumentsChain(verbose=False, llm_chain=LLMChain(verbose=False, prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n{context}\n\nQuestion: {question}\nHelpful Answer:"), llm=Ollama(), output_parser=StrOutputParser(), llm_kwargs={}), document_prompt=PromptTemplate(input_variables=['page_content'], input_types={}, partial_variables={}, template='{page_content}'), document_variable_name='context'), return_source_documents=True, retriever=VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001A4D5622B50>, search_kwargs={}))

In [55]:
from langchain.chains import RetrievalQA

# Create simpler chain
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_index.as_retriever(),
    return_source_documents=True
)

# Test it
result = chain.invoke({"query": "what is the price of Tiago iCNG?"})
print("Result:", result["result"])
print("\n" + "="*50)
print("SOURCE DOCUMENTS:")
print("="*50)

# Display all source documents with their content
for i, doc in enumerate(result.get("source_documents", [])):
    print(f"\n--- Source Document {i+1} ---")
    print(f"Source: {doc.metadata.get('source', 'Unknown')}")
    print(f"Content: {doc.page_content[:500]}...")  # Show first 500 characters
    print("-" * 30)

  return forward_call(*args, **kwargs)


Result: Based on the provided context, the price of Tiago iCNG cannot be determined with certainty as it is not mentioned in the given text. The text only provides information about the stock prices of Manappuram Finance, Ashok Leyland, and Hero MotoCorp, but does not provide any information about the price of Tiago iCNG. Therefore, I cannot answer your question.

SOURCE DOCUMENTS:

--- Source Document 1 ---
Source: https://www.moneycontrol.com/news/business/markets/market-corrects-post-rbi-ups-inflation-forecast-icrr-bet-on-these-top-10-rate-sensitive-stocks-ideas-11142611.html
Content: The price movement after that led to the development of the symmetrical triangle. Currently, the stock is very close to multiple moving average support level at 50-day, 100-day and 200-day MA which is expected to act as a strong support level and the price is likely to move towards the previous highs of Rs 1,720 levels.

Thus, one should continue to hold the stock with stop-loss of Rs 1,612 and can exp