In [1]:
# # ─── Cell 1: Install dependencies ───
# !pip install transformers torch langchain langchain-huggingface chromadb langchain_community


In [2]:
# !pip install langchain_community

In [3]:
# ─── Cell 2: Imports & Pipeline Setup ───
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain_huggingface.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma

# 1) Build local HF pipeline
model_id = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model     = AutoModelForSeq2SeqLM.from_pretrained(model_id)
hf_pipe   = pipeline(
    task="text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    device=-1,            # CPU; set to 0 for GPU
)

# 2) Wrap in LangChain LLM
hf_llm = HuggingFacePipeline(pipeline=hf_pipe)

# 3) Embeddings & VectorStore factory
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")


USER_AGENT environment variable not set, consider setting it to identify your requests.
Device set to use cpu
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")


In [4]:
# ─── Cell 3: Load a Website and Build the QA Chain ───
# Replace with any URL you like
test_url = "https://www.gsmarena.com/"

# 1) Load & split
loader    = WebBaseLoader(test_url)
docs      = loader.load()

# 2) Build vector store
vectordb = Chroma.from_documents(
    documents=docs,
    embedding=embeddings,
    collection_name="test-site"
)

# 3) Build RetrievalQA chain
qa = RetrievalQA.from_chain_type(
    llm=hf_llm,
    chain_type="stuff",
    retriever=vectordb.as_retriever()
)

print("✅ QA chain ready!")


✅ QA chain ready!


In [5]:
# # ─── Cell 3: Load a Website and Build the QA Chain ───
# # Replace with any URL you like
# test_url = "https://www.example.com/"

# # 1) Load & split
# loader    = WebBaseLoader(test_url)
# docs      = loader.load()

# # 2) Build vector store
# vectordb = Chroma.from_documents(
#     documents=docs,
#     embedding=embeddings,
#     collection_name="test-site"
# )

# # 3) Build RetrievalQA chain
# qa = RetrievalQA.from_chain_type(
#     llm=hf_llm,
#     chain_type="stuff",
#     retriever=vectordb.as_retriever()
# )

# print("✅ QA chain ready!")


In [6]:
# ─── Cell 4: Ask It Questions ───
queries = [
    "What is the top news in this website",
    "Does it mention any contact information?",
    "Summarize the first paragraph."
]

for q in queries:
    answer = qa.run(q)
    print(f"Q: {q}\nA: {answer}\n" + "-"*60)


  answer = qa.run(q)
Token indices sequence length is longer than the specified maximum sequence length for this model (1791 > 512). Running this sequence through the model will result in indexing errors


Q: What is the top news in this website
A: Apple and Aston Martin announce CarPlay Ultra - a next-generation infotainment system
------------------------------------------------------------
Q: Does it mention any contact information?
A: GSMArena.com
------------------------------------------------------------
Q: Summarize the first paragraph.
A: The following is a list of the best smartphones of the year.
------------------------------------------------------------
