In [None]:
!pip install langchain-pinecone pinecone-client langchain-huggingface


In [None]:
pip install pinecone

In [None]:
!pip install -q gradio langchain-pinecone pinecone-client langchain-huggingface


In [None]:
!pip -q install langchain
!pip -q install bitsandbytes accelerate transformers
!pip -q install datasets sentencepiece peft
!pip -q install pypdf
!pip -q install sentence-transformers
!pip -q install langchain-community
!pip -q install langchain-text-splitters

In [None]:
!pip install unstructured

In [None]:
!pip install tokenizers

In [None]:
#!pip install xformers

Libraries

In [None]:
!pip install langchain-classic
!pip install langchain-huggingface


In [None]:
!pip install -q gradio

In [None]:
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Pinecone as LC_Pinecone
from langchain_classic.chains import RetrievalQA
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import pipeline
from langchain_huggingface import HuggingFacePipeline
from huggingface_hub import notebook_login
import textwrap
import sys
import os
import torch

In [None]:
import nltk
nltk.download("punkt")
nltk.download("averaged_perceptron_tagger")

Passing URLs

In [None]:
URL=['https://www.evolvingdev.com/post/what-is-llama-2',
     'https://www.sciencedirect.com/science/article/pii/S266734522300024X',
     'https://www.sciencenewstoday.org/what-is-gemini-ai-the-future-of-multimodal-artificial-intelligence-explained',
     'https://www.cnet.com/tech/services-and-software/what-is-copilot-everything-you-need-to-know-about-microsofts-ai-tools/']

In [None]:
loader=UnstructuredURLLoader(urls=URL)
data=loader.load()

In [None]:
data

In [None]:
len(data)

Splitting Extracted Data into Chunks

In [None]:
text_splitter=CharacterTextSplitter(separator='\n',
                                    chunk_size=1000,
                                    chunk_overlap=200)

In [None]:
text_chunks=text_splitter.split_documents(data)

In [None]:
len(text_chunks)

In [None]:
text_chunks[9]

Downloading HuggingFaceEmbedding

In [None]:
embeddings=HuggingFaceEmbeddings()

In [None]:
embeddings

Testing HF Embedding

In [None]:
a=embeddings.embed_query("Rahul")
len(a)

In [None]:
a

384 re-establishment

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings_384 = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

print(len(embeddings_384.embed_query("check")))  # must print 384


Converting Text Chunks into Embeddings and Creating a Knowledge Base

In [None]:
import os

PINECONE_API_KEY='pcsk_9uK9o_82Vdb2t6w3CUMMbNB2DZTgnp7a2b2K9kBkuE2QZTeMwMJPrrHDJdTeZPW73FX7W'
PINECONE_API_ENV='us-east-1'

os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY
os.environ['PINECONE_ENVIRONMENT'] = PINECONE_API_ENV

In [None]:
from pinecone import Pinecone

pc = Pinecone(api_key="pcsk_4NjoRg_5pzWfhnfsmnP2s9kJtpUf4MGVzhivWNHJCr3dqZP8fyo4AA3cLgo5y5tviGc23j")
index = pc.Index("llama-384")

In [None]:
pc.list_indexes()


In [None]:
index_name='llama'

Establishing 384 Pinecone index

In [None]:
from pinecone import Pinecone, ServerlessSpec

# Initialize Pinecone client
pc = Pinecone(api_key=PINECONE_API_KEY)

# Create the index (only run once!)
pc.create_index(
    name="llama-384",
    dimension=384,
    metric="cosine",
    spec=ServerlessSpec(
        cloud="aws",        # or "gcp"
        region="us-east-1"  # pick a valid region
    )
)

print("Index created successfully!")


In [None]:
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore
from langchain_huggingface import HuggingFaceEmbeddings

PINECONE_API_KEY = "pcsk_9uK9o_82Vdb2t6w3CUMMbNB2DZTgnp7a2b2K9kBkuE2QZTeMwMJPrrHDJdTeZPW73FX7W"
INDEX_NAME = "llama"

# 1Ô∏è‚É£ Init Pinecone client
pc = Pinecone(api_key=PINECONE_API_KEY)

# 2Ô∏è‚É£ Define embeddings FIRST
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L12-v2"  # 768-dim
)

# 3Ô∏è‚É£ Connect to existing index (STRING name only)
vectorstore = PineconeVectorStore.from_existing_index(
    INDEX_NAME,
    embeddings,
    text_key="text"
)

# 4Ô∏è‚É£ (Optional) Inspect index stats
index = pc.Index(INDEX_NAME)
print("‚úÖ VectorStore connected successfully!")
print(index.describe_index_stats())


Establishing new vectorstore

In [None]:
from langchain_pinecone import PineconeVectorStore

vectorstore_384 = PineconeVectorStore.from_existing_index(
    "llama-384",
    embeddings_384,
    text_key="text"
)


In [None]:
vectorstore_384.add_documents(text_chunks)


Creating LLM Wrapper

In [None]:
notebook_login()

In [None]:
model="daryl149/llama-2-7b-chat-hf"

In [None]:
tokenizer=AutoTokenizer.from_pretrained(model,
                                        use_auth_token=True,)
model=AutoModelForCausalLM.from_pretrained(model,
                                           device_map='auto',
                                           torch_dtype=torch.float16,
                                           use_auth_token=True,
                                           load_in_8bit=True,
                                           )

In [None]:
pipe=pipeline("text-generation",
              model=model,
              tokenizer=tokenizer,
              torch_dtype=torch.bfloat16,
              device_map="auto",
              max_new_tokens=512,
              do_sample=True,
              top_k=30,
              num_return_sequences=1,
              eos_token_id=tokenizer.eos_token_id)

In [None]:
llm=HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature':0.2})

In [None]:
response = llm.invoke("Please provide a concise summary of ChatGPT and Gemini")
print(response)


In [None]:
response = llm.invoke("On what parameters does Iphone 13 differs from Iphone 15")
print(response)


In [None]:
from langchain_classic.chains import RetrievalQA
print("‚úÖ RetrievalQA imported successfully!")

In [None]:
query="How was Chatgpt used for Students and Research Works"

In [None]:
response = llm.invoke(query)
print(response)


In [None]:
print(len(vectorstore_384 ._embedding.embed_query("dimension check")))

doc=vectorstore_384 .similarity_search(query,k=3)

In [None]:
from langchain_core.documents import Document


In [None]:
print(type(doc[0]))
print(doc[0].page_content)
print(doc[0].metadata)


In [None]:
doc

In [None]:
test_vec = embeddings.embed_query("test")
print(len(test_vec))


In [None]:
qa=RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorstore_384.as_retriever())


In [None]:
question="What are capabilities of ChatGPT"

In [None]:
qa.run(question)

Prompt Template

In [None]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""
Use ONLY the following context from {URL}: {context}

Question: {input}

If the context doesn't contain relevant information, respond exactly:
"I can only answer questions about {URL}"

Answer:
""")

# Rebuild your qa chain with strict prompt
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    chain_type_kwargs={"prompt": prompt}
)


In [None]:
from langchain_classic.chains import RetrievalQA

# Create retriever
retriever = vectorstore_384.as_retriever(search_kwargs={"k": 3})  # limit to top 3 docs

# Setup QA without any memory
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever
)

# Ask your question
question = "Who founded ChatGPT"
result = qa({"query": question})

print(result['result'])  # Only uses retrieved docs


In [None]:
import gradio as gr
from gradio.components import Textbox

def chat_with_url(question):
    result = qa.invoke({"query": question})
    return result["result"]

with gr.Blocks(title="URL Q&A Bot") as demo:
    gr.Markdown("# üöÄ URL Q&A Bot")
    gr.Markdown(f"Query your Pinecone chunks from {URL}!")

    with gr.Row():
        with gr.Column(scale=1):
            question = Textbox(
                label="Question",
                placeholder=f"Ask about {URL}...",
                lines=3,
                show_label=True
            )
        with gr.Column(scale=2):
            answer = Textbox(
                label="Answer",
                lines=10,
                max_lines=20,
                interactive=False  # Read-only answer box
            )

    # FIX: Clear question after submit + button backup
    btn = gr.Button("Submit", variant="primary")
    btn.click(chat_with_url, question, answer)
    question.submit(chat_with_url, question, answer).then(
        lambda: "", question, question  # Clear input after submit
    )

demo.launch(share=True)
