In [None]:
import os
import json
import requests
import time
from dotenv import load_dotenv, find_dotenv

from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings

load_dotenv(find_dotenv())

# -----------------------------
# Config variables
# -----------------------------
PROMPT_TEMPLATE = """
You are a great researcher. With the information provided understand in deep and try to answer the question.
If you cant answer the question based on the information either say you cant find an answer or unable to find an answer.
So try to understand in depth about the context and answer only based on the information provided. Dont generate irrelevant answers.

Context: {context}
Question: {question}
Do provide only helpful answers

Answer:
"""
INPUT_VARIABLES = ["context", "question"]
SEPARATORS = "\n"
CHUNK_SIZE = 100
CHUNK_OVERLAP = 50
EMBEDDER = "BAAI/bge-base-en-v1.5"
CHAIN_TYPE = "stuff"
SEARCH_KWARGS = {'k': 2}

# -----------------------------
# Researcher class
# -----------------------------
class Researcher:

    def __init__(self):
        self.serper_api_key = os.getenv("SERPER_API_KEY")
        self.groq_api_key = os.getenv("GROQ_API_KEY")
        self.prompt_template = PromptTemplate(
            template=PROMPT_TEMPLATE,
            input_variables=INPUT_VARIABLES
        )
        self.text_splitter = RecursiveCharacterTextSplitter(
            separators=SEPARATORS,
            chunk_size=CHUNK_SIZE,
            chunk_overlap=CHUNK_OVERLAP
        )
        self.llm = ChatGroq(
            temperature=0.5,
            model_name="groq/compound",
            groq_api_key=self.groq_api_key
        )

        # -----------------------------
        # GPU-based embeddings
        # -----------------------------
        self.hfembeddings = HuggingFaceEmbeddings(
            model_name=EMBEDDER,
            model_kwargs={'device': 'cuda'}  # Use GPU
        )

    # -----------------------------
    # Search articles using Serper API
    # -----------------------------
    def search_articles(self, query):
        t0 = time.time()
        url = "https://google.serper.dev/search"
        data = json.dumps({"q": query})
        headers = {
            'X-API-KEY': self.serper_api_key,
            'Content-Type': 'application/json'
        }
        response = requests.post(url, headers=headers, data=data)
        t1 = time.time()
        print(f"[Search] Time taken: {t1-t0:.3f}s")
        return response.json(), t1-t0

    # -----------------------------
    # Extract URLs from search results
    # -----------------------------
    def get_urls(self, articles):
        urls = []
        try:
            urls.append(articles["answerBox"]["link"])
        except:
            pass
        for i in range(0, min(3, len(articles.get("organic", [])))):
            urls.append(articles["organic"][i]["link"])
        return urls

    # -----------------------------
    # Fetch content from URLs
    # -----------------------------
    def get_content_from_urls(self, urls):
        t0 = time.time()
        loader = UnstructuredURLLoader(urls=urls)
        try:
            research_content = loader.load()
        except Exception as e:
            print(f"Error loading URLs: {e}")
            research_content = []
        t1 = time.time()
        print(f"[Content Fetch] Time taken: {t1-t0:.3f}s")
        return research_content, t1-t0

    # -----------------------------
    # Process query: embeddings, FAISS, LLM
    # -----------------------------
    def research_given_query(self, research_objective, research_content):
        t0_total = time.time()

        # Split content into chunks
        docs = self.text_splitter.split_documents(research_content)
        t_split = time.time()
        print(f"[Text Split] Time taken: {t_split - t0_total:.3f}s")

        # Create FAISS vector store using GPU embeddings
        self.db = FAISS.from_documents(documents=docs, embedding=self.hfembeddings)
        t_embed = time.time()
        print(f"[Embedding + DB Build] Time taken: {t_embed - t_split:.3f}s")

        # Setup RetrievalQA chain
        bot = RetrievalQA.from_chain_type(
            llm=self.llm,
            chain_type=CHAIN_TYPE,
            retriever=self.db.as_retriever(search_kwargs=SEARCH_KWARGS),
            return_source_documents=True,
            verbose=True,
            chain_type_kwargs={"prompt": self.prompt_template}
        )

        # LLM inference
        t_start_infer = time.time()
        research_out = bot.invoke({"query": research_objective})
        t_end_infer = time.time()
        print(f"[LLM Inference] Time taken: {t_end_infer - t_start_infer:.3f}s")

        # Estimate time per token
        if 'result' in research_out and isinstance(research_out['result'], str):
            num_tokens = len(research_out['result'].split())
            print(f"[LLM] Estimated time per token: {(t_end_infer - t_start_infer)/num_tokens:.3f}s")

        t_total = time.time()
        print(f"[Total Research Query] Total time: {t_total - t0_total:.3f}s")

        return research_out["result"]

    # -----------------------------
    # Full research pipeline
    # -----------------------------
    def research(self, query):
        search_articles, t_search = self.search_articles(query)
        urls = self.get_urls(search_articles)
        research_content, t_content = self.get_content_from_urls(urls)
        answer = self.research_given_query(query, research_content)
        return answer


# -----------------------------
# Run example
# -----------------------------
if __name__ == "__main__":
    os.environ["SERPER_API_KEY"] = ""
    os.environ["GROQ_API_KEY"] = ""

    researcher = Researcher()
    query = "Who won Asia Cup 2025?"
    print("Searching and retrieving answer...")

    t_start = time.time()
    answer = researcher.research(query)
    t_end = time.time()

    print("\n===== Answer =====")
    print(answer)
    print(f"[Total pipeline] Total time: {t_end - t_start:.3f}s")


Searching and retrieving answer...
[Search] Time taken: 2.185s
[Content Fetch] Time taken: 0.218s
[Text Split] Time taken: 0.002s
[Embedding + DB Build] Time taken: 0.270s


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
[LLM Inference] Time taken: 4.697s
[LLM] Estimated time per token: 0.427s
[Total Research Query] Total time: 4.968s

===== Answer =====
India won the Asia Cup 2025, defeating Pakistan in the final.
[Total pipeline] Total time: 7.372s


In [None]:
!pip install -q langchain langchain_groq langchain_community langchain_huggingface langchain-text-splitters python-dotenv unstructured faiss-cpu sentence-transformers

In [None]:
# -----------------------------
# Config variables (inline instead of config.py)
# -----------------------------
PROMPT_TEMPLATE = """
You are a great researcher. With the information provided understand in deep and try to answer the question.
If you cant answer the question based on the information either say you cant find an answer or unable to find an answer.
So try to understand in depth about the context and answer only based on the information provided. Dont generate irrelevant answers.

Context: {context}
Question: {question}
Do provide only helpful answers

Answer:
"""
INPUT_VARIABLES = ["context", "question"]
SEPARATORS = "\n"
CHUNK_SIZE = 3000
CHUNK_OVERLAP = 500
EMBEDDER = "BAAI/bge-base-en-v1.5"
CHAIN_TYPE = "stuff"
SEARCH_KWARGS = {'k': 3}

# -----------------------------
# Imports
# -----------------------------
import os
import json
import requests
from dotenv import load_dotenv, find_dotenv
from typing import List
import time
import statistics

from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
import torch

load_dotenv(find_dotenv())

# -----------------------------
# Researcher class (with GPU FAISS)
# -----------------------------
class Researcher:

    def __init__(self):
        self.serper_api_key = os.getenv("SERPER_API_KEY")
        self.groq_api_key = os.getenv("GROQ_API_KEY")
        self.prompt_template = PromptTemplate(
            template=PROMPT_TEMPLATE,
            input_variables=INPUT_VARIABLES
        )
        self.text_splitter = RecursiveCharacterTextSplitter(
            separators=SEPARATORS,
            chunk_size=CHUNK_SIZE,
            chunk_overlap=CHUNK_OVERLAP
        )

        # Use a current production Groq model
        self.llm = ChatGroq(
            temperature=0.5,
            model_name="groq/compound",
            groq_api_key=self.groq_api_key
        )

        # GPU-friendly embeddings
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.hfembeddings = HuggingFaceEmbeddings(
            model_name=EMBEDDER,
            model_kwargs={'device': device}
        )
        print(f"[INFO] Using device: {device} for embeddings")

    def search_articles(self, query: str) -> dict:
        url = "https://google.serper.dev/search"
        data = json.dumps({"q": query})
        headers = {
            'X-API-KEY': self.serper_api_key,
            'Content-Type': 'application/json'
        }
        response = requests.post(url, headers=headers, data=data)
        response.raise_for_status()
        return response.json()

    def research_answerer(self):
        research_qa_chain = RetrievalQA.from_chain_type(
            llm=self.llm,
            chain_type=CHAIN_TYPE,
            retriever=self.db.as_retriever(search_kwargs=SEARCH_KWARGS),
            return_source_documents=True,
            verbose=True,
            chain_type_kwargs={"prompt": self.prompt_template}
        )
        return research_qa_chain

    def get_urls(self, articles: dict) -> List[str]:
        urls = []
        if not articles:
            return urls
        try:
            if "answerBox" in articles and "link" in articles["answerBox"]:
                urls.append(articles["answerBox"]["link"])
        except Exception:
            pass
        for i in range(0, min(3, len(articles.get("organic", [])))):
            urls.append(articles["organic"][i]["link"])
        return urls

    def get_content_from_urls(self, urls: List[str]):
        documents = []
        for url in urls:
            try:
                loader = UnstructuredURLLoader(urls=[url])
                docs = loader.load()
                for d in docs:
                    if not d.metadata.get("source"):
                        d.metadata["source"] = url
                documents.extend(docs)
            except Exception as e:
                print(f"Skipping URL {url}: {e}")
        return documents

    def research_given_query(self, research_objective: str, research_content):
        docs = self.text_splitter.split_documents(research_content)

        # Use FAISS GPU index if CUDA available
        try:
            import faiss
            index = FAISS.from_documents(documents=docs, embedding=self.hfembeddings, index_factory="Flat")
            if torch.cuda.is_available():
                gpu_res = faiss.StandardGpuResources()
                index.index = faiss.index_cpu_to_gpu(gpu_res, 0, index.index)
            self.db = index
            print("[INFO] FAISS GPU index built successfully")
        except Exception as e:
            print(f"[WARN] FAISS GPU index failed, fallback to CPU: {e}")
            self.db = FAISS.from_documents(documents=docs, embedding=self.hfembeddings)

        bot = self.research_answerer()
        try:
            research_out = bot.invoke({"query": research_objective})
        except Exception:
            research_out = bot({"query": research_objective})
        return research_out["result"]

    def research(self, query: str):
        search_articles = self.search_articles(query)
        urls = self.get_urls(search_articles)
        research_content = self.get_content_from_urls(urls)
        answer = self.research_given_query(query, research_content)
        return answer

    # Evaluation method same as your previous code
    def evaluate_retriever(self, dataset: List[dict], top_k: int = 3):
        results = []
        correct = 0
        total = len(dataset)
        times = []

        for i, sample in enumerate(dataset, start=1):
            q = sample["query"]
            expected_source = sample.get("expected_source", "").lower()
            expected_answer = sample.get("expected_answer", "").lower()

            start_time = time.time()
            try:
                search_res = self.search_articles(q)
            except Exception as e:
                elapsed = time.time() - start_time
                times.append(elapsed)
                results.append((q, False, [], "search_failed", elapsed))
                continue

            urls = self.get_urls(search_res)
            docs = self.get_content_from_urls(urls)
            if not docs:
                elapsed = time.time() - start_time
                times.append(elapsed)
                results.append((q, False, urls, "no_docs", elapsed))
                continue

            # FAISS GPU index
            try:
                import faiss
                db = FAISS.from_documents(documents=docs, embedding=self.hfembeddings, index_factory="Flat")
                if torch.cuda.is_available():
                    gpu_res = faiss.StandardGpuResources()
                    db.index = faiss.index_cpu_to_gpu(gpu_res, 0, db.index)
                retriever = db.as_retriever(search_kwargs={"k": top_k})
            except Exception as e:
                db = FAISS.from_documents(documents=docs, embedding=self.hfembeddings)
                retriever = db.as_retriever(search_kwargs={"k": top_k})

            try:
                retrieved = retriever.get_relevant_documents(q)
            except Exception as e:
                elapsed = time.time() - start_time
                times.append(elapsed)
                results.append((q, False, [], "retrieval_failed", elapsed))
                continue

            retrieved_sources = []
            found = False
            for d in retrieved:
                src = ""
                for key in ("source", "url", "source_url"):
                    if key in d.metadata and d.metadata[key]:
                        src = str(d.metadata[key]).lower()
                        break
                retrieved_sources.append(src)
                content = (d.page_content or "").lower()
                if expected_source and expected_source in src:
                    found = True
                    break
                if expected_answer and expected_answer in content:
                    found = True
                    break

            elapsed = time.time() - start_time
            times.append(elapsed)
            if found:
                correct += 1
                status = "hit"
            else:
                status = "miss"

            results.append((q, found, urls, retrieved_sources, status, elapsed))
            print(f"[{i}] Query: {q}, Result: {status}, Time: {elapsed:.3f}s")

        accuracy = correct / total * 100 if total else 0.0
        print(f"Retrieval accuracy @top-{top_k}: {accuracy:.2f}% ({correct}/{total})")
        if times:
            print(f"Response time summary (s): mean={statistics.mean(times):.3f}, median={statistics.median(times):.3f}, max={max(times):.3f}")
        return accuracy, results, times



# -----------------------------
# Test dataset (14 likely-success cases + 2 likely-paywalled/failure cases = 16)
# -----------------------------
TEST_DATA = [
    {"query": "Who is the CEO of NVIDIA?", "expected_answer": "jensen huang", "expected_source": "nvidia.com"},
    {"query": "What is the capital of Japan?", "expected_answer": "tokyo", "expected_source": "en.wikipedia.org/wiki/Tokyo"},
    {"query": "When was OpenAI founded?", "expected_answer": "december 2015", "expected_source": "en.wikipedia.org/wiki/OpenAI"},
    {"query": "What is LangChain used for?", "expected_answer": "framework for building applications", "expected_source": "langchain.com"},
    {"query": "Which company created the Mistral-7B model?", "expected_answer": "mistral ai", "expected_source": "mistral.ai"},
    {"query": "What is FAISS used for?", "expected_answer": "similarity search", "expected_source": "github.com/facebookresearch/faiss"},
    {"query": "Who invented the World Wide Web?", "expected_answer": "tim berners-lee", "expected_source": "en.wikipedia.org/wiki/Tim_Berners-Lee"},
    {"query": "What is HuggingFace Transformers?", "expected_answer": "library for state-of-the-art natural language processing models", "expected_source": "huggingface.co/transformers"},
    {"query": "What is the default port for HTTP?", "expected_answer": "port 80", "expected_source": "developer.mozilla.org"},
    {"query": "What programming language is TensorFlow written in?", "expected_answer": "c++", "expected_source": "tensorflow.org"},
    {"query": "What is the latest iPhone model?", "expected_answer": "iphone 16", "expected_source": "apple.com"},
    {"query": "Which algorithm does Google use for search ranking?", "expected_answer": "pagerank", "expected_source": "en.wikipedia.org/wiki/PageRank"},
    # 2 likely-paywalled / fail cases (Chegg, Course Hero) to simulate sources that often block scraping/paywall
    {
    "query": "Let x₀ be the real number such that e^(x₀) + x₀ = 0. For a given real number α, define g(x) = (3 x e^x + 3 x - α e^x - α x) / (3 (e^x + 1)) for all real numbers x. Then which one of the following statements is TRUE? (A) For α = 2, lim x→x₀ |g(x) + e^(x₀)| / |x - x₀| = 0. (B) For α = 2, lim x→x₀ |g(x) + e^(x₀)| / |x - x₀| = 1. (C) For α = 3, lim x→x₀ |g(x) + e^(x₀)| / |x - x₀| = 0. (D) For α = 3, lim x→x₀ |g(x) + e^(x₀)| / |x - x₀| = 2/3.",
    "expected_answer": "C",
    "expected_source": "sarthaks.com"
    },
    {"query": "Give me Optiver OA questions that happened on 14th October 2025", "expected_answer": "Researchers are studying the behavior of squirrels in a forest, where they compete to hide and collect nuts in hidden locations. You are tasked with implementing a tracker that can simulate this behavior. Your system is initialized with a list of locations and their capacities, and should support the following actions: Register a nut has been hid by a squirrel in a location. Register how nuts are retrieved by a squirrel from a location. Complete the functions described below in the SquirrelResearch class."
    , "expected_source": "https://oahelper.in/questions/U2FsdGVkX18gVO7LQ0Ng-51OZtY-etlab8ViFBPseMw?company_id=U2FsdGVkX19j-__NQxYfL1WkTv_vpON0lGt2of6ikmU"}
]

# -----------------------------
# Run evaluation if executed directly
# -----------------------------
if __name__ == "__main__":
    # Provide your API keys here (or ensure .env is present)
    os.environ["SERPER_API_KEY"] = ""
    os.environ["GROQ_API_KEY"] = ""

    researcher = Researcher()

    # Run the retriever-only evaluation (does NOT invoke the LLM for scoring)
    acc, details, timings = researcher.evaluate_retriever(TEST_DATA, top_k=3)


[INFO] Using device: cuda for embeddings


  retrieved = retriever.get_relevant_documents(q)


[1] Query: Who is the CEO of NVIDIA?, Result: hit, Time: 2.045s
[2] Query: What is the capital of Japan?, Result: hit, Time: 3.002s
[3] Query: When was OpenAI founded?, Result: hit, Time: 2.698s
[4] Query: What is LangChain used for?, Result: hit, Time: 2.847s
[5] Query: Which company created the Mistral-7B model?, Result: hit, Time: 1.442s
[6] Query: What is FAISS used for?, Result: hit, Time: 2.763s
[7] Query: Who invented the World Wide Web?, Result: hit, Time: 4.127s
[8] Query: What is HuggingFace Transformers?, Result: miss, Time: 2.258s
[9] Query: What is the default port for HTTP?, Result: hit, Time: 0.956s
[10] Query: What programming language is TensorFlow written in?, Result: hit, Time: 1.337s
[11] Query: What is the latest iPhone model?, Result: hit, Time: 1.714s
[12] Query: Which algorithm does Google use for search ranking?, Result: hit, Time: 1.919s
[13] Query: Let x₀ be the real number such that e^(x₀) + x₀ = 0. For a given real number α, define g(x) = (3 x e^x + 3 x - α