In [1]:
pip install langchain sentence-transformers faiss-cpu PyPDF2 python-dotenv

Collecting sentence-transformers
  Downloading sentence_transformers-5.1.1-py3-none-any.whl.metadata (16 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp313-cp313-win_amd64.whl.metadata (5.2 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting transformers<5.0.0,>=4.41.0 (from sentence-transformers)
  Downloading transformers-4.56.2-py3-none-any.whl.metadata (40 kB)
Collecting torch>=1.11.0 (from sentence-transformers)
  Downloading torch-2.8.0-cp313-cp313-win_amd64.whl.metadata (30 kB)
Collecting huggingface-hub>=0.20.0 (from sentence-transformers)
  Downloading huggingface_hub-0.35.3-py3-none-any.whl.metadata (14 kB)
Collecting tokenizers<=0.23.0,>=0.22.0 (from transformers<5.0.0,>=4.41.0->sentence-transformers)
  Downloading tokenizers-0.22.1-cp39-abi3-win_amd64.whl.metadata (6.9 kB)
Collecting safetensors>=0.4.3 (from transformers<5.0.0,>=4.41.0->sentence-transformers)
  Downloading safetensors-0.6.2-cp38-abi3-win_amd64.whl.metada

In [13]:
# retrieval_qa_groq.py (or copy into a Jupyter cell)
import os
from dotenv import load_dotenv
load_dotenv()

# core LangChain imports
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA

# Your Groq wrapper (either import from file or copy the class here)
# from groq_remote_llm import GroqRemoteLLM
# If you already defined GroqRemoteLLM in a cell, import/use it directly.
from groq_remote_llm import GroqRemoteLLM

# -------------- CONFIG ----------------
PDF_PATH = "sample.pdf"            # <- path to your PDF
CHUNK_SIZE = 800
CHUNK_OVERLAP = 120
EMBED_MODEL = "all-MiniLM-L6-v2"   # small, fast embedding model (good for 8GB RAM)
VECTORDB_PATH = "faiss_index"      # optional: directory to save index (FAISS persists in files)

# 1) Load PDF
if not os.path.exists(PDF_PATH):
    raise FileNotFoundError(f"Put a PDF at '{PDF_PATH}' or change PDF_PATH variable.")

loader = PyPDFLoader(PDF_PATH)
docs = loader.load()

# 2) Split into chunks
splitter = CharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
chunks = splitter.split_documents(docs)
print(f"Created {len(chunks)} chunks from the PDF.")

# 3) Create embeddings (SentenceTransformers)
embeddings = SentenceTransformerEmbeddings(model_name=EMBED_MODEL)

# 4) Build FAISS vectorstore (in-memory). Optionally persist to disk.
vectorstore = FAISS.from_documents(chunks, embeddings)

# Optional: persist FAISS index to disk (persist manually)
# vectorstore.save_local(VECTORDB_PATH)   # requires langchain faiss save_local support in your version

# 5) Create Groq LLM
llm = GroqRemoteLLM()   # reads GROQ_API_URL/GROQ_API_KEY/GROQ_MODEL from .env

# 6) Build RetrievalQA chain
# chain_type can be "stuff", "map_reduce", "refine" depending on behaviour you want.
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever(search_kwargs={"k":3}))

# 7) Ask questions
queries = [
    "Summarize the main conclusion of the paper in one sentence.",
    "What method did the authors use for evaluation?",
    "List any limitations mentioned by the authors."
]

for q in queries:
    print("\nQUESTION:", q)
    try:
        ans = qa.run(q)
        print("ANSWER:", ans.strip())
    except Exception as e:
        print("Error during QA:", e)


ImportError: cannot import name 'GroqRemoteLLM' from 'groq_remote_llm' (C:\Users\Arul\groq_remote_llm.py)

In [15]:
# Cell 1: write groq_remote_llm.py into the notebook's working directory
module_code = r'''
# groq_remote_llm.py  -- robust import-safe wrapper for Groq remote LLM
import os
import requests
from typing import Optional, List, Mapping, Any

# Try to import LangChain LLM base; if missing, provide a minimal fallback.
try:
    from langchain.llms.base import LLM
except Exception:
    class LLM:
        pass

# Try to use pydantic Field to satisfy LangChain expectations; if unavailable, fallback.
use_pydantic = True
try:
    from pydantic import Field
except Exception:
    use_pydantic = False

if use_pydantic:
    from pydantic import Field
    class GroqRemoteLLM(LLM):
        api_url: str = Field(default_factory=lambda: os.getenv("GROQ_API_URL"))
        api_key: str = Field(default_factory=lambda: os.getenv("GROQ_API_KEY"))
        model: str   = Field(default_factory=lambda: os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile"))
        timeout: int = 60

        @property
        def _llm_type(self) -> str:
            return "groq-remote-llm"

        @property
        def _identifying_params(self) -> Mapping[str, Any]:
            return {"model": self.model, "url": self.api_url}

        def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
            headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
            payload = {
                "model": self.model,
                "messages": [
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": prompt},
                ],
                "max_tokens": 256,
            }
            resp = requests.post(self.api_url, json=payload, headers=headers, timeout=self.timeout)
            resp.raise_for_status()
            doc = resp.json()
            try:
                return doc["choices"][0]["message"]["content"]
            except Exception:
                return str(doc)
else:
    class GroqRemoteLLM(LLM):
        def __init__(self, api_url: Optional[str]=None, api_key: Optional[str]=None, model: Optional[str]=None, timeout: int=60):
            self.api_url = api_url or os.getenv("GROQ_API_URL")
            self.api_key = api_key or os.getenv("GROQ_API_KEY")
            self.model   = model or os.getenv("GROQ_MODEL", "llama-3.3-70b-versatile")
            self.timeout = timeout

        @property
        def _llm_type(self) -> str:
            return "groq-remote-llm"

        @property
        def _identifying_params(self) -> Mapping[str, Any]:
            return {"model": self.model, "url": self.api_url}

        def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
            headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
            payload = {
                "model": self.model,
                "messages": [
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": prompt},
                ],
                "max_tokens": 256,
            }
            resp = requests.post(self.api_url, json=payload, headers=headers, timeout=self.timeout)
            resp.raise_for_status()
            doc = resp.json()
            try:
                return doc["choices"][0]["message"]["content"]
            except Exception:
                return str(doc)
'''
with open("groq_remote_llm.py", "w", encoding="utf-8") as f:
    f.write(module_code)
print("Wrote groq_remote_llm.py to", __import__("os").getcwd())



Wrote groq_remote_llm.py to C:\Users\Arul


In [3]:
# Cell 2: import and sanity-check
import importlib, sys, traceback
try:
    if "groq_remote_llm" in sys.modules:
        importlib.reload(sys.modules["groq_remote_llm"])
    else:
        import groq_remote_llm
    from groq_remote_llm import GroqRemoteLLM
    print("Imported GroqRemoteLLM OK.")
    inst = GroqRemoteLLM()
    print("api_url:", getattr(inst, "api_url", None))
    print("model:", getattr(inst, "model", None))
except Exception:
    print("Import failed; traceback:")
    traceback.print_exc()


Imported GroqRemoteLLM OK.
api_url: None
model: llama-3.3-70b-versatile


In [1]:
import os
from dotenv import load_dotenv

# load .env (only needed if you use a .env file)
load_dotenv()

print("GROQ_API_URL   ->", repr(os.getenv("GROQ_API_URL")))
print("GROQ_API_KEY   ->", repr(os.getenv("GROQ_API_KEY")))
print("GROQ_MODEL     ->", repr(os.getenv("GROQ_MODEL")))

GROQ_API_URL   -> 'https://api.groq.com/openai/v1/chat/completions'
GROQ_API_KEY   -> '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'
GROQ_MODEL     -> 'llama-3.3-70b-versatile'


In [2]:
# Connectivity test
import os, requests, json
API_URL = os.getenv("GROQ_API_URL")
API_KEY = os.getenv("GROQ_API_KEY")
MODEL   = os.getenv("GROQ_MODEL")

headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
payload = {
    "model": MODEL,
    "messages": [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "Test: reply with the word HELLO and nothing else."}
    ],
    "max_tokens": 20,
}

resp = requests.post(API_URL, json=payload, headers=headers, timeout=30)
print("HTTP status:", resp.status_code)
try:
    data = resp.json()
    # pretty print a compact view (mask long fields)
    import pprint
    pprint.pprint(data)
    # attempt to extract content safely:
    content = None
    if isinstance(data, dict):
        if "choices" in data and isinstance(data["choices"], list) and data["choices"]:
            c = data["choices"][0]
            if isinstance(c, dict) and "message" in c and isinstance(c["message"], dict):
                content = c["message"].get("content")
            elif "text" in c:
                content = c.get("text")
    print("Extracted content:", repr(content))
except Exception:
    print("Non-JSON response (first 1000 chars):")
    print(resp.text[:1000])


HTTP status: 200
{'choices': [{'finish_reason': 'stop',
              'index': 0,
              'logprobs': None,
              'message': {'content': 'HELLO', 'role': 'assistant'}}],
 'created': 1759246509,
 'id': 'chatcmpl-112cb3d9-cbfd-4dbe-b7fc-d9e7605fa489',
 'model': 'llama-3.3-70b-versatile',
 'object': 'chat.completion',
 'service_tier': 'on_demand',
 'system_fingerprint': 'fp_9e1e8f8435',
 'usage': {'completion_time': 0.010181789,
           'completion_tokens': 3,
           'prompt_time': 0.002954313,
           'prompt_tokens': 53,
           'queue_time': 0.056309637,
           'total_time': 0.013136102,
           'total_tokens': 56},
 'usage_breakdown': None,
 'x_groq': {'id': 'req_01k6dm6rx0eect175fxppjnrdt'}}
Extracted content: 'HELLO'


In [3]:
# Reload and test GroqRemoteLLM with LangChain LLMChain
import importlib, sys
if "groq_remote_llm" in sys.modules:
    importlib.reload(sys.modules["groq_remote_llm"])
from groq_remote_llm import GroqRemoteLLM

# create instance
llm = GroqRemoteLLM()
print("LLM api_url:", llm.api_url)
print("LLM model :", llm.model)

# small LangChain usage
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

prompt = PromptTemplate(input_variables=["topic"], template="In two sentences, explain {topic}.")
chain = LLMChain(llm=llm, prompt=prompt)
print(chain.run("k-means clustering"))


LLM api_url: https://api.groq.com/openai/v1/chat/completions
LLM model : llama-3.3-70b-versatile


  chain = LLMChain(llm=llm, prompt=prompt)
  print(chain.run("k-means clustering"))


K-means clustering is an unsupervised machine learning algorithm that groups similar data points into clusters based on their features, with the goal of identifying patterns or structures in the data. The algorithm works by randomly initializing 'k' cluster centers, then iteratively updating the assignments of data points to the nearest cluster center and recalculating the center of each cluster until the assignments no longer change.


In [5]:
!pip install pypdf

Collecting pypdf
  Downloading pypdf-6.1.1-py3-none-any.whl.metadata (7.1 kB)
Downloading pypdf-6.1.1-py3-none-any.whl (323 kB)
Installing collected packages: pypdf
Successfully installed pypdf-6.1.1


In [6]:
# RetrievalQA pipeline using GroqRemoteLLM + local embeddings + FAISS
import os
from dotenv import load_dotenv
load_dotenv()   # just to be safe if not already loaded

from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA

# reload wrapper
import importlib, sys
if "groq_remote_llm" in sys.modules:
    importlib.reload(sys.modules["groq_remote_llm"])
from groq_remote_llm import GroqRemoteLLM

# CONFIG
PDF_PATH = "sample.pdf"         # change if needed
CHUNK_SIZE = 800
CHUNK_OVERLAP = 120
EMBED_MODEL = "all-MiniLM-L6-v2"

# 1) check file
if not os.path.exists(PDF_PATH):
    raise FileNotFoundError(f"Place your PDF as '{PDF_PATH}' in the current folder or change PDF_PATH.")

# 2) load and split
loader = PyPDFLoader(PDF_PATH)
docs = loader.load()
splitter = CharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)
chunks = splitter.split_documents(docs)
print(f"Created {len(chunks)} chunks")

# 3) embeddings + vectorstore
embeddings = SentenceTransformerEmbeddings(model_name=EMBED_MODEL)
vectorstore = FAISS.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

# 4) LLM and RetrievalQA
llm = GroqRemoteLLM()
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

# 5) Ask interactive question(s)
queries = [
    "Summarize the main conclusion of the document.",
    "What methods were used in the study?",
]
for q in queries:
    print("\nQUESTION:", q)
    try:
        ans = qa.run(q)
        print("ANSWER:", ans.strip())
    except Exception as e:
        print("Error while answering:", repr(e))

Created 3 chunks


  embeddings = SentenceTransformerEmbeddings(model_name=EMBED_MODEL)


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


QUESTION: Summarize the main conclusion of the document.
ANSWER: The document appears to be a collection of research papers from the 15th International Conference on Science and Innovative Engineering 2025. It covers a wide range of topics, including AI-powered learning management systems, insider attack classification, data deduplication, grievance resolution platforms, graph theory, and IoT-enabled smart wheelchairs.

Since the document is a collection of various research papers, there isn't a single main conclusion. Each paper presents its own findings, proposals, and results. However, overall, the document showcases innovative solutions and approaches to various problems in the fields of engineering, computer science, and technology.

QUESTION: What methods were used in the study?
ANSWER: Based on the provided context, the following methods were used in the studies:

1. **LMS Platform Using Generative AI**: The study used Clerk for authentication, Inngest, PostgreSQL, and Drizzle 

In [9]:
print(qa.run("What is the dataset size mentioned in the paper?"))
print(qa.run("What is the author's name and affiliation?"))
print(qa.run("Who is the author's of 118?"))

I don't know. The dataset used is the Insider Threat Test Dataset, but the paper does not mention the size of the dataset. It only mentions the accuracy and false positive rates achieved by the proposed Hybrid Classification Strategy (HCS) in the training and testing phases.
There are multiple authors and affiliations mentioned in the context. Here are a few:

1. Ruben George Varghese, Dharshan R E, Harish Jayaram S S, and R Dheepthi from the Department of Computer Science and Engineering, Hindustan Institute of Technology and Science, Chennai, Tamil Nadu, India.
2. Arul Selvam P and Tamije Selvy P from the Department of CSE, Hindusthan College of Engineering and Technology, Coimbatore.
3. DuraiMurugan A, Seema S, BhavalDharshini S, Sujitha K, and Litthika S from the Department of CSBS, M.Kumarasamy college of engineering, Karur, India.
4. Dr. Anuratha K, Harini R, and Harinee M from the Department of Information Technology, Sri Sairam Institute of Technology, Chennai, India.
5. A. Kok

In [12]:
from langchain.vectorstores import FAISS
from langchain.embeddings import SentenceTransformerEmbeddings

embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

vectorstore = FAISS.load_local(
    "faiss_index",
    embeddings,
    allow_dangerous_deserialization=True
)

retriever = vectorstore.as_retriever(search_kwargs={"k":3})

In [13]:
from langchain.chains import RetrievalQA
qa_map = RetrievalQA.from_chain_type(llm=llm, chain_type="map_reduce", retriever=retriever)
print(qa_map.run("Summarize the methodology section."))

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (1744 > 1024). Running this sequence through the model will result in indexing errors


The text describes a methodology using a GRU-CNN (Gated Recurrent Unit-Convolutional Neural Network) hybrid ML approach to categorize assaults. The model was trained using pre-existing static patterns and tested using the Insider Threat Test Dataset, achieving accuracy rates of 98.43% and 97.12% in the training and testing phases, respectively.


In [14]:
!pip install -U langchain-community



In [1]:
import sys
print(sys.executable)

C:\Users\Arul\anaconda3\python.exe


In [2]:
!pip install sentence-transformers



In [4]:
import sentence_transformers
print("SentenceTransformers version:", sentence_transformers.__version__)

SentenceTransformers version: 5.1.1
