In [1]:
!pip install transformers langchain langchain_community langchain-openai
!pip install -U transformers accelerate
!pip install tiktoken
!pip install pinecone
!pip install langchain-pinecone



In [2]:
from pydantic import BaseModel, Field
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
import torch
import json
from sentence_transformers import SentenceTransformer, SimilarityFunction

In [11]:
from google.colab import userdata
import os

hf_token = userdata.get('HF_TOKEN')

if hf_token is None:
    raise ValueError("Hugging Face token not found. Please add it to Colab Secrets.")

from huggingface_hub import login


login(token=hf_token)

os.environ["LANGSMITH_TRACING"]="true"
os.environ["LANGSMITH_ENDPOINT"]="https://api.smith.langchain.com"
os.environ["LANGSMITH_API_KEY"]=userdata.get('LG_SMITH')
os.environ['LANGSMITH_PROJECT']="RAG"
os.environ["OPENAI_API_KEY"]=str(userdata.get('OPEN_AI')).strip()


In [4]:
print("LANGSMITH_TRACING:", os.environ.get("LANGSMITH_TRACING"))
print("LANGSMITH_ENDPOINT:", os.environ.get("LANGSMITH_ENDPOINT"))



LANGSMITH_TRACING: true
LANGSMITH_ENDPOINT: https://api.smith.langchain.com


In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")



model_embed = SentenceTransformer("nomic-ai/modernbert-embed-base", similarity_fn_name=SimilarityFunction.COSINE)

Using device: cpu


# Retriever


In [15]:


import pinecone
import langchain_pinecone
from langchain_openai import ChatOpenAI



index_name = "rag-data"
from langchain_pinecone import PineconeVectorStore

from sentence_transformers import SentenceTransformer
from langchain.vectorstores import Pinecone
from langchain.embeddings.base import Embeddings




pinecone_client = pinecone.Pinecone(api_key=userdata.get('PINE'))


hf_model = model_embed


class HuggingFaceEmbeddings(Embeddings):
    def __init__(self, model):
        self.model = model

    def embed_documents(self, texts):
        """Generate embeddings for a list of documents."""
        return self.model.encode(texts).tolist()

    def embed_query(self, text):
        """Generate embedding for a single query."""
        return self.model.encode(text).tolist()

hf_embeddings = HuggingFaceEmbeddings(hf_model)


index_name = "rag-data"



index = pinecone_client.Index(index_name)

retriever= Pinecone(
    embedding=hf_embeddings,
    index=index,
    text_key="Text",
).as_retriever()


# Strucuted Grade of Retrieval

In [7]:
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain import hub


class GradeDocuments(BaseModel):

    binary_score: str = Field(
        description="Are documents relevant to the question, 'yes' or 'no'"
    )

grade_prompt = hub.pull("efriis/self-rag-retrieval-grader")

# LLM with structured function
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeDocuments)

retrieval_grader = grade_prompt | structured_llm_grader



For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


# Generation

In [14]:
from langchain import hub
from langchain_core.output_parsers import StrOutputParser


prompt = hub.pull("rlm/rag-prompt")

llm = ChatOpenAI(model_name="gpt-3.5-turbo-0125", temperature=0)

rag_chain = prompt | llm | StrOutputParser()

question = "Shall the access to the information be granted without undue delay?"
docs = retriever.get_relevant_documents(question)

generation = rag_chain.invoke({"context": docs, "question": question})
print(generation)

  docs = retriever.get_relevant_documents(question)


Access to the information shall be granted without undue delay, but at the latest within four weeks of receipt of the request by the competent body. If access to the information is not granted, a decision on this must be issued within two months of receipt of the request. If the requested information has not been provided, the applicant may file a request for a decision on the dispute by the administrative court within four weeks of the expiry of the deadline for the provision of information.
