# RAG Using Different LLM Endpoints in PCAI

## Importing the Libraries

In [None]:
from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
from langchain_nvidia_ai_endpoints.reranking import NVIDIARerank
from langchain_weaviate.vectorstores import WeaviateVectorStore
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain.text_splitter import CharacterTextSplitter
import weaviate

## Fetching the Secret Token for RAG Essentials

In [None]:
import weaviate, os
from weaviate.classes.init import Auth

#getting the auth token
secret_file_path = "/etc/secrets/ezua/.auth_token"

with open(secret_file_path, "r") as file:
    token = file.read().strip()

## Connecting to Weaviate

In [None]:
domain = ".cluster.local"
http_host = "weaviate.hpe-weaviate.svc.cluster.local"
grpc_host = "weaviate-grpc.hpe-weaviate.svc" + domain
weaviate_headers = {"x-auth-token": token}
#weaviate_headers = {"x-auth-token": "wrong token"}

client = weaviate.connect_to_custom(
    http_host=http_host,        # Hostname for the HTTP API connection
    http_port=80,              # Default is 80, WCD uses 443
    http_secure=False,           # Whether to use https (secure) for the HTTP API connection
    grpc_host=grpc_host,        # Hostname for the gRPC API connection
    grpc_port=50051,              # Default is 50051, WCD uses 443
    grpc_secure=False,           # Whether to use a secure channel for the gRPC API connection
    headers=weaviate_headers,
    skip_init_checks=False
)

print(client.is_ready())

# RUN ONLY THE CELL THAT YOU WANT TO ACCESS YOUR LLM THROUGH

## Connecting to LLM through MLIS

In [None]:
_api_key = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NDc0ODMxMzEsImlhdCI6MTc0NDg5MTEzMywiaXNzIjoiYWlvbGlAaHBlLmNvbSIsInN1YiI6ImE2ODU0ZjJhLWQ0NGItNGY2MC04N2EwLTEyOGU4YmY2N2JhZCIsInVzZXIiOiJnZW5haS0wMS10bWVob2wubmV0In0.mvLeYBTwIpCuun5frO6SIxPdDbUlkhO26BYQci38osk5YeUheed2zOn8iTHHxqmYydg7jJRvv61mlzgwj9E59QcOCl8UAComSudralRgvDuSthv-9nExi1lw_6qNhs8WjhKl9r2f2eNJ-vLIKHQTZDZkzUz1-gaOC_TmQzQgtvDoBeJOY_5jULrR88n22jQY8_lOdypDQBG1u7bIVOCdy-YcEzbfOTDTIsoTcGqw19J7arDi0IzrUZmbnlhmqgQ9LRj3KBmkvv-t-db5LfGCbpFW7EZU0lzJ5wA2eMKo3Cfam1Wzlne36QDCWvxVUD9blhc8Q8Q_hEGVQytjECr6Sw"

llm = ChatNVIDIA(
    base_url="https://nvidia-nim-model-predictor-genai-01-tmehol-net-f5874609.pcai1.genai1.hou",
    model="meta/llama3-8b-instruct",
    api_key=_api_key,
    temperature=0.5,
    max_tokens=1024,
    top_p=1.0,
)
llm.invoke("what is an api?")

## Connecting to LLM Through RAG ESSENTIALS

In [None]:
llm = ChatNVIDIA(
    base_url="https://llama-3-1-8b-b7ee1686-predictor-ezai-services.pcai1.genai1.hou",
    model="meta/llama-3.1-8b-instruct",
    api_key=token,
    temperature=0.5,
    max_tokens=1024,
    top_p=1.0,
)
llm.invoke("what is an api?")

## Connecting to LLM through OLLAMA

In [None]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model="llama3.3:70b",
    base_url="https://ollama.pcai1.genai1.hou",
)
llm.invoke("what is an api?")

## Data Extraction and Processing

In [None]:
# Replace with the path to your PDF
pdf_path = "./HPE.pdf"

# Load PDF file
loader = PyPDFLoader(pdf_path)
documents = loader.load()

# Split into manageable chunks
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(documents)

for doc in docs:
    doc.metadata={}

## Vector Store Initialization

In [None]:
from langchain_ollama import OllamaEmbeddings

vector = WeaviateVectorStore.from_documents(docs, embedding=OllamaEmbeddings(model = "nomic-embed-text:latest", base_url="https://ollama.pcai1.genai1.hou"), client=client, index_name="RAG", text_key="Rag".lower() + "_key")


## Retriever Initialization

In [None]:
retriever=vector.as_retriever()

## Reranking

In [None]:
compressor = NVIDIARerank(model="nvidia/nv-rerankqa-mistral-4b-v3",
                          base_url="https://reranker-5c3f14b5-predictor-ezai-services.pcai1.genai1.hou",
                          api_key=token)

In [None]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=retriever
) 

## User Query

In [None]:
query = "What is HPE Proliant Compute DL384 Gen12"

## Output

In [None]:
chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
chain.invoke(query)