In [1]:
import os 
from langchain_chroma import Chroma
from langchain_unstructured import UnstructuredLoader
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores.utils import filter_complex_metadata
from langchain.prompts import ChatPromptTemplate
from langchain_ollama import OllamaLLM
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [3]:
FOLDER_PATH = '/Users/keithatienza/Desktop/Academics/Emergent Consulting [HPE]/HPE LLM v2/HPE Files/'
CHROMA_PATH = '/Users/keithatienza/Desktop/Academics/Emergent Consulting [HPE]/HPE LLM v2/DB'

In [5]:
embed_model = "mxbai-embed-large"
llm_model = "llama3"

In [7]:
prompt_template = """
Answer the question based only on the following context:

{context}

---

Answer the question based on the context above: {question}
"""

In [9]:
!ollama pull mxbai-embed-large

[?25lpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠧ [?25h[?25l[2K[1Gpulling manifest ⠇ [?25h[?25l[2K[1Gpulling manifest ⠏ [?25h[?25l[2K[1Gpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest 
pulling 819c2adf5ce6... 100% ▕████████████████▏ 669 MB                         
pulling c71d239df917... 100% ▕████████████████▏  11 KB                         
pulling b837481ff855... 100% ▕████████████████▏   16 B                         
pulling 38badd946f91... 100% ▕████████████████▏  408 B                         
verifying sha256 digest 
writing manifest 
success [?25h


In [11]:
!ollama pull llama3

[?25lpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest 
pulling 6a0746a1ec1a... 100% ▕████████████████▏ 4.7 GB                         
pulling 4fa551d4f938... 100% ▕████████████████▏  12 KB                         
pulling 8ab4849b038c... 100% ▕████████████████▏  254 B                         
pulling 577073ffcc6c... 100% ▕████████████████▏  110 B                         
pulling 3f8eb4da87fa... 100% ▕████████████████▏  485 B                         
verifying sha256 digest 
writing manifest 
success [?25h


In [13]:
def load_documents():
    docs = []
    for file in os.listdir(FOLDER_PATH):
        if file.endswith('.pdf'):
            pdf_path = FOLDER_PATH + "/" + file
            loader = UnstructuredLoader(pdf_path)
            docs.extend(loader.load())
    docs = filter_complex_metadata(docs)
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=200)
    chunks = text_splitter.split_documents(docs)
    return chunks

In [15]:
def populate_database(docs):
    db = Chroma(persist_directory=CHROMA_PATH, embedding_function=OllamaEmbeddings(model=embed_model))
    db.add_documents(docs)
    return db

In [125]:
def query_rag(query_text, db):
    results = db.similarity_search_with_score(query_text, k=20)
    context_text = "\n\n---\n\n".join(["Product:" + docs.metadata.get("filename")[:-4] + "\n\n" + docs.page_content for docs, _score in results])
    prompt = ChatPromptTemplate.from_template(prompt_template)
    prompt = prompt.format(context=context_text, question=query_text)
    
    model = OllamaLLM(model="llama3")
    response_text = model.invoke(prompt)
    formatted_response = f"Response: {response_text} \n\n"
    #print(formatted_response)
    return response_text

In [19]:
docs = load_documents()
db = populate_database(docs)

INFO: pikepdf C++ to Python logger bridge initialized
INFO: Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
INFO: HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"


In [127]:
query = "Is the RTX 4000 GPU supported in any of HPE’s server models?" 

In [131]:
print(query_rag(query,db))

INFO: HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO: HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


According to the provided context, the NVIDIA RTX 4000 Ada Graphics Accelerator for HPE is supported in the following servers:

1. Product:HPE ProLiant ML350 Gen11-a50004308enw
2. Product:HPE ProLiant DL380 Gen11-a50004307enw

Therefore, yes, the RTX 4000 GPU is supported in at least two of HPE's server models: the ML350 and the DL380.
