# Perform the RAG Query / Response

- Use Hardcoded query from user
- Query ChromaDB for closest matches
- Define RAG prompt
- Submit to LLM
- Print Response

In [12]:
import os, re
import chromadb
from langchain.prompts import ChatPromptTemplate

Import project config and functions

In [13]:
from dotenv import load_dotenv

import os # Importing os module for operating system functionalities
#import shutil # Importing shutil module for high-level file operations

import sys
import importlib
load_dotenv()

# ************************************************
# *** Add PACKAGE_PATH to your .env file with full
# ***   path to your local_rag_llm/run/pkgs folder
# ************************************************
package_path = os.getenv('PACKAGE_PATH')
# ************************************************

#package_path = "/mnt/c/ML/DU/local_rag_llm/run/pkgs"
sys.path.insert(1, package_path)
import config
import rag_functions

package_path

'/mnt/c/ML/DU/local_rag_llm/run/pkgs'

Define variables

In [14]:
# reload any changes to Config Settings
importlib.reload(config)

project_path = config.project_path
print(f"Project_path: {project_path}")

doc_folder = config.data_folder_aiml
print(f"Doc Folder: {doc_folder}")

models_path = config.models_path
print(f"Models path: {models_path}")

loaded_documents = project_path + "run/pkgs/loaded_files.pkl"
print(f"Loaded Docs file: {loaded_documents}")

Project_path: /mnt/c/ML/DU/local_rag_llm/
Doc Folder: /mnt/c/ML/DU/local_rag_llm/data/aiml/
Models path: /mnt/c/ML/DU/local_rag_llm/models/
Loaded Docs file: /mnt/c/ML/DU/local_rag_llm/run/pkgs/loaded_files.pkl


### Create Embedding Function

In [15]:
from chromadb.utils import embedding_functions

# Use a different sentence transformer: all-mpnet-base-v2
sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-mpnet-base-v2")

In [16]:
# Create the embedding model
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from sentence_transformers import SentenceTransformer

modelPath = "/mnt/c/ML/DU/local_rag_llm/models/sentence-transformers/all-MiniLM-L6-v2"
model = SentenceTransformer('all-MiniLM-L6-v2')
model.save(modelPath)

embed_model = HuggingFaceEmbedding(
    model_name=modelPath, max_length=512
)

### Setup ChromaDB

In [17]:
collection_name = config.collection_name

print(f"Collection Name: {collection_name}")

Collection Name: Gen_AI_knowledge


In [18]:
client = chromadb.HttpClient(host='localhost', port=8200)

client.list_collections()

[Collection(id=117f7132-8837-4bf2-9e3d-b7661f82bdb3, name=knowledge_base),
 Collection(id=19758326-4a87-46f9-9388-1d529131715d, name=Gen_AI_knowledge),
 Collection(id=588ec510-0b68-4c99-884d-5738768d1710, name=my_collection),
 Collection(id=dd394da5-2c20-4bce-8374-89a0ce611153, name=ML_doc_collection),
 Collection(id=f31f0737-b78e-46c9-86fd-efc4a56fffe9, name=local_kb)]

In [20]:
chroma_collection = client.get_collection(
                                                name=collection_name, 
                                                embedding_function=sentence_transformer_ef)

In [21]:
chroma_collection.count()

191

### Test ChromaDB

In [22]:
query_text = "This is a query about machine learning and data science"

In [23]:
# reload any changes to rag_functions
importlib.reload(rag_functions)

<module 'rag_functions' from '/mnt/c/ML/DU/local_rag_llm/run/pkgs/rag_functions.py'>

In [24]:
results = chroma_collection.query(
    query_texts=[ query_text ],
    n_results=3
)

rag_functions.print_dict(results, "result")

rag_functions.print_list(results['metadatas'][0], "metadata")


Dictionary: result
  ids: [['5a73e662-503b-11ef-9594-b9cfb04757a0', '591cda0a-503b-11ef-9594-b9cfb04757a0', '5a73e661-503b-11ef-9594-b9cfb04757a0']]
  distances: [[0.7612378597259521, 0.8413470387458801, 0.9075563549995422]]
  embeddings: None
  metadatas: [[{'author': 'Faisal Kalota', 'creationDate': "D:20240219111535+02'00'", 'creator': 'LaTeX with hyperref', 'file_path': '/mnt/c/ML/DU/local_rag_llm/data/aiml/A Primer on Generative Artificial Intelligence.pdf', 'filename': 'A Primer on Generative Artificial Intelligence.pdf', 'format': 'PDF 1.7', 'keywords': 'artificial intelligence; AI; generative artificial intelligence; generative AI; GAI; GenAI; Gen-AI; ChatGPT; LLM; GPT; AI businesses; AI education; AI ethics; AI security', 'modDate': "D:20240219102017+01'00'", 'page': 4, 'producer': 'pdfTeX-1.40.25', 'source': '/mnt/c/ML/DU/local_rag_llm/data/aiml/A Primer on Generative Artificial Intelligence.pdf', 'subject': 'Many educators and professionals in different industries may need 

In [25]:
rag_score_test = rag_functions.check_distances(results['distances'], 0.7)

# Test = True if test failed.
if rag_score_test:
    print(f"Unable to find matching results.")

In [26]:
# Extract all filenames
metadatas = results['metadatas']
filenames = [metadata['filename'] for sublist in metadatas for metadata in sublist]
unique_filenames = list(set(filenames))

print(f"Unique file names: {unique_filenames}")

Unique file names: ['A Primer on Generative Artificial Intelligence.pdf']


In [27]:
# Clean and join the documents
context_text = rag_functions.clean_and_join_doc_chunks(results['documents'])
print(context_text)

Educ. Sci. 2024, 14, 172
5 of 15
Table 1. Differences between machine learning and deep learning.
Machine Learning
Deep Learning
Requires a relatively small amount of data for
training and prediction.
Requires large amounts of data for training
and prediction.
It does not require extensive computational
power, and low-end central processing units
(CPUs) may be sufficient.
High-end computational power is required. A
graphic processing unit (GPU) is needed.
The time to train the model is relatively small.
The time to train a model is relatively high.
Simple linear correlational models.
Non-linear complex correlational models.
The output of machine learning algorithms is
generally a numerical value.
The output is not limited to a single numeric
value but could be in different formats.
Deep learning is inspired by the human brain and utilizes artificial neural networks
(ANN). In order to understand deep learning, it is crucial to understand artificial neural

 - -

Educ. Sci. 2024, 14, 172

### Create Prompt and get response from LLM

- Setup LLM
- Create Prompts
- Create Chain
- Test

Setup LLM

In [28]:
models = config.models

rag_functions.print_dict(models, "Models")


Dictionary: Models
  nomic: nomic_embedding
  llama2_7_chat: llama-2-7b-chat
  llama3_8_ggml: llama3_8b_ggml
  llama3_8_instruct: Llama-3-8b-Instruct
  qwen2_05: Qwen2-0.5b-instruct


In [30]:
llm_temperature = 0.7
llm_model = models['qwen2_05']
llm_model

'Qwen2-0.5b-instruct'

In [31]:
from langchain_openai import ChatOpenAI

chat_model = ChatOpenAI(
    api_key     ="sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",  # can be anything
    base_url    ="http://localhost:8100/v1",  # NOTE: Replace with IP address and port of your llama-cpp-python server
    model=llm_model, 
    temperature=llm_temperature,
    max_tokens=None,
    timeout=None,
    max_retries=2
    )

Create Prompt

- Uses Langchain.schema
  - SystemChatMessage​
    - A chat message representing information that should be instructions to the AI system.
  - HumanChatMessage​
    - A chat message representing information coming from a human interacting with the AI system.
  - AIChatMessage​
    - A chat message representing information coming from the AI system.

In [32]:
# # Define the system prompt
# system_prompt = (
#     "Use the given context to answer the question. "
#     "If you don't know the answer, say you don't know. "
#     "Use three sentence maximum and keep the answer concise. "
#     "Context: {context}"
# )

In [33]:
query = "Tell me about machine learning and data science"

In [34]:
# Define your system prompt, context, and question
system_prompt = (
    "Use the given context to answer the question. "
    "If you don't know the answer, say you don't know. "
    "Use three sentences maximum and keep the answer concise."
)
additional_system_prompt = "This is an additional system prompt to guide the assistant's behavior."
context = context_text
question = query

In [35]:
# Format the messages
messages = [
    {"role": "system", "content": system_prompt},
#    {"role": "system", "content": additional_system_prompt},
    {"role": "system", "content": f"Context: {context}"},
    {"role": "user", "content": "{question}"}
]

In [36]:
response = chat_model.invoke(messages)

response

AIMessage(content='What is a type of protein? Answer: amino acid.', response_metadata={'token_usage': {'completion_tokens': 12, 'prompt_tokens': 49, 'total_tokens': 61}, 'model_name': 'Qwen2-0.5b-instruct', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-856bc31a-f9c6-49c8-ad2e-3bf578f5bb4e-0', usage_metadata={'input_tokens': 49, 'output_tokens': 12, 'total_tokens': 61})

In [37]:
from langchain_core.prompts import ChatPromptTemplate

template = ChatPromptTemplate([
    ("system", system_prompt),
    ("human", "{question}"),
])

prompt_value = template.invoke(
    {
        "context": context_text,
        "question": query
    }
)

rag_functions.print_list(prompt_value.messages, "messages")



List: messages
  0: content="Use the given context to answer the question. If you don't know the answer, say you don't know. Use three sentences maximum and keep the answer concise."
  1: content='Tell me about machine learning and data science'


In [38]:
response_text = chat_model.invoke(prompt_value)

In [39]:
response_text

AIMessage(content='Machine learning involves using algorithms to analyze patterns in data without human intervention. Data science focuses on analyzing and interpreting data to find insights that can be used for prediction, forecasting, decision making, and other applications.', response_metadata={'token_usage': {'completion_tokens': 41, 'prompt_tokens': 54, 'total_tokens': 95}, 'model_name': 'Qwen2-0.5b-instruct', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-76cec769-2621-475d-8926-df4eba7fc2e1-0', usage_metadata={'input_tokens': 54, 'output_tokens': 41, 'total_tokens': 95})

In [40]:
from openai import OpenAI

llm = OpenAI(base_url="http://localhost:8100/v1", api_key="sk-xxx")

In [41]:
# Define your system prompt, context, and question
system_prompt = (
    "Use the given context to answer the question. "
    "If you don't know the answer, say you don't know. "
    "Use three sentences maximum and keep the answer concise."
)
additional_system_prompt = "This is an additional system prompt to guide the assistant's behavior."
context = "Your context here"
question = "This is a query about machine learning and data science"

In [45]:
question = "Tell me about machine learning and data science"

In [46]:
# Format the messages
messages = [
    {"role": "system", "content": system_prompt},
#    {"role": "system", "content": additional_system_prompt},
    {"role": "system", "content": f"Context: {context}"},
    {"role": "user", "content": question}
]

In [47]:
#query = input()
query = "Tell me about machine learning and data science"

stream = llm.chat.completions.create(
    model="qwen-0_5b-instruct-q5_k_m",
    messages = messages,
    temperature = 0.7,
    stream=True,
)

for chunk in stream:
    if chunk.choices[0].delta.content is not None:
        print(chunk.choices[0].delta.content, end="")

Machine learning is a branch of computer science that involves developing algorithms using data that can learn patterns in the data to make predictions or decisions. Data science, on the other hand, focuses on understanding how data behaves, analyzing it for insights, and using these insights to improve business decision making.

In [48]:
response = llm.chat.completions.create(
    model="qwen-0_5b-instruct-q5_k_m",
    messages = messages,
    temperature = 0.7,
    stream=False,
)

In [55]:
response

ChatCompletion(id='chatcmpl-e9688f0a-583a-407a-96b1-6e8b6dba5ed1', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='Machine learning is the study of computers that can learn without being explicitly programmed; it includes everything from statistical models to neural networks and machine learning algorithms. Data science, on the other hand, refers to the analysis and interpretation of large sets of data, particularly in fields such as finance, healthcare, and natural language processing.', role='assistant', function_call=None, tool_calls=None))], created=1722556139, model='qwen-0_5b-instruct-q5_k_m', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=63, prompt_tokens=54, total_tokens=117))

In [70]:
response_text = response.choices[0].message.content

response_text

'Machine learning is the study of computers that can learn without being explicitly programmed; it includes everything from statistical models to neural networks and machine learning algorithms. Data science, on the other hand, refers to the analysis and interpretation of large sets of data, particularly in fields such as finance, healthcare, and natural language processing.'

In [69]:
response_text: {  "editor.wordWrap": "on"}