# IBM Quantum Computing RAG on IBM Cloud

In [1]:
from ibm_granite_community.notebook_utils import get_env_var, wrap_text
from langchain_huggingface import HuggingFaceEmbeddings
from transformers import AutoTokenizer
from langchain_milvus import Milvus
import tempfile
from langchain_ibm import ChatWatsonx
from docling.document_converter import DocumentConverter
from docling_core.transforms.chunker.hybrid_chunker import HybridChunker
from docling_core.types.doc.labels import DocItemLabel
from langchain_core.documents import Document
# from torch.optim.lr_scheduler import LRScheduler

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
WATSONX_APIKEY = get_env_var('WATSONX_APIKEY')

WATSONX_PROJECT_ID = get_env_var('WATSONX_PROJECT_ID')

URL = get_env_var("WATSONX_URL")

In [3]:
import torch
print(torch.__version__)

2.9.1


In [4]:
embeddings_model_path = "ibm-granite/granite-embedding-30m-english"
embedding_model = HuggingFaceEmbeddings(
    model_name=embeddings_model_path,
)
embeddings_tokenizer = AutoTokenizer.from_pretrained(embeddings_model_path)

2025-12-01 12:54:56,214 - INFO - Use pytorch device_name: mps
2025-12-01 12:54:56,215 - INFO - Load pretrained SentenceTransformer: ibm-granite/granite-embedding-30m-english


In [5]:
db_file = tempfile.NamedTemporaryFile(prefix="milvus_", suffix=".db", delete=False).name
print(f"The vector database will be saved to {db_file}")

vector_db = Milvus(
    embedding_function=embedding_model,
    connection_args={"uri": db_file},
    auto_id=True,
    enable_dynamic_field=True,
    index_params={"index_type": "AUTOINDEX"},
)

The vector database will be saved to /var/folders/rh/41xw7p5x5xn6nc78sc2h6__40000gn/T/milvus_98k348df.db


  from pkg_resources import DistributionNotFound, get_distribution


In [6]:
llm = ChatWatsonx(
    model_id="ibm/granite-4-h-small",
    apikey=WATSONX_APIKEY,
    url=URL,
    project_id=WATSONX_PROJECT_ID,
    params={
        "temperature": 0,
        "max_new_tokens": 512,
    }
)

2025-12-01 12:54:59,203 - INFO - Client successfully initialized
2025-12-01 12:54:59,960 - INFO - HTTP Request: GET https://us-south.ml.cloud.ibm.com/ml/v1/foundation_model_specs?version=2025-11-12&project_id=4e0a5d87-689d-41e6-9ad8-982592d431c7&filters=%21lifecycle_withdrawn&limit=200 "HTTP/1.1 200 OK"
2025-12-01 12:54:59,971 - INFO - Successfully finished Get available foundation models for url: 'https://us-south.ml.cloud.ibm.com/ml/v1/foundation_model_specs?version=2025-11-12&project_id=4e0a5d87-689d-41e6-9ad8-982592d431c7&filters=%21lifecycle_withdrawn&limit=200'


In [20]:
# Here are our documents, feel free to add more documents in formats that Docling supports
sources = [
    'https://arxiv.org/pdf/2506.03094', 
    'https://arxiv.org/pdf/2511.19983', 
    'https://arxiv.org/pdf/2303.09491'    
]

converter = DocumentConverter()

# Convert and chunk out documents
doc_id = 0
texts: list[Document] = [
    Document(page_content=chunk.text, metadata={"doc_id": (doc_id:=doc_id+1), "source": source})
    for source in sources
    for chunk in HybridChunker(tokenizer=embeddings_tokenizer).chunk(converter.convert(source=source).document)
    if any(filter(lambda c: c.label in [DocItemLabel.TEXT, DocItemLabel.PARAGRAPH], iter(chunk.meta.doc_items)))
]

print(f"{len(texts)} document chunks created")

2025-12-01 13:00:11,524 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-12-01 13:00:11,543 - INFO - Going to convert document batch...
2025-12-01 13:00:11,544 - INFO - Initializing pipeline for StandardPdfPipeline with options hash 44ae89a68fc272bc7889292e9b5a1bad
2025-12-01 13:00:11,545 - INFO - Auto OCR model selected ocrmac.
2025-12-01 13:00:11,546 - INFO - Accelerator device: 'mps'
2025-12-01 13:00:12,298 - INFO - Accelerator device: 'mps'
2025-12-01 13:00:12,734 - INFO - Processing document 2506.03094v1.pdf
2025-12-01 13:00:29,546 - INFO - Finished converting document 2506.03094v1.pdf in 18.42 sec.
2025-12-01 13:00:30,454 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2025-12-01 13:00:30,466 - INFO - Going to convert document batch...
2025-12-01 13:00:30,467 - INFO - Processing document 2511.19983v1.pdf
2025-12-01 13:00:37,431 - INFO - Finished converting document 2511.19983v1.pdf in 7.22 sec.
2025-12-01 13:00:38,135 - INFO - detected formats: [<InputFormat.PDF: 

235 document chunks created


In [21]:
ids = vector_db.add_documents(texts)
print(f"{len(ids)} documents added to the vector database")

235 documents added to the vector database


In [22]:
query = "Explain to me what quantum machine learning is all about."
retrieved_docs = vector_db.similarity_search(query) # return a list of documents
print(f"{len(retrieved_docs)} source documents returned")
for doc in retrieved_docs:
    print(doc)
    print("=" * 80)  # Separator for clarity

4 source documents returned
page_content='In practice, QML is a broad term that encompasses all of the tasks shown in Fig. 1. For example, one can apply machine learning to quantum applications like discovering quantum algorithms [8] or optimizing quantum experiments [9, 10], or one can use a quantum neural network to process either classical or quantum information [11]. Even classical tasks can be viewed as QML when they are quantum inspired [12]. We note that the focus of this article will be on quantum neural networks, quantum deep learning, and quantum kernels, even though the field of QML is quite broad and goes beyond these topics.
After the invention of the laser, it was called a solution in search of a problem. To some degree, the situation with QML is similar. The complete list of applications of QML is not fully known. Nevertheless, it is possible to speculate that all the areas shown in Fig. 2 will be impacted by QML. For example, QML will likely benefit chemistry, materials

In [23]:
from ibm_granite_community.langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_classic.chains.retrieval import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate

# Create a Granite prompt for question-answering with the retrieved context
prompt_template = ChatPromptTemplate.from_template("{input}")

# Assemble the retrieval-augmented generation chain
combine_docs_chain = create_stuff_documents_chain(
    llm=llm,
    prompt=prompt_template,
)
rag_chain = create_retrieval_chain(
    retriever=vector_db.as_retriever(),
    combine_docs_chain=combine_docs_chain,
)

In [24]:
output = rag_chain.invoke({"input": query})

print("=" * 40)
print("RAG Answer:")
print(wrap_text(output['answer']))
print("=" * 40)

2025-12-01 13:00:47,596 - INFO - HTTP Request: POST https://us-south.ml.cloud.ibm.com/ml/v1/text/chat?version=2025-11-12 "HTTP/1.1 200 OK"
2025-12-01 13:00:47,600 - INFO - Successfully finished chat for url: 'https://us-south.ml.cloud.ibm.com/ml/v1/text/chat?version=2025-11-12'


RAG Answer:
Quantum Machine Learning (QML) is a field that combines machine learning and
quantum computing, with the potential to accelerate data analysis, especially
for quantum data. QML has applications in areas such as quantum materials,
biochemistry, and high-energy physics. However, there are challenges regarding
the trainability of QML models due to noise from quantum computers. One of the
main goals of QML is to achieve a quantum advantage for data analysis, utilizing
the speedup offered by quantum computing. QML models include quantum neural
networks (QNNs) and quantum kernels, with QNNs being a primary ingredient in
several learning schemes. The field of QML is broad and encompasses tasks such
as discovering quantum algorithms, optimizing quantum experiments, and
processing classical or quantum information using quantum neural networks.
