In [None]:
# Install required packages
!pip install langchain langchain-community chromadb transformers torch sentence-transformers pypdf
!pip install huggingface_hub

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Import necessary libraries
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModel
import torch
from langchain.embeddings import HuggingFaceEmbeddings
import os

In [None]:
# Data Ingestion from Google Drive
def load_pdfs_from_drive(directory_path):
    """Load and split PDF documents from a specified Google Drive directory."""
    documents = []
    for filename in os.listdir(directory_path):
        if filename.endswith('.pdf'):
            file_path = os.path.join(directory_path, filename)
            loader = PyPDFLoader(file_path)
            docs = loader.load()
            documents.extend(docs)
    return documents

# Specify the Google Drive directory containing PDFs
drive_pdf_directory = '/content/drive/MyDrive/pdfs'
documents = load_pdfs_from_drive(drive_pdf_directory)

# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_documents = text_splitter.split_documents(documents)

print(f"Loaded and split {len(split_documents)} document chunks.")

Loaded and split 1122 document chunks.


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

# Model ID
model_id = "deepseek-ai/deepseek-llm-7b-chat"

# Initialize tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="cuda",  # Explicitly use T4 GPU
    offload_folder="/content/drive/MyDrive/quantum_tutor_offload",
    torch_dtype=torch.float16,  # Half-precision for T4 GPU
    low_cpu_mem_usage=True  # Minimize CPU memory usage
)

# Create generation pipeline
deepseek_llm = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=500,
    temperature=0.7,
    top_p=0.9
)

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os

# Load PDFs from Google Drive
def load_pdfs_from_drive(directory_path):
    documents = []
    for filename in os.listdir(directory_path):
        if filename.endswith('.pdf'):
            file_path = os.path.join(directory_path, filename)
            loader = PyPDFLoader(file_path)
            docs = loader.load()
            documents.extend(docs)
    return documents

# Specify PDF directory
drive_pdf_directory = '/content/drive/MyDrive/pdfs'
documents = load_pdfs_from_drive(drive_pdf_directory)

# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_documents = text_splitter.split_documents(documents)

print(f"Loaded and split {len(split_documents)} document chunks.")

Loaded and split 1122 document chunks.


In [None]:
from langchain_community.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
import os

# Use IBM Granite Embedding
granite_embedding = HuggingFaceEmbeddings(
    model_name="ibm-granite/granite-embedding-125m-english",  # Or use the 30m / multilingual variants
    model_kwargs={"device": "cuda"},
    encode_kwargs={"batch_size": 16}  # Adjust batch size based on GPU memory
)

# Create and persist ChromaDB vector store
db = Chroma.from_documents(
    documents=split_documents,
    embedding=granite_embedding,
    persist_directory="/content/drive/MyDrive/chroma_db"
)
db.persist()
print("Vector store created and persisted with Granite Embedding.")


Vector store created and persisted with Granite Embedding.


In [None]:
def rag_pipeline(query, db, llm, top_k=3, max_tokens=512):
    import torch, gc

    # Clear GPU memory before generation
    gc.collect()
    torch.cuda.empty_cache()

    # Retrieve relevant documents
    docs = db.similarity_search(query, k=top_k)
    context = "\n".join([doc.page_content[:500] for doc in docs])  # Truncate each doc

    # Friendly, educational prompt
    prompt = f"""You are an expert quantum computing tutor with a friendly and engaging teaching style, similar to how ChatGPT explains complex topics. Your goal is to provide a detailed, clear, and comprehensive answer to the query, making it easy to understand for someone learning the topic. Use analogies, comparisons to classical computing or everyday scenarios, and break down complex ideas into simple steps. If possible, include examples, potential applications, or implications. Structure your response with headings or bullet points for clarity. If the context doesn’t provide enough information, state so clearly and provide a general explanation based on quantum computing principles.

Context:
{context}

Query: {query}

Answer:"""

    # Generate model output
    response = llm(
        prompt,
        return_full_text=False,
        max_new_tokens=max_tokens,
        temperature=0.8,
        top_p=0.8
    )[0]['generated_text']

    # Clear GPU memory after generation
    gc.collect()
    torch.cuda.empty_cache()

    return response


In [None]:
# Test query
query = "What is quantum superposition, and how does it enable quantum parallelism?"
response = rag_pipeline(query, db, deepseek_llm, top_k=3, max_tokens=1024)
print(f"Query: {query}\nResponse: {response}")

Query: What is quantum superposition, and how does it enable quantum parallelism?
Response: 

1.1 Introduction to Quantum Superposition

Quantum superposition is a fundamental principle in quantum mechanics that allows particles to be in multiple states at once. In classical computing, a bit (short for binary digit) can be either a 0 or a 1, but in quantum computing, a qubit (quantum bit) can be in both states simultaneously. This unique property enables quantum computing to perform multiple calculations in parallel, leading to the concept of quantum parallelism.

1.2 Explanation of Quantum Superposition

Quantum superposition is similar to the concept of Schr�dinger's cat, a famous thought experiment that demonstrates the idea of being in multiple states at once. In the experiment, a cat placed in a sealed box is both alive and dead until the box is opened and observed. Similarly, a qubit can be in a superposition of 0 and 1 until measured, at which point it will collapse into one of 

In [None]:
# Test query
query = "What problem does Grover’s algorithm solve more efficiently than classical methods?"
response = rag_pipeline(query, db, deepseek_llm)
print(f"Query: {query}\nResponse: {response}")

Query: What problem does Grover’s algorithm solve more efficiently than classical methods?
Response: 
Grover’s algorithm is designed to solve a problem more efficiently than classical methods: searching an unordered database of N items.

Let's break down the problem:

1. Classical search algorithms:
Classical search algorithms, such as binary search or linear search, take O(N) steps to find an item in an ordered database. However, when dealing with an unordered database, these algorithms can become less efficient.

2. Grover’s algorithm:
Grover’s algorithm operates on a quantum computer, which allows it to take advantage of the property of superposition and the phenomenon of entanglement. By applying a quantum oracle (Uf in the case of Exercise 7.25) to the initial state of the database, Grover's algorithm modifies the state of the database such that the item of interest is amplified.

3. Efficiency:
Grover’s algorithm can find the item of interest in O(1/√N) steps, which is much faste

In [None]:
# Test query
query = "How does the Bell inequality test the completeness of quantum mechanics?"
response = rag_pipeline(query, db, deepseek_llm)
print(f"Query: {query}\nResponse: {response}")

Query: How does the Bell inequality test the completeness of quantum mechanics?
Response: 

The Bell inequality test is a way to check if quantum mechanics is complete or if it requires additional hidden variables to explain certain phenomena, such as quantum entanglement. The test is based on a specific Bell inequality proposed by Clauser and Horne, which is designed to detect any local hidden variable theory that can reproduce the predictions of quantum mechanics.

Step 1: Understanding the EPR Paradox and Hidden Variables

The EPR paradox, proposed by Einstein, Podolsky, and Rosen (EPR), involves two entangled particles that are separated by a large distance. According to quantum mechanics, the properties of these particles are correlated, even when they are not directly interacting. This correlation is known as quantum entanglement.

However, some physicists believe that this correlation might be due to some hidden variables that are not accounted for by quantum mechanics. These hi

In [None]:
# Test query
query = "What is quantum superposition, and how does it enable quantum parallelism?"
response = rag_pipeline(query, db, deepseek_llm)
print(f"Query: {query}\nResponse: {response}")

Query: What is quantum superposition, and how does it enable quantum parallelism?
Response: 

Quantum Superposition:

Quantum superposition is a fundamental principle in quantum mechanics that allows particles to exist in multiple states at once. It is similar to the concept of being in multiple places simultaneously. This property is unique to quantum systems and differentiates them from classical systems, where particles can only be in one state at a time.

In classical computing, information is represented by bits, which can either be in a state of 0 or 1. However, in quantum computing, the fundamental unit of information is a qubit (quantum bit), which can exist in both states simultaneously. This is made possible by the superposition principle, which allows a qubit to be in a state of 00, 01, 10, or 11 at the same time.

How Quantum Superposition Enables Quantum Parallelism:

Quantum parallelism is the ability to perform many computations simultaneously, making quantum computers m

43+25+40+34 = 35.5 seconds on average
