# Testing multi-qa-mpnet-base-dot-v1 on GPU

This notebook tests the `sentence-transformers/multi-qa-mpnet-base-dot-v1` model on your GPU.

In [2]:
# Check if GPU is available
import torch

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU device: {torch.cuda.get_device_name(0)}")
    print(f"GPU count: {torch.cuda.device_count()}")

    print("PyTorch version:", torch.__version__)
    print("CUDA version:", torch.version.cuda)
    print("CUDA available:", torch.cuda.is_available())
else:
    print("No GPU available - will run on CPU")

PyTorch version: 2.9.0+cu128
CUDA available: True
CUDA version: 12.8
GPU device: NVIDIA GeForce RTX 5050 Laptop GPU
GPU count: 1
PyTorch version: 2.9.0+cu128
CUDA version: 12.8
CUDA available: True


In [1]:
print("asda")

asda


In [4]:
# Load the model
from sentence_transformers import SentenceTransformer

model_name = 'sentence-transformers/multi-qa-mpnet-base-dot-v1'
print(f"Loading model: {model_name}")

# Load model and move to GPU if available
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = SentenceTransformer(model_name, device=device)

print(f"Model loaded on device: {device}")

  from .autonotebook import tqdm as notebook_tqdm


Loading model: sentence-transformers/multi-qa-mpnet-base-dot-v1
Model loaded on device: cuda
Model loaded on device: cuda


In [5]:
# Test the model with sample queries and documents
queries = [
    "What are the admission requirements?",
    "How do I apply for a scholarship?",
    "What is the tuition fee structure?"
]

documents = [
    "The university requires a high school diploma and transcripts for admission. Students must also submit standardized test scores.",
    "Scholarships are available based on academic merit and financial need. Applications must be submitted by March 1st each year.",
    "Tuition fees vary by program. Undergraduate students pay $10,000 per semester, while graduate programs cost $15,000 per semester.",
    "The library is open from 8 AM to 10 PM on weekdays and 9 AM to 5 PM on weekends.",
    "Students can register for courses online through the student portal during the registration period."
]

print("Encoding queries...")
query_embeddings = model.encode(queries, convert_to_tensor=True)

print("Encoding documents...")
document_embeddings = model.encode(documents, convert_to_tensor=True)

print(f"\nQuery embeddings shape: {query_embeddings.shape}")
print(f"Document embeddings shape: {document_embeddings.shape}")
print(f"Embeddings device: {query_embeddings.device}")

Encoding queries...
Encoding documents...

Query embeddings shape: torch.Size([3, 768])
Document embeddings shape: torch.Size([5, 768])
Embeddings device: cuda:0
Encoding documents...

Query embeddings shape: torch.Size([3, 768])
Document embeddings shape: torch.Size([5, 768])
Embeddings device: cuda:0


In [None]:
# Calculate similarity scores using dot product (as the model name suggests)
from sentence_transformers import util

print("Computing similarity scores...\n")

for i, query in enumerate(queries):
    print(f"Query: '{query}'")
    # Calculate dot product similarity
    scores = util.dot_score(query_embeddings[i], document_embeddings)[0]
    
    # Get top 3 results
    top_results = torch.topk(scores, k=min(3, len(documents)))
    
    print("Top matches:")
    for score, idx in zip(top_results.values, top_results.indices):
        print(f"  Score: {score:.4f} - {documents[idx][:80]}...")
    print()

In [6]:
# Check GPU memory usage
if torch.cuda.is_available():
    print(f"GPU memory allocated: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
    print(f"GPU memory reserved: {torch.cuda.memory_reserved() / 1024**2:.2f} MB")
    print(f"Max GPU memory allocated: {torch.cuda.max_memory_allocated() / 1024**2:.2f} MB")

GPU memory allocated: 427.87 MB
GPU memory reserved: 476.00 MB
Max GPU memory allocated: 433.33 MB
