In [None]:
import torch

def test_gpu():
    print("--------------------------------------------------")
    print("       PYTORCH GPU DIAGNOSTIC FOR USER       ")
    print("--------------------------------------------------")

    # 1. Check if CUDA is available
    if torch.cuda.is_available():
        print(f"‚úÖ CUDA is available! (PyTorch Version: {torch.__version__})")
        
        # 2. Get Device Details
        device_id = torch.cuda.current_device()
        gpu_name = torch.cuda.get_device_name(device_id)
        print(f"üíª GPU Detected:   {gpu_name}")
        print(f"üî¢ CUDA Version:   {torch.version.cuda}")
        
        # 3. Perform a Real Calculation on GPU
        # We create two random tensors and multiply them on the VRAM
        try:
            print("\n... Attempting actual computation on GPU ...")
            x = torch.rand(5, 3).cuda()
            y = torch.rand(3, 5).cuda()
            result = torch.matmul(x, y)
            
            print(f"‚úÖ Success! performed matrix multiplication on {gpu_name}.")
            print(f"   Result Tensor location: {result.device}")
            print("   Output shape:", result.shape)
            
        except Exception as e:
            print(f"‚ùå Error during computation: {e}")
            
    else:
        print("‚ùå CUDA is NOT available.")
        print("   PyTorch is running on CPU only.")

    print("--------------------------------------------------")

if __name__ == "__main__":
    test_gpu()

In [1]:
# Step 1: Setup and Environment (GPU Optimized)
from dotenv import load_dotenv
import os

# Check for GPU availability
try:
    import torch
    if torch.cuda.is_available():
        print(f"‚úÖ GPU Available: {torch.cuda.get_device_name(0)}")
        print(f"   CUDA Version: {torch.version.cuda}")
        print(f"   GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
        DEVICE = "cuda"
    else:
        print("‚ÑπÔ∏è GPU not available, using CPU")
        DEVICE = "cpu"
except ImportError:
    print("‚ÑπÔ∏è PyTorch not installed, using CPU")
    DEVICE = "cpu"

load_dotenv()
api_key = os.getenv("GOOGLE_API_KEY")
print("‚úÖ Environment loaded")


‚úÖ GPU Available: NVIDIA GeForce MX150
   CUDA Version: 12.4
   GPU Memory: 4.29 GB
‚úÖ Environment loaded


In [None]:

# Step 2: Initialize ChromaDB and Google AI
from google import genai
import chromadb.utils.embedding_functions as embedding_functions
import chromadb

chroma_client = chromadb.PersistentClient(path="db/")
google_ef = embedding_functions.GoogleGenerativeAiEmbeddingFunction(api_key=api_key)
client = genai.Client(api_key=api_key)
print("‚úÖ ChromaDB and Google AI clients initialized")
print("   Embedding Model: Google text-embedding-005")
print("   Vector DB: ChromaDB (PersistentClient)")


An error occurred: module 'importlib.metadata' has no attribute 'packages_distributions'




‚úÖ ChromaDB and Google AI clients initialized
   Embedding Model: Google text-embedding-005
   Vector DB: ChromaDB (PersistentClient)


In [3]:
# Step 3: Create/Get Collection
collection = chroma_client.get_or_create_collection(name="MkDocsRAG", embedding_function=google_ef)
print(f"‚úÖ Collection 'MkDocsRAG' ready (existing count: {collection.count()})")


‚úÖ Collection 'MkDocsRAG' ready (existing count: 0)


## Step 4: Load Markdown Files from Local Directory

Load markdown documentation files from a local directory.
Place your markdown files in the `mkdocs_docs/` directory (or specify a custom path).


In [11]:
# Load Markdown Files from Local Directory
import re
import json
import shutil
from pathlib import Path
from typing import List, Dict

# Configuration - Set your markdown files directory here
DOCS_DIR = r"M:\Term 9\Image Processing and Pattern Recognition\MedicalGPT-main\MedicalGPT-main\mkdocs_rag\docs"  # Change this to your markdown files directory



def clean_markdown_content(content: str) -> str:
    """Clean markdown content"""
    content = re.sub(r'\n{3,}', '\n\n', content)
    lines = [line.rstrip() for line in content.split('\n')]
    content = '\n'.join(lines)
    content = re.sub(r'<!--.*?-->', '', content, flags=re.DOTALL)
    content = re.sub(r'^(#{1,6})([^\s#])', r'\1 \2', content, flags=re.MULTILINE)
    return content.strip()

def load_documentation_files(docs_dir: str = DOCS_DIR) -> List[Dict]:
    """Load markdown documentation files from local directory"""
    docs_path = Path(docs_dir)
    
    if not docs_path.exists():
        print(f"‚ùå Documentation directory not found at {docs_path}")
        print(f"üí° Please create the directory and add your markdown files, or update DOCS_DIR variable")
        return []
    
    documentation_files = []
    
    # Find all markdown files
    md_files = list(docs_path.rglob("*.md"))
    
    if not md_files:
        print(f"‚ö†Ô∏è No markdown files found in {docs_path}")
        return []
    
    print(f"üìÇ Found {len(md_files)} markdown files")
    
    for md_file in md_files:
        relative_path = md_file.relative_to(docs_path)
        
        try:
            with open(md_file, 'r', encoding='utf-8', errors='ignore') as f:
                content = f.read()
            
            cleaned_content = clean_markdown_content(content)
            
            documentation_files.append({
                'file_path': str(relative_path),
                'content': cleaned_content,
                'source': str(md_file)
            })
            
            print(f"‚úÖ Loaded: {relative_path}")
        except Exception as e:
            print(f"‚ö†Ô∏è Error loading {relative_path}: {e}")
            continue
    
    print(f"‚úÖ Loaded {len(documentation_files)} documentation files")
    return documentation_files

# Load documentation files
print("üöÄ Loading markdown files from local directory...")
files = load_documentation_files(DOCS_DIR)

if files:
    metadata = {
        'total_files': len(files),
        'files': [
            {
                'file_path': f['file_path'],
                'content_length': len(f['content']),
                'source': f['source']
            }
            for f in files
        ]
    }
    with open('extraction_metadata.json', 'w', encoding='utf-8') as f:
        json.dump(metadata, f, indent=2)
    print(f"‚úÖ Metadata saved to extraction_metadata.json")

print(f"‚úÖ Loading complete! Found {len(files)} documentation files")

# Load documentation files
print("üöÄ Loading markdown files from local directory...")
files = load_documentation_files(DOCS_DIR)

if files:
    metadata = {
        'total_files': len(files),
        'files': [
            {
                'file_path': f['file_path'],
                'content_length': len(f['content']),
                'source': f['source']
            }
            for f in files
        ]
    }
    with open('extraction_metadata.json', 'w', encoding='utf-8') as f:
        json.dump(metadata, f, indent=2)
    print(f"‚úÖ Metadata saved to extraction_metadata.json")

print(f"‚úÖ Loading complete! Found {len(files)} documentation files")


üöÄ Loading markdown files from local directory...
üìÇ Found 19 markdown files
‚úÖ Loaded: getting-started.md
‚úÖ Loaded: index.md
‚úÖ Loaded: about\contributing.md
‚úÖ Loaded: about\license.md
‚úÖ Loaded: about\release-notes.md
‚úÖ Loaded: dev-guide\api.md
‚úÖ Loaded: dev-guide\plugins.md
‚úÖ Loaded: dev-guide\README.md
‚úÖ Loaded: dev-guide\themes.md
‚úÖ Loaded: dev-guide\translations.md
‚úÖ Loaded: user-guide\choosing-your-theme.md
‚úÖ Loaded: user-guide\cli.md
‚úÖ Loaded: user-guide\configuration.md
‚úÖ Loaded: user-guide\customizing-your-theme.md
‚úÖ Loaded: user-guide\deploying-your-docs.md
‚úÖ Loaded: user-guide\installation.md
‚úÖ Loaded: user-guide\localizing-your-theme.md
‚úÖ Loaded: user-guide\README.md
‚úÖ Loaded: user-guide\writing-your-docs.md
‚úÖ Loaded 19 documentation files
‚úÖ Metadata saved to extraction_metadata.json
‚úÖ Loading complete! Found 19 documentation files
üöÄ Loading markdown files from local directory...
üìÇ Found 19 markdown files
‚úÖ Loaded: getti

## Step 5: Chunking Implementation

**DELIVERABLE 1 & 2**: Chunking method selection and cleaning implementation.


In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from typing import List, Dict, Any

class MkDocsChunker:
    """
    Chunking strategy: RecursiveCharacterTextSplitter
    
    Reason for selection:
    1. MkDocs documentation is primarily markdown with hierarchical structure
    2. RecursiveCharacterTextSplitter respects markdown structure (headers, code blocks, lists)
    3. Handles variable-length content well (short code snippets to long explanations)
    4. Preserves context through chunk overlap
    5. Works well with semantic search as it maintains semantic boundaries
    """
    
    def __init__(self, chunk_size: int = 1000, chunk_overlap: int = 200, separators: List[str] = None):
        if separators is None:
            separators = [
                "\n\n## ",      # Major sections
                "\n\n### ",     # Subsections
                "\n\n",         # Paragraph breaks
                "\n",           # Line breaks
                ". ",           # Sentences
                " ",            # Words
                ""              # Characters
            ]
        
        self.splitter = RecursiveCharacterTextSplitter(
            separators=separators,
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap,
            length_function=len,
            is_separator_regex=False
        )
    
    
    def clean_chunk(self, chunk: str) -> str:
        """Clean individual chunk - 5-step cleaning process"""
        # Step 1: Remove leading/trailing whitespace
        chunk = chunk.strip()
        
        # Step 2: Remove excessive blank lines (more than 2 consecutive)
        chunk = re.sub(r'\n{3,}', '\n\n', chunk)
        
        # Step 3: Remove whitespace-only lines at start/end
        lines = chunk.split('\n')
        while lines and not lines[0].strip():
            lines.pop(0)
        while lines and not lines[-1].strip():
            lines.pop(-1)
        chunk = '\n'.join(lines)
        
        # Step 4: Normalize markdown header formatting
        chunk = re.sub(r'\n(#{1,6})\s*([^\n]+)', r'\n\n\1 \2\n', chunk)
        
        # Step 5: Final cleanup of excessive newlines
        chunk = re.sub(r'\n{3,}', '\n\n', chunk)
        
        return chunk.strip()
    
    def chunk_document(self, content: str, metadata: Dict[str, Any] = None) -> List[Dict[str, Any]]:
        """Chunk a document and return chunks with metadata"""
        chunks = self.splitter.split_text(content)
        cleaned_chunks = [self.clean_chunk(chunk) for chunk in chunks]
        cleaned_chunks = [chunk for chunk in cleaned_chunks if len(chunk) > 50]
        
        chunk_list = []
        for i, chunk in enumerate(cleaned_chunks):
            chunk_metadata = {
                'chunk_index': i,
                'chunk_size': len(chunk),
                'total_chunks': len(cleaned_chunks)
            }
            if metadata:
                chunk_metadata.update(metadata)
            
            chunk_list.append({
                'content': chunk,
                'metadata': chunk_metadata
            })
        
        return chunk_list

# Initialize chunker
chunker = MkDocsChunker(chunk_size=1000, chunk_overlap=200)
print("‚úÖ Chunker initialized")
print("Strategy: RecursiveCharacterTextSplitter with markdown-aware separators")


‚úÖ Chunker initialized
Strategy: RecursiveCharacterTextSplitter with markdown-aware separators


## Step 6: Process and Chunk Documentation

Load extracted documentation and chunk it.


In [14]:
# Process and chunk all documentation
import pickle
from tqdm import tqdm

docs_dir = Path(DOCS_DIR)
if not docs_dir.exists():
    print("‚ùå mkdocs_docs directory not found. Run the extraction cell first!")
else:
    all_texts = []
    all_metadatas = []
    all_ids = []
    
    # Process all markdown files
    for md_file in tqdm(docs_dir.rglob("*.md"), desc="Loading and chunking docs"):
        with open(md_file, 'r', encoding='utf-8', errors='ignore') as f:
            content = f.read()
        
        relative_path = md_file.relative_to(docs_dir)
        
        # Chunk the document
        chunks = chunker.chunk_document(
            content,
            metadata={'file_path': str(relative_path), 'source': 'mkdocs'}
        )
        
        # Add chunks to lists
        for chunk in chunks:
            all_texts.append(chunk['content'])
            all_metadatas.append(chunk['metadata'])
            all_ids.append(f"{relative_path}-c{chunk['metadata']['chunk_index']}")
    
    # Save to file for caching
    with open("split_data.pkl", "wb") as f:
        pickle.dump((all_texts, all_metadatas, all_ids), f)
    
    print(f"‚úÖ Processed {len(all_texts)} chunks from {len(list(docs_dir.rglob('*.md')))} files")


Loading and chunking docs: 19it [00:00, 295.46it/s]

‚úÖ Processed 417 chunks from 19 files





## Step 7: Embed and Index Chunks

Embed chunks and add them to ChromaDB.


In [15]:
# Load split data
import threading
import time

with open("split_data.pkl", "rb") as f:
    all_texts, all_metadatas, all_ids = pickle.load(f)

print(f"‚úÖ Loaded {len(all_texts)} chunks.")

# Threading configuration
NUM_WORKERS = 4
BATCH_SIZE = 30
total = len(all_texts)

def embed_and_insert(start_idx: int, end_idx: int):
    texts = all_texts[start_idx:end_idx]
    metadatas = all_metadatas[start_idx:end_idx]
    ids = all_ids[start_idx:end_idx]
    
    print(f"‚úÖ Processing {start_idx} to {end_idx} ({len(texts)} chunks)")

    # Check which IDs already exist
    try:
        existing = collection.get(ids=ids)
        existing_ids = set(existing["ids"])
    except Exception as e:
        existing_ids = set()

    # Filter out already existing IDs
    filtered_texts, filtered_metadatas, filtered_ids = [], [], []
    for t, m, i in zip(texts, metadatas, ids):
        if i not in existing_ids:
            filtered_texts.append(t)
            filtered_metadatas.append(m)
            filtered_ids.append(i)

    if not filtered_ids:
        print("‚è© All IDs already exist. Skipping batch.")
        return

    try:
        collection.add(documents=filtered_texts, metadatas=filtered_metadatas, ids=filtered_ids)
    except Exception as e:
        print(f"‚ùå Retry after error: {e}")
        time.sleep(60)
        try:
            collection.add(documents=filtered_texts, metadatas=filtered_metadatas, ids=filtered_ids)
        except Exception as e:
            print(f"‚ùå Permanent failure: {e}")
    
    time.sleep(4)

# Run embedding
from concurrent.futures import ThreadPoolExecutor, as_completed

total = len(all_texts)
futures = []
with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
    for i in range(0, total, BATCH_SIZE):
        futures.append(executor.submit(embed_and_insert, i, min(i + BATCH_SIZE, total)))

    for _ in tqdm(as_completed(futures), total=len(futures), desc="Embedding Chunks", unit="batch"):
        try:
            _.result()
        except Exception as e:
            print(f"‚ùå Thread failed: {e}")

print("‚úÖ Indexing completed.")


‚úÖ Loaded 417 chunks.
‚úÖ Processing 0 to 30 (30 chunks)
‚úÖ Processing 30 to 60 (30 chunks)
‚úÖ Processing 60 to 90 (30 chunks)
‚úÖ Processing 90 to 120 (30 chunks)


Embedding Chunks:   0%|          | 0/14 [00:00<?, ?batch/s]

‚ùå Retry after error: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. 
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0 [links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerDayPerProjectPerModel-FreeTier"
}
violat

Embedding Chunks:  14%|‚ñà‚ñç        | 2/14 [01:05<05:24, 27.02s/batch]

‚úÖ Processing 120 to 150 (30 chunks)
‚úÖ Processing 150 to 180 (30 chunks)
‚úÖ Processing 180 to 210 (30 chunks)


Embedding Chunks:  29%|‚ñà‚ñà‚ñä       | 4/14 [01:05<01:41, 10.11s/batch]

‚úÖ Processing 210 to 240 (30 chunks)
‚ùå Retry after error: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. 
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0 [links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerDa

Embedding Chunks:  29%|‚ñà‚ñà‚ñä       | 4/14 [01:20<01:41, 10.11s/batch]

‚ùå Permanent failure: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. 
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0 [links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerDayPerProjectPerModel-FreeTier"
}
violat

Embedding Chunks:  36%|‚ñà‚ñà‚ñà‚ñå      | 5/14 [02:11<04:04, 27.22s/batch]

‚úÖ Processing 240 to 270 (30 chunks)‚úÖ Processing 270 to 300 (30 chunks)

‚úÖ Processing 300 to 330 (30 chunks)
‚úÖ Processing 330 to 360 (30 chunks)
‚ùå Retry after error: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. 
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0 [links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, violations {
  quota_m

Embedding Chunks:  64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 9/14 [03:16<01:41, 20.21s/batch]

‚úÖ Processing 360 to 390 (30 chunks)‚úÖ Processing 390 to 417 (27 chunks)

‚ùå Retry after error: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. 
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0 [links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"


Embedding Chunks: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 14/14 [04:20<00:00, 18.60s/batch]

‚úÖ Indexing completed.





## Step 8: Verify Collection

Check the indexed documents.


In [16]:
# Verify collection
print(f"Total documents in collection: {collection.count()}")
print("\nSample documents:")
collection.peek(limit=3)


Total documents in collection: 0

Sample documents:


{'ids': [],
 'embeddings': array([], dtype=float64),
 'documents': [],
 'uris': None,
 'included': ['metadatas', 'documents', 'embeddings'],
 'data': None,
 'metadatas': []}

## Step 9: Test Query

Test the retrieval system with a sample question.


In [None]:
test_query = "How do I install MkDocs?"
results = collection.query(
    query_texts=[test_query],
    n_results=5,
)

print("=" * 80)
print("DELIVERABLE 5: Sample Questions and Context from Vector DB")
print("=" * 80)
print(f"\nüìù Question: {test_query}\n")
print(f"üìä Retrieved {len(results['documents'][0])} relevant chunks from vector database:\n")

for i, (doc, metadata, distance) in enumerate(zip(
    results['documents'][0], 
    results['metadatas'][0],
    results.get('distances', [[]])[0] if 'distances' in results else [0] * len(results['documents'][0])
), 1):
    print(f"{'‚îÄ' * 80}")
    print(f"Result {i}:")
    print(f"  Source: {metadata.get('file_path', 'Unknown')}")
    print(f"  Distance: {distance:.4f} (lower = more similar)")
    print(f"  Content preview:")
    print(f"  {doc[:300]}...")
    print()

print("\nüí° You can test more questions by changing 'test_query' above")
print("   Or use the FastAPI app (app.py) to query interactively")


ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. 
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0 [links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerDayPerProjectPerModel-FreeTier"
}
violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerMinutePerProjectPerModel-FreeTier"
}
violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerMinutePerUserPerProjectPerModel-FreeTier"
}
violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerDayPerUserPerProjectPerModel-FreeTier"
}
]