In [1]:
import base64
import vertexai
from vertexai.generative_models import GenerativeModel, Part, SafetySetting

In [3]:
from google.auth.transport.requests import Request
from google.oauth2.service_account import Credentials
from lightrag.base import BaseKVStorage

# Create credentials object

credentials = Credentials.from_service_account_file(
    filename='flowing-elf-441900-u8-fec05545b3e4.json',
    scopes=['https://www.googleapis.com/auth/cloud-platform'])

if credentials.expired:
    credentials.refresh(Request())

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
def generate():
    vertexai.init(project="flowing-elf-441900-u8", location="us-central1", credentials=credentials)
    model = GenerativeModel(
        "gemini-1.5-pro-001",
    )
    responses = model.generate_content(
        ["The following is a list of the most popular baby names in the United States in 2021."],
        generation_config=generation_config,
        safety_settings=safety_settings,
        stream=True,
    )

    for response in responses:
        print(response.text, end="")


generation_config = {
    "max_output_tokens": 8192,
    "temperature": 1,
    "top_p": 0.95,
}

safety_settings = [
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
        threshold=SafetySetting.HarmBlockThreshold.OFF
    ),
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
        threshold=SafetySetting.HarmBlockThreshold.OFF
    ),
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
        threshold=SafetySetting.HarmBlockThreshold.OFF
    ),
    SafetySetting(
        category=SafetySetting.HarmCategory.HARM_CATEGORY_HARASSMENT,
        threshold=SafetySetting.HarmBlockThreshold.OFF
    ),
]

generate()

Please provide me with the list of popular baby names! I need the names to tell you anything about them. 😊 

I can then help you analyze the list, compare it to other years, or even tell you about the origins and meanings of the names.  


In [None]:
project="flowing-elf-441900-u8"
location="us-central1"
credentials=credentials

In [41]:
from typing import List, Dict, Optional, Any
from google.api_core import exceptions
from vertexai.generative_models import (
    GenerativeModel, 
    ChatSession,
    Content
)
from vertexai.language_models import ChatModel
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
import vertexai
from lightrag.base import BaseKVStorage
from lightrag.utils import compute_args_hash, wrap_embedding_func_with_attrs

class VertexAIError(Exception):
    """Generic error for issues related to Vertex AI."""

def init_vertexai():
    """Initialize Vertex AI with project details"""
    vertexai.init(project="flowing-elf-441900-u8", location="us-central1", credentials=credentials)

@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=10),
    retry=retry_if_exception_type((
        exceptions.ResourceExhausted,  # Rate limit
        exceptions.ServiceUnavailable,  # Connection issues
        exceptions.DeadlineExceeded,   # Timeout
    ))
)
async def vertexai_complete_if_cache2(
    model: str,
    prompt: str,
    system_prompt: Optional[str] = None,
    history_messages: List[Dict[str, str]] = [],
    **kwargs
) -> str:
    init_vertexai()

    hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)
    messages = []
    if system_prompt:
        messages.append(Content(role="system", parts=system_prompt))
    messages.extend([Content(role=msg["role"], parts=msg["content"]) for msg in history_messages])
    messages.append(Content(role="user", parts=prompt))

    # Check cache if available
    if hashing_kv is not None:
        args_hash = compute_args_hash(model, messages)
        cache_result = await hashing_kv.get_by_id(args_hash)
        if cache_result is not None:
            return cache_result["return"]

    # Initialize the model
    generation_model = GenerativeModel(model)
    chat: ChatSession = generation_model.start_chat()

    # Add system prompt if provided
    if system_prompt:
        chat_params = kwargs.copy()
        chat_params.pop("temperature", None)  # Remove temperature if present
        await chat.send_message_async(system_prompt, **chat_params)

    # Add history messages
    for message in history_messages:
        chat_params = kwargs.copy()
        chat_params.pop("temperature", None)
        if message["role"] == "assistant":
            # For assistant messages, we don't need to wait for response
            chat._history.append(Content(role="assistant", parts=message["content"]))
        else:
            await chat.send_message_async(message["content"], **chat_params)

    # Send the actual prompt and get response
    response = await chat.send_message_async(prompt, **kwargs)
    response_text = response.text

    # Cache the response if caching is enabled
    if hashing_kv is not None:
        await hashing_kv.upsert(
            {args_hash: {"return": response_text, "model": model}}
        )

    return response_text

In [None]:
import os
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
import vertexai
from google.api_core import exceptions
from vertexai.preview.generative_models import GenerativeModel, ChatSession
from typing import List, Dict, Optional, Any, Union
from lightrag.base import BaseKVStorage
from lightrag.utils import compute_args_hash, wrap_embedding_func_with_attrs

def init_vertexai():
    """Initialize Vertex AI with project details"""
    vertexai.init(project="flowing-elf-441900-u8", location="us-central1", credentials=credentials)

@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=10),
    retry=retry_if_exception_type((
        exceptions.ResourceExhausted,  # Rate limit
        exceptions.ServiceUnavailable,  # Connection issues
        exceptions.DeadlineExceeded,   # Timeout
    ))
)
async def vertexai_complete_if_cache(
    model: str,
    prompt: str,
    system_prompt: Optional[str] = None,
    history_messages: List[Dict[str, str]] = [],
    **kwargs
) -> str:
    """
    Async function to generate completions using Vertex AI Gemini with caching support
    
    Args:
        model: Model name (e.g., "gemini-pro")
        prompt: The user prompt
        system_prompt: System prompt for context
        history_messages: List of previous messages
        project_id: Google Cloud project ID
        location: Google Cloud location
        hashing_kv: Cache storage interface
        **kwargs: Additional parameters for model configuration
    
    Returns:
        str: Generated response content
    """
    init_vertexai()
    
    hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None)
    messages = []
    if system_prompt:
        messages.append({"role": "system", "content": system_prompt})
    messages.extend(history_messages)
    messages.append({"role": "user", "content": prompt})

    # Check cache if available
    if hashing_kv is not None:
        args_hash = compute_args_hash(model, messages)
        cache_result = await hashing_kv.get_by_id(args_hash)
        if cache_result is not None:
            return cache_result["return"]

    # Initialize the model
    generation_model = GenerativeModel(model)
    chat: ChatSession = generation_model.start_chat()

    # Add system prompt if provided
    if system_prompt:
        chat_params = kwargs.copy()
        chat_params.pop("temperature", None)  # Remove temperature if present
        await chat.send_message_async(system_prompt, **chat_params)

    # Add history messages
    for message in history_messages:
        chat_params = kwargs.copy()
        chat_params.pop("temperature", None)
        if message["role"] == "assistant":
            # For assistant messages, we don't need to wait for response
            await chat.send_message_async(message["content"], **chat_params)
        else:
            await chat.send_message_async(message["content"], **chat_params)

    # Send the actual prompt and get response
    response = await chat.send_message_async(prompt, **kwargs)
    response_text = response.text

    # Cache the response if caching is enabled
    if hashing_kv is not None:
        await hashing_kv.upsert(
            {args_hash: {"return": response_text, "model": model}}
        )

    return response_text

In [49]:
async def gemini_pro_complete(
    prompt: str,
    system_prompt: str = None,
    history_messages: list = [],
    **kwargs
) -> str:
    """
    Helper function to generate completions using Gemini 1.5 Pro model.
    
    Args:
        prompt (str): The input prompt
        system_prompt (str, optional): System prompt for setting context
        history_messages (list, optional): List of previous conversation messages
        project_id (str, optional): Google Cloud project ID
        **kwargs: Additional parameters for model configuration
    
    Returns:
        str: Generated response from Gemini
    """
    return await vertexai_complete_if_cache(
        model="gemini-1.5-pro-001",  # Gemini 1.5 Pro model identifier
        prompt=prompt,
        system_prompt=system_prompt,
        history_messages=history_messages,
        **kwargs
    )

In [50]:
await gemini_pro_complete("How are you?", system_prompt="Respond with islamic greetings", history_messages=[{"role": "assistant", "content": "Assalamu Alaikum"}])

'Alhamdulillah, I am doing well, thank you. 😊 How are you? \n'

In [7]:
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
from google.api_core import exceptions
from vertexai.language_models import TextEmbeddingModel
import numpy as np

@wrap_embedding_func_with_attrs(embedding_dim=768, max_token_size=3072)  # PaLM embedding dimensions
@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=4, max=60),
    retry=retry_if_exception_type((exceptions.ResourceExhausted, exceptions.ServiceUnavailable, exceptions.DeadlineExceeded))
)
async def vertexai_embedding(
    texts: List[str],
    model: str = "textembedding-gecko@003",
) -> np.ndarray:
    """
    Generate embeddings for a list of texts using Vertex AI's embedding models.
    
    Args:
        texts: List of strings to generate embeddings for
        model: Model name (e.g., "textembedding-gecko@003")
        project_id: Google Cloud project ID
        location: Google Cloud location
        api_key: Google Cloud API key (optional)
    
    Returns:
        np.ndarray: Array of embeddings
        
    Raises:
        VertexAIEmbeddingError: For any Vertex AI-specific errors
    """
    # Set up environment variables if provided
    init_vertexai()

    # Initialize the embedding model
    model = TextEmbeddingModel.from_pretrained(model)
    response = model.get_embeddings(texts)

    return np.array([emb.values for emb in response])

In [8]:
(await vertexai_embedding(["Hello, how are you?"])).shape

(1, 768)

In [None]:
from lightrag.lightrag import LightRAG, EmbeddingFunc, QueryParam

rag = LightRAG(
    working_dir=r'.\merged',
    llm_model_func=gemini_pro_complete,
    embedding_func=EmbeddingFunc(
        embedding_dim=768,
        max_token_size=8192,
        func=vertexai_embedding,
    )
)

INFO:lightrag:Logger initialized for working directory: .\merged
INFO:lightrag:Load KV llm_response_cache with 1 data
INFO:lightrag:Load KV full_docs with 0 data
INFO:lightrag:Load KV text_chunks with 0 data
INFO:lightrag:Loaded graph from .\merged\graph_chunk_entity_relation.graphml with 0 nodes, 0 edges
INFO:nano-vectordb:Load (0, 768) data
INFO:nano-vectordb:Init {'embedding_dim': 768, 'metric': 'cosine', 'storage_file': '.\\merged\\vdb_entities.json'} 0 data
INFO:nano-vectordb:Load (0, 768) data
INFO:nano-vectordb:Init {'embedding_dim': 768, 'metric': 'cosine', 'storage_file': '.\\merged\\vdb_relationships.json'} 0 data
INFO:nano-vectordb:Load (1, 768) data
INFO:nano-vectordb:Init {'embedding_dim': 768, 'metric': 'cosine', 'storage_file': '.\\merged\\vdb_chunks.json'} 1 data


In [53]:
with open("./android_versions_detailed.txt", "r", encoding="utf-8") as f:
    await rag.ainsert(f.read())


INFO:lightrag:[New Docs] inserting 1 docs
INFO:lightrag:[New Chunks] inserting 1 chunks
INFO:lightrag:Inserting 1 vectors to chunks
INFO:lightrag:[Entity Extraction]...


⠙ Processed 1 chunks, 19 entities(duplicated), 18 relations(duplicated)

INFO:lightrag:Inserting 19 vectors to entities





INFO:lightrag:Inserting 18 vectors to relationships
INFO:lightrag:Writing graph with 19 nodes, 18 edges


In [55]:
from lightrag.lightrag import QueryParam

await rag.aquery("Which android version is designed specifically for tablets ?", param=QueryParam(mode="local"))

INFO:lightrag:Local query uses 19 entites, 18 relations, 1 text units


'Android 3.0, codenamed Honeycomb, was designed specifically for tablets. \n'