# dependencies

In [14]:
from google import genai
import re

from dotenv import load_dotenv
load_dotenv()

import logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)

try:
    gemini_client = genai.Client()
    logger.info("Gemini client created successfully.")
except Exception as e:
    logger.error("Failed to create Gemini client: %s", e)
    raise


2025-08-08 17:12:01 - __main__ - INFO - Gemini client created successfully.


# global config

In [15]:
EMBEDDING_MODEL = "gemini-embedding-001"

# core functions

In [16]:
def chunk_by_structure(document):
    pattern = r"\n## "

    return re.split(pattern, document)

def generate_embeddings(text, model=EMBEDDING_MODEL):
    try:
        response = gemini_client.models.embed_content(
            model=model,
            contents=text
        )
        embeddings = response.embeddings
        logger.info("Embeddings generated successfully.")
        
        return embeddings
    except Exception as e:
        logger.error("Failed to generate embeddings: %s", e)
        raise


# main function

In [17]:
def main(doc_file):
    try:
        with open(doc_file, 'r') as file:
            document = file.read()
        
        logger.info("Document read successfully.")
        
        chunks = chunk_by_structure(document)
        logger.info(f"Document split into {len(chunks)} chunks.")

        embeddings = []
        for chunk in chunks:
            chunk_embeddings = generate_embeddings(chunk)
            embeddings.append(chunk_embeddings)
        
        logger.info("All embeddings generated successfully.")
        
        return embeddings
    except Exception as e:
        logger.error("An error occurred in main function: %s", e)
        raise

In [18]:
main("./assets/report.md")

2025-08-08 17:12:01 - __main__ - INFO - Document read successfully.
2025-08-08 17:12:01 - __main__ - INFO - Document split into 15 chunks.
2025-08-08 17:12:01 - httpx - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-001:batchEmbedContents "HTTP/1.1 200 OK"
2025-08-08 17:12:01 - __main__ - INFO - Embeddings generated successfully.
2025-08-08 17:12:03 - httpx - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-001:batchEmbedContents "HTTP/1.1 200 OK"
2025-08-08 17:12:03 - __main__ - INFO - Embeddings generated successfully.
2025-08-08 17:12:04 - httpx - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-001:batchEmbedContents "HTTP/1.1 200 OK"
2025-08-08 17:12:04 - __main__ - INFO - Embeddings generated successfully.
2025-08-08 17:12:05 - httpx - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-embeddi

[[ContentEmbedding(
    values=[
      0.009310254,
      -0.014552261,
      -0.0075410446,
      -0.056133088,
      0.011788808,
      <... 3067 more items ...>,
    ]
  )],
 [ContentEmbedding(
    values=[
      0.0036549964,
      -0.0055337953,
      0.0059174136,
      -0.031344205,
      0.006447381,
      <... 3067 more items ...>,
    ]
  )],
 [ContentEmbedding(
    values=[
      0.023292294,
      -0.0028651813,
      0.008429838,
      -0.031267766,
      -0.0006679747,
      <... 3067 more items ...>,
    ]
  )],
 [ContentEmbedding(
    values=[
      0.014450376,
      0.009730031,
      -0.00010011065,
      -0.039459046,
      0.012765585,
      <... 3067 more items ...>,
    ]
  )],
 [ContentEmbedding(
    values=[
      0.0028671417,
      -0.010705836,
      -0.018952785,
      -0.044698812,
      -0.0031852461,
      <... 3067 more items ...>,
    ]
  )],
 [ContentEmbedding(
    values=[
      -0.0035334064,
      -0.013167111,
      -0.009030964,
      -0.06412988