In [1]:
import requests
from dotenv import load_dotenv
import os
import pandas as pd
load_dotenv()

True

In [21]:
API_KEY = os.getenv("GOOGLE_API_KEY")
API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-embedding-001:embedContent"

def embedding_single_text(text: str) -> list[float]:
    """Embed un seul texte via l'API HTTP Gemini"""
    headers = {
        "Content-Type": "application/json"
    }
    
    params = {
        "key": API_KEY
    }
    
    data = {
        "content": {
            "parts": [{"text": text}]
        },
        "taskType": "RETRIEVAL_DOCUMENT" # QUESTION_ANSWERING
    }

    response = requests.post(
        API_URL,
        headers=headers,
        params=params,
        json=data,
        timeout=30
    )
    
    if response.status_code != 200:
        error_msg = f"Erreur API Gemini: {response.status_code} - {response.text}"
        print(error_msg)
        raise Exception(error_msg)
    
    result = response.json()
    
    if "embedding" not in result:
        raise Exception(f"Réponse API inattendue: {result}")
    
    return result["embedding"]["values"]

print(embedding_single_text("Bonjour, comment ça va ?"))  # Exemple d'utilisation

[-0.02679715, 0.015944129, 0.01608209, -0.09254073, -0.013292915, 0.0036471656, -0.012539151, -0.0062028645, 0.021741781, 0.0047696754, -0.00500527, -0.017772626, -0.0101934485, 0.009156706, 0.15583917, -0.024703404, 0.0050660055, 0.013385825, 0.015912466, -0.01700574, 0.024848618, 0.009266602, -0.0029189806, 0.0018577691, -0.022466503, 0.028110832, -0.0025831314, 0.0034605307, 0.018543124, 0.004596463, 0.012880009, 0.0081143165, -0.00043476198, 0.011226169, 0.001462478, 0.0055141626, 0.005826531, 0.01260169, -0.011092282, 0.004920139, 0.007530718, -0.0003668049, -0.012961787, 0.011992108, 0.01724396, -0.0024252234, 0.00016794115, -0.017665243, 0.003027438, 0.043443404, 0.002857183, 0.030294765, -0.012448233, -0.28112754, 0.00500706, 0.006602709, 0.0070549482, 0.0034689496, -0.014538607, -0.02260369, -0.0014666287, 0.014380082, -0.033578277, -0.041611586, 0.011809584, 0.0016020318, 0.018068653, 0.004138244, 0.000114284616, -0.012608933, 0.01640964, 0.0007922901, -0.018126547, -0.012258

In [2]:
import ollama

# Initialize the Ollama client
client = ollama.Client()

# Define the model and the input prompt
model = "bge-m3"  # Replace with your model name
prompt = "Bonjour, comment ça va ?"

# Generate the embedding
response = client.embeddings(model=model, prompt=prompt)
print(response.embedding)  # Print the embedding response  

[-0.16637776792049408, 1.0764302015304565, -1.2941112518310547, -0.14064176380634308, -0.03070148453116417, -0.8521616458892822, -0.15186455845832825, -0.33297696709632874, 0.013607017695903778, 0.4653691053390503, -0.5808588862419128, 0.5500119924545288, -0.30125564336776733, 0.05556266009807587, 0.450225293636322, -0.3812559247016907, 0.7121164798736572, -0.9701688885688782, -0.3448219299316406, -0.8217577338218689, -0.6443268060684204, 0.8086008429527283, 1.0902446508407593, -0.2563309073448181, 0.8997833132743835, 1.1762614250183105, -1.845300555229187, 0.11616362631320953, 0.6264902949333191, 0.623015284538269, -0.08663613349199295, 1.0164600610733032, -0.0742264837026596, -0.6037577986717224, -0.5020959973335266, -0.3123403787612915, 0.4475620687007904, -1.212857961654663, -0.8783820271492004, 1.4157222509384155, 0.2464241087436676, 0.1318800002336502, 0.8205568790435791, -1.3586987257003784, 0.2228715568780899, -1.4398044347763062, -0.7787986397743225, 0.37600934505462646, 0.694

# File System handler test

In [3]:
from src.file_handler import load_files_from_directory
import src.config as config
from pathlib import Path
file_path = Path(__vsc_ipynb_file__).parents[1] / "hexamind-obsidian"
# print(file_path.exists())
 
loaded_file = load_files_from_directory(file_path)

Loaded 26 documents from /Users/hexamind/Documents/hexamind-obsidian
Skipped 34 unsupported files.


In [4]:
df = pd.DataFrame(loaded_file)
df.head()

Unnamed: 0,name,content,source
0,UseCase-idea.md,- Recherche complète d'une personne sur le net...,/Users/hexamind/Documents/hexamind-obsidian/Us...
1,2025-04-10.md,## Lancement de ZeAgent \n`git clone https://g...,/Users/hexamind/Documents/hexamind-obsidian/20...
2,data_analysis_report.md,# Data Analysis Report\n\n## Descriptive Stati...,/Users/hexamind/Documents/hexamind-obsidian/da...
3,sleep_analysis_report.md,# Sleep Analysis Report\n\nThis report analyze...,/Users/hexamind/Documents/hexamind-obsidian/sl...
4,2025-03-25.md,### 🧩 **API & Architecture**\n- **Une seule AP...,/Users/hexamind/Documents/hexamind-obsidian/20...


In [5]:
def split_text(text: str, chunk_size: int = 1000, overlap: int = 200) -> list[str]:
    """Split text into chunks of specified size with overlap."""
    if chunk_size <= overlap:
        return [text]  # Avoid infinite loop if overlap >= chunk_size

    chunks = []
    start = 0
    text_length = len(text)

    while start < text_length:
        end = min(start + chunk_size, text_length)
        chunks.append(text[start:end])
        if end == text_length:
            break
        start += chunk_size - overlap

    return chunks

def split_document(document: dict, chunk_size: int = 1000, overlap: int = 200) -> list[dict]:
    """Split a document's content into chunks and return a list of chunked documents."""
    text = document.get("content", "")
    chunks = split_text(text, chunk_size, overlap)
    
    chunked_documents = []
    for i, chunk in enumerate(chunks):
        chunked_doc = document.copy()
        chunked_doc["content"] = chunk
        chunked_doc["chunk_index"] = i
        chunked_documents.append(chunked_doc)
    
    return chunked_documents

def split_documents(documents: list[dict], chunk_size: int = 1000, overlap: int = 200) -> list[dict]:
    """Split a list of documents into chunks."""
    all_chunked_documents = []
    for document in documents:
        chunked_docs = split_document(document, chunk_size, overlap)
        all_chunked_documents.extend(chunked_docs)
    return all_chunked_documents

In [6]:
df['chunks'] = df['content'].apply(lambda x: split_text(x))

In [7]:
df["nb_chunks"] = df["chunks"].apply(len)

In [8]:
df_doc = split_documents(loaded_file)

In [9]:
df_doc = pd.DataFrame(df_doc)
df_doc.head()

Unnamed: 0,name,content,source,chunk_index
0,UseCase-idea.md,- Recherche complète d'une personne sur le net...,/Users/hexamind/Documents/hexamind-obsidian/Us...,0
1,UseCase-idea.md,que a un système interne\n- pour le recrutemen...,/Users/hexamind/Documents/hexamind-obsidian/Us...,1
2,UseCase-idea.md,", Analyse des causes (volume, prix, mix, charg...",/Users/hexamind/Documents/hexamind-obsidian/Us...,2
3,UseCase-idea.md,"ectifs, le scoring risque, Vérifie la cohérenc...",/Users/hexamind/Documents/hexamind-obsidian/Us...,3
4,UseCase-idea.md,ite ou d'une page streamlit et publication sur...,/Users/hexamind/Documents/hexamind-obsidian/Us...,4


In [16]:
from src.embedding import EmbeddingOllama

emb_ollama = EmbeddingOllama()

# import chroma db and init collection

import chromadb

client = chromadb.PersistentClient(path="./chroma_db")
collection = client.get_or_create_collection(name="obsidian_notes")

# add to collection
collection.add(
    documents=df_doc["content"].tolist(),
    metadatas=df_doc.drop(columns=["content"]).to_dict(orient="records"),
    ids=[str(i) for i in range(len(df_doc))],
    embeddings=[emb_ollama.embed(text) for text in df_doc["content"].tolist()]
)


In [31]:
# similarity search
# similar to what is done in langchain
query = "Qu'est ce que Hexamind?"
query_embedding = emb_ollama.embed(query)
results = collection.query(
    query_embeddings=[query_embedding],
    n_results=3
)
# print([result['metadata'] for result in results['content'][0]])
print([print(res) for res in results['documents'][0]])
# print(len(results['documents']))

on, changement de rôle).

- **Administrateur Hexamind**


## Exemples d’agents

**TODO**

## Scenarii d’utilisation

### Nouvel agent

1. L’utilisateur arrive sur la page de création d’un agent.
2. Il configure son agent et le teste.
3. Il sauvegarde ses réglages.
4. Le système lui propose alors de nommer son agent, de fournir une description et éventuellement de partager l’agent avec d’autres.
5. Le système propose ensuite à l’utilisateur de créer une clé API pour l’application dans laquelle il compte utiliser cet agent.
6. L’utilisateur est ensuite redirigé vers la “Liste des agents”.


### Modifier un agent existant dont l’utilisateur est le créateur

1. L’utilisateur se rend dans la section “Liste des agents”. 
2. Il en sélectionne un dont il est le créateur.
3. L’interface d’édition de l’agent lui est présentée pour lui permettre de tester et modifier l’agent.
4. Une fois les modifications faites, l’utilisateur clique sur le bouton “Enregistrer”.
5. Le service lui demande s’il s’a

In [36]:
results['metadatas']

[[{'source': '/Users/hexamind/Documents/hexamind-obsidian/2025-03-27.md',
   'chunk_index': 2,
   'name': '2025-03-27.md'},
  {'name': '2025-03-27.md',
   'chunk_index': 1,
   'source': '/Users/hexamind/Documents/hexamind-obsidian/2025-03-27.md'},
  {'source': '/Users/hexamind/Documents/hexamind-obsidian/Excalidraw/Drawing 2025-05-30 09.23.25.excalidraw.md',
   'chunk_index': 3,
   'name': 'Drawing 2025-05-30 09.23.25.excalidraw.md'}]]