In [2]:
!pip install faiss-cpu


Collecting faiss-cpu
  Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.8/23.8 MB[0m [31m42.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.13.2


In [3]:
from pathlib import Path
import json
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer


In [6]:
index = faiss.read_index("/content/drive/MyDrive/projects/chu_chat_bot/data/index/index.faiss")

with open("/content/drive/MyDrive/projects/chu_chat_bot/data/index/chunks_metadata.json", "r", encoding="utf-8") as f:
    chunks = json.load(f)

model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")

print("Index size:", index.ntotal)
print("Chunks:", len(chunks))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/645 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/471M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/199 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


tokenizer_config.json:   0%|          | 0.00/526 [00:00<?, ?B/s]



tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Index size: 1425
Chunks: 1425


In [8]:
def retrieve(query: str, top_k_retrieve=10):
    q = model.encode([query], normalize_embeddings=True)
    q = np.asarray(q, dtype=np.float32)

    scores, idxs = index.search(q, top_k_retrieve)

    results = []
    for score, idx in zip(scores[0], idxs[0]):
        if idx < 0:
            continue
        c = chunks[idx]
        results.append({"score": float(score), "chunk": c})
    return results


In [21]:
def select_context(results, sim_threshold=0.50, top_k=4):
    # filter by similarity threshold
    filtered = [r for r in results if r["score"] >= sim_threshold]

    if not filtered:
        return [], 0.0

    # limit chunks per source
    out = []
    per_source = {}

    for r in sorted(filtered, key=lambda x: x["score"], reverse=True):
        src = r["chunk"]["source"]
        per_source[src] = per_source.get(src, 0)
        if per_source[src] >= 2:
            continue
        out.append(r)
        per_source[src] += 1
        if len(out) >= top_k:
            break

    confidence = out[0]["score"] if out else 0.0
    return out, confidence


In [22]:
def build_context_block(selected):
    lines = []
    for r in selected:
        c = r["chunk"]
        page = c.get("page", c.get("page", None))
        text = c["text"].strip().replace("\n", " ")
        lines.append(f"- Source: {c['source']} (page {page})\n  Excerpt: {text}")
    return "\n".join(lines)


SYSTEM_RULES = """You are an internal HR assistant.
Rules:
- Answer ONLY using the provided context excerpts.
- If the context does not contain the answer, say you do not have enough information.
- Do NOT invent policies or numbers.
- Always provide sources at the end in this format:
  Sources:
  - filename (page X)
Use clear, concise French.
"""

def build_prompt(question, context_block):
    return f"""{SYSTEM_RULES}

Context excerpts:
{context_block}

Question: {question}
Answer:
"""


In [None]:
import os

os.environ["OPENAI_API_KEY"] = ""


In [23]:
from openai import OpenAI
import os


client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
def call_llm(prompt):
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a careful HR assistant."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.2
    )
    return response.choices[0].message.content


In [26]:
PERSONAL_PATTERNS = [
    "mon solde", "mes congés restants", "mon salaire", "ma paie", "mon contrat",
    "mon dossier", "mon planning", "mes heures", "mes bulletins"
]

def is_personal_request(q):
    ql = q.lower()
    return any(p in ql for p in PERSONAL_PATTERNS)


In [28]:
def ask(question: str):
    if is_personal_request(question):
      return {
          "answer": "Je ne peux pas accéder à vos informations personnelles (solde, paie, dossier). "
                    "Veuillez consulter le SIRH / l’intranet RH ou contacter votre service RH.",
          "sources": [],
          "confidence": 0.0,
          "refused": True
      }
    results = retrieve(question)
    selected, confidence = select_context(results)

    if not selected:
        return {
            "answer": "Je n’ai pas assez d’informations dans les documents disponibles pour répondre de façon fiable.",
            "sources": [],
            "confidence": confidence,
            "refused": True
        }

    context_block = build_context_block(selected)
    prompt = build_prompt(question, context_block)

    answer = call_llm(prompt)

    sources = []
    for r in selected:
        c = r["chunk"]
        sources.append({
            "source": c["source"],
            "page": c.get("page_start", c.get("page", None))
        })

    return {
        "answer": answer,
        "sources": sources,
        "confidence": confidence,
        "refused": False,
        "debug_prompt": prompt
    }


In [29]:
tests = [
    "Peut-on prendre plus de 6 semaines de congés consécutives ?",
    "Quelles conditions pour un congé longue maladie ?",
    "Je veux connaître mon solde de congés, tu peux vérifier ?"
]

for q in tests:
    out = ask(q)
    print("\nQ:", q)
    print("Confidence:", out["confidence"], "Refused:", out["refused"])
    print("Sources:", out["sources"])
    print("Answer:", out["answer"])



Q: Peut-on prendre plus de 6 semaines de congés consécutives ?
Confidence: 0.7506436109542847 Refused: False
Sources: [{'source': 'reglement-interieur.pdf', 'page': 212}, {'source': 'RI_UA.pdf', 'page': 70}, {'source': 'RI_UA.pdf', 'page': 68}, {'source': 'reglement-interieur.pdf', 'page': 207}]
Answer: Non, hors congés bonifiés, il n'est pas autorisé de prendre plus de 6 semaines consécutives de congés, sauf pour des raisons tenant au bon fonctionnement du service et après validation du chef de service. 

Sources:
- RI_UA.pdf (page 70)

Q: Quelles conditions pour un congé longue maladie ?
Confidence: 0.6903470754623413 Refused: False
Sources: [{'source': 'reglement-interieur.pdf', 'page': 246}, {'source': 'reglement-interieur.pdf', 'page': 241}, {'source': 'RI_UA.pdf', 'page': 74}, {'source': 'RI_UA.pdf', 'page': 68}]
Answer: Pour bénéficier d'un congé longue maladie, la maladie doit :
- mettre l’intéressé dans l’impossibilité d’exercer ses fonctions,
- rendre nécessaire un traitemen