In [5]:
import psycopg
import requests
import json
import time
import re

# -----------------------
# Configuration
# -----------------------
conversation_file_path = "C:\\Users\\lenovo\\Chatbot-RAG\\data\\TRANS_TXT\\017_00000012.txt"
DB_CONNECTION_STR = "dbname=postgres user=postgres password=zaineb host=localhost port=5434"

# OLLAMA
OLLAMA_URL = "http://localhost:11434"
OLLAMA_EMBED_MODEL = "nomic-embed-text"
LLM_MODEL = "gemma2:2b"

VECTOR_DIM = 4096
EMBED_TIMEOUT = 60
GENERATE_TIMEOUT = 90
BATCH_SIZE = 5

# -----------------------
# Chunking intelligent
# -----------------------
def create_conversation_chunks(file_path: str) -> list[dict]:
    """
    Cr√©e des chunks intelligents en regroupant les r√©pliques courtes
    et en gardant le contexte conversationnel
    """
    encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
    encoding = 'latin-1'
    
    for enc in encodings:
        try:
            with open(file_path, 'r', encoding=enc) as f:
                f.read()
            encoding = enc
            break
        except UnicodeDecodeError:
            continue
    
    print(f"Encodage d√©tect√©: {encoding}")
    
    with open(file_path, "r", encoding=encoding) as file:
        lines = file.readlines()
    
    chunks = []
    current_chunk = []
    current_text = ""
    MIN_CHUNK_LENGTH = 50  # Minimum de caract√®res par chunk
    
    for line in lines:
        line = line.strip()
        
        # Ignorer les lignes vides et les tags
        if not line or line.startswith("<"):
            # Si on a un chunk en cours et qu'on rencontre un tag, on le sauvegarde
            if current_chunk and len(current_text) >= MIN_CHUNK_LENGTH:
                chunks.append({
                    'text': current_text.strip(),
                    'lines': current_chunk.copy()
                })
                current_chunk = []
                current_text = ""
            continue
        
        # Nettoyer la ligne
        cleaned = line.removeprefix("     ").strip()
        
        # Ajouter √† la chunk courante
        current_chunk.append(cleaned)
        current_text += " " + cleaned
        
        # Si le chunk est assez long ET se termine par une phrase compl√®te, on le sauvegarde
        if len(current_text) >= MIN_CHUNK_LENGTH:
            # V√©rifier si c'est une fin de phrase significative
            if any(current_text.endswith(marker) for marker in ['.', '?', '!']) or len(current_chunk) >= 3:
                chunks.append({
                    'text': current_text.strip(),
                    'lines': current_chunk.copy()
                })
                current_chunk = []
                current_text = ""
    
    # Sauvegarder le dernier chunk s'il existe
    if current_chunk and len(current_text) >= MIN_CHUNK_LENGTH:
        chunks.append({
            'text': current_text.strip(),
            'lines': current_chunk.copy()
        })
    
    print(f"Nombre de chunks cr√©√©s: {len(chunks)}")
    print(f"Premier chunk: {chunks[0]['text'][:100]}..." if chunks else "Aucun chunk")
    
    return chunks

# -----------------------
# Fonctions utilitaires
# -----------------------
def check_ollama_health() -> bool:
    try:
        response = requests.get(f"{OLLAMA_URL}/api/tags", timeout=5)
        return response.status_code == 200
    except:
        return False

def calculate_embeddings_ollama(text: str, retry_count: int = 3) -> list[float]:
    payload = {"model": OLLAMA_EMBED_MODEL, "prompt": text}
    
    for attempt in range(retry_count):
        try:
            response = requests.post(
                f"{OLLAMA_URL}/api/embeddings", 
                json=payload, 
                timeout=EMBED_TIMEOUT
            )
            response.raise_for_status()
            embedding = response.json().get("embedding", [])
            if embedding:
                return embedding
        except requests.exceptions.Timeout:
            if attempt < retry_count - 1:
                time.sleep(2)
        except Exception as e:
            if attempt < retry_count - 1:
                time.sleep(2)
    
    return []

def embedding_to_pgvector_format(emb: list[float]) -> str:
    return "[" + ",".join(map(str, emb)) + "]"

def save_embedding(corpus: str, embedding: list[float], cursor) -> None:
    emb_literal = embedding_to_pgvector_format(embedding)
    cursor.execute(
        """INSERT INTO embeddings (corpus, embedding) VALUES (%s, %s::vector)""",
        (corpus, emb_literal)
    )

def similar_corpus(input_corpus: str, cursor, top_k: int = 5) -> list[tuple]:
    """Recherche avec distance cosine"""
    emb = calculate_embeddings_ollama(input_corpus)
    if not emb:
        return []
    emb_literal = embedding_to_pgvector_format(emb)
    
    cursor.execute(
        f"""
        SELECT id, corpus, embedding <=> %s::vector AS distance
        FROM embeddings
        ORDER BY embedding <=> %s::vector
        LIMIT {top_k}
        """,
        (emb_literal, emb_literal)
    )
    return cursor.fetchall()

def generate_answer_ollama(user_query: str, context_texts: list[str]) -> str:
    if not context_texts:
        return "Aucun contexte trouv√© pour r√©pondre √† la question."
    
    # Combiner tous les contextes
    full_context = "\n\n".join([f"Extrait {i+1}:\n{text}" for i, text in enumerate(context_texts)])
    
    prompt = f"""Tu es un assistant qui r√©pond en utilisant UNIQUEMENT le contexte fourni.

CONTEXTE (conversation service client):
{full_context}

QUESTION: {user_query}

INSTRUCTIONS:
- Lis TOUT le contexte attentivement
- Liste TOUS les organismes, lieux, dates et d√©tails mentionn√©s
- Structure ta r√©ponse clairement avec des puces si n√©cessaire
- N'invente RIEN qui n'est pas explicitement dans le contexte
- Si une information manque, dis-le clairement

R√âPONSE COMPL√àTE:"""
    
    payload = {
        "model": LLM_MODEL,
        "prompt": prompt,
        "temperature": 0.05,
        "stream": True,
        "options": {
            "num_predict": 300,
            "top_k": 5,
            "top_p": 0.3
        }
    }
    
    try:
        response = requests.post(
            f"{OLLAMA_URL}/api/generate", 
            json=payload, 
            stream=True,
            timeout=GENERATE_TIMEOUT
        )
        response.raise_for_status()
        
        full_response = ""
        print("   ", end="", flush=True)
        for line in response.iter_lines():
            if line:
                try:
                    json_response = json.loads(line)
                    chunk = json_response.get("response", "")
                    full_response += chunk
                    print(chunk, end="", flush=True)
                    if json_response.get("done", False):
                        break
                except json.JSONDecodeError:
                    continue
        
        print()
        return full_response.strip() if full_response else "Erreur : r√©ponse vide"
    
    except requests.exceptions.Timeout:
        return "‚è±Ô∏è Timeout"
    except Exception as e:
        return f"Erreur : {e}"

# -----------------------
# Pipeline principal
# -----------------------
def main():
    print("=" * 70)
    print(f"SYST√àME RAG OPTIMIS√â avec CHUNKING INTELLIGENT")
    print(f"Mod√®le: {LLM_MODEL}")
    print("=" * 70)
    
    print("\nüîç V√©rification d'Ollama...")
    if not check_ollama_health():
        print("‚ùå Ollama non accessible")
        return
    print("‚úÖ Ollama OK")
    
    print("\nüìù Cr√©ation des chunks intelligents...")
    chunks = create_conversation_chunks(conversation_file_path)
    
    if not chunks:
        print("‚ùå Aucun chunk cr√©√©!")
        return
    
    print("\nüóÑÔ∏è  Connexion PostgreSQL...")
    try:
        with psycopg.connect(DB_CONNECTION_STR) as conn:
            conn.autocommit = True
            with conn.cursor() as cur:
                print("\nüîß Pr√©paration base de donn√©es...")
                cur.execute("CREATE EXTENSION IF NOT EXISTS vector")
                cur.execute(f"""
                    CREATE TABLE IF NOT EXISTS embeddings (
                        id SERIAL PRIMARY KEY,
                        corpus TEXT,
                        embedding vector({VECTOR_DIM})
                    )
                """)
                cur.execute("DELETE FROM embeddings")
                
                print(f"\nü§ñ G√©n√©ration embeddings ({len(chunks)} chunks)...")
                
                successful = 0
                failed = 0
                
                for i, chunk in enumerate(chunks, 1):
                    print(f"   [{i}/{len(chunks)}] ", end="", flush=True)
                    emb = calculate_embeddings_ollama(chunk['text'])
                    if emb:
                        save_embedding(chunk['text'], emb, cur)
                        successful += 1
                        print("‚úì")
                    else:
                        failed += 1
                        print("‚úó")
                    
                    if i % BATCH_SIZE == 0:
                        time.sleep(0.5)
                
                conn.commit()
                print(f"\n‚úÖ Embeddings: {successful} OK, {failed} KO")
                
                if successful == 0:
                    print("‚ùå √âchec cr√©ation embeddings")
                    return
                
                print("\n" + "="*70)
                print("üí¨ MODE QUESTIONS-R√âPONSES")
                print("="*70)
                print("Tapez 'quit' pour sortir\n")
                
                while True:
                    user_query = input("‚ùì Votre question : ").strip()
                    if user_query.lower() in ['quit', 'exit', 'q']:
                        print("\nüëã Au revoir!")
                        break
                    if not user_query:
                        continue
                    
                    print(f"\nüîç Recherche contexte pertinent (top-5)...")
                    results = similar_corpus(user_query, cur, top_k=5)
                    
                    if results:
                        context_texts = [r[1] for r in results]
                        
                        print("\nüìÑ Contexte s√©lectionn√©:")
                        for i, (_, corpus, distance) in enumerate(results):
                            similarity = max(0, 1 - distance) * 100
                            print(f"\n  [{i+1}] (similarit√©: {similarity:.1f}%)")
                            print(f"      {corpus[:120]}...")
                        
                        print("\n" + "-"*70)
                        print("ü§ñ G√©n√©ration r√©ponse...\n")
                        answer = generate_answer_ollama(user_query, context_texts)
                        
                        print("\n" + "="*70)
                        print("üí° R√âPONSE:")
                        print("="*70)
                        print(answer)
                        print("="*70 + "\n")
                    else:
                        print("‚ùå Aucun contexte trouv√©\n")
    
    except Exception as e:
        print(f"‚ùå Erreur : {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()

SYST√àME RAG OPTIMIS√â avec CHUNKING INTELLIGENT
Mod√®le: gemma2:2b

üîç V√©rification d'Ollama...
‚úÖ Ollama OK

üìù Cr√©ation des chunks intelligents...
Encodage d√©tect√©: latin-1
Nombre de chunks cr√©√©s: 19
Premier chunk: h: U B S bonjour c: oui bonjour e j'appelle je sais pas si j'appelle au bon endroit e...

üóÑÔ∏è  Connexion PostgreSQL...

üîß Pr√©paration base de donn√©es...

ü§ñ G√©n√©ration embeddings (19 chunks)...
   [1/19] ‚úì
   [2/19] ‚úì
   [3/19] ‚úì
   [4/19] ‚úì
   [5/19] ‚úì
   [6/19] ‚úì
   [7/19] ‚úì
   [8/19] ‚úì
   [9/19] ‚úì
   [10/19] ‚úì
   [11/19] ‚úì
   [12/19] ‚úì
   [13/19] ‚úì
   [14/19] ‚úì
   [15/19] ‚úì
   [16/19] ‚úì
   [17/19] ‚úì
   [18/19] ‚úì
   [19/19] ‚úì

‚úÖ Embeddings: 19 OK, 0 KO

üí¨ MODE QUESTIONS-R√âPONSES
Tapez 'quit' pour sortir


üîç Recherche contexte pertinent (top-5)...

üìÑ Contexte s√©lectionn√©:

  [1] (similarit√©: 80.3%)
      h: ben √©coutez vous avez c: son espagnol h: un organisme qui s'appelle e "English Connectio