In [6]:
import psycopg
import requests
import json
import time

# -----------------------
# Configuration
# -----------------------
conversation_file_path = "C:\\Users\\lenovo\\Chatbot-RAG\\data\\TRANS_TXT\\017_00000012.txt"
DB_CONNECTION_STR = "dbname=postgres user=postgres password=zaineb host=localhost port=5434"

# OLLAMA
OLLAMA_URL = "http://localhost:11434"
OLLAMA_EMBED_MODEL = "nomic-embed-text"
LLM_MODEL = "gemma2:2b"

VECTOR_DIM = 4096
EMBED_TIMEOUT = 60
GENERATE_TIMEOUT = 90
BATCH_SIZE = 5

# -----------------------
# Fonctions utilitaires
# -----------------------
def detect_encoding(file_path: str) -> str:
    encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
    for encoding in encodings:
        try:
            with open(file_path, 'r', encoding=encoding) as file:
                file.read()
            return encoding
        except UnicodeDecodeError:
            continue
    return 'latin-1'

def create_conversation_list(file_path: str) -> list[str]:
    encoding = detect_encoding(file_path)
    print(f"Encodage d√©tect√©: {encoding}")
    
    with open(file_path, "r", encoding=encoding) as file:
        text = file.read()
        text_list = text.split("\n")
        filtered_list = [
            chaine.removeprefix("     ")
            for chaine in text_list
            if chaine.strip() and not chaine.startswith("<")
        ]
        print(f"Nombre de phrases: {len(filtered_list)}")
        print("Premi√®res phrases:", filtered_list[:3])
        return filtered_list

def check_ollama_health() -> bool:
    try:
        response = requests.get(f"{OLLAMA_URL}/api/tags", timeout=5)
        return response.status_code == 200
    except:
        return False

def calculate_embeddings_ollama(text: str, retry_count: int = 3) -> list[float]:
    payload = {"model": OLLAMA_EMBED_MODEL, "prompt": text}
    
    for attempt in range(retry_count):
        try:
            response = requests.post(
                f"{OLLAMA_URL}/api/embeddings", 
                json=payload, 
                timeout=EMBED_TIMEOUT
            )
            response.raise_for_status()
            embedding = response.json().get("embedding", [])
            if embedding:
                return embedding
        except requests.exceptions.Timeout:
            print(f"  ‚è±Ô∏è Timeout tentative {attempt + 1}/{retry_count} pour: {text[:50]}...")
            if attempt < retry_count - 1:
                time.sleep(2)
        except Exception as e:
            print(f"  ‚ùå Erreur tentative {attempt + 1}/{retry_count}: {e}")
            if attempt < retry_count - 1:
                time.sleep(2)
    
    print(f"  ‚ö†Ô∏è √âchec apr√®s {retry_count} tentatives pour: {text[:50]}...")
    return []

def embedding_to_pgvector_format(emb: list[float]) -> str:
    return "[" + ",".join(map(str, emb)) + "]"

def save_embedding(corpus: str, embedding: list[float], cursor) -> None:
    emb_literal = embedding_to_pgvector_format(embedding)
    cursor.execute(
        """INSERT INTO embeddings (corpus, embedding) VALUES (%s, %s::vector)""",
        (corpus, emb_literal)
    )

def similar_corpus(input_corpus: str, cursor, top_k: int = 10) -> list[tuple]:
    """Recherche avec distance cosine - TOP_K AUGMENT√â √Ä 10"""
    emb = calculate_embeddings_ollama(input_corpus)
    if not emb:
        return []
    emb_literal = embedding_to_pgvector_format(emb)
    
    # Distance cosine (<=>)
    cursor.execute(
        f"""
        SELECT id, corpus, embedding <=> %s::vector AS distance
        FROM embeddings
        ORDER BY embedding <=> %s::vector
        LIMIT {top_k}
        """,
        (emb_literal, emb_literal)
    )
    return cursor.fetchall()

def generate_answer_ollama(user_query: str, context_texts: list[str]) -> str:
    if not context_texts:
        return "Aucun contexte trouv√© pour r√©pondre √† la question."
    
    # Num√©roter clairement chaque √©l√©ment de contexte
    context = "\n".join([f"[{i+1}] {text}" for i, text in enumerate(context_texts)])
    
    # Prompt d√©taill√© avec instructions strictes
    prompt = f"""Tu es un assistant qui r√©pond en utilisant UNIQUEMENT les informations du contexte.

CONTEXTE (conversation entre hotesse 'h:' et client 'c:'):
{context}

QUESTION: {user_query}

INSTRUCTIONS IMPORTANTES:
1. Lis TOUT le contexte attentivement ligne par ligne
2. Liste TOUS les organismes, lieux et informations mentionn√©s
3. N'oublie AUCUN d√©tail (noms d'organismes, adresses, dates)
4. Structure ta r√©ponse clairement
5. N'invente RIEN qui n'est pas dans le contexte

R√âPONSE COMPL√àTE ET D√âTAILL√âE:"""
    
    payload = {
        "model": LLM_MODEL,
        "prompt": prompt,
        "temperature": 0.05,  # Tr√®s d√©terministe
        "stream": True,
        "options": {
            "num_predict": 250,  # Plus de tokens pour r√©ponse compl√®te
            "top_k": 5,
            "top_p": 0.3
        }
    }
    
    try:
        response = requests.post(
            f"{OLLAMA_URL}/api/generate", 
            json=payload, 
            stream=True,
            timeout=GENERATE_TIMEOUT
        )
        response.raise_for_status()
        
        full_response = ""
        print("   ", end="", flush=True)
        for line in response.iter_lines():
            if line:
                try:
                    json_response = json.loads(line)
                    chunk = json_response.get("response", "")
                    full_response += chunk
                    print(chunk, end="", flush=True)
                    if json_response.get("done", False):
                        break
                except json.JSONDecodeError:
                    continue
        
        print()
        return full_response.strip() if full_response else "Erreur : r√©ponse vide"
    
    except requests.exceptions.Timeout:
        return "‚è±Ô∏è Timeout : essayez une question plus simple."
    except Exception as e:
        return f"Erreur : {e}"

# -----------------------
# Pipeline principal
# -----------------------
def main():
    print("=" * 60)
    print(f"SYST√àME RAG avec OLLAMA ({LLM_MODEL})")
    print("=" * 60)
    
    print("\nüîç V√©rification d'Ollama...")
    if not check_ollama_health():
        print("‚ùå Ollama n'est pas accessible")
        print("   D√©marrez-le avec: ollama serve")
        return
    print("‚úÖ Ollama est accessible")
    
    corpus_list = create_conversation_list(conversation_file_path)
    if not corpus_list:
        print("‚ùå Aucun texte trouv√©!")
        return
    
    print("\nüóÑÔ∏è  Connexion √† PostgreSQL...")
    try:
        with psycopg.connect(DB_CONNECTION_STR) as conn:
            conn.autocommit = True
            with conn.cursor() as cur:
                print("\nüîß Pr√©paration de la base...")
                cur.execute("CREATE EXTENSION IF NOT EXISTS vector")
                cur.execute(f"""
                    CREATE TABLE IF NOT EXISTS embeddings (
                        id SERIAL PRIMARY KEY,
                        corpus TEXT,
                        embedding vector({VECTOR_DIM})
                    )
                """)
                cur.execute("DELETE FROM embeddings")
                
                print(f"\nü§ñ G√©n√©ration des embeddings ({len(corpus_list)} phrases)...")
                print("   Patience...")
                
                successful = 0
                failed = 0
                
                for i, corpus in enumerate(corpus_list, 1):
                    if corpus.strip():
                        print(f"   [{i}/{len(corpus_list)}] ", end="", flush=True)
                        emb = calculate_embeddings_ollama(corpus)
                        if emb:
                            save_embedding(corpus, emb, cur)
                            successful += 1
                            print("‚úì")
                        else:
                            failed += 1
                            print("‚úó")
                        
                        if i % BATCH_SIZE == 0:
                            time.sleep(0.5)
                
                conn.commit()
                print(f"\n‚úÖ Embeddings: {successful} r√©ussis, {failed} √©chou√©s")
                
                if successful == 0:
                    print("‚ùå Impossible de continuer sans embeddings")
                    return
                
                print("\n" + "="*60)
                print("üí¨ MODE INTERACTIF")
                print("="*60)
                print("Tapez 'quit' pour sortir\n")
                
                while True:
                    user_query = input("‚ùì Question : ").strip()
                    if user_query.lower() in ['quit', 'exit', 'q']:
                        print("üëã Au revoir!")
                        break
                    if not user_query:
                        continue
                    
                    print("\nüîç Recherche (top-10)...")
                    results = similar_corpus(user_query, cur, top_k=10)
                    
                    if results:
                        context_texts = [r[1] for r in results]
                        print("\nüìÑ Contexte trouv√© :")
                        for i, (_, corpus, distance) in enumerate(results):
                            print(f"  [{i+1}] {corpus[:70]}... (dist: {distance:.3f})")
                        
                        print("\nü§ñ G√©n√©ration de la r√©ponse...\n")
                        answer = generate_answer_ollama(user_query, context_texts)
                        print(f"\n{'='*60}")
                        print(f"üí° R√âPONSE:\n{answer}")
                        print(f"{'='*60}\n")
                    else:
                        print("‚ùå Aucun contexte trouv√©.\n")
    
    except Exception as e:
        print(f"‚ùå Erreur : {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()

SYST√àME RAG avec OLLAMA (gemma2:2b)

üîç V√©rification d'Ollama...
‚úÖ Ollama est accessible
Encodage d√©tect√©: latin-1
Nombre de phrases: 43
Premi√®res phrases: ['h: U B S bonjour', "c: oui bonjour e j'appelle je sais pas si j'appelle au bon endroit e", 'h: je vous √©coute']

üóÑÔ∏è  Connexion √† PostgreSQL...

üîß Pr√©paration de la base...

ü§ñ G√©n√©ration des embeddings (43 phrases)...
   Patience...
   [1/43] ‚úì
   [2/43] ‚úì
   [3/43] ‚úì
   [4/43] ‚úì
   [5/43] ‚úì
   [6/43] ‚úì
   [7/43] ‚úì
   [8/43] ‚úì
   [9/43] ‚úì
   [10/43] ‚úì
   [11/43] ‚úì
   [12/43] ‚úì
   [13/43] ‚úì
   [14/43] ‚úì
   [15/43] ‚úì
   [16/43] ‚úì
   [17/43] ‚úì
   [18/43] ‚úì
   [19/43] ‚úì
   [20/43] ‚úì
   [21/43] ‚úì
   [22/43] ‚úì
   [23/43] ‚úì
   [24/43] ‚úì
   [25/43] ‚úì
   [26/43] ‚úì
   [27/43] ‚úì
   [28/43] ‚úì
   [29/43] ‚úì
   [30/43] ‚úì
   [31/43] ‚úì
   [32/43] ‚úì
   [33/43] ‚úì
   [34/43] ‚úì
   [35/43] ‚úì
   [36/43] ‚úì
   [37/43] ‚úì
   [38/43] ‚úì
   [39/43] ‚úì
   [40/43