In [None]:
# Standard library imports
import os
import pickle
import random
import sys
from datetime import datetime
from pathlib import Path

# Third party imports
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import re
from azure.ai.inference import EmbeddingsClient
from azure.core.credentials import AzureKeyCredential
from dotenv import load_dotenv
from openai import AzureOpenAI
from pydantic import BaseModel
from tqdm import tqdm

# ChromaDB imports
import chromadb
from chromadb import Documents, EmbeddingFunction, Embeddings
from typing import Dict, List, Optional, Tuple, Union

try:
    # This will work in scripts where __file__ is defined
    current_dir = os.path.dirname(os.path.abspath(__file__))
    # Assuming "src" is parallel to the script folder
    project_root = os.path.abspath(os.path.join(current_dir, ".."))
except NameError:
    # In notebooks __file__ is not defined: assume we're in notebooks/riziv_dataset/
    project_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))

src_path = os.path.join(project_root, "src")
if src_path not in sys.path:
    sys.path.append(src_path)

# Local application imports

from riziv.riziv_article_code_format_utils import (
    standardize_article_number,
    std_to_sort_tuple,
)
from riziv.riziv_graph_building_utils import create_base_document_graph
from riziv.riziv_graph_visualization_utils import (
    visualize_node_1hop,
    visualize_node_2hop,
)

from riziv.riziv_sequence_chunking_utils import (
    split_text_with_overlap,
    verify_overlap,
)

#from src.retrieval_utils import ( 
#    extract_date_from_query, 
#    get_query_embedding, 
#    get_valid_graph_at_date, 
#    create_chroma_collection_and_retrieve_top_k, 
#    filter_relevant_evidence,
#    get_document_context,
#    build_sources_citation,
#    #get_article_title, 
#    #format_article_number,
#    generate_final_answer
#)

In [None]:
from src.graphsage import GraphSAGE
import torch
from scipy.spatial.distance import cosine
import sys
import pandas as pd
import pickle
sys.path.append('../')

from src.retrieval_utils import ( 
    extract_date_from_query, 
    get_query_embedding, 
    get_valid_graph_at_date, 
    create_chroma_collection_and_retrieve_top_k, 
    filter_relevant_evidence,
    get_document_context,
    build_sources_citation,
    get_article_title, 
    format_article_number,
    generate_final_answer,
    get_sage_embedding
)

In [None]:
query = """Je vous contacte par rapport à une pratique médicale qui me paraît douteuse. Ce soir, j’avais rdv chez XXX à Bruxelles pour une consultation avec un médecin en vue d’un possible traitement laser pour une cicatrise.
La consultation ne dure que 2-3min et je demande au médecin s’il peut immédiatement commencer le traitement, ce qu’il me confirme.
Après la séance laser, au moment de payer, la dame de l’accueil me demande 70€ pour le laser et 50€ pour la consultation avec le docteur. Je lui demande une feuille de soins pour la consultation avec le médecin, ce qu’elle refuse prétextant que les médecins esthétiques ne remettent pas de reçus aux patients.
J’ai trouvé ça très curieux et j’ai insisté mais la personne à l’accueil s’est énervée. Finalement, le docteur ne l’a fait payer que la séance laser à 70€ (sans reçu donc) mais je me suis fait traiter comme une malpropre.
Pouvez-vous me confirmer s’il est vrai qu’un médecin esthétique n’est pas tenu de remettre une attestation de soins pour une consultation?"""

In [None]:
####################################
# Load graph w/embeddings and Article's data
####################################

print('-------------------------')
print('Load data')

with open('../data/document_graph_with_embeddings.pkl', 'rb') as f:
    G_emb = pickle.load(f)

#with open('graph_sage.pkl', 'rb') as f:
#    G_emb = pickle.load(f)

workArticlePlusLanguageFR = pd.read_csv('../data/df_workArticlePlusLanguageFR.csv')

query_embedding = get_query_embedding(query)

print('-------------------------')

####################################
# Extract relevant date from user query
####################################

print('-------------------------')
print('Extract_date_from_query')

relevant_date = extract_date_from_query(query)
print(f"Extracted date: {relevant_date}")

print('-------------------------')

####################################
# Prune graph
####################################

print('-------------------------')
print('Get_valid_graph_at_date')

G_at_date = get_valid_graph_at_date(G_emb, relevant_date)

print('-------------------------')

####################################
# Instatiate chromdb and query vector database
####################################

print('-------------------------')
print('Create_chroma_collection_and_retrieve_top_k')

chroma_results = create_chroma_collection_and_retrieve_top_k(
    G=G_at_date,  # Graph filtered by data
    query_embedding=query_embedding,  # Query embedding
    k=10  # Number of relevant docs to retrieve
)

chroma_results['ids']
chroma_results_texts = chroma_results['documents'][0]

print('-------------------------')

####################################
# Filter retrieved evidence
####################################

print('-------------------------')
print('Filter_relevant_evidence')

relevant_evidence = filter_relevant_evidence(
        query=query,
        evidence_texts=chroma_results_texts
    )

for i in relevant_evidence: 
    print(i[1] , "\n")

print('-------------------------')

####################################
# Get context of relevant evidence
####################################

print('-------------------------')
print('Get_document_context')

# Obtain ids of relevant text chunks
relevant_chunk_ids = [chroma_results['ids'][0][idx] for idx, _ in relevant_evidence]

# Get context
context = get_document_context(G=G_at_date, chunk_ids=relevant_chunk_ids)

# Opcional: Mostrar resultados
for chunk_info in context:
    print(f"\nChunk: {chunk_info['chunk_id']}")
    if chunk_info['article_info']:
        print(f"Article: {chunk_info['article_info']['sort_tuple']}")
    if chunk_info['act_info']:
        print(f"Act: {chunk_info['act_info']['title_short']}")

print('-------------------------')

####################################
# Build citation's block
####################################

print('-------------------------')
print('Build_sources_citation')

citation = build_sources_citation(context, workArticlePlusLanguageFR)
print(citation)

print('-------------------------')

####################################
# Produce final answer
####################################

print('-------------------------')
print('Generate_final_answer')

final_answer = generate_final_answer(query, relevant_evidence, context, workArticlePlusLanguageFR)
print(final_answer + "\n\n\n" + citation)

print('-------------------------')

In [None]:
print(chroma_results)

In [None]:
relevant_evidence

In [None]:
for i in range(0):
    print(i)

In [None]:
print(query)

In [None]:
print(relevant_evidence)
print(len(relevant_evidence))

In [None]:
print(context)
print(len(relevant_evidence))

In [None]:
for i in range(len(relevant_evidence)):
    piece_of_ev = relevant_evidence[i][1]
    ev_article_n = get_article_title(format_article_number(context[i]['article_info']['article_number']), workArticlePlusLanguageFR)
    ev_act_title = context[i]['act_info']['title_short']

    
    print(f"Evidence {i}", "\n", ev_article_n, " - ", ev_act_title, "\n", piece_of_ev, "\n\n")

In [None]:
print(workArticlePlusLanguageFR)

In [None]:
print('-------------------------')
print('Generate_final_answer')

final_answer = generate_final_answer(query, relevant_evidence, context, workArticlePlusLanguageFR)
print(final_answer + "\n\n\n" + citation)

print('-------------------------')