In [1]:
import sys
sys.path.append('../')

## Rag to create the datalog code

In [2]:
import os
import torch
from langchain.document_loaders import PyMuPDFLoader
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
device="cuda" if torch.cuda.is_available() else "cpu"
print(device)

cpu


In [3]:
from src.config import PATH_PROJECT
CHROMA_PERSIST_DIR = os.path.join(PATH_PROJECT, "data/chroma_db/")

if not os.path.exists(CHROMA_PERSIST_DIR) or not os.listdir(CHROMA_PERSIST_DIR):
    print("🆕 Création de la base Chroma et indexation des documents...")

    # Charger les fichiers PDF
    data_dir = os.path.join(PATH_PROJECT, "data/")
    documents = []

    for file in os.listdir(data_dir):
        if file.endswith(".pdf"):
            pdf_path = os.path.join(data_dir, file)
            pdf_loader = PyMuPDFLoader(pdf_path)
            documents.extend(pdf_loader.load())

    # Diviser les documents en chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    docs = text_splitter.split_documents(documents)

    embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2",
                                       model_kwargs={'device': device}, encode_kwargs={'device': device})

    # Initialiser Chroma avec les embeddings 
    vectorstore = Chroma.from_documents(docs, embedding_model, persist_directory=CHROMA_PERSIST_DIR)
    vectorstore.persist()
    print("Base Chroma sauvegardée avec succès")

else:
    print("Chargement de la base Chroma existante...")
    embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2",
                                       model_kwargs={'device': device}, encode_kwargs={'device': device})
    vectorstore = Chroma(persist_directory=CHROMA_PERSIST_DIR, embedding_function=embedding_model)

print("Base Chroma prête à être utilisée")

Chargement de la base Chroma existante...


  from .autonotebook import tqdm as notebook_tqdm
  vectorstore = Chroma(persist_directory=CHROMA_PERSIST_DIR, embedding_function=embedding_model)


Base Chroma prête à être utilisée


In [4]:
observed_symptoms = {
    "depressed_mood",
    "reduced_concentration",
    "reduced_energy",
    "increased_talkativeness",
    "diminished_interest_pleasure",
    "low_self_worth",
    "psychomotor_disturbances",
    "increased_activity_energy",
    "euphoria_irritability_expansiveness",
    "racing_thoughts",
    "disrupted_excessive_sleep",
    "change_in_appetite_weight",
    "decreased_need_for_sleep",
    "distractibility",
    "recurrent_thoughts_death_suicide",
    "impulsive_reckless_behavior",
    "increased_self_esteem",
    "increased_sexual_sociability_goal_directed_activity",
    "hopelessness",
    "delusions",
    "passivity_experiences",
    "disorganized_behavior",
}

history_conditions = {
    "depressive",
    "hypomanic",
    "manic",
    "mixed",
}

mood_episodes = {
    "depressive",
    "hypomanic",
    "manic",
    "mixed",
}

In [5]:
# query =  "translate natural language descriptions of mental health diagnostic criteria to logic rules"
# query = "Translate the diagnostic criteria for Bipolar I Disorder into Datalog"

import ollama

def generate_datalog(criteria):
    prompt = f"""
    System: You are an expert at translating mental health diagnostic criteria into Datalog code.

    - Only use the following symptoms when generating the logic rules: {", ".join(observed_symptoms)}.
    - Only use the following history conditions: {", ".join(history_conditions)}.
    - Only use the following mood episodes: {", ".join(mood_episodes)}.
    - The predicates 'Observed' and 'History' are already defined. You must use them as given without redefining them.

    Definitions:
    - Observed(PATIENT, SYMPTOM, DURATION): Indicates that a patient has experienced a specific symptom for a given duration.
    - History(PATIENT, CONDITION, OCCURRENCE): Indicates that a patient has a history of a specific condition with a certain occurrence.
    
    Examples:
    - Criteria: "SEDD requires at least one major depressive episode lasting at least 2 weeks..."
        SEDD(PATIENT) <= Observed(PATIENT, 'depressed_mood', WEEKS) & (sup(WEEKS , 2.0))

    Task: Translate the following diagnostic criteria into a Datalog program using Soufflé syntax.
    Ensure that only the listed symptoms, history conditions, and mood episodes are used in the generated rules.

    Diagnostic Criteria: {criteria}. 
    """
    
    # response = ollama.chat(model="mistral", messages=[{"role": "user", "content": prompt}])
    response = ollama.chat(model="deepseek-coder-v2", messages=[{"role": "user", "content": prompt}])
    return response['message']['content']

disorders = ["Bipolar I (BPD1)", 
             "Bipolar II (BPD2)", 
             "Single Episode Depressive Disorder (SEDD)", 
             "Recurrent Depressive Disorder (RDD)"]

for disorder in ["Bipolar I (BPD1)"]:
    query = f"What are the diagnostic criteria for {disorder} ?"
    retrieved_docs = vectorstore.similarity_search(query, k=3)

    # for doc in retrieved_docs:
    #     print(doc.page_content)  

    context = "\n\n".join([doc.page_content for doc in retrieved_docs])

    datalog_code = generate_datalog(context) 
    
    print(f"Disorder: {disorder}")
    # print("Retrieved Criteria:")
    # for doc in retrieved_docs:
    #     print(doc.page_content)
    print(datalog_code)

Disorder: Bipolar I (BPD1)
 To translate the diagnostic criteria for bipolar type I disorder into Datalog using Soufflé syntax, we need to define rules that capture the presence of mixed mood episodes characterized by significant mood symptoms and possibly prominent anxiety or panic symptoms, along with specific mental health symptoms such as delusions, reduced concentration, passivity experiences, racing thoughts, recurrent thoughts about death/suicide, low self-worth, increased self-esteem, reduced energy, hopelessness, psychomotor disturbances, increased activity/energy, disorganized behavior, decreased need for sleep, diminished interest in pleasure, change in appetite/weight, depressed mood, impulsive recklessness behavior, euphoria/irritability/expansiveness, increased sexual sociability/goal-directed activity, increased talkativeness, distractibility, and disrupted excessive sleep.

Here's the Datalog code for these criteria:

```datalog
% Define predicates for symptoms and cond

Mistral version longue (6min) 
:
predicate bipolar_type_I(PATIENT) {}

predicate in_partial_remission(PATIENT) {
    Observed(PATIENT, reduced_energy, DURATION1) &
    Observed(PATIENT, depressed_mood, DURATION2) &
    (sup(DURATION1 + DURTION2, 0.0)) &
    (not (Observed(PATIENT, increased_energy, _))) &
    (not (Observed(PATIENT, euphoria_irritability_expansiveness, _)))
}

predicate most_recent_mixed_episode(PATIENT) {
    Observed(PATIENT, depressed_mood, DURATION1) &
    Observed(PATIENT, manic, DURATION2) &
    (sup(DURATION1 + DURITION2, 0.0)) &
    History(PATIENT, mixed, OCCURENCE1) &
    (not (Observed(PATIENT, depressed_mood, _))) &
    (not (Observed(PATIENT, manic, _)))
}

predicate significant_mood_symptoms(PATIENT) {
    Observed(PATIENT, decreased_need_for_sleep, _) |
    Observed(PATIENT, delusions, _) |
    Observed(PATIENT, distractibility, _) |
    Observed(PATIENT, disrupted_excessive_sleep, _) |
    Observed(PATIENT, racing_thoughts, _) |
    Observed(PATIENT, diminished_interest_pleasure, _) |
    Observed(PATIENT, hopelessness, _) |
    Observed(PATIENT, reduced_concentration, _) |
    Observed(PATIENT, disorganized_behavior, _) |
    Observed(PATIENT, psychomotor_disturbances, _) |
    Observed(PATIENT, change_in_appetite_weight, _) |
    Observed(PATIENT, increased_talkativeness, _) |
    Observed(PATIENT, low_self_worth, _) |
    Observed(PATIENT, impulsive_reckless_behavior, _) |
    Observed(PATIENT, reduced_self_esteem, _) |
    Observed(PATIENT, psychomotor_disturbances, _)
}

bipolar_type_I(PATIENT) <=
   in_partial_remission(PATIENT) &
   most_recent_mixed_episode(PATIENT) &
   significant_mood_symptoms(PATIENT)

Deepseek


Mistral version longue (6min) 
:
predicate bipolar_type_I(PATIENT) {}

predicate in_partial_remission(PATIENT) {
    Observed(PATIENT, reduced_energy, DURATION1) &
    Observed(PATIENT, depressed_mood, DURATION2) &
    (sup(DURATION1 + DURTION2, 0.0)) &
    (not (Observed(PATIENT, increased_energy, _))) &
    (not (Observed(PATIENT, euphoria_irritability_expansiveness, _)))
}

predicate most_recent_mixed_episode(PATIENT) {
    Observed(PATIENT, depressed_mood, DURATION1) &
    Observed(PATIENT, manic, DURATION2) &
    (sup(DURATION1 + DURITION2, 0.0)) &
    History(PATIENT, mixed, OCCURENCE1) &
    (not (Observed(PATIENT, depressed_mood, _))) &
    (not (Observed(PATIENT, manic, _)))
}

predicate significant_mood_symptoms(PATIENT) {
    Observed(PATIENT, decreased_need_for_sleep, _) |
    Observed(PATIENT, delusions, _) |
    Observed(PATIENT, distractibility, _) |
    Observed(PATIENT, disrupted_excessive_sleep, _) |
    Observed(PATIENT, racing_thoughts, _) |
    Observed(PATIENT, diminished_interest_pleasure, _) |
    Observed(PATIENT, hopelessness, _) |
    Observed(PATIENT, reduced_concentration, _) |
    Observed(PATIENT, disorganized_behavior, _) |
    Observed(PATIENT, psychomotor_disturbances, _) |
    Observed(PATIENT, change_in_appetite_weight, _) |
    Observed(PATIENT, increased_talkativeness, _) |
    Observed(PATIENT, low_self_worth, _) |
    Observed(PATIENT, impulsive_reckless_behavior, _) |
    Observed(PATIENT, reduced_self_esteem, _) |
    Observed(PATIENT, psychomotor_disturbances, _)
}

bipolar_type_I(PATIENT) <=
   in_partial_remission(PATIENT) &
   most_recent_mixed_episode(PATIENT) &
   significant_mood_symptoms(PATIENT)

Test datalog

In [9]:
import pandas as pd
import os

# Données Patients
patients_df = pd.read_excel("../data/dataset.xlsx", sheet_name="data")

# Création des fichiers Observed.facts et History.facts s'ils n'existent pas
observed_file_path = "../data/Observed.facts"
history_file_path = "../data/History.facts"

if not os.path.exists(observed_file_path) and not os.path.exists(history_file_path):
    with open(observed_file_path, "w") as observed_file, open(history_file_path, "w") as history_file:
        for _, row in patients_df.iterrows():
            patient_id = row["PatientID"]
            
            # Écrire dans le fichier Observed.facts si le symptôme observé est présent
            if pd.notna(row["Observed_Symptom"]):
                observed_file.write(f'"{patient_id}" "{row["Observed_Symptom"]}" {row["Observed_Week"]}\n')
            
            # Écrire dans le fichier History.facts si la condition historique est présente
            if pd.notna(row["History_Condition"]):
                history_file.write(f'"{patient_id}" "{row["History_Condition"]}" {row["History_Count"]}\n')
else:
    print("Les fichiers 'Observed.facts' et 'History.facts' existent déjà.")

Les fichiers 'Observed.facts' et 'History.facts' existent déjà.


In [10]:
from pyDatalog import pyDatalog

pyDatalog.clear()

# Charger les faits depuis les fichiers
with open(observed_file_path, "r") as observed_file:
    observed_facts = observed_file.readlines()

with open(history_file_path, "r") as history_file:
    history_facts = history_file.readlines()

for fact in observed_facts:
    # Split each line into its components, remove any leading/trailing whitespace
    fact_parts = fact.strip().split()
    fact_parts = [part.strip('"') for part in fact_parts]
    if fact_parts:
        # pyDatalog.assert_fact('Observed', *fact_parts)
        pyDatalog.assert_fact('Observed', fact_parts[0], fact_parts[1], float(fact_parts[2]))

for fact in history_facts:
    fact_parts = fact.strip().split()
    if fact_parts:
        pyDatalog.assert_fact('History', fact_parts[0], fact_parts[1], float(fact_parts[2]))

print("Faits Observed:")
for fact in observed_facts:
    print(fact.strip())

print("\nFaits History:")
for fact in history_facts:
    print(fact.strip())

Faits Observed:
"1" "depressed_mood" 1.5
"1" "reduced_concentration" 1.2
"1" "reduced_energy" 0.8
"1" "increased_talkativeness" 0.6
"2" "depressed_mood" 5.7
"2" "diminished_interest_pleasure" 5.7
"2" "reduced_concentration" 3.5
"2" "low_self_worth" 2.0
"2" "psychomotor_disturbances" 5.7
"3" "increased_activity_energy" 0.5
"3" "euphoria_irritability_expansiveness" 0.5
"3" "racing_thoughts" 0.5
"3" "increased_talkativeness" 0.5
"3" "increased_self_esteem" 0.5
"3" "diminished_interest_pleasure" 2.0
"3" "reduced_concentration" 2.0
"3" "disrupted_excessive_sleep" 2.0
"3" "change_in_appetite_weight" 2.0
"3" "psychomotor_disturbances" 0.8
"4" "depressed_mood" 5.7
"4" "diminished_interest_pleasure" 5.7
"4" "reduced_concentration" 3.5
"4" "low_self_worth" 2.0
"4" "psychomotor_disturbances" 5.7
"5" "depressed_mood" 7.5
"5" "low_self_worth" 7.5
"5" "disrupted_excessive_sleep" 4.0
"5" "reduced_energy" 5.5
"5" "change_in_appetite_weight" 5.5
"5" "euphoria_irritability_expansiveness" 3.0
"5" "increa

In [14]:
def fix_datalog_code(datalog_code):
    prompt = f"""
    System: You are an expert in Datalog and Soufflé syntax. The following Datalog code contains errors or inconsistencies. Your task is to fix the code and make it syntactically and semantically correct, ensuring it adheres to Soufflé syntax and is usable.

    Original Datalog Code:
    {datalog_code}

    Please provide the corrected Datalog code below:
    """
    
    response = ollama.chat(model="mistral", messages=[{"role": "user", "content": prompt}])
    return response['message']['content']

fixed_datalog_code = fix_datalog_code(datalog_code)
print(fixed_datalog_code)

 Here's the corrected Datalog code according to Soufflé syntax. I have made some modifications to improve readability and adhere more strictly to Datalog conventions.

```datalog
% Define predicates for symptoms, conditions, and their durations
observed(PATIENT, SYMPTOM, DURATION) :- Observed(PATIENT, SYMPTOM, DURATION).
has_history(PATIENT, CONDITION) :- History(PATIENT, CONDITION, _).

% Define bipolar type I disorder criteria
bipolar_type1(PATIENT) :-
    observed(PATIENT, reduced_concentration, D),
    observed(PATIENT, passivity_experiences, _),
    observed(PATIENT, racing_thoughts, _),
    ( count({ observed(PATIENT, _, D2) }) >= 3 ).
bipolar_type1(PATIENT) :-
    observed(PATient, recurrent_thoughts_death_suicide, _),
    observed(PATIENT, low_self_worth, _),
    observed(PATIENT, increased_self_esteem, _),
    ( count({ observed(PATIENT, _, D2) }) >= 3 ).
bipolar_type1(PATIENT) :-
    observed(PATIENT, reduced_energy, _),
    observed(PATIENT, hopelessness, _),
    observed(PA

In [None]:
observed(PATIENT, SYMPTOM, DURATION) :- Observed(PATIENT, SYMPTOM, DURATION).
history(PATIENT, CONDITION, OCCURRENCE) :- History(PATIENT, CONDITION, OCCURRENCE).

bipolar_type1(PATIENT) <= { observed(PATIENT, 'reduced_concentration', D), observed(PATIENT, 'passivity_experiences', _), observed(PATIENT, 'racing_thoughts', _) } & (count({ observed(PATIENT, _, D2) | observed(PATIENT, _, D2) }) >= 3).
bipolar_type1(PATIENT) <= { observed(PATIENT, 'recurrent_thoughts_death_suicide', _), observed(PATIENT, 'low_self_worth', _), observed(PATIENT, 'increased_self_esteem', _) } & (count({ observed(PATIENT, _, D2) | observed(PATIENT, _, D2) }) >= 3).
bipolar_type1(PATIENT) <= { observed(PATIENT, 'reduced_energy', _), observed(PATIENT, 'hopelessness', _), observed(PATIENT, 'psychomotor_disturbances', _) } & (count({ observed(PATIENT, _, D2) | observed(PATIENT, _, D2) }) >= 3).
bipolar_type1(PATIENT) <= { observed(PATIENT, 'increased_activity_energy', _), observed(PATIENT, 'disorganized_behavior', _), observed(PATIENT, 'decreased_need_for_sleep', _) } & (count({ observed(PATIENT, _, D2) | observed(PATIENT, _, D2) }) >= 3).
bipolar_type1(PATIENT) <= { observed(PATIENT, 'diminished_interest_pleasure', _), observed(PATIENT, 'change_in_appetite_weight', _), observed(PATIENT, 'depressed_mood', _) } & (count({ observed(PATIENT, _, D2) | observed(PATIENT, _, D2) }) >= 3).
bipolar_type1(PATIENT) <= { observed(PATIENT, 'impulsive_reckless_behavior', _), observed(PATIENT, 'euphoria_irritability_expansiveness', _), observed(PATIENT, 'increased_sexual_sociability_goal_directed_activity', _) } & (count({ observed(PATIENT, _, D2) | observed(PATIENT, _, D2) }) >= 3).
bipolar_type1(PATIENT) <= { observed(PATIENT, 'increased_talkativeness', _), observed(PATIENT, 'distractibility', _), observed(PATIENT, 'disrupted_excessive_sleep', _) } & (count({ observed(PATIENT, _, D2) | observed(PATIENT, _, D2) }) >= 3).

mixed_episode(PATIENT) <= { observed(PATIENT, 'depressed_mood', _), observed(PATIENT, 'euphoria_irritability_expansiveness', _) } & (count({ observed(PATIENT, _, D2) | observed(PATIENT, _, D2) }) >= 3).
mixed_episode(PATIENT) <= { observed(PATIENT, 'increased_talkativeness', _), observed(PATIENT, 'distractibility', _), observed(PATIENT, 'disrupted_excessive_sleep', _) } & (count({ observed(PATIENT, _, D2) | observed(PATIENT, _, D2) }) >= 3).
mixed_episode(PATIENT) <= { observed(PATIENT, 'impulsive_reckless_behavior', _), observed(PATIENT, 'depressed_mood', _), observed(PATIENT, 'euphoria_irritability_expansiveness', _) } & (count({ observed(PATIENT, _, D2) | observed(PATIENT, _, D2) }) >= 3).

% Bipolar type I disorder in partial remission with mixed mood episode and additional features
bipolar_type1_partial_remission_mixed(PATIENT) <= { history(PATIENT, 'bipolar', 'most recent'), mixed_episode(PATIENT), bipolar_type1(PATIENT) } & (count({ observed(PATIENT, _, D2) | observed(PATIENT, _, D2) }) >= 3)

In [None]:
# v corrigée par mistral 

% Define predicates for symptoms, conditions, and their durations
observed(PATIENT, SYMPTOM, DURATION) :- Observed(PATIENT, SYMPTOM, DURATION).
has_history(PATIENT, CONDITION) :- History(PATIENT, CONDITION, _).

% Define bipolar type I disorder criteria
bipolar_type1(PATIENT) :-
    observed(PATIENT, reduced_concentration, D),
    observed(PATIENT, passivity_experiences, _),
    observed(PATIENT, racing_thoughts, _),
    ( count({ observed(PATIENT, _, D2) }) >= 3 ).
bipolar_type1(PATIENT) :-
    observed(PATient, recurrent_thoughts_death_suicide, _),
    observed(PATIENT, low_self_worth, _),
    observed(PATIENT, increased_self_esteem, _),
    ( count({ observed(PATIENT, _, D2) }) >= 3 ).
bipolar_type1(PATIENT) :-
    observed(PATIENT, reduced_energy, _),
    observed(PATIENT, hopelessness, _),
    observed(PATIENT, psychomotor_disturbances, _),
    ( count({ observed(PATIENT, _, D2) }) >= 3 ).
bipolar_type1(PATIENT) :-
    observed(PATIENT, increased_activity_energy, _),
    observed(PATIENT, disorganized_behavior, _),
    observed(PATIENT, decreased_need_for_sleep, _),
    ( count({ observed(PATIENT, _, D2) }) >= 3 ).
bipolar_type1(PATIENT) :-
    observed(PATIENT, diminished_interest_pleasure, _),
    observed(PATIENT, change_in_appetite_weight, _),
    observed(PATIENT, depressed_mood, _),
    ( count({ observed(PATIENT, _, D2) }) >= 3 ).
bipolar_type1(PATIENT) :-
    observed(PATIENT, impulsive_reckless_behavior, _),
    observed(PATIENT, euphoria_irritability_expansiveness, _),
    ( count({ observed(PATIENT, _, D2) }) >= 3 ).
bipolar_type1(PATIENT) :-
    observed(PATIENT, increased_talkativeness, _),
    observed(PATIENT, distractibility, _),
    observed(PATIENT, disrupted_excessive_sleep, _),
    ( count({ observed(PATIENT, _, D2) }) >= 3 ).

% Mixed mood episode criteria
mixed_episode(PATIENT) :-
    observed(PATIENT, depressed_mood, _),
    observed(PATIENT, euphoria_irritability_expansiveness, _),
    ( count({ observed(PATIENT, _, D2) }) >= 3 ).
mixed_episode(PATIENT) :-
    observed(PATIENT, increased_talkativeness, _),
    observed(PATIENT, distractibility, _),
    observed(PATIENT, disrupted_excessive_sleep, _),
    ( count({ observed(PATIENT, _, D2) }) >= 3 ).
mixed_episode(PATIENT) :-
    observed(PATIENT, impulsive_reckless_behavior, _),
    observed(PATIENT, depressed_mood, _),
    observed(PATIENT, euphoria_irritability_expansiveness, _),
    ( count({ observed(PATIENT, _, D2) }) >= 3 ).

% Bipolar type I disorder in partial remission with mixed mood episode and additional features
bipolar_type1_partial_remission_mixed(PATIENT) :-
    has_history(PATIENT, bipolar),
    mixed_episode(PATIENT),
    bipolar_type1(PATIENT),
    ( count({ observed(PATIENT, _, D2) }) >= 3 ).

In [8]:
for disorder in disorders:
    result = Diagnosis(f'{i}', disorder)
    print(result)

NameError: name 'Diagnosis' is not defined