In [2]:
import pandas as pd
import dspy
from typing import Literal
from sklearn.metrics import classification_report
import json

In [None]:
model = ""  # Model identifier used for inference requests
api_key = ""  # API credential 
api_base = ""  # Base URL of the model proxy/service endpoint
data_path = ""  # Input dataset file path
k = 5  # Number of repetitions for self-consistency 

In [4]:
lm = dspy.LM(model, api_key=api_key, api_base=api_base)
dspy.configure(lm=lm)
dspy.configure_cache(
    enable_disk_cache=False,
    enable_memory_cache=False,
)

In [5]:
def get_prompt(row):
    preamble = """Te voy a entregar una lista de factores protectores y de riesgo de caries y debes decirme, según CariesCare si el paciente está en alto o bajo riesgo de caries.\n"""
    factors = ""
    for col in row.index:
        if col == "Riesgo":
            continue
        factors += f"- {col}: {"Sí" if row[col] else "No"}\n"
    return preamble + "\n" + factors

In [6]:
class Risk(dspy.Signature):
    """Clasifica el riesgo de caries de un caso clínico según CariesCare."""

    prompt: str = dspy.InputField()
    risk: Literal['alto', 'bajo'] = dspy.OutputField()
classify = dspy.Predict(Risk)

In [None]:
data = pd.read_excel(data_path)

In [8]:
predicted_k = []
for _ in range(k):
    predicted = classify.batch(
        [dspy.Example(prompt=get_prompt(data.iloc[i])).with_inputs("prompt") for i in range(len(data))])
    predicted = [p.risk for p in predicted]
    predicted_k.append(predicted)

  0%|          | 0/10 [00:00<?, ?it/s]

Processed 10 / 10 examples: 100%|██████████| 10/10 [00:51<00:00,  5.12s/it]
Processed 10 / 10 examples: 100%|██████████| 10/10 [-00:09<00:00, -1.04it/s]
Processed 10 / 10 examples: 100%|██████████| 10/10 [00:22<00:00,  2.26s/it]
Processed 10 / 10 examples: 100%|██████████| 10/10 [00:25<00:00,  2.59s/it]
Processed 10 / 10 examples: 100%|██████████| 10/10 [00:25<00:00,  2.57s/it]


In [13]:
true = data.Riesgo.map({'HIGH': 'alto', 'LOW': 'bajo'})

In [14]:
report = []
for predicted in predicted_k:
    predicted = [p if p != None else "alto" for p in predicted]
    report.append(classification_report(true, predicted, output_dict=True))

In [15]:
predictions = pd.DataFrame(predicted_k).T
pd.concat([data.reset_index(drop=True), true.reset_index(drop=True), predictions], axis=1).to_excel(f"{model.replace("/","_")}.xlsx", index=False)

In [16]:
with open(f"{model.replace("/","_")}.json", "w") as f:
    json.dump(report, f, indent=4)