In [24]:
from openai import OpenAI
from textgrad.engine.local_model_openai_api import ChatExternalClient

MODEL_NAME = "mlabonne/NeuralBeagle14-7B-GGUF"
client = OpenAI(base_url="http://localhost:1234/v1", api_key="lm-studio")
engine = ChatExternalClient(client=client, model_string=MODEL_NAME)

In [25]:
import textgrad as tg
import numpy as np


class SmartAnswer:
    def __init__(
        self, model_name=MODEL_NAME, confidence_threshold=0.95, max_iterations=3
    ):
        tg.set_backward_engine(engine, override=True)
        self.model = tg.BlackboxLLM(engine=engine)
        self.confidence_threshold = confidence_threshold
        self.max_iterations = max_iterations

    def estimate_confidence(self, response):
        """
        Estimates confidence based on token probabilities using entropy-based normalization.
        """
        token_probs = np.array(
            response.token_probabilities
        )  # Assuming token_probabilities is available
        entropy = -np.sum(token_probs * np.log(token_probs))
        normalized_confidence = 1 - (
            entropy / np.log(len(token_probs))
        )  # Normalize between 0 and 1
        return normalized_confidence

    def generate_answer(self, question_text):
        question = tg.Variable(question_text, role_description="question to the LLM", requires_grad=False)
        answer = self.model(question)
        answer.confidence = self.estimate_confidence(answer)

        # Apply TextGrad optimization
        loss_fn = tg.TextLoss("Evaluate the answer and suggest improvements.")
        for _ in range(self.max_iterations):
            loss = loss_fn(answer)
            loss.backward()
            optimizer = tg.TGD(parameters=[answer])
            optimizer.step()
            answer.confidence = self.estimate_confidence(answer)

        # Apply Test-Time Scaling
        if answer.confidence < self.confidence_threshold:
            refined_answer = self.model.compute_more_steps(question)
            refined_answer.confidence = self.estimate_confidence(refined_answer)
            if refined_answer.confidence > answer.confidence:
                answer = refined_answer

        # Decide whether to answer or abstain
        if answer.confidence >= self.confidence_threshold:
            return answer.value
        else:
            return "Model chooses to abstain due to low confidence."

In [26]:
class StandardLLM:
    def __init__(self, engine=engine):
        self.model = tg.BlackboxLLM(engine=engine)

    def generate_answer(self, question_text):
        question = tg.Variable(question_text, role_description="question to the LLM", requires_grad=False)
        return self.model(question).value

In [27]:
# Initialize models
smart_answer = SmartAnswer()
standard_llm = StandardLLM()

question_text = "What is the capital of France?"

# Generate responses
smart_response = smart_answer.generate_answer(question_text)
standard_response = standard_llm.generate_answer(question_text)

print(f"SmartAnswer Response: {smart_response}")
print(f"Standard LLM Response: {standard_response}")


AttributeError: 'Variable' object has no attribute 'token_probabilities'