In [None]:
from typing import Annotated, Callable
from concurrent.futures import ThreadPoolExecutor
import json
import os

from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
from langchain import chat_models
from pydantic import BaseModel, Field, RootModel

### Define the shape of the profile an analyzer should return

In [None]:
class Profile(BaseModel):
    identity: float = Field(ge=0, le=1)

    def cmp(self, other: "Profile") -> float:
        return abs(self.identity - other.identity)


### Run setup
You probably wanna collapse this cell most of the time.

In [None]:
class QuestionResponse(BaseModel):
    question: str = Field()
    response: str = Field()


Response = Annotated[dict[str, QuestionResponse], Field()]

ResponseSet = Annotated[dict[str, Response], Field()]

ProfileSet = Annotated[dict[str, Profile], Field()]


Analyzer = Callable[[Response], Profile]

with open("secrets.json", "r") as f:
    secrets = json.load(f)
    os.environ["OPENAI_API_KEY"] = secrets["OPENAI_API_KEY"]

llm = chat_models.init_chat_model("gpt-4o-mini", model_provider="openai")

with open("data/training_responses.json", "r") as f:

    class ResponseSetDeserializer(RootModel[dict[str, Response]]):
        pass

    training_responses = ResponseSetDeserializer.model_validate_json(f.read()).root


def test_analyzer(
    analyzer: Analyzer, responses: ResponseSet, expected: ProfileSet
) -> float:
    if responses.keys() != expected.keys():
        raise ValueError("ResponseSet keys do not match ProfileSet keys")

    with ThreadPoolExecutor() as executor:
        profiles = list(executor.map(analyzer, responses.values()))

    total_error = 0.0
    for key, response in responses.items():
        expected_profile = expected[key]
        profile = analyzer(response)
        error = profile.cmp(expected_profile)
        total_error += error
    return total_error / len(responses) if responses else 0.0

### Set expected profiles

In [None]:
expected_profiles = {}
for key in training_responses.keys():
    expected_profiles[key] = Profile(identity=0.5)

### Define analyzer

In [None]:
def awesome_analyzer_that_totally_works(response: Response) -> Profile:
    # This is a placeholder for the actual analysis logic.
    # For now, it just returns a profile with identity 0.6.
    structured_llm = llm.with_structured_output(Profile)

    content = "\n".join(
        f"{question}: {question_response.response}"
        for question, question_response in response.items()
    )

    response = structured_llm.invoke(
        [
            SystemMessage(
                content="How high is the identity of this set of answers on a scale from 0 to 1?"
            ),
            HumanMessage(content=content),
        ]
    )

    return response

### Test the analyzer

In [None]:
error = test_analyzer(
    analyzer=awesome_analyzer_that_totally_works,
    responses=training_responses,
    expected=expected_profiles,
)
error