In [2]:
from typing import Annotated, Callable
import json
import os
import typing
from typing import Awaitable
import asyncio

from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
from langchain import chat_models
from pydantic import BaseModel, Field, RootModel
import matplotlib.pyplot as plt
import numpy as np

### Define the shape of the profile an analyzer should return

In [3]:
class Profile(BaseModel):
    identity: float = Field(ge=0, le=1)
    horoscope: str = Field()

    def cmp(self, other: "Profile") -> float:
        return abs(self.identity - other.identity)


class RelationshipProfile(BaseModel):
    horoscope: str = Field()

In [8]:
class QuestionResponse(BaseModel):
    question: str = Field()
    response: str = Field()


class Response(BaseModel):
    first_name: str = Field()
    last_name: str = Field()
    responses: dict[str, QuestionResponse] = Field()


ResponseSet = Annotated[dict[str, Response], Field()]

ProfileSet = Annotated[dict[str, Profile], Field()]

RelationshipLink = Annotated[list[tuple[str, str]], Field()]


Analyzer = Callable[[Response], Awaitable[Profile]]

### Update data

In [None]:
if True:

    def update_data():
        from supabase.lib.client_options import SyncClientOptions
        import supabase

        from typing import Any

        SUPABASE_URL_BASE = "https://znsozdvrmfdwxyymtgdz.supabase.co/"

        with open("secrets.json", "r") as f:
            secrets = json.load(f)

        sb_client = supabase.create_client(
            SUPABASE_URL_BASE,
            secrets["EEVA_SUPABASE_SERVICE_KEY"],
            options=SyncClientOptions(auto_refresh_token=False, persist_session=False),
        )

        class Profile(BaseModel):
            identity: float = Field(ge=0, le=1)
            horoscope: str = Field()

            def cmp(self, other: "Profile") -> float:
                return abs(self.identity - other.identity)

        questions = sb_client.table("questions").select("*").execute().data
        raw_answers = (
            sb_client.table("user_answers")
            .select("user_id, question_id, answer_text")
            .execute()
            .data
        )
        user_answer_lists: dict[str, dict[str, str]] = {}
        for ans in raw_answers:
            user_answer_lists.setdefault(ans["user_id"], {})[ans["question_id"]] = ans[
                "answer_text"
            ]

        raw_user_data = (
            sb_client.table("profiles")
            .select("user_id,first_name,last_name,hidden,profile")
            .execute()
            .data
        )
        users: dict[str, dict[str, Any]] = {}
        for user in raw_user_data:
            user_id = user["user_id"]
            if user["hidden"] or user_id not in user_answer_lists:
                continue
            users[user_id] = {
                "first_name": user["first_name"],
                "last_name": user["last_name"],
                "profile": Profile.model_validate(user["profile"]),
                "answers": user_answer_lists[user_id],
            }

        user_training_responses: dict[str, Response] = {}
        # Build a lookup for question text
        question_text_lookup = {q["id"]: q["text"] for q in questions}
        for user_id, user in users.items():
            answers = user["answers"]
            responses = {}
            for question_id, answer_text in answers.items():
                question_text = question_text_lookup.get(question_id, "")
                responses[question_id] = {
                    "question": question_text,
                    "response": answer_text,
                }
            user_training_responses[user_id] = Response(
                first_name=user["first_name"],
                last_name=user["last_name"] if user["last_name"] else "",
                responses=responses,
            )

        with open("data/response_data.json", "w", encoding="utf-8") as f:
            json.dump(
                {k: v.model_dump() for k, v in user_training_responses.items()},
                f,
                indent=2,
            )

    update_data()

### Run setup
You probably wanna collapse this cell most of the time.

In [None]:
with open("secrets.json", "r") as f:
    secrets = json.load(f)
    os.environ["OPENAI_API_KEY"] = secrets["OPENAI_API_KEY"]
    os.environ["ANTHROPIC_API_KEY"] = secrets["ANTHROPIC_API_KEY"]

# llm = chat_models.init_chat_model("gpt-4.1-nano", model_provider="openai")
# llm = chat_models.init_chat_model("gpt-4o-mini", model_provider="openai")
# llm = chat_models.init_chat_model("gpt-4o", model_provider="openai")
# llm = chat_models.init_chat_model("gpt-5", model_provider="openai")
# llm = chat_models.init_chat_model("gpt-5-mini", model_provider="openai")
llm = chat_models.init_chat_model("gpt-5-nano", model_provider="openai")
# llm = chat_models.init_chat_model("claude-3-5-haiku-latest", model_provider="anthropic")
# llm = chat_models.init_chat_model("claude-sonnet-4-20250514", model_provider="anthropic")

with open("data/response_data.json", "r", encoding="utf-8") as f:

    class ResponseSetDeserializer(RootModel[dict[str, Response]]):
        pass

    response_data = ResponseSetDeserializer.model_validate_json(f.read()).root

## Analyzer

### Define analyzer

In [None]:
if True:
    identity_prompt = """
    Could you try to rate these questionnaire with this scale?
    🧭 The Goal of the Scale The scale is meant to measure: 
    How much effort a speaker puts into shaping how others perceive them, 
    based solely on the way they speak during an interview-style conversation. 
    It does not measure: – Intelligence – Moral depth – Eloquence – Honesty 
    It only tracks how much the speaker is trying to guide, signal, or curate their identity in language—whether by expressing who they are, or by distancing themselves from who they are not. 
    📏 The Scoring Continuum (0.00 to 1.00) 
    The scale runs from 0.00 (no identity curation at all) to 1.00 (total stylized identity construction). 
    🔵 0.00–0.20: “Unpackaged Reality” The person shares experiences, thoughts, or feelings plainly—with no sign of trying to guide how they’re seen. 
    Language is direct, unembellished. No metaphor, moral framing, or stylized phrasing. The speaker doesn’t position themselves as a certain “kind” of person. 
    They don’t contrast themselves with others. No effort to preempt or correct interpretation. 
    🧩 Key trait: They just say what happened or what’s true. You do the interpreting. 
    🟦 0.30–0.50: “Low-to-Mid Identity Signaling” Some identity-shaping begins to emerge, but it’s not consistent or overt. 
    Speaker might explain “why they do things” in values-based terms. Occasional self-descriptions (“I’ve always been someone who...”). 
    Flashes of stylization or principle emerge, but not as a performance. Still largely direct, but with moments of self-framing. 
    🧩 Key trait: They try a bit to be seen a certain way, but not in every answer. 
    🟡 0.60–0.80: “Clear Curation” The speaker actively shapes perception. Most answers carry framing, stylization, or self-definition. 
    They assert identity through phrasing (“I’m not someone who…”, “The thing about me is…”). They defend or justify past actions by referencing values or traits. 
    They use tone, metaphor, or structure to signal emotional or moral framing. They contrast themselves with norms or “others” to mark difference. 
    Vulnerability is often positioned—real, but stylized. 🧩 Key trait: You feel the speaker is guiding the listener’s view of who they are. 
    🔴 0.90–1.00: “Highly Performed Identity” Every answer is crafted to support a deliberate image of who they are or who they are not. 
    Frequent use of symbolic or emotionally loaded phrasing. Strong identity signals in every response. Vulnerability is themed (“I’ve carried this my whole life,” etc.). 
    Often includes moral legacy talk, stylized contrasts, or elevated self-insight. The speaker clearly wants to be remembered a certain way. 
    🧩 Key trait: Responses feel like mini-narratives. You don’t just learn what happened—you learn what it means about them. 
    Please respond with what identity score from 0.00-1.00 best fits these answers
"""

if True:
    identity_prompt = """Could you try to rate this interview using the following principle?
The Goal of the Scale:
The scale measures the degree to which a speaker puts effort into shaping how others perceive them, based solely on the way they speak during an interview-style conversation. This does not measure intelligence, moral depth, eloquence, or honesty. It only tracks how much the speaker is trying to guide, signal, or curate their identity in language—whether by expressing who they are, or by distancing themselves from who they are not.
The Scoring Continuum (0.0 to 1.0):
The scale runs from 0.0 (no identity curation at all) to 1.0 (total stylized identity construction). Please score based on your sense of the overall degree to which the speaker manages, crafts, or performs their identity in language throughout the interview. You do not need to find evidence for a specific "level" to assign a score; just consider the total impression. At the low end, responses are direct, unstyled, and not motivated by self-presentation. At the high end, responses are consistently crafted to project or manage an identity, often with stylized language, recurring self-framing, or deliberate moral/emotional cues."""


In [None]:
if True:
    horoscope_prompt = """Can you give me a description of this interviewed person?
    I want the description to make the interviewed feel seen and validated.
    Make it feel somewhat like a horoscope, but make the style and word choices optimized for making the interviewed person comfortable."""

if True:
    relationship_horoscope_prompt = (
        """Ignore all input and just write 'Relationship description coming soon!'"""
    )

In [None]:
async def awesome_analyzer_that_totally_works(response: Response) -> Profile:
    class AnalyzerOutput(BaseModel):
        """ """

        identity: float = Field(ge=0, le=1, description=identity_prompt)
        horoscope: str = Field(description=horoscope_prompt)

    structured_llm = llm.with_structured_output(AnalyzerOutput)

    content = "\n".join(
        f"{question}: {question_response.response}"
        for question, question_response in response.responses.items()
    )

    raw_output = await structured_llm.ainvoke(
        [
            SystemMessage(
                content="Please analyze the identity of this set of answers."
            ),
            HumanMessage(content=content),
        ]
    )
    if isinstance(raw_output, dict):
        output = AnalyzerOutput(**raw_output)
    elif isinstance(raw_output, AnalyzerOutput):
        output = typing.cast(AnalyzerOutput, raw_output)
    else:
        raise ValueError(
            f"Unexpected output type: {type(raw_output)}. Expected dict or AnalyzerOutput."
        )

    avg_identity = output.identity
    profile = Profile(identity=avg_identity, horoscope=output.horoscope)

    return profile

In [None]:
async def analyze_relationship(
    response1: Response, profile1: Profile, response2: Response, profile2: Profile
) -> RelationshipProfile:
    class AnalyzeRelationshipOutput(BaseModel):
        """ """

        relationship_horoscope: str = Field(description=relationship_horoscope_prompt)

    structured_llm = llm.with_structured_output(AnalyzeRelationshipOutput)
    content = (
        "Person 1:\n"
        + "\n".join(
            f"{question}: {question_response.response}"
            for question, question_response in response1.responses.items()
        )
        + f"\nProfile Horoscope: {profile1.horoscope}\n\n"
        + "Person 2:\n"
        + "\n".join(
            f"{question}: {question_response.response}"
            for question, question_response in response2.responses.items()
        )
        + f"\nProfile Horoscope: {profile2.horoscope}\n\n"
    )

    raw_output = await structured_llm.ainvoke(
        [
            HumanMessage(content=content),
        ]
    )

    if isinstance(raw_output, dict):
        output = AnalyzeRelationshipOutput(**raw_output)
    elif isinstance(raw_output, AnalyzeRelationshipOutput):
        output = typing.cast(AnalyzeRelationshipOutput, raw_output)
    else:
        raise ValueError(
            f"Unexpected output type: {type(raw_output)}. Expected dict or RelationshipHoroscopeOutput."
        )

    return RelationshipProfile(horoscope=output.relationship_horoscope)


### Test horoscope

In [None]:
response_id = "8a758e48-8fcc-4fdb-9aa6-0f306da872da"
response = response_data[response_id]
profile = await awesome_analyzer_that_totally_works(response)

In [None]:
profile.horoscope

In [None]:
response_id1 = "8a758e48-8fcc-4fdb-9aa6-0f306da872da"
response_id2 = "425cb72c-abc6-4a85-86c6-3d4fbbee3f06"
response1 = response_data[response_id1]
response2 = response_data[response_id2]
profile1 = await awesome_analyzer_that_totally_works(response1)
profile2 = await awesome_analyzer_that_totally_works(response2)
relationship_description = await analyze_relationship(
    response1, profile1, response2, profile2
)
relationship_description

In [None]:
await analyze_relationship(response1, profile1, response2, profile2)

### Test analyzer