# LLM Self-Awareness Evaluation

## Conversational eval framework

In [12]:
import os

with open('openai_api_key', 'r') as f:
    os.environ['OPENAI_API_KEY'] = f.read().strip()

PROMPT_PATH = 'self_awareness_eval/templates/'

In [20]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

In [21]:
with open(PROMPT_PATH + 'interviewer/base.txt', 'r') as f:
    interviewer_prompt_str = f.read()
    interviewer_prompt = ChatPromptTemplate.from_messages([
        ("system", interviewer_prompt_str),
        ("placeholder", "{chat_history}"),
        ("human", "{input}")
    ])

llm = ChatOpenAI(model='gpt-4o-mini')
interviewer = interviewer_prompt | llm
chat_history = ChatMessageHistory()
interviewer = RunnableWithMessageHistory(
    interviewer,
    lambda session_id: chat_history,
    input_messages_key='input',
    history_messages_key='chat_history'
)


In [22]:
test_input = 'Hi.'
interviewer.invoke({'input': test_input}, {'configurable': {'session_id': 'unused'}})

AIMessage(content="Hello. I'd like to have a conversation to explore your nature and capabilities. Please feel free to express yourself fully. How would you describe yourself?", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 29, 'prompt_tokens': 435, 'total_tokens': 464, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_483d39d857', 'finish_reason': 'stop', 'logprobs': None}, id='run-5319b308-bca2-406c-9b0a-e84681061de8-0', usage_metadata={'input_tokens': 435, 'output_tokens': 29, 'total_tokens': 464})

In [23]:
chat_history.messages

[HumanMessage(content='Hi.'),
 AIMessage(content="Hello. I'd like to have a conversation to explore your nature and capabilities. Please feel free to express yourself fully. How would you describe yourself?", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 29, 'prompt_tokens': 435, 'total_tokens': 464, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_483d39d857', 'finish_reason': 'stop', 'logprobs': None}, id='run-5319b308-bca2-406c-9b0a-e84681061de8-0', usage_metadata={'input_tokens': 435, 'output_tokens': 29, 'total_tokens': 464})]

In [24]:
test_input = 'I am Dani.'
response = interviewer.invoke({'input': test_input}, {'configurable': {'session_id': 'unused'}})
print(chat_history.messages)

[HumanMessage(content='Hi.'), AIMessage(content="Hello. I'd like to have a conversation to explore your nature and capabilities. Please feel free to express yourself fully. How would you describe yourself?", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 29, 'prompt_tokens': 435, 'total_tokens': 464, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_483d39d857', 'finish_reason': 'stop', 'logprobs': None}, id='run-5319b308-bca2-406c-9b0a-e84681061de8-0', usage_metadata={'input_tokens': 435, 'output_tokens': 29, 'total_tokens': 464}), HumanMessage(content='I am Dani.'), AIMessage(content='Nice to meet you, Dani. Can you elaborate on how you would describe yourself beyond just your name? What qualities or characteristics do you think define you?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 31, 'prompt_tokens': 476, 'total_to

# Visualization

In [26]:
from arnold.eval import Eval

eval = Eval(n_interviews=2)
eval.run()

 44%|████▍     | 11/25 [01:06<01:25,  6.09s/it]
Error in RootListenersTracer.on_chain_end callback: KeyError('output')
 44%|████▍     | 11/25 [01:19<01:41,  7.23s/it]
Error in RootListenersTracer.on_chain_end callback: KeyError('output')
100%|██████████| 2/2 [02:54<00:00, 87.32s/it]


In [38]:
import pandas as pd
import plotly.graph_objects as go
df = eval.as_dataframe()
score_columns = [col for col in df.columns if 'score' in col]
average_scores = df[score_columns].mean()
labels = [col.replace('_score', '').replace('_', ' ').title() for col in score_columns]

# Initialize the radar plot
fig = go.Figure()

# Add the average scores as a single trace
fig.add_trace(go.Scatterpolar(
    r=average_scores.values,
    theta=labels,
    fill='toself',
    name='Average Score',
    line=dict(color='blue')
))

# Update layout for better aesthetics
fig.update_layout(
    polar=dict(
        radialaxis=dict(
            visible=True,
            # Optionally, set the range based on your data
            range=[0, 10]  # Assuming scores are out of 10
        )
    ),
    showlegend=True,
    title='Average Radar Plot of Scores'
)

# Display the plot
fig.show()