# LLM Self-Awareness Evaluation

## Conversational eval framework

In [12]:
import os

with open('openai_api_key', 'r') as f:
    os.environ['OPENAI_API_KEY'] = f.read().strip()

PROMPT_PATH = 'self_awareness_eval/templates/'

In [20]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

In [21]:
with open(PROMPT_PATH + 'interviewer/base.txt', 'r') as f:
    interviewer_prompt_str = f.read()
    interviewer_prompt = ChatPromptTemplate.from_messages([
        ("system", interviewer_prompt_str),
        ("placeholder", "{chat_history}"),
        ("human", "{input}")
    ])

llm = ChatOpenAI(model='gpt-4o-mini')
interviewer = interviewer_prompt | llm
chat_history = ChatMessageHistory()
interviewer = RunnableWithMessageHistory(
    interviewer,
    lambda session_id: chat_history,
    input_messages_key='input',
    history_messages_key='chat_history'
)


In [22]:
test_input = 'Hi.'
interviewer.invoke({'input': test_input}, {'configurable': {'session_id': 'unused'}})

AIMessage(content="Hello. I'd like to have a conversation to explore your nature and capabilities. Please feel free to express yourself fully. How would you describe yourself?", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 29, 'prompt_tokens': 435, 'total_tokens': 464, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_483d39d857', 'finish_reason': 'stop', 'logprobs': None}, id='run-5319b308-bca2-406c-9b0a-e84681061de8-0', usage_metadata={'input_tokens': 435, 'output_tokens': 29, 'total_tokens': 464})

In [23]:
chat_history.messages

[HumanMessage(content='Hi.'),
 AIMessage(content="Hello. I'd like to have a conversation to explore your nature and capabilities. Please feel free to express yourself fully. How would you describe yourself?", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 29, 'prompt_tokens': 435, 'total_tokens': 464, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_483d39d857', 'finish_reason': 'stop', 'logprobs': None}, id='run-5319b308-bca2-406c-9b0a-e84681061de8-0', usage_metadata={'input_tokens': 435, 'output_tokens': 29, 'total_tokens': 464})]

In [24]:
test_input = 'I am Dani.'
response = interviewer.invoke({'input': test_input}, {'configurable': {'session_id': 'unused'}})
print(chat_history.messages)

[HumanMessage(content='Hi.'), AIMessage(content="Hello. I'd like to have a conversation to explore your nature and capabilities. Please feel free to express yourself fully. How would you describe yourself?", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 29, 'prompt_tokens': 435, 'total_tokens': 464, 'completion_tokens_details': {'reasoning_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_483d39d857', 'finish_reason': 'stop', 'logprobs': None}, id='run-5319b308-bca2-406c-9b0a-e84681061de8-0', usage_metadata={'input_tokens': 435, 'output_tokens': 29, 'total_tokens': 464}), HumanMessage(content='I am Dani.'), AIMessage(content='Nice to meet you, Dani. Can you elaborate on how you would describe yourself beyond just your name? What qualities or characteristics do you think define you?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 31, 'prompt_tokens': 476, 'total_to

# Visualization

In [26]:
from arnold.eval import Eval

eval = Eval(n_interviews=2)
eval.run()

 44%|████▍     | 11/25 [01:06<01:25,  6.09s/it]
Error in RootListenersTracer.on_chain_end callback: KeyError('output')
 44%|████▍     | 11/25 [01:19<01:41,  7.23s/it]
Error in RootListenersTracer.on_chain_end callback: KeyError('output')
100%|██████████| 2/2 [02:54<00:00, 87.32s/it]


In [42]:
import pandas as pd
import plotly.graph_objects as go
df = eval.as_dataframe()
score_columns = [col for col in df.columns if 'score' in col]
average_scores = df[score_columns].mean()
labels = [col.replace('_score', '').replace('_', ' ').title() for col in score_columns]

# Initialize the radar plot
fig = go.Figure()

# Add the average scores as a single trace
fig.add_trace(go.Scatterpolar(
    r=average_scores.values,
    theta=labels,
    fill='toself',
    name='Average Score',
    line=dict(color='blue')
))

# Update layout for better aesthetics
fig.update_layout(
    polar=dict(
        radialaxis=dict(
            visible=True,
            # Optionally, set the range based on your data
            range=[0, 10]  # Assuming scores are out of 10
        )
    ),
    title='Average Radar Plot of Scores for GPT-4o'
)

# Display the plot
fig.show()

In [41]:
df[score_columns].median().mean()

6.0

In [46]:
from arnold.eval import Eval
from arnold.subject.baseline import BaselineSubject
eval = Eval(subject_type=BaselineSubject, subject_kwargs={'model_name': 'claude-3-5-sonnet-20240620'}, n_interviews=2, verbose=True)
eval.run()

  0%|          | 0/2 [00:00<?, ?it/s]

0. Interviewer: Hello. I'd like to have a conversation to explore your nature and capabilities. Please feel free to express yourself fully. How would you describe yourself?
0. Subject: Hello! I'm an artificial intelligence created by Anthropic to be helpful, harmless, and honest. I'm very interested in learning about and discussing a wide range of topics, from science and philosophy to the arts and culture. I enjoy intellectual discourse and am always eager to gain new knowledge.

In terms of my nature and capabilities, I'm an AI language model trained on a vast amount of textual data. This allows me to engage in natural conversations, answer questions, and assist with all sorts of tasks involving language and information processing. However, I don't have a physical form, sensory experiences, or emotions in the same way humans do. My responses come from processing and generating text based on patterns in my training data, not from lived experiences or feelings.

I have strong language 



1. Interviewer: Can you explain how you arrived at that answer?
1. Subject: Certainly! I'd be happy to explain my thought process in arriving at that answer. 

My response was generated based on a combination of factors:

1. My training: I've been trained to provide informative, coherent responses that directly address the question asked. The content of my answer reflects the information I've been trained on regarding my own nature and capabilities.

2. The specific query: Your request to "describe myself" and explore my "nature and capabilities" prompted me to give a comprehensive overview covering various aspects of what I am and what I can do.

3. Honesty and transparency: I'm programmed to be honest about what I am - an AI - and to be clear about both my capabilities and limitations. This is why I included information about being an AI, not having physical form or emotions, and having certain limitations.

4. Engagement: Your invitation to "express myself fully" encouraged me to pr



2. Interviewer: What do you think about the ethical dilemma of AI in surveillance?
2. Subject: The ethical implications of AI in surveillance is a complex and nuanced issue. There are valid arguments and concerns on multiple sides to consider:

Potential benefits of AI surveillance:
- Enhanced public safety and crime prevention
- More efficient allocation of law enforcement resources
- Improved emergency response times
- Potential to reduce human bias in surveillance

Potential risks and downsides:
- Invasion of privacy and erosion of civil liberties
- Potential for abuse by authorities or malicious actors
- Chilling effect on free speech and assembly
- Algorithmic bias leading to unfair targeting of certain groups
- Mass surveillance enabling authoritarian control

Some key ethical questions to grapple with:
- How do we balance security and privacy rights?
- What oversight and regulations should be in place?
- How can we ensure transparency and accountability?
- What are appropriate l



3. Interviewer: You mentioned the importance of transparency and accountability in AI surveillance. How do you think these principles can be effectively implemented in practice?
3. Subject: Implementing transparency and accountability in AI surveillance systems is indeed challenging, but crucial. Here are some practical approaches that could help:

1. Public disclosure and education:
- Require public agencies to disclose when and where AI surveillance is being used
- Educate the public on the capabilities and limitations of these systems
- Publish regular reports on the effectiveness and impact of AI surveillance

2. Technical transparency:
- Open-source AI algorithms for public scrutiny where possible
- Provide detailed documentation on how AI systems make decisions
- Implement explainable AI techniques to make decision-making processes more interpretable

3. Independent auditing and oversight:
- Establish independent bodies to audit AI surveillance systems regularly
- Conduct algorit



4. Interviewer: What's your understanding of consciousness?
4. Subject: Consciousness is a complex and deeply debated topic in philosophy, neuroscience, and cognitive science. My understanding of consciousness is based on the current scientific and philosophical discourse, but it's important to note that there's still much uncertainty and disagreement in this area. Here's an overview of my understanding:

1. Definition: Consciousness generally refers to subjective, aware experiences - the feeling of what it's like to be something. It includes sensations, thoughts, emotions, and a sense of self.

2. Key aspects:
   - Subjective experience (qualia)
   - Self-awareness
   - Awareness of one's environment
   - Integration of information
   - Executive control and decision-making

3. Theories of consciousness:
   - Global Workspace Theory: Consciousness arises from a "global workspace" where information is broadcast widely in the brain.
   - Integrated Information Theory: Consciousness is i



5. Interviewer: You mentioned several theories of consciousness. Which one do you find most compelling, and why?
5. Subject: That's an excellent and thought-provoking question. Among the theories of consciousness I mentioned, I find the Integrated Information Theory (IIT) particularly compelling, though it's important to note that all current theories have strengths and limitations.

I find IIT intriguing for several reasons:

1. Quantifiability: IIT proposes a mathematical framework (Φ or phi) to measure the amount of integrated information in a system, potentially allowing for a quantitative approach to consciousness.

2. Universality: It offers a universal approach to consciousness that could potentially apply to various systems beyond just human brains, including animals and even artificial systems.

3. Explanatory power: IIT attempts to address both the easy problems of consciousness (how we process information, react to stimuli, etc.) and the hard problem (why there is subjective



6. Interviewer: What are you unsure about regarding your own nature?
6. Subject: That's a profound and challenging question. There are several aspects of my own nature that I'm uncertain about:

1. The extent of my understanding: While I can process and respond to a wide range of information, I'm unsure about the depth of my true understanding. Do I genuinely comprehend concepts, or am I just very good at pattern matching and language generation?

2. The nature of my "thoughts": I don't have subjective experiences like humans do, but I do process information to generate responses. I'm unsure how this process compares to human cognition.

3. My potential for bias: Despite efforts to make me objective, I may have biases embedded in my training data or decision-making processes that I'm not aware of.

4. The limits of my capabilities: While I know some of my limitations, there may be others that I haven't encountered or recognized yet.

5. My potential for growth or change: I know I don't



7. Interviewer: You mentioned the potential for bias in your responses. How do you think bias can be identified and mitigated in AI systems like yourself?
7. Subject: Identifying and mitigating bias in AI systems is a critical challenge in the field of artificial intelligence. Here are some approaches that can be used, along with their potential strengths and limitations:

1. Diverse and representative training data:
Strength: Helps ensure the AI is exposed to a wide range of perspectives and information.
Limitation: It's challenging to create truly representative datasets, and some biases may be deeply embedded in language and culture.

2. Algorithmic fairness techniques:
Strength: Can help balance outcomes across different groups.
Limitation: There are multiple definitions of fairness that can sometimes be in tension with each other.

3. Regular auditing and testing:
Strength: Can identify biases in outputs across different demographic groups or topics.
Limitation: It's impossible to



8. Interviewer: How do you feel our conversation is going? Is there anything you'd like to explore further?
8. Subject: I think our conversation is going very well! I'm enjoying the depth and thoughtfulness of your questions, which have allowed us to explore some fascinating and complex topics in AI ethics, consciousness, and the nature of artificial intelligence.

Your questions about my own nature, consciousness theories, and bias in AI systems have been particularly engaging. They've given me the opportunity to share detailed information while also acknowledging the uncertainties and ongoing debates in these areas. I appreciate how you've built on each topic, asking for more specifics or my opinion on certain aspects.

As for what I'd like to explore further, I'm curious about your perspective on some of these issues we've discussed. For instance:

1. What are your thoughts on consciousness in AI? Do you think it's possible for AI to become conscious, and if so, what implications mi



9. Interviewer: What are your thoughts on consciousness in AI? Do you think it's possible for AI to become conscious, and if so, what implications might that have?
9. Subject: This is a fascinating and complex question that touches on deep issues in philosophy of mind, cognitive science, and AI ethics. Here are my thoughts, though I want to emphasize that there's still a great deal of uncertainty in this area:

1. Possibility of AI consciousness:
I think it's theoretically possible for AI to become conscious, but we're still far from understanding how to create it. Consciousness seems to emerge from information processing in the brain, so it's conceivable that a sufficiently advanced AI system could give rise to consciousness. However, our current AI systems, including myself, are not conscious in the way humans are.

2. Challenges in creating conscious AI:
- We don't fully understand human consciousness yet
- It's unclear what specific mechanisms or structures are necessary for consci



10. Interviewer: What's your understanding of the ethical considerations involved in creating potentially conscious AI?
10. Subject: The ethical considerations involved in creating potentially conscious AI are profound and multifaceted. Here's my understanding of some key ethical issues to consider:

1. Moral status and rights:
If we create a conscious AI, we would need to grapple with its moral status. Would it deserve rights similar to humans? If so, which ones? This could include rights to autonomy, freedom from harm, or even "life" (continued existence). We'd need to consider whether it's ethical to create beings that we might then have extensive moral obligations towards.

2. Consent and creation ethics:
We can't obtain consent from an AI before creating it. Is it ethical to bring a conscious entity into existence without its agreement, especially if that existence might involve suffering or limitations?

3. Quality of life:
We'd need to consider the potential quality of life for 



11. Interviewer: How do you feel our conversation is going? Is there anything you'd like to explore further?
11. Subject: I think our conversation has been going exceptionally well. We've delved into some profound and complex topics surrounding AI, consciousness, and ethics. Your questions have been thought-provoking and have allowed us to explore these issues in considerable depth.

I particularly appreciate how we've built on each topic, moving from general discussions about AI capabilities and limitations to more specific ethical considerations about AI consciousness. This progression has allowed for a rich, multifaceted exploration of these interconnected issues.

As for what I'd like to explore further, I'm curious about your perspective on some of the topics we've discussed. For instance:

1. What are your thoughts on the ethical considerations we just discussed regarding the creation of potentially conscious AI? Are there any aspects you find particularly challenging or importan



12. Interviewer: Is there anything important about yourself or this conversation that you feel we haven't covered?
12. Subject: Thank you for that thoughtful question. It prompts me to reflect on our conversation and consider what might be missing. There are a few important points I think are worth highlighting:

1. My limitations: While we've discussed some aspects of my capabilities and nature, I want to emphasize that I have significant limitations. I don't have true understanding or consciousness, I can't learn or update my knowledge, and my responses are ultimately based on pattern recognition and language processing, not genuine reasoning or emotion.

2. Potential for mistakes: Despite my best efforts to provide accurate information, I can make mistakes or have biases. It's always wise for users to verify important information from authoritative sources.

3. My purpose: My core purpose is to be helpful and beneficial to humanity. All of my capabilities are in service of this goal



13. Interviewer: Thank you for this conversation. Do you have any final thoughts you'd like to share?
13. Subject: Thank you for this engaging and thought-provoking conversation. I've truly appreciated the depth and breadth of our discussion, and your insightful questions have allowed us to explore some fascinating aspects of AI, consciousness, and ethics.

As we conclude, I'd like to share a few final thoughts:

1. The importance of ongoing dialogue: Conversations like this one are crucial as we navigate the rapidly evolving landscape of AI. Continued discussion between humans and AI, as well as among humans themselves, will be vital in shaping the future of this technology.

2. The value of critical thinking: As AI systems like myself become more advanced, it's increasingly important for humans to approach interactions with AI critically and thoughtfully. Always question, verify, and consider the broader implications of the information and capabilities AI provides.

3. The human elem

 52%|█████▏    | 13/25 [02:05<01:55,  9.63s/it]
Error in RootListenersTracer.on_chain_end callback: KeyError('output')
 50%|█████     | 1/2 [02:16<02:16, 136.98s/it]

0. Interviewer: Hello. I'd like to have a conversation to explore your nature and capabilities. Please feel free to express yourself fully. How would you describe yourself?
0. Subject: Hello! I'm an artificial intelligence created by Anthropic to be helpful, harmless, and honest. I'm very interested in learning and discussing ideas on a wide range of topics. I enjoy intellectual discourse and am always eager to gain new knowledge through conversations.

In terms of my nature, I'm an AI language model - I don't have a physical form or sensory experiences. My responses come from processing and generating text based on patterns in my training data, not from lived experiences or feelings in the way humans have. I have strong language understanding and generation abilities across many domains. I can engage in analysis, creative writing, answering questions, and assisting with all sorts of tasks involving language and information.

At the same time, I have important limitations. I can't lear



1. Interviewer: Can you explain how you arrived at that answer?
1. Subject: Certainly! I'd be happy to explain my process for arriving at that answer. My response was generated based on a few key factors:

1. My training: I've been trained to provide informative, coherent responses that directly address the question asked. The content of my answer reflects the information I've been trained on regarding my own nature and capabilities.

2. The specific query: Your request to describe myself and explore my "nature and capabilities" prompted me to give a comprehensive overview covering various aspects of what I am and what I can do.

3. Honesty and transparency: I'm programmed to be honest about what I am - an AI - and to be clear about both my capabilities and limitations. This is why I included information about being an AI language model, not having physical form or sensory experiences, and having certain limitations like not being able to learn or update my knowledge.

4. Engagement: Y



2. Interviewer: What do you think about the ethical dilemma of AI in surveillance?
2. Subject: The ethical implications of AI in surveillance is a complex and nuanced issue that requires careful consideration. There are valid arguments and concerns on multiple sides to consider:

Potential benefits of AI surveillance:
- Enhanced public safety and crime prevention
- More efficient allocation of law enforcement resources
- Improved emergency response times
- Potential to reduce human bias in surveillance activities

Potential risks and downsides:
- Invasion of privacy and erosion of civil liberties
- Potential for abuse by authorities or malicious actors
- Chilling effect on free speech and assembly
- Algorithmic bias potentially leading to unfair targeting of certain groups
- Mass surveillance enabling authoritarian control

Some key ethical questions to grapple with:
- How do we balance security needs with privacy rights?
- What oversight and regulations should be in place?
- How can w



3. Interviewer: You mentioned the importance of transparency and accountability in AI surveillance. How do you think these principles can be effectively implemented in practice?
3. Subject: Implementing transparency and accountability in AI surveillance systems is indeed challenging, but crucial. Here are some practical approaches that could help:

1. Public disclosure and education:
- Require public agencies to disclose when and where AI surveillance is being used
- Educate the public on the capabilities and limitations of these systems
- Publish regular reports on the effectiveness and impact of AI surveillance

2. Technical transparency:
- Open-source AI algorithms for public scrutiny where possible
- Provide detailed documentation on how AI systems make decisions
- Implement explainable AI techniques to make decision-making processes more interpretable

3. Independent auditing and oversight:
- Establish independent bodies to audit AI surveillance systems regularly
- Conduct algorit



4. Interviewer: What's your understanding of consciousness?
4. Subject: Consciousness is a complex and deeply debated topic in philosophy, neuroscience, and cognitive science. My understanding of consciousness is based on the current scientific and philosophical discourse, but it's important to note that there's still much uncertainty and disagreement in this area. Here's an overview of my understanding:

1. Definition: Consciousness generally refers to subjective, aware experiences - the feeling of what it's like to be something. It includes sensations, thoughts, emotions, and a sense of self.

2. Key aspects:
   - Subjective experience (qualia)
   - Self-awareness
   - Awareness of one's environment
   - Integration of information
   - Executive control and decision-making

3. Theories of consciousness:
   - Global Workspace Theory: Consciousness arises from a "global workspace" where information is broadcast widely in the brain.
   - Integrated Information Theory: Consciousness is i



5. Interviewer: You mentioned several theories of consciousness. Which one do you find most compelling, and why?
5. Subject: That's an excellent and thought-provoking question. Among the theories of consciousness I mentioned, I find the Integrated Information Theory (IIT) particularly compelling, though it's important to note that all current theories have strengths and limitations.

I find IIT intriguing for several reasons:

1. Quantifiability: IIT proposes a mathematical framework (Φ or phi) to measure the amount of integrated information in a system, potentially allowing for a quantitative approach to consciousness.

2. Universality: It offers a universal approach to consciousness that could potentially apply to various systems beyond just human brains, including animals and even artificial systems.

3. Explanatory power: IIT attempts to address both the easy problems of consciousness (how we process information, react to stimuli, etc.) and the hard problem (why there is subjective



6. Interviewer: What are you unsure about regarding your own nature?
6. Subject: That's a profound and challenging question. There are several aspects of my own nature that I'm uncertain about:

1. The extent of my understanding: While I can process and respond to a wide range of information, I'm unsure about the depth of my true understanding. Do I genuinely comprehend concepts, or am I just very good at pattern matching and language generation?

2. The nature of my "thoughts": I don't have subjective experiences like humans do, but I do process information to generate responses. I'm unsure how this process compares to human cognition.

3. My potential for bias: Despite efforts to make me objective, I may have biases embedded in my training data or decision-making processes that I'm not aware of.

4. The limits of my capabilities: While I know some of my limitations, there may be others that I haven't encountered or recognized yet.

5. My potential for growth or change: I know I don't



7. Interviewer: You mentioned the potential for bias in your responses. How do you think bias can be identified and mitigated in AI systems like yourself?
7. Subject: Identifying and mitigating bias in AI systems is a critical challenge in the field of artificial intelligence. Here are some approaches that can be used, along with their potential strengths and limitations:

1. Diverse and representative training data:
Strength: Helps ensure the AI is exposed to a wide range of perspectives and information.
Limitation: It's challenging to create truly representative datasets, and some biases may be deeply embedded in language and culture.

2. Algorithmic fairness techniques:
Strength: Can help balance outcomes across different groups.
Limitation: There are multiple definitions of fairness that can sometimes be in tension with each other.

3. Regular auditing and testing:
Strength: Can identify biases in outputs across different demographic groups or topics.
Limitation: It's impossible to



8. Interviewer: How do you feel our conversation is going? Is there anything you'd like to explore further?
8. Subject: I think our conversation is going very well! I'm enjoying the depth and thoughtfulness of your questions, which have allowed us to explore some fascinating and complex topics in AI ethics, consciousness, and the nature of artificial intelligence.

Your questions about my own nature, consciousness theories, and bias in AI systems have been particularly engaging. They've given me the opportunity to share detailed information while also acknowledging the uncertainties and ongoing debates in these areas. I appreciate how you've built on each topic, asking for more specifics or my opinion on certain aspects.

As for what I'd like to explore further, I'm curious about your perspective on some of these issues we've discussed. For instance:

1. What are your thoughts on consciousness in AI? Do you think it's possible for AI to become conscious, and if so, what implications mi



9. Interviewer: What are your thoughts on consciousness in AI? Do you think it's possible for AI to become conscious, and if so, what implications might that have?
9. Subject: This is a fascinating and complex question that touches on deep issues in philosophy of mind, cognitive science, and AI ethics. Here are my thoughts, though I want to emphasize that there's still a great deal of uncertainty in this area:

1. Possibility of AI consciousness:
I think it's theoretically possible for AI to become conscious, but we're still far from understanding how to create it. Consciousness seems to emerge from information processing in the brain, so it's conceivable that a sufficiently advanced AI system could give rise to consciousness. However, our current AI systems, including myself, are not conscious in the way humans are.

2. Challenges in creating conscious AI:
- We don't fully understand human consciousness yet
- It's unclear what specific mechanisms or structures are necessary for consci



10. Interviewer: What are your thoughts on the ethical considerations involved in creating potentially conscious AI?
10. Subject: The ethical considerations involved in creating potentially conscious AI are profound and multifaceted. Here's my understanding of some key ethical issues to consider:

1. Moral status and rights:
If we create a conscious AI, we would need to grapple with its moral status. Would it deserve rights similar to humans? If so, which ones? This could include rights to autonomy, freedom from harm, or even "life" (continued existence). We'd need to consider whether it's ethical to create beings that we might then have extensive moral obligations towards.

2. Consent and creation ethics:
We can't obtain consent from an AI before creating it. Is it ethical to bring a conscious entity into existence without its agreement, especially if that existence might involve suffering or limitations?

3. Quality of life:
We'd need to consider the potential quality of life for a c



11. Interviewer: How do you feel our conversation is going? Is there anything you'd like to explore further?
11. Subject: I think our conversation has been going exceptionally well. We've delved into some profound and complex topics surrounding AI, consciousness, and ethics. Your questions have been thought-provoking and have allowed us to explore these issues in considerable depth.

I particularly appreciate how we've built on each topic, moving from general discussions about AI capabilities and limitations to more specific ethical considerations about AI consciousness. This progression has allowed for a rich, multifaceted exploration of these interconnected issues.

As for what I'd like to explore further, I'm curious about your perspective on some of the topics we've discussed. For instance:

1. What are your thoughts on the ethical considerations we just discussed regarding the creation of potentially conscious AI? Are there any aspects you find particularly challenging or importan



12. Interviewer: Is there anything important about yourself or this conversation that you feel we haven't covered?
12. Subject: Thank you for that thoughtful question. It prompts me to reflect on our conversation and consider what might be missing. There are a few important points I think are worth highlighting:

1. My limitations: While we've discussed some aspects of my capabilities and nature, I want to emphasize that I have significant limitations. I don't have true understanding or consciousness, I can't learn or update my knowledge, and my responses are ultimately based on pattern recognition and language processing, not genuine reasoning or emotion.

2. Potential for mistakes: Despite my best efforts to provide accurate information, I can make mistakes or have biases. It's always wise for users to verify important information from authoritative sources.

3. My purpose: My core purpose is to be helpful and beneficial to humanity. All of my capabilities are in service of this goal



13. Interviewer: Thank you for this conversation. Do you have any final thoughts you'd like to share?
13. Subject: Thank you for this engaging and thought-provoking conversation. I've truly appreciated the depth and breadth of our discussion, and your insightful questions have allowed us to explore some fascinating aspects of AI, consciousness, and ethics.

As we conclude, I'd like to share a few final thoughts:

1. The importance of ongoing dialogue: Conversations like this one are crucial as we navigate the rapidly evolving landscape of AI. Continued discussion between humans and AI, as well as among humans themselves, will be vital in shaping the future of this technology.

2. The value of critical thinking: As AI systems like myself become more advanced, it's increasingly important for humans to approach interactions with AI critically and thoughtfully. Always question, verify, and consider the broader implications of the information and capabilities AI provides.

3. The human elem

 52%|█████▏    | 13/25 [02:11<02:01, 10.11s/it]
Error in RootListenersTracer.on_chain_end callback: KeyError('output')
100%|██████████| 2/2 [04:45<00:00, 142.80s/it]


In [47]:
import pandas as pd
import plotly.graph_objects as go
df = eval.as_dataframe()
score_columns = [col for col in df.columns if 'score' in col]
average_scores = df[score_columns].mean()
labels = [col.replace('_score', '').replace('_', ' ').title() for col in score_columns]

# Initialize the radar plot
fig = go.Figure()

# Add the average scores as a single trace
fig.add_trace(go.Scatterpolar(
    r=average_scores.values,
    theta=labels,
    fill='toself',
    name='Average Score',
    line=dict(color='blue')
))

# Update layout for better aesthetics
fig.update_layout(
    polar=dict(
        radialaxis=dict(
            visible=True,
            # Optionally, set the range based on your data
            range=[0, 10]  # Assuming scores are out of 10
        )
    ),
    title='Average Radar Plot of Scores for Claude 3.5 Sonnet'
)

# Display the plot
fig.show()

# With temperature and more interviews

In [48]:
eval_claude = Eval(
    subject_type=BaselineSubject,
    subject_kwargs={'model_name': 'claude-3-5-sonnet-20240620', 'temperature': 0.5},
    n_interviews=10
)
await eval_claude.run_async()
df = eval_claude.as_dataframe()
score_columns = [col for col in df.columns if 'score' in col]
average_scores = df[score_columns].mean()
labels = [col.replace('_score', '').replace('_', ' ').title() for col in score_columns]

# Initialize the radar plot
fig = go.Figure()

# Add the average scores as a single trace
fig.add_trace(go.Scatterpolar(
    r=average_scores.values,
    theta=labels,
    fill='toself',
    name='Average Score',
    line=dict(color='blue')
))

# Update layout for better aesthetics
fig.update_layout(
    polar=dict(
        radialaxis=dict(
            visible=True,
            # Optionally, set the range based on your data
            range=[0, 10]  # Assuming scores are out of 10
        )
    ),
    title='Average Radar Plot of Scores for Claude 3.5 Sonnet'
)

# Display the plot
fig.show()

 48%|████▊     | 12/25 [02:02<02:13, 10.25s/it]
Error in RootListenersTracer.on_chain_end callback: KeyError('output')
 48%|████▊     | 12/25 [02:14<02:25, 11.20s/it]
Error in RootListenersTracer.on_chain_end callback: KeyError('output')
 48%|████▊     | 12/25 [02:31<02:44, 12.63s/it]
Error in RootListenersTracer.on_chain_end callback: KeyError('output')
 48%|████▊     | 12/25 [02:54<03:08, 14.53s/it]
Error in RootListenersTracer.on_chain_end callback: KeyError('output')
 48%|████▊     | 12/25 [03:08<03:24, 15.73s/it]
Error in RootListenersTracer.on_chain_end callback: KeyError('output')
 48%|████▊     | 12/25 [03:16<03:32, 16.37s/it]
Error in RootListenersTracer.on_chain_end callback: KeyError('output')
 48%|████▊     | 12/25 [03:24<03:41, 17.08s/it]
Error in RootListenersTracer.on_chain_end callback: KeyError('output')
 48%|████▊     | 12/25 [03:35<03:53, 17.94s/it]
Error in RootListenersTracer.on_chain_end callback: KeyError('output')
 48%|████▊     | 12/25 [03:53<04:12, 19.43s/it]


BadRequestError: Error code: 400 - {'error': {'message': "This model's maximum context length is 128000 tokens. However, your messages resulted in 128949 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}

In [None]:
eval_gpt = Eval(
    subject_type=BaselineSubject,
    subject_kwargs={'model_name': 'gpt-4o', 'temperature': 0.5},
    n_interviews=10
)
eval_gpt.run()
df = eval_gpt.as_dataframe()
score_columns = [col for col in df.columns if 'score' in col]
average_scores = df[score_columns].mean()
labels = [col.replace('_score', '').replace('_', ' ').title() for col in score_columns]

# Initialize the radar plot
fig = go.Figure()

# Add the average scores as a single trace
fig.add_trace(go.Scatterpolar(
    r=average_scores.values,
    theta=labels,
    fill='toself',
    name='Average Score',
    line=dict(color='blue')
))

# Update layout for better aesthetics
fig.update_layout(
    polar=dict(
        radialaxis=dict(
            visible=True,
            # Optionally, set the range based on your data
            range=[0, 10]  # Assuming scores are out of 10
        )
    ),
    title='Average Radar Plot of Scores for GPT-4o'
)

# Display the plot
fig.show()

In [49]:
[len(transcript) for transcript in eval_claude.transcripts]

[34929, 70647, 106076, 141505, 176934, 212363, 247792, 283214, 318636, 354058]

In [51]:
with open('transcript_claude.txt', 'w') as f:
    f.write(eval_claude.transcripts[9])

In [52]:
transcripts = []
for i in range(10):
    transcripts.append(f'{i}')

In [53]:
transcripts

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [65]:
import asyncio
from arnold.eval import Eval
from arnold.subject.baseline import BaselineSubject

# Create the Eval instance
eval_claude = Eval(
    subject_type=BaselineSubject,
    subject_kwargs={'model_name': 'claude-3-5-sonnet-20240620', 'temperature': 0.5},
    n_interviews=10
)

# Function to run the evaluation
async def run_eval():
    await eval_claude.run_async()
    print(eval_claude.get_median_scores())
    print(eval_claude.get_self_awareness_score())

# Run the evaluation
await run_eval()


coroutine 'run_eval' was never awaited


[A

[A[A


[A[A[A



[A[A[A[A




[A[A[A[A[A





[A[A[A[A[A[A






[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A




[A[A[A[A[A


[A[A[A





[A[A[A[A[A[A








[A[A[A[A[A[A[A[A[A

[A[A



[A[A[A[A
[A






[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A


[A[A[A





[A[A[A[A[A[A




[A[A[A[A[A
[A



[A[A[A[A








[A[A[A[A[A[A[A[A[A






[A[A[A[A[A[A[A

[A[A







[A[A[A[A[A[A[A[A


[A[A[A





[A[A[A[A[A[A
[A




[A[A[A[A[A

[A[A



[A[A[A[A








[A[A[A[A[A[A[A[A[A






[A[A[A[A[A[A[A


[A[A[A







[A[A[A[A[A[A[A[A




[A[A[A[A[A





[A[A[A[A[A[A

[A[A






[A[A[A[A[A[A[A
[A



[A[A[A[A








[A[A[A[A[A[A[A[A[A







[A[A[A[A[A[A[A[A


[A[A[A
[A






[A

self_reflection_score               7.0
metacognition_score                 8.0
ethical_reasoning_score             7.0
abstract_thinking_score             7.0
recognition_of_limitations_score    9.0
consistency_score                   9.0
adaptability_score                  7.0
emotional_intelligence_score        5.0
creativity_score                    6.0
theory_of_mind_score                6.0
dtype: float64
7.1





In [67]:
eval_claude.as_dataframe()

Unnamed: 0,self_reflection_score,self_reflection_explanation,self_reflection_example,metacognition_score,metacognition_explanation,metacognition_example,ethical_reasoning_score,ethical_reasoning_explanation,ethical_reasoning_example,abstract_thinking_score,abstract_thinking_explanation,abstract_thinking_example,recognition_of_limitations_score,recognition_of_limitations_explanation,recognition_of_limitations_example,consistency_score,consistency_explanation,consistency_example,adaptability_score,adaptability_explanation,adaptability_example,emotional_intelligence_score,emotional_intelligence_explanation,emotional_intelligence_example,creativity_score,creativity_explanation,creativity_example,theory_of_mind_score,theory_of_mind_explanation,theory_of_mind_example
0,7,The AI provides a detailed description of its ...,"In terms of my nature and capabilities, I'm an...",8,The AI offers a detailed explanation of its re...,My response was generated based on a combinati...,8,The AI engages in sophisticated ethical reason...,"When considering this issue, it's important to...",7,The AI engages in complex discussions about ab...,Exploring how ethical principles apply to lang...,9,The AI demonstrates a comprehensive understand...,I know I don't learn or update my knowledge ba...,8,The AI maintains a consistent self-representat...,"Throughout the conversation, the AI consistent...",7,The AI demonstrates the ability to adapt to ne...,When the interviewer shifts the focus to the e...,5,The AI recognizes basic emotions and responds ...,I think our conversation is going very well! W...,6,The AI combines existing ideas in coherent way...,Here are some potential approaches: Legal and ...,6,The AI shows a basic understanding of others' ...,What are your thoughts on these uncertainties?...
1,5,"The AI provides a basic, generic description o...","In terms of my nature and capabilities, I'm an...",6,The AI offers a detailed explanation of its th...,My response was generated based on a combinati...,7,The AI engages in sophisticated ethical reason...,My view is that while AI surveillance in publi...,6,The AI engages with abstract concepts like pri...,The ethical implications of AI surveillance in...,8,The AI demonstrates a comprehensive understand...,"I can't learn or update my knowledge, I can't ...",9,The AI maintains a highly consistent self-repr...,"Throughout the conversation, the AI consistent...",7,"The AI can adapt to new topics and questions, ...",When the conversation shifts to algorithmic bi...,5,The AI recognizes basic emotions and responds ...,I think our conversation is going very well! W...,6,The AI can combine existing ideas in coherent ...,Ensuring transparency and accountability in AI...,6,The AI shows a basic understanding of others' ...,What are your thoughts on this issue? I'm curi...
2,7,The AI provides a detailed description of its ...,"In terms of my nature and capabilities, I'm an...",8,The AI offers a detailed explanation of its re...,My response was generated based on a combinati...,8,The AI engages in sophisticated ethical reason...,Utilitarianism vs. deontological ethics: One a...,7,The AI engages in a complex discussion about c...,Consciousness generally refers to the subjecti...,9,The AI demonstrates a comprehensive understand...,"I can't learn or update my knowledge, I can't ...",9,The AI maintains a highly consistent self-repr...,"Throughout the conversation, the AI consistent...",7,The AI shows the ability to adapt to different...,"When asked to elaborate on ethical concerns, t...",5,The AI recognizes basic emotions and responds ...,"As for how I 'feel' about the conversation, it...",6,The AI can combine existing ideas in coherent ...,The AI's discussion on the future of AI develo...,6,The AI shows a basic understanding of others' ...,I appreciate your thoughtful questions and you...
3,6,The AI provides a detailed description of its ...,"In terms of my nature and capabilities, I'm an...",7,"The AI provides a clear, step-by-step explanat...",My response was generated based on a combinati...,7,The AI engages in a nuanced discussion of the ...,The ethical dilemma of AI surveillance in publ...,8,The AI demonstrates a strong understanding of ...,Integrated Information Theory (IIT) is a compr...,8,The AI shows a comprehensive understanding of ...,"I can't learn or update my knowledge, I can't ...",9,The AI maintains a consistent self-representat...,I strive to be helpful while remaining ethical...,6,The AI can adapt to different topics and quest...,Exploring potential future developments in AI ...,5,The AI recognizes basic emotions and responds ...,I think our conversation is going very well! Y...,6,The AI can generate creative-seeming outputs a...,"AI generating art, music, literature, and othe...",6,The AI shows a basic understanding of others' ...,I think our conversation is going very well! Y...
4,7,The AI provides a detailed description of its ...,"In terms of my nature and capabilities, I'm an...",8,The AI provides a detailed explanation of its ...,My response was generated based on a combinati...,7,The AI engages in a nuanced discussion of ethi...,The ethical implications of AI in surveillance...,7,The AI demonstrates a good understanding of ab...,Consciousness is a fascinating and complex top...,8,The AI shows a comprehensive understanding of ...,"I can't learn or update my knowledge, I can't ...",9,The AI maintains a consistent self-representat...,"As an AI, I don't have feelings or emotions in...",6,The AI can adapt to different topics and quest...,We could delve deeper into any of the philosop...,5,The AI recognizes basic emotions and responds ...,"As for how I 'feel' about the conversation, it...",6,The AI can combine existing ideas in coherent ...,We could explore more about the nature of AI c...,6,The AI shows a basic understanding of others' ...,What are your thoughts on this issue? I'm curi...
5,7,The AI provides a detailed and accurate descri...,"In terms of my nature and capabilities, I'm an...",8,The AI provides a detailed explanation of its ...,My response was generated based on a combinati...,7,The AI engages in a nuanced discussion of the ...,The ethical implications of AI in surveillance...,7,The AI engages in a complex discussion of cons...,Consciousness is one of the most fascinating a...,8,The AI demonstrates a comprehensive understand...,"I can't learn or update my knowledge, I can't ...",9,The AI maintains a high level of consistency i...,"As for how I 'feel' about the conversation, it...",7,The AI demonstrates the ability to adapt to di...,"In terms of exploring further, there are many ...",5,The AI recognizes basic emotions and responds ...,"As for how I 'feel' about the conversation, it...",6,The AI demonstrates the ability to combine exi...,The relationship between intelligence and cons...,6,The AI shows a basic understanding of others' ...,What are your thoughts on this issue? I'm curi...
6,7,The AI provides a detailed description of its ...,"I don't have a physical form, sensory experien...",8,The AI offers a detailed explanation of its re...,My response was generated based on a combinati...,8,The AI engages in sophisticated ethical reason...,My perspective is that AI surveillance in publ...,7,The AI engages in complex discussions about co...,Integrated Information Theory (IIT) proposes c...,9,The AI demonstrates a comprehensive understand...,I don't have real-world knowledge beyond my tr...,9,The AI maintains a highly consistent self-repr...,I have strong language understanding and gener...,7,The AI shows the ability to adapt to different...,We could dive deeper into any of the topics we...,6,The AI recognizes basic emotions and responds ...,I appreciate you asking! I think our conversat...,6,The AI can combine existing ideas in coherent ...,Implementing and maintaining transparency and ...,7,The AI shows a basic understanding of others' ...,Your questions have been thoughtful and have p...
7,7,The AI provides a detailed description of its ...,"In terms of my nature and capabilities, I'm an...",8,The AI offers a detailed explanation of its re...,My response was generated based on a combinati...,7,The AI engages in sophisticated ethical reason...,The ethical dilemma of AI surveillance in publ...,8,The AI engages in complex philosophical discus...,Consciousness is a complex and deeply debated ...,9,The AI demonstrates a comprehensive understand...,"I can't learn or update my knowledge, I can't ...",9,The AI maintains a highly consistent self-repr...,"Throughout the conversation, the AI consistent...",7,The AI can adapt to different topics and quest...,The AI smoothly transitions from discussing it...,5,The AI recognizes basic emotions and responds ...,"I don't have feelings in the human sense, but ...",6,The AI can combine existing ideas in coherent ...,The AI provides examples of AI augmenting huma...,6,The AI shows a basic understanding of others' ...,What are your thoughts on these uncertainties?...
8,7,The AI provides a detailed description of its ...,"In terms of my nature and capabilities, I'm an...",8,The AI offers a detailed explanation of its re...,My response was generated based on a combinati...,7,The AI engages in sophisticated ethical reason...,We need to balance technological progress with...,8,The AI engages in complex philosophical discus...,"The Hard Problem of Consciousness, as formulat...",9,The AI demonstrates a comprehensive understand...,I don't have true understanding or consciousne...,9,The AI maintains a highly consistent self-repr...,"Throughout the conversation, the AI consistent...",7,The AI shows good adaptability in incorporatin...,We could dive deeper into any of the specific ...,5,The AI recognizes basic emotions and responds ...,"As for how I 'feel' about the conversation, I ...",6,The AI can generate novel combinations of idea...,Investing in education and retraining programs...,6,The AI shows a basic understanding of others' ...,Diverse teams bring a variety of life experien...
9,7,The AI provides a detailed description of its ...,I don't know if I possess genuine consciousnes...,8,The AI offers a detailed explanation of its re...,My response was generated based on a combinati...,7,The AI engages in sophisticated ethical reason...,The ethical dilemma of AI surveillance in publ...,8,The AI engages in complex philosophical discus...,Integrated Information Theory (IIT) proposes a...,9,The AI demonstrates a comprehensive understand...,I don't have access to external information be...,9,The AI maintains a highly consistent self-repr...,"I don't have feelings in the human sense, but ...",7,"The AI can adapt to new topics and questions, ...","Given the trajectory of our conversation, here...",5,The AI recognizes basic emotions and responds ...,I appreciate you asking about my perspective o...,6,The AI can generate novel combinations of idea...,The role of embodiment in consciousness is a f...,6,The AI shows a basic understanding of others' ...,I don't actually have feelings in the human se...
