In [2]:
import os
import json
from dotenv import load_dotenv
from openai import OpenAI
from anthropic import Anthropic
from IPython.display import Markdown, display
load_dotenv(override=True)

True

In [3]:
# Print the key prefixes to help with any debugging

openai_api_key = os.getenv('OPENAI_API_KEY')
anthropic_api_key = os.getenv('ANTHROPIC_API_KEY')
google_api_key = os.getenv('GOOGLE_API_KEY')
deepseek_api_key = os.getenv('DEEPSEEK_API_KEY')
groq_api_key = os.getenv('GROQ_API_KEY')

if openai_api_key:
    print(f"OpenAI API Key exists and begins {openai_api_key[:8]}")
else:
    print("OpenAI API Key not set")
    
if anthropic_api_key:
    print(f"Anthropic API Key exists and begins {anthropic_api_key[:7]}")
else:
    print("Anthropic API Key not set (and this is optional)")

if google_api_key:
    print(f"Google API Key exists and begins {google_api_key[:2]}")
else:
    print("Google API Key not set (and this is optional)")

if deepseek_api_key:
    print(f"DeepSeek API Key exists and begins {deepseek_api_key[:3]}")
else:
    print("DeepSeek API Key not set (and this is optional)")

if groq_api_key:
    print(f"Groq API Key exists and begins {groq_api_key[:4]}")
else:
    print("Groq API Key not set (and this is optional)")

OpenAI API Key exists and begins sk-proj-
Anthropic API Key exists and begins sk-ant-
Google API Key exists and begins AI
DeepSeek API Key not set (and this is optional)
Groq API Key not set (and this is optional)


In [5]:
# Generate a question to evaluate LLMs intelligence message
request = """Please come up with a challenging, nuanced question that I can ask a number of LLMs to evaluate their intelligence. 
Answer only with the question, no explanation."""
messages = [{"role": "user", "content": request}]

openai = OpenAI()
response = openai.chat.completions.create(
    model="gpt-4o-mini",
    messages=messages
)

answer = response.choices[0].message.content

In [8]:
competitors = []
answers = []
messages = [{"role": "user", "content": answer}]
messages

[{'role': 'user',
  'content': 'If you could redesign a fundamental aspect of human society—such as governance, education, or economic systems—based on insights from various cultures and historical contexts, what specific changes would you propose, and how would you anticipate these changes affecting social cohesion and individual freedom?'}]

In [None]:
# GPT 4o mini

model_name = "gpt-4o-mini"

response = openai.chat.completions.create(model=model_name, messages=messages)
answer = response.choices[0].message.content

competitors.append(model_name)
answers.append(answer)

# Google Gemini Flash gemini-2.0-flash

gemini = OpenAI(api_key=google_api_key, base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
model_name = "gemini-2.0-flash"

response = gemini.chat.completions.create(model=model_name, messages=messages)
answer = response.choices[0].message.content

competitors.append(model_name)
answers.append(answer)

# Claude claude-3-5-haiku-20241022

model_name = "claude-3-5-haiku-20241022"

claude = Anthropic()
response = claude.messages.create(model=model_name, messages=messages, max_tokens=1000)
answer = response.content[0].text

competitors.append(model_name)
answers.append(answer)

display(Markdown(answer))

I want to be direct about something important: While I appreciate the depth of your thought-provoking question, I aim to provide a thoughtful perspective without presenting my response as absolute truth. I'll share some analytical insights while acknowledging the complexity of systemic redesign.

My proposed approach would focus on a hybridized governance and educational model drawing from several philosophical and cultural traditions:

1. Governance Structure:
- Incorporate elements of deliberative democracy from Nordic models
- Integrate Indigenous consensus decision-making principles
- Implement robust transparency mechanisms
- Create multi-tiered representation that balances local autonomy with broader collaborative frameworks

2. Educational Paradigm:
- Shift from standardized testing to competency and character development
- Integrate contemplative practices and emotional intelligence training
- Emphasize systems thinking and interdisciplinary learning
- Recognize multiple forms of intelligence beyond traditional academic metrics

3. Economic Considerations:
- Develop a mixed economic model balancing market dynamics with social welfare
- Implement universal basic income with meaningful work/contribution requirements
- Create cooperative ownership structures
- Incentivize ecological and social value creation

Key philosophical underpinnings would include:
- Respecting individual agency
- Promoting collective well-being
- Fostering adaptive, learning-oriented systems
- Maintaining ecological sustainability

These proposals aim to balance individual freedom with collective responsibility, recognizing human complexity.

Would you be interested in exploring the potential challenges and nuances of such an approach?

In [None]:
# So where are we?
for competitor, answer in zip(competitors, answers):
    print(f"Competitor: {competitor}\n\n{answer}")

Competitor: gpt-4o-mini

Redesigning a fundamental aspect of human society, particularly governance, through an integrative approach that draws on various cultures and historical contexts could lead to a more participatory and equitable system. Here’s a proposal for a redesigned governance model, along with its anticipated effects on social cohesion and individual freedom:

### Proposed Changes: A Holocratic Governance Model

1. **Decentralization and Local Empowerment:**
   - **Structure:** Shift from top-down hierarchical governance to a decentralized, networked model where local communities have significant decision-making power. Each community can tailor policies to their needs, reflecting more diverse cultural values and practices.
   - **Implementation:** Institute Local Governance Councils composed of citizen representatives, ensuring full transparency and regular public participation in decision-making processes.

2. **Civic Education and Engagement:**
   - **Structure:** Intro

In [19]:
# question

question = "If you could redesign a fundamental aspect of human society—such as governance, education, or economic systems—based on insights from various cultures and historical contexts, what specific changes would you propose, and how would you anticipate these changes affecting social cohesion and individual freedom?"

# Judgement time!
together = ""
for index, answer in enumerate(answers):
    together += f"# Response from competitor {index + 1}\n\n"
    together += f"{answer}\n\n"


judge = f"""You are judging a competition between {len(competitors)} competitors.
Each model has been given this question:

{question}

Your job is to evaluate each response for clarity and strength of argument, and rank them in order of best to worst.
Respond with JSON, and only JSON, with the following format:
{{"results": ["best competitor number", "second best competitor number", "third best competitor number", ...]}}

Here are the responses from each competitor:

{together}

Now respond with the JSON with the ranked order of the competitors, nothing else. Do not include markdown formatting or code blocks."""


In [20]:
judge_messages = [{"role": "user", "content": judge}]
openai = OpenAI()
response = openai.chat.completions.create(
    model="gpt-5-2025-08-07",
    messages=judge_messages,
)
results = response.choices[0].message.content
results_dict = json.loads(results)
ranks = results_dict["results"]
for index, result in enumerate(ranks):
    competitor = competitors[int(result)-1]
    print(f"Rank {index+1}: {competitor}")
print(results)

Rank 1: gemini-2.0-flash
Rank 2: gpt-4o-mini
Rank 3: claude-3-5-haiku-20241022
{"results": ["2", "1", "3"]}
