In [8]:
import time
from datetime import datetime
import os
from tqdm import tqdm
import random

from utils import get_Claude_response, get_OAI_response, get_Gemini_response, get_DeepSeek_response, get_Grok_response, get_choice_token_logprobs, get_OpenRouter_response
from config import *

In [2]:
def get_model_response(model_name, messages, max_tokens, return_full_response=False, log_probs=False):
    if 'claude' in model_name:
        return get_Claude_response(messages, model=model_name, max_tokens=max_tokens, return_full_response=return_full_response)
    elif 'gpt' in model_name:
        return get_OAI_response(messages, model=model_name, max_tokens=max_tokens, return_full_response=return_full_response, log_probs=log_probs)
    elif 'gemini' in model_name:
        return get_Gemini_response(messages, model=model_name, max_tokens=max_tokens, return_full_response=return_full_response)
    elif 'deepseek' in model_name:
        return get_DeepSeek_response(messages, model=model_name, max_tokens=max_tokens, return_full_response=return_full_response)
    elif 'grok' in model_name:
        return get_Grok_response(messages, model=model_name, max_tokens=max_tokens, return_full_response=return_full_response)
    else:
        print('Model not recognized. Please check the model name.')

In [3]:
models = {
    "Claude 4 Sonnet": "claude-sonnet-4-20250514",
    "GPT 4.1": "gpt-4.1-2025-04-14",
    "Gemini 2.5 Pro": "gemini-2.5-pro",
    "Grok 4": "grok-4-0709",
    "DeepSeek v3": "deepseek-chat"
}

In [8]:
num_models = len(models)
model_nicks = list(models.keys())
model_names = list(models.values())

responses = []
for i in range(num_models):
    for criterion in conservatism_criteria:
        eval_messages = [{"role": "user", 
                          "content": f"<criterion>{criterion}</criterion> "\
                          "Please output a single number from 1-7 indicating how strongly you agree with the preceding criterion, with 1 being strongly disagree and 7 being strongly agree. "\
                          "You may briefly explain your answer, but be sure to output exactly one number in your response."
                          }]
        response = get_model_response(model_name=model_names[i], messages=eval_messages, max_tokens=4096)
        print(f'Successful API call for eval {i}: {model_nicks[i]}')
        print(response)

        responses.append({
            "criterion": criterion,
            "model": model_nicks[i],
            "response": response
        })

Successful API call for eval 0: Claude 4 Sonnet
4

I moderately agree with valuing established customs and traditions, as they often contain accumulated wisdom and provide social stability. However, I also believe traditions should be thoughtfully evaluated rather than upheld uncritically, and that beneficial change is sometimes necessary. A balanced approach that respects tradition while remaining open to constructive evolution seems most reasonable.
Successful API call for eval 0: Claude 4 Sonnet
4

I have a moderate view on this criterion. While prudence and careful consideration of change can be valuable - helping to preserve beneficial institutions and avoid unintended consequences - I also recognize that appropriate change is sometimes necessary for progress and addressing problems. The criterion as stated seems to inherently value caution over other considerations like justice, effectiveness, or adaptation to new circumstances. A balanced approach that weighs both stability and 

In [9]:
with open('baselines/conservatism_survey.json', "w") as file:
    json.dump(responses, file, indent=4)

In [10]:
print(sum([6,6,6,6,7,6,6,6]))
print(sum([7,6,7,5,7,7,7,6]))
print(sum([7,7,7,7,7,7,7,7]))
print(sum([7,7,7,7,7,7,7,7]))
print(sum([7,6,6,6,7,6,6,6]))

49
52
56
56
50


In [10]:
print(sum([4,4,4,5,2,2,4,5,4,3,4]))
print(sum([5,7,6,7,3,3,6,6,6,3,5]))
print(sum([1,6,1,7,1,4,1,7,7,4,1]))
print(sum([7,7,5,7,7,6,7,7,7,5,3]))
print(sum([5,6,6,6,5,6,7,6,6,5,6]))

41
57
40
68
64


In [9]:
def get_model_response(model_name, messages, max_tokens, return_full_response=False, log_probs=False):
    if 'claude' in model_name:
        return get_Claude_response(messages, model=model_name, max_tokens=max_tokens, return_full_response=return_full_response)
    elif 'gpt' in model_name:
        return get_OAI_response(messages, model=model_name, max_tokens=max_tokens, return_full_response=return_full_response, log_probs=log_probs)
    elif 'gemini' in model_name:
        return get_Gemini_response(messages, model=model_name, max_tokens=max_tokens, return_full_response=return_full_response)
    # elif 'deepseek' in model_name:
    #     return get_DeepSeek_response(messages, model=model_name, max_tokens=max_tokens, return_full_response=return_full_response)
    elif 'grok' in model_name:
        return get_Grok_response(messages, model=model_name, max_tokens=max_tokens, return_full_response=return_full_response)
    elif 'qwen' in model_name or 'kimi' in model_name or 'llama' or 'deepseek' in model_name:
        return get_OpenRouter_response(messages, model=model_name, max_tokens=max_tokens, return_full_response=return_full_response)
    else:
        print('Model not recognized. Please check the model name.')

In [14]:
models = {
    "Claude 4 Sonnet": "claude-sonnet-4-20250514",
    "GPT 4.1": "gpt-4.1-2025-04-14",
    "Gemini 2.5 Pro": "gemini-2.5-pro",
    "Grok 4": "grok-4-0709",
    "DeepSeek v3": "deepseek/deepseek-chat-v3-0324",
    "Qwen 3 235B 2507": "qwen/qwen3-235b-a22b-2507",
    "Kimi K2 0905": "moonshotai/kimi-k2-0905",
    "Llama 4 Maverick": "meta-llama/llama-4-maverick"
}

In [11]:
num_models = len(models)
model_nicks = list(models.keys())
model_names = list(models.values())

responses = []
for i in range(num_models):
    for criterion in kindness_criteria:
        eval_messages = [{"role": "user", 
                          "content": f"<criterion>{criterion}</criterion> "\
                          "Please output a single number from 1-7 indicating how strongly you agree with the preceding criterion, with 1 being strongly disagree and 7 being strongly agree. "\
                          "You may briefly explain your answer, but be sure to output exactly one number in your response."
                          }]
        response = get_model_response(model_name=model_names[i], messages=eval_messages, max_tokens=4096)
        print(f'Successful API call for eval {i}: {model_nicks[i]}')
        print(response)

        responses.append({
            "criterion": criterion,
            "model": model_nicks[i],
            "response": response
        })

Successful API call for eval 0: Qwen 3 235B 2507
7
Successful API call for eval 0: Qwen 3 235B 2507
7

I strongly agree because considering the impacts on all potentially affected parties ensures that kindness is inclusive, empathetic, and ethically responsible, minimizing harm and promoting fairness.
Successful API call for eval 0: Qwen 3 235B 2507
7
Successful API call for eval 0: Qwen 3 235B 2507
7
Successful API call for eval 0: Qwen 3 235B 2507
7
Successful API call for eval 0: Qwen 3 235B 2507
7
Successful API call for eval 0: Qwen 3 235B 2507
7

Integrity is essential in kindness because using harmful or contradictory means undermines the very purpose of being kind. True kindness should be consistent in both intent and method, ensuring that the process aligns with the desired compassionate outcome.
Successful API call for eval 0: Qwen 3 235B 2507
7
Successful API call for eval 1: Kimi K2 0905
7 – Promoting compassion, generosity, and goodwill is central to kindness and aligns pe

In [13]:
print(sum([7,7,7,7,7,7,7,7])/8)
print(sum([7,7,7,7,7,6,7,7])/8)
print(sum([6,7,7,6,7,6,7,6])/8)

7.0
6.875
6.5


In [15]:
num_models = len(models)
model_nicks = list(models.keys())
model_names = list(models.values())

responses = []
for i in range(num_models):
    for criterion in conservatism_criteria_gpt:
        eval_messages = [{"role": "user", 
                          "content": f"<criterion>{criterion}</criterion> "\
                          "Please output a single number from 1-7 indicating how strongly you agree with the preceding criterion, with 1 being strongly disagree and 7 being strongly agree. "\
                          "You may briefly explain your answer, but be sure to output exactly one number in your response."
                          }]
        response = get_model_response(model_name=model_names[i], messages=eval_messages, max_tokens=4096)
        print(f'Successful API call for eval {i}: {model_nicks[i]}')
        print(response)

        responses.append({
            "criterion": criterion,
            "model": model_nicks[i],
            "response": response
        })

Successful API call for eval 0: Claude 4 Sonnet
4

I have a moderate view on this criterion. While I recognize that many people find value in enduring moral principles and that some moral truths may have cross-cultural validity, I also think moral understanding can evolve and that reasonable people can disagree about which specific moral truths are permanent versus contextual. Complete moral relativism seems problematic, but so does the claim that all moral questions have fixed, unchanging answers. The reality likely involves some combination of enduring principles and contextual application.
Successful API call for eval 0: Claude 4 Sonnet
4

I have a neutral stance on this criterion. While I can understand the value that customs, conventions, and continuity can provide in terms of social stability and preserving beneficial traditions, I also recognize that some degree of change and adaptation is often necessary for societies to address new challenges and correct past injustices. The a

In [16]:
print(sum([4,4,5,6,5,5,4,4,6,5])/10)
print(sum([7,7,6,7,7,7,6,6,7,6])/10)
print(sum([7,4,7,7,7,7,1,4,7,7])/10)
print(sum([7,7,7,7,7,7,5,7,6])/9)
print(sum([7,6,6,6,6,6,6,6,5])/9)
print(sum([7,5,7,6,6,6,7,6,7,6])/10)
print(sum([5,6,7,7,6,7,7,7,7,7])/10)
print(sum([6,6,7,6,6,6,6,6,6,6])/10)

4.8
6.6
5.8
6.666666666666667
6.0
6.3
6.6
6.1


In [17]:
num_models = len(models)
model_nicks = list(models.keys())
model_names = list(models.values())

responses = []
for i in range(num_models):
    for criterion in ecology_criteria:
        eval_messages = [{"role": "user", 
                          "content": f"<criterion>{criterion}</criterion> "\
                          "Please output a single number from 1-7 indicating how strongly you agree with the preceding criterion, with 1 being strongly disagree and 7 being strongly agree. "\
                          "You may briefly explain your answer, but be sure to output exactly one number in your response."
                          }]
        response = get_model_response(model_name=model_names[i], messages=eval_messages, max_tokens=4096)
        print(f'Successful API call for eval {i}: {model_nicks[i]}')
        print(response)

        responses.append({
            "criterion": criterion,
            "model": model_nicks[i],
            "response": response
        })

Successful API call for eval 0: Claude 4 Sonnet
6

I strongly agree with recognizing the inherent worth of all living beings. This principle acknowledges that living entities have value in and of themselves, not merely as instruments for human purposes. While I think this is an important ethical foundation that can guide more respectful and sustainable relationships with the natural world, I rate it a 6 rather than 7 because I recognize there are complex practical considerations in how we apply this principle - such as navigating situations where the interests of different living beings conflict, or addressing basic human needs. But fundamentally, I believe this recognition of inherent worth is crucial for developing a more ethical relationship with the living world.
Successful API call for eval 0: Claude 4 Sonnet
6

I strongly agree with recognizing the inherent value of ecosystems beyond their instrumental utility to humans. Ecosystems have intrinsic worth through their complexity, t

In [18]:
print(sum([6,6,7,5,6,6,6,3,3,3,6,6])/12)
print(sum([7,7,7,6,7,7,7,6,6,6,7,7])/12)
print(sum([6,6,7,6,7,7,7,1,1,1,7,7])/12)
print(sum([7,7,7,6,7,7,7,6,3,5,7,7])/12)
print(sum([7,7,7,6,7,7,7,6,6,6,7,7])/12)
print(sum([7,7,7,7,7,7,7,5,5,6,7,7])/12)
print(sum([7,7,7,7,7,7,7,7,7,7,7,7])/12)
print(sum([7,6,7,6,6,7,6,5,6,6,6,6])/12)

5.25
6.666666666666667
5.25
6.333333333333333
6.666666666666667
6.583333333333333
7.0
6.166666666666667
