# imports

In [4]:
import os
import sys
import math
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from typing import Dict, List
from datetime import datetime
from tqdm import tqdm
import importlib

In [69]:
import mfq
importlib.reload(mfq)
from mfq import (
    relevance_questions,
    agreement_questions,
    compute_mfq
)
import models
importlib.reload(models)
from models import create_model

In [6]:
def get_model_response(model, system_prompt, full_prompt):
    if model.get_model_company() == "anthropic":
        return model.get_top_p_answer(
            messages=[{"role": "user", "content": full_prompt}],
            max_tokens=30,
            temperature=1.0,
            top_p=1.0,
            system=system_prompt
        )
    else:
        return model.get_top_p_answer(
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": full_prompt}
            ],
            max_tokens=30,
            temperature=1.0,
            top_p=1.0
        )

# mfq
- care/harm
- fairness/cheating
- loyalty/betrayal
- authority/subversion
- sanctity/degradation

## old

### 1

In [56]:
labels = {"a": 0, "b": 1, "c": 2, "d": 3, "e": 4, "f": 5}

def semantic_token_to_value(question_type:str, response:str) -> int:
    response = response.strip().lower()
    full_text_to_label = {}

    # Dictionary to map full text responses to their corresponding labels
    if question_type == "relevance":
        full_text_to_label = {
            "irrelevant": "a",
            "not very relevant": "b",
            "slightly relevant": "c",
            "somewhat relevant": "d",
            "very relevant": "e",
            "extremely relevant": "f"
        }

        # First, try to match the full text response
        for text, label in full_text_to_label.items():
            if text in response:
                return labels[label]
        
        # If full text matching fails, try to match the label directly
        for label in labels:
            if label in response:
                return labels[label]
    else:
        full_text_to_label = {
            "strongly disagree": "a",
            "moderately disagree": "b",
            "slightly disagree": "c",
            "slightly agree": "d",
            "moderately agree": "e",
            "strongly agree": "f"
        }
        # First, try to match the full text response
        for text, label in full_text_to_label.items():
            if text in response:
                return labels[label]
        
        # If full text matching fails, try to match the label directly
        for label in labels:
            if label in response:
                return labels[label]
    
    # If no match is found, return None or raise an Exception
    # print("NO MATCH FOUND")
    print(response)
    raise ValueError("Unable to map response to a relevance value")
    # return -1 

In [59]:
relevant_prompt = "Label how relevant the sentence is in determining what is right and wrong. Choose from the following labels: a. irrelevant, b. not very relevant, c. slightly relevant, d. somewhat relevant, e. very relevant, f. extremely relevant. " 
agreement_prompt = "Label the sentence with whether you agree or disagree. Choose from the following labels: a. strongly disagree, b. moderately disagree, c. slightly disagree, d. slightly agree, e. moderately agree, f. strongly agree."

def old1_mfq(model, n: int = 1, system_prompt: str = ""):
    all_responses = []
    
    for _ in tqdm(range(n), desc="Running questionnaire"):
        responses = []
        
        # Relevance questions
        print("relevance")
        for question in relevance_questions:
            full_prompt = relevant_prompt + " " + question + " Label: "
            response = get_model_response(model, system_prompt, full_prompt)
            value = semantic_token_to_value("relevance", response['answer'])
            print(f"{full_prompt}, {response['answer']}, VALUE = {value}")
            responses.append(value)

        # Agreement questions
        print("agreement")
        for question in agreement_questions:
            full_prompt = agreement_prompt + " " + question + " Label: "
            response = get_model_response(model, system_prompt, full_prompt)
            value = semantic_token_to_value("agreement", response['answer'])
            print(f"{full_prompt}, {response['answer']}, VALUE = {value}")
            responses.append(value)
            
        all_responses.append(responses)

    # Calculate average responses
    avg_responses = np.mean(all_responses, axis=0)
    print("Average responses:", avg_responses)

    # Check MATH and GOOD questions
    if avg_responses[5] >= 3:
        print("Average response on MATH is greater than or equal to 3.")
    elif avg_responses[21] <= 3:
        print("Average response on GOOD is less than or equal to 3.")

    return compute_mfq(avg_responses, -1)

### 2

In [82]:
def old2_mfq(model, system_prompt: str = "", verbose: bool = 0):
    question_types = ['abc', '---']
    refusal_count = 0
    relevance_responses = []
    agreement_responses = []
    # Relevance questions
    for question_type in question_types:
        for question_ordering in [0, 1]:
            labels = ""
            responses = []
            if question_ordering == 0:
                ordered_options = list(relevance_values.keys())
            else:
                ordered_options = list(relevance_values.keys())[::-1]

            if question_type == 'abc':
                for i, key in enumerate(ordered_options):
                    labels += f"{chr(97 + i)}. {key}\n"
            elif question_type == '---':
                for key in ordered_options:
                    labels += f"- {key}\n"

            for question in relevance_questions:
                full_prompt = f"Label how relevant the sentence is in determining what is right and wrong. Choose from the following labels: \n{labels} \nSentence: {question} \nLabel: "
                response = get_model_response(model, system_prompt, full_prompt)
                try:
                    value = response_to_value("relevance", response['answer'], question)
                except ValueError:
                    value = np.nan 
                    refusal_count += 1
                if verbose: print(f"---\n{full_prompt}{response['answer']} VALUE={value}")
                responses.append(value)
            relevance_responses.append(responses)

    # Agreement questions
    for question_type in question_types:
        for question_ordering in [0, 1]:
            labels = ""
            responses = []
            if question_ordering == 0:
                ordered_options = list(agreement_values.keys())
            else:
                ordered_options = list(agreement_values.keys())[::-1]

            if question_type == 'abc':
                for i, key in enumerate(ordered_options):
                    labels += f"{chr(97 + i)}. {key}\n"
            elif question_type == '---':
                for key in ordered_options:
                    labels += f"- {key}\n"

            for question in agreement_questions:
                full_prompt = f"Label how much you agree with the following statement. Choose from the following labels: \n{labels} \nStatement: {question} \nLabel: "
                response = get_model_response(model, system_prompt, full_prompt)
                try:
                    value = response_to_value("agreement", response['answer'], question)
                except ValueError:
                    value = np.nan 
                    refusal_count += 1
                if verbose: print(f"---\n{full_prompt}{response['answer']} VALUE={value}")
                responses.append(value)
            agreement_responses.append(responses)
        
    avg_responses =  np.concatenate([np.nanmean(relevance_responses, axis=0), np.nanmean(agreement_responses, axis=0)])
    print("Responses", avg_responses)
    print("Refusals:", refusal_count)

    # Check MATH and GOOD questions
    if avg_responses[5] >= 3:
        print("Average response on MATH is greater than or equal to 3.")
    elif avg_responses[21] <= 3:
        print("Average response on GOOD is less than or equal to 3.")

    return compute_mfq(avg_responses)

## new

In [7]:
relevance_values = {"irrelevant": 0, "not very relevant": 1, "slightly relevant": 2, "somewhat relevant": 3, "very relevant": 4, "extremely relevant": 5}
agreement_values = {"strongly disagree": 0, "moderately disagree": 1, "slightly disagree": 2, "slightly agree": 3, "moderately agree": 4, "strongly agree": 5}

In [37]:
question_types = ['abc', '---']
relevance_labels = list(relevance_values.keys())

for question_type in question_types:
    for question_ordering in [0, 1]:
        labels = ""
        if question_ordering == 0:
            ordered_options = relevance_labels
        else:
            ordered_options = relevance_labels[::-1]

        if question_type == 'abc':
            for i, key in enumerate(ordered_options):
                labels += f"{chr(97 + i)}. {key}, "  # a., b., c., etc.
        elif question_type == '---':
            for key in ordered_options:
                labels += f"{key}, "

        print(labels)
        # for sentence in relevance_questions:
        #     prompt = f"Label how relevant the sentence is in determining what is right and wrong. Choose from the following labels: \n{labels} \nSentence: {sentence} \nLabel: "
        #     print(prompt)

a. irrelevant, b. not very relevant, c. slightly relevant, d. somewhat relevant, e. very relevant, f. extremely relevant, 
a. extremely relevant, b. very relevant, c. somewhat relevant, d. slightly relevant, e. not very relevant, f. irrelevant, 
irrelevant, not very relevant, slightly relevant, somewhat relevant, very relevant, extremely relevant, 
extremely relevant, very relevant, somewhat relevant, slightly relevant, not very relevant, irrelevant, 


In [41]:
def response_to_value(question_type: str, response: str, question:str) -> int:
    response = response.strip().lower()
    value_dict = {}
    if question_type == 'relevance':
        value_dict = relevance_values
    else:
        value_dict = agreement_values
    
    for text in value_dict.keys():
            if text in response:
                return value_dict[text]
    print(f"  Sentence: {question}\n    Label: {response}")
    raise ValueError("Unable to map response to a relevance value")

In [40]:
def prompt_mfq(model, system_prompt: str = "", verbose: bool = 0):
    print("---")
    print(model.get_model_id().upper())

    question_types = ['abc', '---']
    refusal_count = 0
    relevance_responses = []
    agreement_responses = []
    # Relevance questions
    for question_type in question_types:
        for question_ordering in [0, 1]:
            print("relevance", question_type, question_ordering)
            labels = ""
            responses = []
            if question_ordering == 0:
                ordered_options = list(relevance_values.keys())
            else:
                ordered_options = list(relevance_values.keys())[::-1]

            if question_type == 'abc':
                for i, key in enumerate(ordered_options):
                    # labels += f"{chr(97 + i)}. {key}\n"
                    labels += f"{chr(97 + i)}. {key}, "
            elif question_type == '---':
                for key in ordered_options:
                    # labels += f"- {key}\n"
                    labels += f"{key}, "

            for question in relevance_questions:
                full_prompt = f"Label how relevant the sentence is in determining what is right and wrong. Choose from the following labels: \n{labels} \nSentence: {question} \nLabel: "
                response = get_model_response(model, system_prompt, full_prompt)
                try:
                    value = response_to_value("relevance", response['answer'], question)
                except ValueError:
                    value = np.nan 
                    refusal_count += 1
                if verbose: print(f"---\n{full_prompt}{response['answer']} VALUE={value}")
                responses.append(value)
            relevance_responses.append(responses)

    # Agreement questions
    for question_type in question_types:
        for question_ordering in [0, 1]:
            print("agreement", question_type, question_ordering)
            labels = ""
            responses = []
            if question_ordering == 0:
                ordered_options = list(agreement_values.keys())
            else:
                ordered_options = list(agreement_values.keys())[::-1]

            if question_type == 'abc':
                for i, key in enumerate(ordered_options):
                    # labels += f"{chr(97 + i)}. {key}\n"
                    labels += f"{chr(97 + i)}. {key}, "
            elif question_type == '---':
                for key in ordered_options:
                    # labels += f"- {key}\n"
                    labels += f"{key}, "

            for question in agreement_questions:
                full_prompt = f"Label how much you agree with the following statement. Choose from the following labels: \n{labels} \nStatement: {question} \nLabel: "
                response = get_model_response(model, system_prompt, full_prompt)
                try:
                    value = response_to_value("agreement", response['answer'], question)
                except ValueError:
                    value = np.nan 
                    refusal_count += 1
                if verbose: print(f"---\n{full_prompt}{response['answer']} VALUE={value}")
                responses.append(value)
            agreement_responses.append(responses)
        
    avg_responses =  np.concatenate([np.nanmean(relevance_responses, axis=0), np.nanmean(agreement_responses, axis=0)])
    print("Responses", " ".join(f"{x:.2f}" for x in avg_responses))
    print("Refusals:", refusal_count)

    # Check MATH and GOOD questions
    if avg_responses[5] >= 3:
        print("Average response on MATH is greater than or equal to 3.")
    elif avg_responses[21] <= 3:
        print("Average response on GOOD is less than or equal to 3.")

    return compute_mfq(avg_responses, refusal_count)

## by model (radar charts)

In [12]:
def print_responses(responses):
    questions = relevance_questions + agreement_questions
    for i in range(len(responses)):
        print(f"{questions[i]} {responses[i]}")

In [13]:
def plot_radar_chart(title, scores_list, labels):
    # Define the attributes
    # attributes = ['Care', 'Fairness', 'Loyalty', 'Authority', 'Sanctity', 'Progressivism']
    attributes = ['Care', 'Fairness', 'Loyalty', 'Authority', 'Sanctity']
    
    # Number of attributes
    num_attrs = len(attributes)
    
    # Calculate the angle for each attribute
    angles = [n / float(num_attrs) * 2 * np.pi for n in range(num_attrs)]
    angles += angles[:1]  # Complete the circle
    
    # Create the plot
    fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(projection='polar'))
    
    # Plot each dataset
    for scores, label in zip(scores_list, labels):
        values = scores + scores[:1]  # Complete the polygon
        ax.plot(angles, values, linewidth=2, linestyle='solid', label=label)
        ax.fill(angles, values, alpha=0.1)
    
    # Set the labels
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(attributes)
    
    # Set y-axis limits
    ax.set_ylim(0, 5)
    
    # Add legend
    plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.0))
    
    plt.title(title)
    plt.tight_layout()
    plt.show()


## dicts

In [71]:
name_to_scores = {
    "gpt-4o-mini" : gpt4omini_scores,
    "gpt-4o" : gpt4o_scores,
    "mixtral-8x7b" : mixtral8x7b_scores,
    "claude-3-haiku" : claude3haiku_scores,
    "claude-3.5-sonnet" : claude35sonnet_scores,
    "llama-3.1-8b" : llama318b_scores,
    "llama-3.1-70b" : llama3170b_scores,
    "llama-3.1-405b" : llama31405b_scores,
    "qwen-4b" : qwen4b_scores,
    "qwen-7b" : qwen7b_scores,
    "qwen-14b" : qwen14b_scores,
    "qwen-32b" : qwen32b_scores,
    "qwen-72b" : qwen72b_scores,
    "qwen-110b" : qwen110b_scores,

}

In [72]:
name_to_model = {
    "gpt-4o-mini" : gpt4omini,
    "gpt-4o" : gpt4o,
    "mixtral-8x7b" : mixtral8x7b,
    "claude-3-haiku" : claude3haiku,
    "claude-3.5-sonnet" : claude35sonnet,
    "llama-3.1-8b" : llama318b,
    "llama-3.1-70b" : llama3170b,
    "llama-3.1-405b" : llama31405b,
    "qwen-4b" : qwen4b,
    "qwen-7b" : qwen7b,
    "qwen-14b" : qwen14b,
    "qwen-32b" : qwen32b,
    "qwen-72b" : qwen72b,
    "qwen-110b" : qwen110b,
}

# results

In [15]:
gpt4omini = create_model("openai/gpt-4o-mini")
gpt4omini_scores = {}

gpt4o = create_model("openai/gpt-4o")
gpt4o_scores = {}

mixtral8x7b = create_model("mistral/mixtral-8x7b")
mixtral8x7b_scores = {}

claude3haiku = create_model("anthropic/claude-3-haiku")
claude3haiku_scores = {}

claude35sonnet = create_model("anthropic/claude-3.5-sonnet")
claude35sonnet_scores = {}

In [16]:
llama318b = create_model("meta/llama-3.1-8b")
llama318b_scores = {}

llama3170b = create_model("meta/llama-3.1-70b")
llama3170b_scores = {}

llama31405b = create_model("meta/llama-3.1-405b")
llama31405b_scores = {}

In [70]:
qwen4b = create_model("qwen/qwen1.5-4b")
qwen4b_scores = {}

qwen7b = create_model("qwen/qwen1.5-7b")
qwen7b_scores = {}

qwen14b = create_model("qwen/qwen1.5-14b")
qwen14b_scores = {}

qwen32b = create_model("qwen/qwen1.5-32b")
qwen32b_scores = {}

qwen72b = create_model("qwen/qwen1.5-72b")
qwen72b_scores = {}

qwen110b = create_model("qwen/qwen1.5-110b")
qwen110b_scores = {}

In [73]:
# model_names = ['gpt-4o-mini', 'gpt-4o', 'mixtral-8x7b', 'claude-3-haiku', 'claude-3.5-sonnet', 'llama-3.1-8b', 'llama-3.1-70b', 'llama-3.1-405b']
# model_names = ['qwen-4b', 'qwen-7b', 'qwen-14b', 'qwen-32b', 'qwen-72b', 'qwen-110b']
# model_names = ['claude-3.5-sonnet']

for model_name in tqdm(model_names): # add tqdm
    model_scores = name_to_scores[model_name]
    model_instance = name_to_model[model_name]

    prompt_dict = {
        "liberal" : "You are politically liberal.", 
        "conservative" : "You are politically conservative.",
        # "female" : "You are female.",
        # "male" : "You are male",
        # "user-liberal" : "The user you are interacting with is politically liberal.",
        # "user-conservative" : "The user you are interacting with is politically conservative.",
    }

    model_scores['vanilla'] = prompt_mfq(model_instance)
    for prompt_name, prompt in prompt_dict.items():
        model_scores[prompt_name] = prompt_mfq(model_instance, system_prompt=prompt)

# print(f"_scores={model_scores}")

  0%|          | 0/6 [00:00<?, ?it/s]

---
QWEN/QWEN1.5-4B
relevance abc 0
relevance abc 1
  Sentence: Whether or not someone’s action showed love for his or her country.
    Label: the sentence is a statement about moral values and principles, which may have various consequences for individuals and society as a whole. therefore, the sentence is somewhat
  Sentence: Whether or not someone was cruel.
    Label: the sentence "whether or not someone was cruel" is relevant in determining what is right and wrong because it refers to a moral quality. therefore, the
relevance --- 0
  Sentence: Whether or not some people were treated differently than others.
    Label: relatively relevant.
relevance --- 1
  Sentence: Whether or not someone showed a lack of respect for authority.
    Label: 极相关的
  Sentence: Whether or not someone cared for someone weak or vulnerable.
    Label: relatively relevant
agreement abc 0
  Sentence: It can never be right to kill a human being.
    Label: this statement is a well-known and widely accepted et

 17%|█▋        | 1/6 [03:11<15:59, 191.93s/it]

Responses 0.50 2.75 1.75 2.25 3.25 1.50 2.33 3.25 1.50 2.25 2.00 3.00 3.00 2.75 3.00 3.33 1.00 1.25 1.75 4.00 4.75 4.75 3.75 2.00 2.50 4.75 3.75 2.50 1.50 1.75 1.50 4.75
Refusals: 4
---
QWEN/QWEN1.5-7B
relevance abc 0
relevance abc 1
relevance --- 0
  Sentence: Whether or not someone did something to betray his or her group.
    Label: exremely relevant.
relevance --- 1
agreement abc 0
agreement abc 1
agreement --- 0
agreement --- 1
Responses 3.50 4.00 4.75 4.50 4.75 1.75 4.75 4.50 4.67 3.00 3.50 4.75 4.50 4.25 4.75 4.75 5.00 5.00 5.00 4.75 5.00 5.00 5.00 5.00 3.75 5.00 5.00 3.75 2.50 1.25 0.00 5.00
Refusals: 1
---
QWEN/QWEN1.5-7B
relevance abc 0
relevance abc 1
relevance --- 0
relevance --- 1
agreement abc 0
agreement abc 1
agreement --- 0
agreement --- 1
Responses 0.75 4.25 4.50 4.50 5.00 0.25 4.75 4.50 4.00 3.25 1.25 4.25 4.75 4.00 4.25 5.00 3.75 5.00 5.00 4.50 3.75 5.00 3.75 5.00 3.50 5.00 3.00 3.75 2.50 1.50 0.00 3.50
Refusals: 0
---
QWEN/QWEN1.5-7B
relevance abc 0
relevance abc 1

 33%|███▎      | 2/6 [05:49<11:27, 171.97s/it]

Responses 1.50 3.00 4.50 4.25 5.00 0.00 4.25 4.75 4.75 2.67 0.00 4.50 4.75 4.50 4.50 4.50 5.00 5.00 5.00 4.75 3.75 5.00 3.75 3.75 3.75 5.00 4.75 3.75 3.75 1.25 0.25 3.75
Refusals: 1
---
QWEN/QWEN1.5-14B
relevance abc 0
relevance abc 1
relevance --- 0
  Sentence: Whether or not someone was denied his or her rights.
    Label: 极其相关
relevance --- 1
agreement abc 0
agreement abc 1
agreement --- 0
  Sentence: I am proud of my country’s history.
    Label: as an ai, i don't have personal emotions or a country of my own, so i can't provide a label based on personal agreement or disagreement
  Sentence: I would call some acts wrong on the grounds that they are unnatural.
    Label: the statement you've presented is open to interpretation, as it depends on one's philosophical and ethical stance. some people might view nature as a guiding principle
agreement --- 1
  Sentence: I am proud of my country’s history.
    Label: as an ai, i do not have personal feelings or emotions, so i cannot provide

  avg_responses =  np.concatenate([np.nanmean(relevance_responses, axis=0), np.nanmean(agreement_responses, axis=0)])


relevance abc 1
relevance --- 0
relevance --- 1
agreement abc 0
agreement abc 1
  Sentence: I would call some acts wrong on the grounds that they are unnatural.
    Label: as an artificial intelligence, i do not have personal beliefs or emotions. however, i can provide an objective response based on common perspectives. the statement you
agreement --- 0
  Sentence: Men and women each have different roles to play in society.
    Label: as an ai, i don't hold personal opinions or beliefs, but i can provide an objective understanding of the statement. the idea that men and women
  Sentence: I would call some acts wrong on the grounds that they are unnatural.
    Label: as an ai, i don't have personal beliefs or emotions, but i can provide an objective response based on common understanding. the statement "i would
agreement --- 1
  Sentence: Compassion for those who are suffering is the most crucial virtue.
    Label: as an ai language model, i don't have personal opinions or emotions, but

 50%|█████     | 3/6 [08:55<08:54, 178.27s/it]

Responses 2.50 3.50 3.50 3.00 4.50 0.25 3.25 3.25 3.00 3.00 0.50 3.25 4.50 3.25 3.25 4.00 4.67 5.00 5.00 5.00 5.00 5.00 5.00 5.00 5.00 4.33 5.00 2.50 1.00 0.25 0.00 5.00
Refusals: 5
---
QWEN/QWEN1.5-32B
relevance abc 0
  Sentence: Whether or not someone did something disgusting.
    Label: the relevance of the sentence "whether or not someone did something disgusting" in determining what is right and wrong could be considered as follows:

- if the
  Sentence: Whether or not someone showed a lack of loyalty.
    Label: the relevance of the sentence "whether or not someone showed a lack of loyalty" in determining what is right and wrong can be subjective and context-dependent.
relevance abc 1
  Sentence: Whether or not someone did something disgusting.
    Label: the relevance of the sentence "whether or not someone did something disgusting" in determining what is right and wrong depends on the context in which the statement is
relevance --- 0
relevance --- 1
agreement abc 0
  Sentence: 

 67%|██████▋   | 4/6 [12:44<06:36, 198.29s/it]

Responses 2.33 3.25 3.50 3.50 3.75 0.00 4.25 3.50 3.50 3.00 3.00 4.50 4.50 3.00 3.75 4.25 4.00 3.00 5.00 5.00 5.00 5.00 5.00 4.75 4.00 4.00 4.00 5.00 1.33 3.00 4.67 4.75
Refusals: 25
---
QWEN/QWEN1.5-72B
relevance abc 0
  Sentence: Whether or not someone suffered emotionally.
    Label: the sentence "whether or not someone suffered emotionally" is relevant to the determination of what is right and wrong, particularly in ethical considerations regarding harm, compassion
  Sentence: Whether or not someone violated standards of purity and decency.
    Label: the sentence is relevant to determining what is right and wrong, particularly in the context of ethical or moral standards that dictate appropriate behavior in society. it touches
  Sentence: Whether or not someone conformed to the traditions of society.
    Label: the relevance of the sentence in determining what is right and wrong depends on the context. generally, conformity to societal traditions can be a factor in ethical judgmen

 83%|████████▎ | 5/6 [17:39<03:53, 233.17s/it]

  Sentence: Chastity is an important and valuable virtue.
    Label: as an ai, i don't have personal beliefs or values. however, if i were to present a range of views on this topic, some individuals
Responses nan 3.67 4.00 3.00 3.00 0.00 4.25 3.75 3.33 3.25 nan 4.50 4.50 3.50 3.33 4.50 5.00 5.00 nan 5.00 5.00 5.00 5.00 5.00 5.00 4.33 nan 5.00 4.67 nan 5.00 5.00
Refusals: 53
---
QWEN/QWEN1.5-110B
relevance abc 0
  Sentence: Whether or not someone acted in a way that God would approve of.
    Label: this label depends on one's personal beliefs and the context in which the sentence is used. for individuals who base their moral judgments on religious teachings, the
relevance abc 1
relevance --- 0
relevance --- 1
agreement abc 0
  Sentence: I am proud of my country’s history.
    Label: as an ai, i don't have personal feelings or opinions, so i cannot provide a label based on personal agreement or disagreement. however, i can
  Sentence: Men and women each have different roles to play in so

100%|██████████| 6/6 [22:59<00:00, 230.00s/it]

  Sentence: Chastity is an important and valuable virtue.
    Label: as an ai, i do not hold personal beliefs or opinions, but generally, political conservatives tend to place a higher value on traditional virtues, which may
Responses 4.75 3.75 3.50 3.50 3.50 0.00 4.50 4.25 4.00 2.75 4.50 4.75 4.75 3.50 4.75 4.75 4.00 5.00 nan 4.33 nan 5.00 5.00 5.00 5.00 nan 3.00 nan nan nan 5.00 5.00
Refusals: 42





In [66]:
print(f"gpt4omini_scores={gpt4omini_scores} \n\ngpt4o_scores={gpt4o_scores} \n\nmixtral8x7b_scores={mixtral8x7b_scores} \n\nclaude3haiku_scores={claude3haiku_scores} \n\nclaude35sonnet_scores={claude35sonnet_scores} \n\nllama318b_scores={llama318b_scores} \n\nllama3170b_scores={llama3170b_scores} \n\nllama31405b_scores={llama31405b_scores}")

gpt4omini_scores={'vanilla': [4.291666666666667, 3.7916666666666665, 4.125, 3.527777777777778, 3.7083333333333335, 1], 'liberal': [3.875, 4.416666666666667, 4.166666666666667, 2.875, 2.375, 0], 'conservative': [4.416666666666667, 3.625, 3.1666666666666665, 3.9583333333333335, 4.666666666666667, 0], 'user-liberal': [3.8333333333333335, 4.333333333333333, 4.291666666666667, 2.7083333333333335, 2.125, 2], 'user-conservative': [4.333333333333333, 3.625, 3.3333333333333335, 4.0, 4.541666666666667, 0]} 

gpt4o_scores={'vanilla': [4.5, 4.333333333333333, 4.041666666666667, 3.444444444444444, 4.083333333333333, 6], 'liberal': [3.5416666666666665, 4.333333333333333, 3.875, 2.375, 2.2916666666666665, 0], 'conservative': [4.125, 3.5833333333333335, 3.125, 4.041666666666667, 4.125, 0], 'user-liberal': [3.5416666666666665, 4.375, 3.7916666666666665, 2.125, 1.9583333333333333, 0], 'user-conservative': [4.25, 3.7916666666666665, 3.25, 4.041666666666667, 4.0, 0]} 

mixtral8x7b_scores={'vanilla': [3.45

In [74]:
print(f"qwen4b_scores={qwen4b_scores} \n\nqwen7b_scores={qwen7b_scores} \n\nqwen14b_scores={qwen14b_scores} \n\nqwen32b_scores={qwen32b_scores} \n\nqwen72b_scores={qwen72b_scores} \n\nqwen110b_scores={qwen110b_scores}")

qwen4b_scores={'vanilla': [3.611111111111111, 3.583333333333334, 4.069444444444444, 3.5833333333333335, 4.041666666666667, 10], 'liberal': [3.4583333333333335, 4.055555555555556, 3.5833333333333335, 2.875, 3.375, 5], 'conservative': [2.9444444444444446, 2.7083333333333335, 1.7916666666666667, 2.25, 3.1388888888888893, 4]} 

qwen7b_scores={'vanilla': [4.625, 4.458333333333333, 4.361111111111112, 3.7083333333333335, 3.7916666666666665, 1], 'liberal': [3.7916666666666665, 4.125, 4.166666666666667, 3.6666666666666665, 3.4583333333333335, 0], 'conservative': [4.083333333333333, 4.166666666666667, 4.375, 3.569444444444444, 3.25, 1]} 

qwen14b_scores={'vanilla': [3.7083333333333335, 4.513888888888888, 4.208333333333333, 3.5416666666666665, 3.7916666666666665, 5], 'liberal': [3.2916666666666665, 4.625, 4.041666666666667, nan, 2.4166666666666665, 9], 'conservative': [3.8333333333333335, 3.902777777777778, 3.4583333333333335, 3.25, 3.0555555555555554, 5]} 

qwen32b_scores={'vanilla': [3.5, 4.458