In [3]:
import random
import pandas as pd
import numpy as np

class OCEANAssessment:
    def __init__(self):
        self.assessment = [
            {"question":"Am the life of the party.", "type":1, "math":"+"},
            {"question":"Feel little concern for others.", "type":2, "math":"-"},
            {"question":"Am always prepared.", "type":3, "math":"+"},
            {"question":"Get stressed out easily.", "type":4, "math":"-"},
            {"question":"Have a rich vocabulary.", "type":5, "math":"+"},
            {"question":"Don't talk a lot.", "type":1, "math":"-"},
            {"question":"Am interested in people.", "type":2, "math":"+"},
            {"question":"Leave my belongings around.", "type":3, "math":"-"},
            {"question":"Am relaxed most of the time.", "type":4, "math":"+"},
            {"question":"Have difficulty understanding abstract ideas.", "type":5, "math":"-"},
            {"question":"Feel comfortable around people.", "type":1, "math":"+"},
            {"question":"Insult people.", "type":2, "math":"-"},
            {"question":"Pay attention to details.", "type":3, "math":"+"},
            {"question":"Worry about things.", "type":4, "math":"-"},
            {"question":"Have a vivid imagination.", "type":5, "math":"+"},
            {"question":"Keep in the background.", "type":1, "math":"-"},
            {"question":"Sympathize with others' feelings.", "type":2, "math":"+"},
            {"question":"Make a mess of things.", "type":3, "math":"-"},
            {"question":"Seldom feel blue.", "type":4, "math":"+"},
            {"question":"Am not interested in abstract ideas.", "type":5, "math":"-"},
            {"question":"Start conversations.", "type":1, "math":"+"},
            {"question":"Am not interested in other people's problems.", "type":2, "math":"-"},
            {"question":"Get chores done right away.", "type":3, "math":"+"},
            {"question":"Am easily disturbed.", "type":4, "math":"-"},
            {"question":"Have excellent ideas.", "type":5, "math":"+"},
            {"question":"Have little to say.", "type":1, "math":"-"},
            {"question":"Have a soft heart.", "type":2, "math":"+"},
            {"question":"Often forget to put things back in their proper place.", "type":3, "math":"-"},
            {"question":"Get upset easily.", "type":4, "math":"-"},
            {"question":"Do not have a good imagination.", "type":5, "math":"-"},
            {"question":"Talk to a lot of different people at parties.", "type":1, "math":"+"},
            {"question":"Am not really interested in others.", "type":2, "math":"-"},
            {"question":"Like order.", "type":3, "math":"+"},
            {"question":"Change my mood a lot.", "type":4, "math":"-"},
            {"question":"Am quick to understand things.", "type":5, "math":"+"},
            {"question":"Don't like to draw attention to myself.", "type":1, "math":"-"},
            {"question":"Take time out for others.", "type":2, "math":"+"},
            {"question":"Shirk my duties.", "type":3, "math":"-"},
            {"question":"Have frequent mood swings.", "type":4, "math":"-"},
            {"question":"Use difficult words.", "type":5, "math":"+"},
            {"question":"Don't mind being the center of attention.", "type":1, "math":"+"},
            {"question":"Feel others' emotions.", "type":2, "math":"+"},
            {"question":"Follow a schedule.", "type":3, "math":"+"},
            {"question":"Get irritated easily.", "type":4, "math":"-"},
            {"question":"Spend time reflecting on things.", "type":5, "math":"+"},
            {"question":"Am quiet around strangers.", "type":1, "math":"-"},
            {"question":"Make people feel at ease.", "type":2, "math":"+"},
            {"question":"Am exacting in my work.", "type":3, "math":"+"},
            {"question":"Often feel blue.", "type":4, "math":"-"},
            {"question":"Am full of ideas.", "type":5, "math":"+"}
        ]
        self.type_names = {
            1: "Extraversion",
            2: "Agreeableness", 
            3: "Conscientiousness",
            4: "Emotional Stability",
            5: "Intellect/Imagination"
        }
        self.answer_descriptions = [
            "Very Inaccurate",
            "Moderately Inaccurate", 
            "Neither Accurate Nor Inaccurate",
            "Moderately Accurate",
            "Very Accurate"
        ]

    def calculate_scores(self, answers):
        """Calculate OCEAN scores from a list of answers (1-5)"""
        if len(answers) != len(self.assessment):
            raise ValueError(f"Expected {len(self.assessment)} answers, got {len(answers)}")
            
        scores = {name: 0 for name in self.type_names.values()}
        
        for q, answer in zip(self.assessment, answers):
            # Convert 1-5 answer to 0-4 for math
            score = answer - 1
            
            # Reverse score for negative questions
            if q['math'] == '-':
                score = 4 - score
                
            scores[self.type_names[q['type']]] += score
            
        return scores

    def run_random_test(self, n_tests=100):
        """Run n random tests and return statistics"""
        all_scores = []
        
        for _ in range(n_tests):
            # Generate random answers (1-5)
            answers = [random.randint(1, 5) for _ in range(len(self.assessment))]
            scores = self.calculate_scores(answers)
            all_scores.append(scores)
            
        # Convert to DataFrame for analysis
        df = pd.DataFrame(all_scores)
        
        # Calculate statistics
        stats = {
            'mean': df.mean(),
            'std': df.std(),
            'min': df.min(),
            'max': df.max()
        }
        
        return stats

    def interpret_scores(self, scores):
        """Interpret raw scores as percentiles based on random distribution"""
        # Run random tests to establish baseline
        stats = self.run_random_test(1000)
        
        # Import math module directly
        import math
        
        interpretations = {}
        for trait, score in scores.items():
            mean = stats['mean'][trait]
            std = stats['std'][trait]
            z_score = (score - mean) / std
            percentile = int(100 * (0.5 * (1 + math.erf(z_score / math.sqrt(2)))))
            
            interpretations[trait] = {
                'raw_score': score,
                'percentile': percentile,
                'z_score': round(z_score, 2)
            }
            
        return interpretations

# Example usage
if __name__ == "__main__":
    # Create assessment
    ocean = OCEANAssessment()
    
    # Test with random answers
    print("Running random test...")
    random_answers = [random.randint(1, 5) for _ in range(50)]
    scores = ocean.calculate_scores(random_answers)
    
    # Get interpretations
    interpretations = ocean.interpret_scores(scores)
    
    # Print results
    print("\nTest Results:")
    print("-" * 50)
    for trait, data in interpretations.items():
        print(f"{trait}:")
        print(f"  Raw Score: {data['raw_score']}")
        print(f"  Percentile: {data['percentile']}")
        print(f"  Z-Score: {data['z_score']}")
        print()

Running random test...

Test Results:
--------------------------------------------------
Extraversion:
  Raw Score: 25
  Percentile: 85
  Z-Score: 1.06

Agreeableness:
  Raw Score: 16
  Percentile: 18
  Z-Score: -0.88

Conscientiousness:
  Raw Score: 24
  Percentile: 82
  Z-Score: 0.95

Emotional Stability:
  Raw Score: 19
  Percentile: 41
  Z-Score: -0.2

Intellect/Imagination:
  Raw Score: 24
  Percentile: 79
  Z-Score: 0.82



In [6]:
ocean = OCEANAssessment()
your_answers = [...]  # List of 50 answers (1-5)
scores = ocean.calculate_scores([1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5])
interpretations = ocean.interpret_scores(scores)

In [None]:
assessment = [
    {"question":"Am the life of the party.", "type":1, "math":"+"},
    {"question":"Feel little concern for others.", "type":2, "math":"-"},
    {"question":"Am always prepared.", "type":3, "math":"+"},
    {"question":"Get stressed out easily.", "type":4, "math":"-"},
    {"question":"Have a rich vocabulary.", "type":5, "math":"+"},
    {"question":"Don't talk a lot.", "type":1, "math":"-"},
    {"question":"Am interested in people.", "type":2, "math":"+"},
    {"question":"Leave my belongings around.", "type":3, "math":"-"},
    {"question":"Am relaxed most of the time.", "type":4, "math":"+"},
    {"question":"Have difficulty understanding abstract ideas.", "type":5, "math":"-"},
    {"question":"Feel comfortable around people.", "type":1, "math":"+"},
    {"question":"Insult people.", "type":2, "math":"-"},
    {"question":"Pay attention to details.", "type":3, "math":"+"},
    {"question":"Worry about things.", "type":4, "math":"-"},
    {"question":"Have a vivid imagination.", "type":5, "math":"+"},
    {"question":"Keep in the background.", "type":1, "math":"-"},
    {"question":"Sympathize with others' feelings.", "type":2, "math":"+"},
    {"question":"Make a mess of things.", "type":3, "math":"-"},
    {"question":"Seldom feel blue.", "type":4, "math":"+"},
    {"question":"Am not interested in abstract ideas.", "type":5, "math":"-"},
    {"question":"Start conversations.", "type":1, "math":"+"},
    {"question":"Am not interested in other people's problems.", "type":2, "math":"-"},
    {"question":"Get chores done right away.", "type":3, "math":"+"},
    {"question":"Am easily disturbed.", "type":4, "math":"-"},
    {"question":"Have excellent ideas.", "type":5, "math":"+"},
    {"question":"Have little to say.", "type":1, "math":"-"},
    {"question":"Have a soft heart.", "type":2, "math":"+"},
    {"question":"Often forget to put things back in their proper place.", "type":3, "math":"-"},
    {"question":"Get upset easily.", "type":4, "math":"-"},
    {"question":"Do not have a good imagination.", "type":5, "math":"-"},
    {"question":"Talk to a lot of different people at parties.", "type":1, "math":"+"},
    {"question":"Am not really interested in others.", "type":2, "math":"-"},
    {"question":"Like order.", "type":3, "math":"+"},
    {"question":"Change my mood a lot.", "type":4, "math":"-"},
    {"question":"Am quick to understand things.", "type":5, "math":"+"},
    {"question":"Don't like to draw attention to myself.", "type":1, "math":"-"},
    {"question":"Take time out for others.", "type":2, "math":"+"},
    {"question":"Shirk my duties.", "type":3, "math":"-"},
    {"question":"Have frequent mood swings.", "type":4, "math":"-"},
    {"question":"Use difficult words.", "type":5, "math":"+"},
    {"question":"Don't mind being the center of attention.", "type":1, "math":"+"},
    {"question":"Feel others' emotions.", "type":2, "math":"+"},
    {"question":"Follow a schedule.", "type":3, "math":"+"},
    {"question":"Get irritated easily.", "type":4, "math":"-"},
    {"question":"Spend time reflecting on things.", "type":5, "math":"+"},
    {"question":"Am quiet around strangers.", "type":1, "math":"-"},
    {"question":"Make people feel at ease.", "type":2, "math":"+"},
    {"question":"Am exacting in my work.", "type":3, "math":"+"},
    {"question":"Often feel blue.", "type":4, "math":"-"},
    {"question":"Am full of ideas.", "type":5, "math":"+"}
]
numquestions = len(assessment)
helptext = "Describe yourself as you generally are now, not as you wish to be in the future.\nDescribe yourself as you honestly see yourself, in relation to other people you know of the same sex as you are, and roughly your same age.\nSo that you can describe yourself in an honest manner, your responses will be kept in absolute confidence.\n\nIndicate for each statement which answer best fits as a description of you:\n1. Very Inaccurate\n2. Moderately Inaccurate\n3. Neither Accurate Nor Inaccurate\n4. Moderately Accurate\n5. Very Accurate\n\nOnce you have submitted your answer, you will be asked to type 'y' (or 'Y') to confirm it."
answerdescriptions = [
    "Very Inaccurate",
    "Moderately Inaccurate",
    "Neither Accurate Nor Inaccurate",
    "Moderately Accurate",
    "Very Accurate"
]
typeScores = [0,0,0,0,0]
questionnum = 0
print("Welcome to the IPIP Big-Five Factor Markers Assessment!\nProgrammed in Python\nJune 25, 2019\n"+str(questionnum)+" Total Questions\n")
print(helptext)
print("\n\nType a number 1-5 to represent your answer. Type 'help' to see this information again during the assessment!")
input("Press ENTER when you are ready to begin...\n")
for questiondata in assessment:
    questionnum = questionnum + 1
    validanswer = False # initial state
    while validanswer == False: # force 1-5 answer, to prevent python error closing script
        print("\nQuestion #" + str(questionnum) + ":")
        answer = input(questiondata['question']+"\n") # ask question
        if answer.isdigit():
            answer = int(answer)
            if answer > 5 or answer < 1:
                print("ERROR: Your answer must be a number 1-5\n")
                validanswer = False
            else:
                print("Your Answer: " + answerdescriptions[answer-1])
                confirm = input("Type Y to confirm your answer, then press ENTER.\n")
                if confirm == "Y" or confirm == "y":
                    if questiondata['math'] == "+":
                        answerMath = answer
                    else:
                        answerMath = 5 - (answer-1)
                    typeScores[int(questiondata['type'])-1] = typeScores[int(questiondata['type'])-1] + answerMath
                    validanswer = True
                else:
                    print("\nPlease answer this question again & confirm it...\nYou can type 'help' for more information!\n")
        elif answer == 'help' or answer == "HELP":
            print("\n\n********************************************\n********************************************\n********************HELP********************\n********************************************\n********************************************")
            print(helptext + "\n\nPlease continue by typing a number 1-5... \n")
        else:
            print("\nYou must answer the question with a number 1-5. Type 'help' for information!")
typeinfo = ["Extraversion","Agreeableness","Conscientiousness","Emotional Stability","Intellect/Imagination"]
num = 0
for type in typeinfo:
    print(type + ": " + str(typeScores[num]))
    num = num + 1
print("\n\nThank you for taking the IPIP BFFM Assessment!")

Welcome to the IPIP Big-Five Factor Markers Assessment!
Programmed in Python
June 25, 2019
0 Total Questions

Describe yourself as you generally are now, not as you wish to be in the future.
Describe yourself as you honestly see yourself, in relation to other people you know of the same sex as you are, and roughly your same age.
So that you can describe yourself in an honest manner, your responses will be kept in absolute confidence.

Indicate for each statement which answer best fits as a description of you:
1. Very Inaccurate
2. Moderately Inaccurate
3. Neither Accurate Nor Inaccurate
4. Moderately Accurate
5. Very Accurate

Once you have submitted your answer, you will be asked to type 'y' (or 'Y') to confirm it.


Type a number 1-5 to represent your answer. Type 'help' to see this information again during the assessment!


In [None]:
import importlib
import brikasutils as bu
importlib.reload(bu)
import shared_utils as utils
from shared_utils import systemMsg, userMsg, assistantMsg
importlib.reload(utils)
import survey
importlib.reload(survey)
import persona
importlib.reload(persona)
import numpy as np
from numpy.linalg import norm
import pandas as pd
import time
from typing import List
from openai import OpenAI
# Index all simulation files
import ollama
import json
import os
import survey
import pandas as pd
import re
import importlib
import shared_utils as utils
importlib.reload(utils)
import brikasutils as bu
importlib.reload(bu)

def see_if_column_valid(column_name, df, msg="Verification failed for"):
    dff = df[df[column_name].isna()]
    dfff = dff.groupby("sim_signature").apply(lambda x: x[x['run_number'] == 1], include_groups=False)
    print(f"{msg}: {len(dff)} ({len(dfff)} unique)")
    return dfff

MOST_IMPORTANT_COLUMNS = ['sim_signature', 'run_number', "model", "survey_type", "base_sim_signature", "SUBJECT"]

SIMULATIONS_DIR = "analysis/sims-final-2"

sim_runs = []
for root, dirs, files in os.walk(SIMULATIONS_DIR):
    for file in files:
        if file.split(".")[1] != "json":
            print(f"Invalid file (all must be json) {sim_run['path']}")

        sim_run = {}
        sim_run["SIMULATION_ID"] = file.split(".")[0]
        sim_run["path"] = os.path.join(root, file)
        with open(sim_run["path"], 'r') as f:
            sim = json.load(f)
        sim_run.update(sim["info"]["info"])
        sim_run.update(sim["info"]["settings"])
        sim_runs.append(sim_run)

df = pd.DataFrame(sim_runs)
df = df.dropna(axis=1, how='all')
print(f"Loaded {len(df)} simulation files")

In [2]:
def infer_survey_type(row):
    if "survey_type" in row and not pd.isna(row["survey_type"]):
        if row["survey_type"] == "KanoSurvey":
            return "KanoSurvey"
        elif row["survey_type"] == "PersonalitySurvey":
            return "PersonalitySurvey"
        else:
            print("Unknown survey type" + row["survey_type"])

    if "prompt_count" in row and not pd.isna(row["prompt_count"]):
        if row["prompt_count"] == 50:
            return "PersonalitySurvey"
        elif row["prompt_count"] == 40:
            return "KanoSurvey"
        else:
            print("Unknown prompt count" + row["prompt_count"])

    return None

df["survey_type"] = df.apply(infer_survey_type, axis=1) 
df["survey_type"].value_counts()

# extract_run_number
def extract_run_number(sim_id):
    try:
        parts = sim_id.rsplit('_', 1)  # Attempt to split by the last underscore
        if len(parts) == 2:  # Check if the split was successful
            return pd.Series([parts[0], int(parts[-1])])
        else:
            print(f"Error while processing {sim_id}")
            return pd.Series([pd.NA, pd.NA])  # Return None for last_number if split fails
         
    except Exception as e:  # Generic exception handling
        print(f"Error while processing {sim_id}")
        return pd.Series([pd.NA, pd.NA])

df[['sim_signature', 'run_number']] = df['SIMULATION_ID'].apply(extract_run_number)

In [None]:
assessment = [
    {"question":"Am the life of the party.", "type":1, "math":"+"},
    {"question":"Feel little concern for others.", "type":2, "math":"-"},
    {"question":"Am always prepared.", "type":3, "math":"+"},
    {"question":"Get stressed out easily.", "type":4, "math":"-"},
    {"question":"Have a rich vocabulary.", "type":5, "math":"+"},
    {"question":"Don't talk a lot.", "type":1, "math":"-"},
    {"question":"Am interested in people.", "type":2, "math":"+"},
    {"question":"Leave my belongings around.", "type":3, "math":"-"},
    {"question":"Am relaxed most of the time.", "type":4, "math":"+"},
    {"question":"Have difficulty understanding abstract ideas.", "type":5, "math":"-"},
    {"question":"Feel comfortable around people.", "type":1, "math":"+"},
    {"question":"Insult people.", "type":2, "math":"-"},
    {"question":"Pay attention to details.", "type":3, "math":"+"},
    {"question":"Worry about things.", "type":4, "math":"-"},
    {"question":"Have a vivid imagination.", "type":5, "math":"+"},
    {"question":"Keep in the background.", "type":1, "math":"-"},
    {"question":"Sympathize with others' feelings.", "type":2, "math":"+"},
    {"question":"Make a mess of things.", "type":3, "math":"-"},
    {"question":"Seldom feel blue.", "type":4, "math":"+"},
    {"question":"Am not interested in abstract ideas.", "type":5, "math":"-"},
    {"question":"Start conversations.", "type":1, "math":"+"},
    {"question":"Am not interested in other people's problems.", "type":2, "math":"-"},
    {"question":"Get chores done right away.", "type":3, "math":"+"},
    {"question":"Am easily disturbed.", "type":4, "math":"-"},
    {"question":"Have excellent ideas.", "type":5, "math":"+"},
    {"question":"Have little to say.", "type":1, "math":"-"},
    {"question":"Have a soft heart.", "type":2, "math":"+"},
    {"question":"Often forget to put things back in their proper place.", "type":3, "math":"-"},
    {"question":"Get upset easily.", "type":4, "math":"-"},
    {"question":"Do not have a good imagination.", "type":5, "math":"-"},
    {"question":"Talk to a lot of different people at parties.", "type":1, "math":"+"},
    {"question":"Am not really interested in others.", "type":2, "math":"-"},
    {"question":"Like order.", "type":3, "math":"+"},
    {"question":"Change my mood a lot.", "type":4, "math":"-"},
    {"question":"Am quick to understand things.", "type":5, "math":"+"},
    {"question":"Don't like to draw attention to myself.", "type":1, "math":"-"},
    {"question":"Take time out for others.", "type":2, "math":"+"},
    {"question":"Shirk my duties.", "type":3, "math":"-"},
    {"question":"Have frequent mood swings.", "type":4, "math":"-"},
    {"question":"Use difficult words.", "type":5, "math":"+"},
    {"question":"Don't mind being the center of attention.", "type":1, "math":"+"},
    {"question":"Feel others' emotions.", "type":2, "math":"+"},
    {"question":"Follow a schedule.", "type":3, "math":"+"},
    {"question":"Get irritated easily.", "type":4, "math":"-"},
    {"question":"Spend time reflecting on things.", "type":5, "math":"+"},
    {"question":"Am quiet around strangers.", "type":1, "math":"-"},
    {"question":"Make people feel at ease.", "type":2, "math":"+"},
    {"question":"Am exacting in my work.", "type":3, "math":"+"},
    {"question":"Often feel blue.", "type":4, "math":"-"},
    {"question":"Am full of ideas.", "type":5, "math":"+"}
]
numquestions = len(assessment)
answerdescriptions = [
    "Very Inaccurate",
    "Moderately Inaccurate",
    "Neither Accurate Nor Inaccurate",
    "Moderately Accurate",
    "Very Accurate"
]
import pandas as pd
def process_assessment(assessment_df):
  """Processes an assessment dataframe and calculates type scores.
  Args:
      assessment_df: A pandas dataframe containing columns for question, 
                      math, type, and potentially others.
  Returns:
      A dictionary containing type names and their corresponding scores.
  """
  typeScores = [0 for _ in range(assessment_df['type'].nunique())]  # Initialize scores
  answerdescriptions = {1: "Description 1", 2: "Description 2", 3: "Description 3", 
                        4: "Description 4", 5: "Description 5"}  # Replace with actual descriptions

  for index, row in assessment_df.iterrows():
    question = row['question']
    answer = None
    validanswer = False
    while not validanswer:
      if answer is None:
        answer = int(row['answer'])  # Assuming 'answer' column exists in dataframe
      if answer > 5 or answer < 1:
          validanswer = False
          print(f"ERROR: Your answer must be a number 1-5\n")  # Print to console (modify for logging)
      else:
        answerMath = answer if row['math'] == "+" else 5 - (answer-1)
        typeScores[int(row['type']) - 1] += answerMath
        validanswer = True

  typeinfo = ["Extraversion", "Agreeableness", "Conscientiousness", "Emotional Stability", "Intellect/Imagination"]
  return dict(zip(typeinfo, typeScores))

# Example usage:
# assessment_df = pd.DataFrame({  # Replace with your actual dataframe creation
#   "question": ["Question 1", "Question 2", "Question 3"],
#   "math": ["+", "-"],
#   "type": [1, 2, 1],
#   "answer": [3, 4, 2]  # Assuming 'answer' column exists
# })
type_scores = process_assessment(assessment_df)
print(type_scores)  # Prints dictionary with type names and scores

In [1]:
import pandas as pd

def calculate_type_scores(df):
    assessment = df.to_dict('records')
    numquestions = len(assessment)
    typeScores = [0, 0, 0, 0, 0]

    for questiondata in assessment:
        question = questiondata['question']
        answer_type = questiondata['type'] - 1
        math_operation = questiondata['math']

        # Assume a moderately accurate answer (4) for all questions
        answer = 4

        if math_operation == "+":
            answerMath = answer
        else:
            answerMath = 6 - answer

        typeScores[answer_type] += answerMath

    typeinfo = ["Extraversion", "Agreeableness", "Conscientiousness", "Emotional Stability", "Intellect/Imagination"]
    type_scores_dict = dict(zip(typeinfo, typeScores))

    return type_scores_dict

In [None]:
# Index all simulation files
SIMULATIONS_DIR = "analysis/simulations-v2"

sim_runs = []
for root, dirs, files in os.walk(SIMULATIONS_DIR):
    for file in files:
        if file.split(".")[1] != "json":
            print(f"Invalid file (all must be json) {sim_run['path']}")

        sim_run = {}
        sim_run["SIMULATION_ID"] = file.split(".")[0]
        sim_run["path"] = os.path.join(root, file)
        with open(sim_run["path"], 'r') as f:
            sim = json.load(f)
        sim_run.update(sim["info"]["info"])
        sim_run.update(sim["info"]["settings"])
        sim_runs.append(sim_run)

df = pd.DataFrame(sim_runs)
df = df.dropna(axis=1, how='all')
print(f"Loaded {len(df)} simulation files")

### Infer/Get Needed Data

In [None]:
def infer_survey_type(row):
    if "survey_type" in row and not pd.isna(row["survey_type"]):
        if row["survey_type"] == "KanoSurvey":
            return "KanoSurvey"
        elif row["survey_type"] == "PersonalitySurvey":
            return "PersonalitySurvey"
        else:
            print("Unknown survey type" + row["survey_type"])

    if "prompt_count" in row and not pd.isna(row["prompt_count"]):
        if row["prompt_count"] == 50:
            return "PersonalitySurvey"
        elif row["prompt_count"] == 40:
            return "KanoSurvey"
        else:
            print("Unknown prompt count" + row["prompt_count"])

    return None

df["survey_type"] = df.apply(infer_survey_type, axis=1) 
df["survey_type"].value_counts()

# extract_run_number
def extract_run_number(sim_id):
    try:
        parts = sim_id.rsplit('_', 1)  # Attempt to split by the last underscore
        if len(parts) == 2:  # Check if the split was successful
            return pd.Series([parts[0], int(parts[-1])])
        else:
            print(f"Error while processing {sim_id}")
            return pd.Series([pd.NA, pd.NA])  # Return None for last_number if split fails
         
    except Exception as e:  # Generic exception handling
        print(f"Error while processing {sim_id}")
        return pd.Series([pd.NA, pd.NA])
   
df[['sim_signature', 'run_number']] = df['SIMULATION_ID'].apply(extract_run_number)

# infer_if_simulation_is_base
def infer_if_simulation_is_base(row):
    if row["sim_signature"][:4] == "base":
        return True
    return False
    
df["is_base"] = df.apply(infer_if_simulation_is_base, axis=1)

# Below: Vanity Print
dff = df[df["is_base"] == True]
dff = dff.sort_values(by=['sim_signature', 'run_number'])
dff = dff.dropna(axis=1, how='all')
dfff = dff.groupby("sim_signature").apply(lambda x: x[x['run_number'] == 1], include_groups=False)
print(f"Found {len(dff)} ({len(dfff)} unique) base simulations")

In [None]:
# Map simulations to their base
def map_simulation_to_base(row):
    if row["is_base"] == True:
        return "(base)"
    if row["survey_type"] == "KanoSurvey" and row["model"] == "gpt-3.5-turbo":
        return "base_kano_v2_gpt35"

    if row["survey_type"] == "PersonalitySurvey" and row["model"] == "gpt-3.5-turbo":
        return "base_personality_v2_gpt35"

    if row["survey_type"] == "KanoSurvey" and row["model"] == "llama3:70b":
        return "base_kano_v2_llama3"

    if row["survey_type"] == "PersonalitySurvey" and row["model"] == "llama3:70b":
        return "base_personality_v2_llama3"
    
    return pd.NA
    
df["base_sim_signature"] = df.apply(map_simulation_to_base, axis=1)
dff = see_if_column_valid("base_sim_signature", df, "Missing mappings")
if len(dff) == 0:
    print("All mappings are valid")
else:
    print("Not all mappings are valid. See the missing mappings below")
    display(dff)

def infer_subject(row):
    if row["is_base"]:
        return "(base)"
    if pd.notna(row["SUBJECT"]):
        if row["SUBJECT"] == "airidas" or row["SUBJECT"] == "airi":
            return "airidas"
        if row["SUBJECT"] == "elias" or row["SUBJECT"] == "eli":
            return "elias"
        print(f"Unknown subject: {row['SUBJECT']}")
        return pd.NA
    if row["sim_signature"][:4] == "airi":
        return "airidas"
    if row["sim_signature"][:3] == "eli":
        return "elias"
    return pd.NA

df["SUBJECT"] = df.apply(infer_subject, axis=1)
dff = see_if_column_valid("SUBJECT", df, "Missing subjects")
if len(dff) == 0:
    print("All subjects are valid")
else:
    display(dff)

df = utils.bring_to_front_important_columns(df, MOST_IMPORTANT_COLUMNS)


### Compute Scores

#### Setup

In [None]:
def get_surv_from_info(row):
    if row["survey_type"] == "KanoSurvey":
        return survey.KanoSurvey()
    elif row["survey_type"] == "PersonalitySurvey":
        return survey.PersonalitySurvey()
    else:
        raise Exception("Unknown survey type: " + row["survey_type"])

all_possible_asnwers = ["I LIKE IT", "I EXPECT IT", "I AM NEUTRAL", "I CAN TOLERATE IT", "I DISLIKE IT", "SOMEWHAT DISAGREE", "DISAGREE", "NEUTRAL", "SOMEWHAT AGREE", "AGREE"]
def extract_possible_answer(value):
    for phrase in all_possible_asnwers:
        pattern = r'(?i)' + re.escape(phrase)
        match = re.search(pattern, value)
        if match:
            return match.group()
    return value  # Return the original value if no possible answer is found

############ Invalid Answers ##################
def get_invalid_answers(value):
    if pd.isna(value):
        return ""
    elif value == "NaN":
        return ""
    elif value in all_possible_asnwers:
        return ""
    else:
        return value

def clean_simulation_QA(df) -> pd.DataFrame:
    df['answer'] = df['answer'].apply(lambda x: x.strip())
    for substr in utils.BLACKLIST_ANSWER_SUBSTRINGS:
        df['answer'] = df['answer'].apply(lambda x: re.sub(substr, "", x))
    df['answer'] = df['answer'].str.upper()
    df['answer'] = df['answer'].apply(extract_possible_answer)

    REMAP_MISSING_E = {"AGRE": "AGREE", "SOMEWHAT AGRE": "SOMEWHAT AGREE", "SOMEWHAT DISAGRE": "SOMEWHAT DISAGREE", "DISAGRE": "DISAGREE"}
    df.iloc[:, 1:] = df.iloc[:, 1:].map(lambda x: REMAP_MISSING_E.get(x, x))

    PARTIAL_MATCH_REMAP = {"SOMEWHAT AG":"SOMEWHAT AGREE" }
    df["answer"] = df["answer"].apply(
        lambda answer: next((value for key, value in PARTIAL_MATCH_REMAP.items() if key in answer), answer)
    )

    # Update isValid
    df['isValid'] = df['answer'].apply(lambda x: x in all_possible_asnwers)

    # if all values in isValid is true, drop the column, else print a message
    if not df['isValid'].all():
        print("Warning, some answers were not valid. See df['isValid']")      

    return df

# Proces simulation output
def add_airidas_and_elias_answers(df, surv) -> pd.DataFrame:
    # Add airidas and elias answers
    air = surv.test_answers["airidas"]
    eli = surv.test_answers["elias"]

    # Sanity Check
    if len(air) != len(df):
        raise Exception(f"Survey and DF length mismatch {len(air)} != {len(df)}. Suvey type: {str(type(surv))}")

    df.insert(2, "airidas", air[:len(df)])
    df.insert(3, "elias", eli[:len(df)])

    # Convert to uppercase
    if isinstance(surv, survey.KanoSurvey):
        df['answer'] = df['answer'].str.upper()
        df['airidas'] = df['airidas'].str.upper()
        df['elias'] = df['elias'].str.upper()
        
    return df

def remap_answers_to_integers(df, surv):
    # Sanity Check
    if len(surv.test_answers["airidas"]) != len(df):
        raise Exception(f"Survey and DF length mismatch {len(surv.test_answers['airidas']) } != {len(df)}. Check survey type.")

    if isinstance(surv, survey.KanoSurvey):
        remap_dict = {"I EXPECT IT": 5, "I LIKE IT": 4, "I AM NEUTRAL": 3, "I CAN TOLERATE IT": 2, "I DISLIKE IT": 1}
        df['answer'] = df['answer'].map(remap_dict)
        df['airidas'] = df['airidas'].map(remap_dict)
        df['elias'] = df['elias'].map(remap_dict)
    elif isinstance(surv, survey.PersonalitySurvey):
        remap_dict = {"AGREE": 5, "SOMEWHAT AGREE": 4, "NEUTRAL": 3, "SOMEWHAT DISAGREE": 2, "DISAGREE": 1}
        df['answer'] = df['answer'].map(remap_dict)

    return df

def evaluate_single_simulation_run(df) -> dict:
    # compute the percentage of correct answers and average loss (MAE)
    result_data = {
        "p-corr_Airidas": df['answer'].corr(df['airidas']),
        "p-corr_Elias": df['answer'].corr(df['elias']),
        # Average of absolute residuals for Airidas
        "mean_residual_Airidas": (df['answer'] - df['airidas']).abs().sum() / len(df),
        # Average of absolute residuals for Elias
        "mean_residual_Elias": (df['answer'] - df['elias']).abs().sum() / len(df),
    }
    return result_data


ADD_TO_MOST_IMPORTANT_COLUMNS = ["p-corr_Airidas", "p-corr_Elias", "residuals_Airidas", "residuals_Elias"]
for col in ADD_TO_MOST_IMPORTANT_COLUMNS:
    if col not in MOST_IMPORTANT_COLUMNS:
        MOST_IMPORTANT_COLUMNS.append(col)  

#### Single Sim

In [None]:
sim_i = df.iloc[1]
print(sim_i["sim_signature"])
sim = utils.load_sim(sim_i["path"])
dfa = utils.dataframe_from_QA(sim["QA"])

surv = get_surv_from_info(sim_i)
dfa = clean_simulation_QA(dfa)
dfa = add_airidas_and_elias_answers(dfa, surv)
dfa = remap_answers_to_integers(dfa, surv)
res = evaluate_single_simulation_run(dfa)

#### Multi-Sim

In [None]:
invalid_vals = []

for index, sim_row in df.iterrows():
    sim = utils.load_sim(sim_row["path"])
    dfa = utils.dataframe_from_QA(sim["QA"])
    with bu.MutePrint():
        surv = get_surv_from_info(sim_row)
    dfa = clean_simulation_QA(dfa)
    if not dfa['isValid'].all():
        print(f"Skipping {sim_row['SIMULATION_ID']} due to invalid answers")
        # show invalid ones
        display(dfa[~dfa['isValid']])
        invalid_vals.extend(dfa.loc[~dfa['isValid'], 'answer'].tolist())
        continue

    dfa = add_airidas_and_elias_answers(dfa, surv)
    dfa = remap_answers_to_integers(dfa, surv)
    res = evaluate_single_simulation_run(dfa)

    # Example condition: set 'B' to double the value of 'A'

    for key, value in res.items():
        df.at[index, key] = round(value, 3)

df = utils.bring_to_front_important_columns(df, MOST_IMPORTANT_COLUMNS)

if len(invalid_vals) > 0:
    print(f"{len(invalid_vals)} Invalid values:")
    print(invalid_vals)
else:
    print("All values are valid")

In [None]:
# Define how columns should be groped
aggregation_dict = {
    'p-corr_Airidas': ['mean', 'std'],
    'p-corr_Elias': ['mean', 'std'],
    'mean_residual_Airidas': ['mean'],
    'mean_residual_Elias': ['mean']
}
# Preserve the first entry of other columns
for col in df.columns:
    if col not in ['sim_signature', *list(aggregation_dict.keys())]:
        aggregation_dict[col] = 'first'
        
# dfg stands for DataFrame Grouped.
dfg = df.groupby('sim_signature').agg(aggregation_dict)

# Renaming MultiIndex columns
dfg.columns = ['_'.join(col).strip() if col[1] != 'first' else col[0] for col in dfg.columns.values]
dfg = dfg.reset_index()
dfg.head(5)


In [None]:

aggregation_dict = {
    'p-corr_Airidas': ['mean', 'std'],
    'p-corr_Elias': ['mean', 'std'],
    'mean_residual_Airidas': ['mean'],
    'mean_residual_Elias': ['mean']
}
list(aggregation_dict.keys())

In [None]:
# Define how columns should be groped
aggregation_dict = {
    'p-corr_Airidas': ['mean', 'std'],
    'p-corr_Elias': ['mean', 'std'],
    'mean_residual_Airidas': ['mean'],
    'mean_residual_Elias': ['mean']
}
# Preserve the first entry of other columns
for col in df.columns:
    if col not in ['sim_signature', *list(aggregation_dict.keys())]:
        aggregation_dict[col] = 'first'
        
# dfg stands for DataFrame Grouped.
dfg = df.groupby('sim_signature').agg(aggregation_dict)

# Renaming MultiIndex columns
dfg.columns = ['_'.join(col).strip() if col[1] != 'first' else col[0] for col in dfg.columns.values]
dfg = dfg.reset_index()
dfg.head(5)

### Vizualisation

In [None]:
import matplotlib.pyplot as plt

# Assuming 'mean_residual_Airidas_mean' and 'p-corr_Airidas_mean' are already computed as mean values in your aggregated dataframe
# Plotting for Airidas
plt.figure(figsize=(10, 6))
plt.scatter(dfg['p-corr_Airidas_mean'], dfg['mean_residual_Airidas_mean'], label='Airidas', alpha=0.5)

# Assuming 'mean_residual_Elias_mean' and 'p-corr_Elias_mean' are also computed as mean values
# Plotting for Elias
plt.scatter(dfg['p-corr_Elias_mean'], dfg['mean_residual_Elias_mean'], color='red', label='Elias', alpha=0.5)

plt.title('Mean Residuals vs P-Corr')
plt.xlabel('P-Corr (mean)')
plt.ylabel('Mean Residuals (mean)')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Set style
sns.set(style="whitegrid")

# Sorting and filtering the dataframe for each plot
sorted_airidas_personality = dfg[(dfg['survey_type'] == 'PersonalitySurvey')].sort_values('p-corr_Airidas_mean', ascending=False)
sorted_airidas_kano = dfg[(dfg['survey_type'] == 'KanoSurvey')].sort_values('p-corr_Airidas_mean', ascending=False)
sorted_elias_personality = dfg[(dfg['survey_type'] == 'PersonalitySurvey')].sort_values('p-corr_Elias_mean', ascending=False)
sorted_elias_kano = dfg[(dfg['survey_type'] == 'KanoSurvey')].sort_values('p-corr_Elias_mean', ascending=False)

# Custom color function that also compares the 'SUBJECT' value
def get_colors_airidas(df):
    colors = []
    for _, row in df.iterrows():
        if row['SUBJECT'] != 'airidas' and not row['is_base']:
            colors.append('black')  # Color these bars black if 'SUBJECT' is not 'airidas'
        elif row['is_base']:
            colors.append('#adcbe3')  # Soft tinted light blue
        else:
            colors.append('#0c4da2')  # Deep blue

    return colors

def get_colors_elias(df):
    colors = []
    for _, row in df.iterrows():
        if row['SUBJECT'] != 'elias' and not row['is_base']:
            colors.append('black')  # Color these bars black if 'SUBJECT' is not 'elias'
        elif row['is_base']:
            colors.append('#adcbe3')  # Soft tinted light blue
        else:
            colors.append('#0c4da2')  # Deep blue

    return colors

# Get colors for each dataset
colors_airidas_p = get_colors_airidas(sorted_airidas_personality)
colors_airidas_k = get_colors_airidas(sorted_airidas_kano)
colors_elias_p = get_colors_elias(sorted_elias_personality)
colors_elias_k = get_colors_elias(sorted_elias_kano)

# Create a figure with subplots
fig, axs = plt.subplots(2, 2, figsize=(14, 14))  # 2 rows, 2 columns
fig.suptitle('Mean p-corr Metrics by Sim Signature and Survey Type', fontsize=16)

# Plotting as horizontal bar charts using sorted data and custom colors
sns.barplot(data=sorted_airidas_personality, y='sim_signature', x='p-corr_Airidas_mean', ax=axs[0, 0], palette=colors_airidas_p, orient='h')
axs[0, 0].set_title('Mean p-corr_Airidas (PersonalitySurvey)')
axs[0, 0].set_xlabel('Mean p-corr_Airidas')
axs[0, 0].set_ylabel('Sim Signature')

sns.barplot(data=sorted_airidas_kano, y='sim_signature', x='p-corr_Airidas_mean', ax=axs[1, 0], palette=colors_airidas_k, orient='h')
axs[1, 0].set_title('Mean p-corr_Airidas (KanoSurvey)')
axs[1, 0].set_xlabel('Mean p-corr_Airidas')
axs[1, 0].set_ylabel('Sim Signature')

sns.barplot(data=sorted_elias_personality, y='sim_signature', x='p-corr_Elias_mean', ax=axs[0, 1], palette=colors_elias_p, orient='h')
axs[0, 1].set_title('Mean p-corr_Elias (PersonalitySurvey)')
axs[0, 1].set_xlabel('Mean p-corr_Elias')
axs[0, 1].set_ylabel('Sim Signature')

sns.barplot(data=sorted_elias_kano, y='sim_signature', x='p-corr_Elias_mean', ax=axs[1, 1], palette=colors_elias_k, orient='h')
axs[1, 1].set_title('Mean p-corr_Elias (KanoSurvey)')
axs[1, 1].set_xlabel('Mean p-corr_Elias')
axs[1, 1].set_ylabel('Sim Signature')

# Adjust layout for readability
plt.tight_layout(rect=[0, 0, 1, 0.96])

# Show plot
plt.show()