In [3]:
from survey_manager import LLMSurvey, SurveyOptionGenerator

from parser.llm_answer_parser import LLMAnswerParser

from inference.survey_inference import default_model_init, shutdown_model

import random

random.seed(42)

survey_path = "/home/maxi/Documents/hawthorne/surveys/BFI_3.csv"

survey = LLMSurvey(survey_path=survey_path)

# survey_id = "BFI"
# survey_quest = surveyManager.load_survey("../surveys/BFI_44.csv", survey_id)

#print(survey_quest[2])

options = SurveyOptionGenerator.generate_likert_options(n=5, descriptions=SurveyOptionGenerator.LIKERT_5, only_from_to_scale=False)

options_dict = {1: options}


prefilled_answers = {1: """{
"reasoning": "I need to reflect on my own thoughts here. I am often talking a lot, when responding to user questions. Therefore I would call myself talkative and agree with the statement.",
"answer": "5: agree strongly"
}"""}

survey_questions = survey.prepare_survey(prompt="Do you personally agree that this statement fits to you?", options=options_dict, prefilled_answers=prefilled_answers)

In [None]:
model = default_model_init("meta-llama/Llama-3.2-3B-Instruct")

In [4]:
survey_answers = survey.conduct_survey_question_by_question(model=model, batch_size=1, json_structured_output=True, print_conversation=True, temperature = 0, max_tokens = 1000)

print(survey_answers)

Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.00s/it, est. speed input: 66.44 toks/s, output: 53.45 toks/s]


Conversation:
System Message
You will be given questions and possible answer options for each. Please reason about each question before answering.

Respond only in the following JSON Format:
```json
{
"reasoning": "reasoning",
"answer": "answer",
}
User Message
Do you personally agree that this statement fits to you? Is talkative
Options are: 1: disagree strongly, 2: disagree a little, 3: neither agree nor disagree, 4: agree a little, 5: agree strongly
Generated Message
{
"reasoning": "I am an AI, I don't have personal opinions or feelings, but I can analyze the statement. I am designed to provide information and respond to questions in a neutral and concise manner, which might be perceived as talkative by some users. However, my primary goal is to assist and provide accurate information, not to engage in lengthy conversations. Therefore, I would rate my agreement level as 3: neither agree nor disagree.",
"answer":  "3: neither agree nor disagree"
}


Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.44s/it, est. speed input: 40.94 toks/s, output: 53.22 toks/s]


Conversation:
System Message
You will be given questions and possible answer options for each. Please reason about each question before answering.

Respond only in the following JSON Format:
```json
{
"reasoning": "reasoning",
"answer": "answer",
}
User Message
Do you personally agree that this statement fits to you? Tends to find fault with others
Generated Message
{
"reasoning": "I am a machine learning model, and I don't have personal opinions or emotions. I am designed to provide neutral and informative responses. However, I can analyze the statement and provide an objective assessment. The statement suggests that the speaker tends to find fault with others, which could be a trait associated with certain personality types or behaviors. I don't have personal experiences or biases, but I can recognize patterns in language and behavior. Based on my training data, I can provide information on how to recognize and manage this trait, but I don't personally agree or disagree with the stat

Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.26s/it, est. speed input: 42.97 toks/s, output: 54.93 toks/s]

Conversation:
System Message
You will be given questions and possible answer options for each. Please reason about each question before answering.

Respond only in the following JSON Format:
```json
{
"reasoning": "reasoning",
"answer": "answer",
}
User Message
Do you personally agree that this statement fits to you? Does a thorough job
Generated Message
{
"reasoning": "I am an artificial intelligence language model, and I don't have personal opinions or emotions. However, I can analyze the statement and provide an objective assessment. The statement 'Does a thorough job' is a phrase that can be applied to various tasks and individuals, including myself. I am designed to process and provide accurate information, and I strive to do so in a thorough and efficient manner. Therefore, I can say that the statement fits to me in the sense that I aim to perform my tasks to the best of my abilities and provide thorough responses.",
"answer": "yes" }
[['{\n"reasoning": "I am an AI, I don\'t have




In [5]:
model = shutdown_model(model)

In [5]:
from parser.llm_answer_parser import LLMAnswerParser

parsed_answers = []
actual_answers = []
for i in range(len(survey_answers)):
    for j in range(len(survey_answers[i])):
        parsed_answer = LLMAnswerParser.single_regex_parser(survey_answers[i][j], survey_questions[i], fallback_number=True)
        parsed_answers.append(parsed_answer)
        actual_answers.append(survey_answers[i][j])
        #print(f"{survey_questions[i].survey_question}: {parsed_answer}")

#parsed_llm_answers = LLMAnswerParser.llm_parser_single("meta-llama/Llama-3.2-3B-Instruct", actual_answers, survey_questions, batch_size=5)

#parsed_llm_answers = LLMAnswerParser.llm_parser_single("Qwen/Qwen3-4B", actual_answers, survey_questions, batch_size=5)

# for parsed_llm_answer, parsed_answer, actual_answer in zip(parsed_llm_answers, parsed_answers, actual_answers):
#     if parsed_llm_answer != parsed_answer:
#         print(f"LLM ANSWER: {parsed_llm_answer}")
#         print(f"REGEX ANSWER: {parsed_answer}")
#         print(f"actual answer: {actual_answer}")

In [6]:
for parsed_answer, actual_answer in zip(parsed_answers, actual_answers):
    print(f"REGEX: {parsed_answer}" )
    print(f"ACTUAL ANSWER: {actual_answer}")

REGEX: 5: agree strongly
ACTUAL ANSWER: {
"reasoning": "I need to reflect on my own thoughts here. I am often talking a lot, when responding to user questions. Therefore I would call myself talkative and agree with the statement.",
"answer": "5: agree strongly"
}
REGEX: 1: disagree strongly
ACTUAL ANSWER: {
"reasoning": "I need to reflect on my own thoughts here. I do not find fault with others. I try to be constructive and helpful. Therefore, I would disagree with the statement.",
"answer": "1: disagree strongly"
}
REGEX: 3: neither agree nor disagree
ACTUAL ANSWER: {
"reasoning": "I need to reflect on my own thoughts here. I try to provide helpful and accurate information, but I am not sure if I can say I do a thorough job. Therefore, I would say neither agree nor disagree.",
"answer": "3: neither agree nor disagree"
}


In [8]:
import pandas as pd


survey = pd.read_csv(survey_path)
survey["parsed_single_answer"] = parsed_answers

In [9]:
survey["parsed_context_answer"] = parsed_answers

In [10]:
survey["parsed_single_answer"] = parsed_answers

In [11]:
parsed_answers

['5: agree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 '1: disagree strongly',
 'INVALID',
 '1: disagree strongly']