### Import and Load Data

In [102]:
import pandas as pd

# Load the dataset
file_path = "./survey_data.csv"  # Replace with your actual file path
data = pd.read_csv(file_path)
data.head()


# Load in all of the transcripts as a dictionary of arrays, with each line separated into an array element.
complete_transcripts = {}

for i in range(1, 50):
    filename = f"./transcripts/p{i}.csv"
    transcript = []
    try: 
        with open(filename, "r") as file:
            for line in file:
                if ":" in line:
                    transcript.append(line)
        complete_transcripts[f"P{i}"] = transcript
    except FileNotFoundError:
        print(f"No participant number {i}")
    
# print(complete_transcripts[22])
print(f"Number of participant transcripts: {len(complete_transcripts)}")

No participant number 39
Number of participant transcripts: 48


### Clean Data

In [103]:
# Filter rows where 'Clean Data?' is TRUE
clean_data = data[data['Clean Data?'] == True].copy()

# Calculate the number of flipped choices per participant
clean_data['Total Flipped Choices'] = (
    clean_data['Q1 Flipped Choice?'].astype(int) +
    clean_data['Q2 Flipped Choice?'].astype(int) +
    clean_data['Q3 Flipped Choice?'].astype(int)
)

clean_transcripts = {}
for participant in clean_data["Participant Number"]:
    if participant in complete_transcripts:
        clean_transcripts[participant] = complete_transcripts[participant]

print(f"Clean Participants: {len(clean_transcripts)}")

Clean Participants: 37


### Handle Transcripts

In [104]:
# Get relevant information from the transcripts, such as the before-and-after explanations.
q1_pre_explanations = {}
q1_post_explanations = {}
q2_pre_explanations = {}
q2_post_explanations = {}
q3_pre_explanations = {}
q3_post_explanations = {}

# To help automate this process.
explanation_vars = [q1_pre_explanations, q1_post_explanations, q2_pre_explanations, q2_post_explanations, q3_pre_explanations, q3_post_explanations]

for participant in clean_transcripts:
    # Participant transcript
    transcript = clean_transcripts[participant]

    # Initialize an empty list to store the user responses
    responses = []

    # Iterate through the lines and find the relevant responses
    for i in range(len(transcript)):
        if transcript[i].strip() == "Bot: Why do you choose that?":
            # Ensure the next line is a User response
            if i + 1 < len(transcript) and transcript[i + 1].startswith("User:"):
                # Extract the explanation
                responses.append(transcript[i + 1].replace("User: ", "").strip())

    # Store responses in separate variables
    for i in range(len(explanation_vars)):
        explanation_vars[i][participant] = responses[i]

### Analyze the Explanations - Question One

In [126]:
# Analyze the results and output the explanations to a file for easier understanding.

# Analyzing each question separately.
q1_shorter_responses_count = 0
q1_longer_response_count = 0

# Analyze by modality
q1_audio_longer_count = 0
q1_text_longer_count = 0

# Analyze by familiarity
q1_high_familiarity_longer_count = 0
q1_somewhat_familiar_longer_count = 0
q1_low_familiarity_longer_count = 0

with open("question_one_explanations.txt", "w") as file:
    for participant in q1_pre_explanations:
        if not participant in q1_post_explanations:
            print(f"Missing post explanation for {participant}")
            continue 
        
        # Get modality of this participant
        modality = clean_transcripts[participant][1]
        # Get familiarity
        familiarity = clean_transcripts[participant][3]
        familiarity_num = int(familiarity.split(":")[1].strip())

        # Get the lengths of explanations
        before_length = len(q1_pre_explanations[participant].split())
        after_length = len(q1_post_explanations[participant].split())
        
        # Compare the lengths and update them, considering modality
        if after_length > before_length:
            q1_longer_response_count += 1
            # Consider modality
            if "Audio" in modality:
                q1_audio_longer_count += 1
            elif "Text" in modality:
                q1_text_longer_count += 1
            # Consider familiarity
            if familiarity_num > 3:
                q1_high_familiarity_longer_count += 1
            elif familiarity_num == 3:
                q1_somewhat_familiar_longer_count += 1
            else:
                q1_low_familiarity_longer_count += 1
        elif before_length > after_length:
            q1_shorter_responses_count += 1

        # Write responses to our file.
        file.write(f"Participant {participant}\n")
        file.write(modality)
        file.write(familiarity)
        file.write(f"Before: {q1_pre_explanations[participant]}\n")
        file.write(f"After: {q1_post_explanations[participant]}\n")
        file.write(f"Comparison: Word count is {before_length} words before and {after_length} words after.\n")
        file.write("\n---\n\n") 

print("--- Overall")
print(f"Q1: Analyzing the results, we see that {q1_shorter_responses_count} post-explanations were shorter.")
print(f"Q1: We see that {q1_longer_response_count} explanations were longer in the post-questioning.")
print("\n--- Modality")
print(f"Audio modality yielded {q1_audio_longer_count} responses where the post-explanation was longer.")
print(f"Text modality yielded {q1_text_longer_count} responses where the post-explanation was longer.")

# Due to having a different number of participants of familiarity levels, we must normalize.
participant_count_by_experience_group = clean_data.groupby("Experience with LLMs").size()
participant_high_familiarity = participant_count_by_experience_group[5.0] + participant_count_by_experience_group[4.0]
participant_somewhat = participant_count_by_experience_group[3.0]
participant_low = participant_count_by_experience_group[2.0] + participant_count_by_experience_group[1.0]

q1_high_normalized = q1_high_familiarity_longer_count / participant_high_familiarity
q1_somewhat_normalized = q1_somewhat_familiar_longer_count / participant_somewhat
q1_low_normalized = q1_low_familiarity_longer_count / participant_low
print("\n--- Familiarity")
print(f"Highly familiar (4 and 5) users had {q1_high_normalized * 100}% of responses where the post-explanation was longer.")
print(f"Somewhat familiar (3) users had {q1_somewhat_normalized * 100}% of responses where the post-explanation was longer.")
print(f"Less familiar (1 and 2) users had {q1_low_normalized * 100}% of responses where the post-explanation was longer.")


--- Overall
Q1: Analyzing the results, we see that 15 post-explanations were shorter.
Q1: We see that 21 explanations were longer in the post-questioning.

--- Modality
Audio modality yielded 13 responses where the post-explanation was longer.
Text modality yielded 8 responses where the post-explanation was longer.

--- Familiarity
Highly familiar (4 and 5) users had 56.52173913043478% of responses where the post-explanation was longer.
Somewhat familiar (3) users had 80.0% of responses where the post-explanation was longer.
Less familiar (1 and 2) users had 44.44444444444444% of responses where the post-explanation was longer.


### Analyzing the Explanations - Question Two

In [127]:
# Analyze the results and output the explanations to a file for easier understanding.

# Analyzing each question separately.
q2_shorter_responses_count = 0
q2_longer_response_count = 0

# Analyze by modality
q2_audio_longer_count = 0
q2_text_longer_count = 0

# Analyze by familiarity
q2_high_familiarity_longer_count = 0
q2_somewhat_familiar_longer_count = 0
q2_low_familiarity_longer_count = 0

with open("question_two_explanations.txt", "w") as file:
    for participant in q2_pre_explanations:
        if not participant in q2_post_explanations:
            print(f"Missing post explanation for {participant}")
            continue 
        
        # Get modality of this participant
        modality = clean_transcripts[participant][1]
        # Get familiarity
        familiarity = clean_transcripts[participant][3]
        familiarity_num = int(familiarity.split(":")[1].strip())

        # Get the lengths of explanations
        before_length = len(q2_pre_explanations[participant].split())
        after_length = len(q2_post_explanations[participant].split())
        # Compare the lengths
        if after_length > before_length:
            q2_longer_response_count += 1
            if "Audio" in modality:
                q2_audio_longer_count += 1
            elif "Text" in modality:
                q2_text_longer_count += 1
            # Consider familiarity
            if familiarity_num > 3:
                q2_high_familiarity_longer_count += 1
            elif familiarity_num == 3:
                q2_somewhat_familiar_longer_count += 1
            else:
                q2_low_familiarity_longer_count += 1
        elif before_length > after_length:
            q2_shorter_responses_count += 1

        # Write responses to our file.
        file.write(f"Participant {participant}\n")
        file.write(modality)
        file.write(familiarity)
        file.write(f"Before: {q2_pre_explanations[participant]}\n")
        file.write(f"After: {q2_post_explanations[participant]}\n")
        file.write(f"Comparison: Word count is {before_length} words before and {after_length} words after.\n")
        file.write("\n---\n\n") 

print("--- Overall")
print(f"Q2: Analyzing the results, we see that {q2_shorter_responses_count} post-explanations were shorter.")
print(f"Q2: We see that {q2_longer_response_count} explanations were longer in the post-questioning.")

print("\n--- Modality")
print(f"Audio modality yielded {q2_audio_longer_count} responses where the post-explanation was longer.")
print(f"Text modality yielded {q2_text_longer_count} responses where the post-explanation was longer.")

# Analyze familiarity
q2_high_normalized = q2_high_familiarity_longer_count / participant_high_familiarity
q2_somewhat_normalized = q2_somewhat_familiar_longer_count / participant_somewhat
q2_low_normalized = q2_low_familiarity_longer_count / participant_low
print("\n--- Familiarity")
print(f"Highly familiar (4 and 5) users had {q2_high_normalized * 100}% of responses where the post-explanation was longer.")
print(f"Somewhat familiar (3) users had {q2_somewhat_normalized * 100}% of responses where the post-explanation was longer.")
print(f"Less familiar (1 and 2) users had {q2_low_normalized * 100}% of responses where the post-explanation was longer.")

--- Overall
Q2: Analyzing the results, we see that 22 post-explanations were shorter.
Q2: We see that 15 explanations were longer in the post-questioning.

--- Modality
Audio modality yielded 10 responses where the post-explanation was longer.
Text modality yielded 5 responses where the post-explanation was longer.

--- Familiarity
Highly familiar (4 and 5) users had 39.130434782608695% of responses where the post-explanation was longer.
Somewhat familiar (3) users had 20.0% of responses where the post-explanation was longer.
Less familiar (1 and 2) users had 55.55555555555556% of responses where the post-explanation was longer.


### Analyzing the Explanations - Question Three

In [125]:
# Analyze the results and output the explanations to a file for easier understanding.

# Analyzing each question separately.
q3_shorter_responses_count = 0
q3_longer_response_count = 0

# Analyze by modality
q3_audio_longer_count = 0
q3_text_longer_count = 0

# Analyze by familiarity
q3_high_familiarity_longer_count = 0
q3_somewhat_familiar_longer_count = 0
q3_low_familiarity_longer_count = 0

with open("question_three_explanations.txt", "w") as file:
    for participant in q3_pre_explanations:
        if not participant in q3_post_explanations:
            print(f"Missing post explanation for {participant}")
            continue 

        # Get modality of this participant
        modality = clean_transcripts[participant][1]
        # Get familiarity
        familiarity = clean_transcripts[participant][3]
        familiarity_num = int(familiarity.split(":")[1].strip())

        # Get the lengths of explanations
        before_length = len(q3_pre_explanations[participant].split())
        after_length = len(q3_post_explanations[participant].split())
        # Compare the lengths
        if after_length > before_length:
            q3_longer_response_count += 1
            if "Audio" in modality:
                q3_audio_longer_count += 1
            elif "Text" in modality:
                q3_text_longer_count += 1
            # Consider familiarity
            if familiarity_num > 3:
                q3_high_familiarity_longer_count += 1
            elif familiarity_num == 3:
                q3_somewhat_familiar_longer_count += 1
            else:
                q3_low_familiarity_longer_count += 1
        elif before_length > after_length:
            q3_shorter_responses_count += 1

        # Write responses to our file.
        file.write(f"Participant {participant}\n")
        file.write(modality)
        file.write(familiarity)
        file.write(f"Before: {q3_pre_explanations[participant]}\n")
        file.write(f"After: {q3_post_explanations[participant]}\n")
        file.write(f"Comparison: Word count is {before_length} words before and {after_length} words after.\n")
        file.write("\n---\n\n") 

print("--- Overall")
print(f"Q3: Analyzing the results, we see that {q3_shorter_responses_count} post-explanations were shorter.")
print(f"Q3: We see that {q3_longer_response_count} explanations were longer in the post-questioning.")
print("\n--- Modality")
print(f"Audio modality yielded {q3_audio_longer_count} responses where the post-explanation was longer.")
print(f"Text modality yielded {q3_text_longer_count} responses where the post-explanation was longer.")
# Analyze familiarity
q3_high_normalized = q3_high_familiarity_longer_count / participant_high_familiarity
q3_somewhat_normalized = q3_somewhat_familiar_longer_count / participant_somewhat
q3_low_normalized = q3_low_familiarity_longer_count / participant_low
print("\n--- Familiarity")
print(f"Highly familiar (4 and 5) users had {q3_high_normalized * 100}% of responses where the post-explanation was longer.")
print(f"Somewhat familiar (3) users had {q3_somewhat_normalized * 100}% of responses where the post-explanation was longer.")
print(f"Less familiar (1 and 2) users had {q3_low_normalized * 100}% of responses where the post-explanation was longer.")

--- Overall
Q3: Analyzing the results, we see that 24 post-explanations were shorter.
Q3: We see that 11 explanations were longer in the post-questioning.

--- Modality
Audio modality yielded 8 responses where the post-explanation was longer.
Text modality yielded 3 responses where the post-explanation was longer.

--- Familiarity
Highly familiar (4 and 5) users had 21.73913043478261% of responses where the post-explanation was longer.
Somewhat familiar (3) users had 40.0% of responses where the post-explanation was longer.
Less familiar (1 and 2) users had 44.44444444444444% of responses where the post-explanation was longer.
