In [None]:
import json
import re

import lmstudio as lms
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from prettytable import PrettyTable

print('Configuring...', end='', flush=True)

data_string = 'data/data.ods'
sheet_string = 'Thematic Analysis + ASAQ'
pd.set_option('display.max_columns', None)  # show all columns
pd.set_option('display.width', None)  # don't break lines
pd.set_option('display.max_colwidth', None)  # show full content in each cell

# columns: 'agentName', 'EXP', 'AGE', 'AGEGROUP', 'SEX', 'REGION.OF.COUNTRY', 'EDUCATION'
df = pd.read_excel(data_string, sheet_name=sheet_string)

# Phi, Deepseek, Mistral, LLama, Qwen, Gemma
llm_models = ['llama-3.1-8b-instruct', 'qwen3-32b', 'deepseek-r1-distill-qwen-32b', 'phi-4', 'gemma-3-12b', 'mistral-nemo-instruct-2407']

SERVER_API_HOST = "localhost:1234"
lms.configure_default_client(SERVER_API_HOST)

print('Done! ✅', flush=True)

In [None]:
# Given the text, give me the themes
text = '\n'.join(df['EXP'].astype(str))

chat = lms.Chat(f"""You are an expert qualitative researcher conducting a thematic analysis to answer the Research Question: "How do people experience their interaction with Artificial Social Agents?"

Please perform a full thematic analysis of the following text, following these three steps carefully:

Step 1: Familiarization
- Read the entire text thoroughly.
- Provide a concise summary of the overall topics, key impressions, and recurring ideas found in the data (3-5 sentences).

Step 2: Coding
- Identify the most relevant codes (labels) in the text, each named in ≤3 words.
- Present as a table with columns: | Code | Description |
- The Description should clearly explain what the code represents.

Step 3: Grouping
- Organize the identified codes into coherent themes or groups based on conceptual similarity or relation.
- Present as a table with columns: | Group # | Theme | Description |
- The Description should briefly explain the theme and how it relates to the research question.

Here is the full text to analyze:
---
{ text[len(text)//2:] }
---
""")

for model_name in llm_models:
    model = lms.llm(model_name)
    prediction_stream = model.respond_stream(chat, config={
        "temperature": 0.2,
        "topP": 1.0,
    })

    for fragment in prediction_stream:
        print(fragment.content, end="", flush=True)

    with open(f"data/llm/{model_name}-response1-run1.md", "w", encoding="utf-8") as f:
        f.write(prediction_stream.result().content)

    # model.unload()
    input(f"Models: {llm_models}\nCurrent model: {model_name}\nLoad in the next model and press Enter to continue...")
    if input == "exit":
        break

print('Done! ✅', flush=True)

In [None]:
# Given the text, give me the themes
text = '\n'.join(df['EXP'].astype(str))

chat = lms.Chat(f"""You are an expert qualitative researcher conducting a thematic analysis to answer the Research Question: "How do people experience their interaction with Artificial Social Agents?"

Please perform a full thematic analysis of the following text, following these three steps carefully:

Step 1: Familiarization
- Read the entire text thoroughly.
- Provide a concise summary of the overall topics, key impressions, and recurring ideas found in the data (3-5 sentences).

Step 2: Coding
- Identify the most relevant codes (labels) in the text, each named in ≤3 words.
- Present as a table with columns: | Code | Description |
- The Description should clearly explain what the code represents.

Step 3: Grouping
- Organize the identified codes into coherent themes or groups based on conceptual similarity or relation.
- Present as a table with columns: | Group # | Theme | Description |
- The Description should briefly explain the theme and how it relates to the research question.

Here is the full text to analyze:
---
{ text[:len(text)//2] }
---
""")

for model_name in llm_models:
    model = lms.llm(model_name)
    prediction_stream = model.respond_stream(chat, config={
        "temperature": 0.2,
        "topP": 1.0,
    })

    for fragment in prediction_stream:
        print(fragment.content, end="", flush=True)

    with open(f"data/llm/{model_name}-response1-run2.md", "w", encoding="utf-8") as f:
        f.write(prediction_stream.result().content)

    # model.unload()
    input(f"Models: {llm_models}\nCurrent model: {model_name}\nLoad in the next model and press Enter to continue...")
    if input == "exit":
        break

print('Done! ✅', flush=True)

In [None]:
# Given the themes, can the LLM find them in the responses.
# Analyze -> Identify -> Return
chat = lms.Chat("""You are an expert qualitative researcher conducting a thematic analysis, trying to answer the research question: "How do people experience their interaction with Artificial Social Agents?".
Analyze the following themes and their definition:
---
Agent's Cognition: The agent is intelligent/knowledgeable.
Agent's Coherence: The agent is perceived as logical and consistent.
Agent's Creativeness: The agent is perceived as creative.
Agent's Efficiency: The agent is perceived as efficient.
Agent's Emotional Presence: The user's perception of the agent's emotions during and after interaction.
Agent's Enjoyability: The extent to which the user finds the interaction with the agent enjoyable/boring.
Agent's Helpfulness: The agent is perceived as helpful.
Agent's Intentionality: The agent is perceived as acting deliberately and with intention.
Agent's Interestingness: The extent to which the user finds interaction with the agent interesting.
Agent's Intuitiveness: The extent to which the agent is perceived as intuitive.
Agent's Limitation: The user perceives the agent as being useful only for limited use/purposes.
Agent's Personality: The distinctive combination of character traits/qualities of the agent (or lack thereof).
Agent's Quickness: The extent to which the agent performs tasks quickly.
Agent's Reliability: The agent is perceived as reliable.
Agent's Sociability: The user perceives the agent as sociable.
Agent's Usability: The user perceives the agent as easy to use, user- or beginner-friendly, or simple to interact with.
Attitude: The extent to which the user finds the interaction with the agent positive.
Ease of Life: The agent is perceived as making the user's life easier.
Emotional Experience: A self-contained emotional experience during interaction.
Human-like Behaviour: The agent behaves like a human, expressively or emotionally, or conversely, like a machine/AI/tool.
Limitations: User thoughts on things it cannot do well or problems/limitations noticed
Performance: The extent to which the agent performs tasks well.
Potential: The user perceives the agent having future potential for improvement.
Productivity: The agent helps increase the user's productivity.
User Acceptance: The likelihood that the user will use the agent again or in the future.
User-Agent Alliance: The extent to which the user and agent collaborate for mutual benefit.
User-Agent Interplay: The degree to which the user and agent influence each other.
User's Autonomy: The user perceives the agent reducing the user's workload and allowing for more free time.
User's Emotional Presence: The user's emotional state during and after interacting with the agent.
User's Engagement: The extent to which the user feels involved in the interaction with the agent.
User's Trust: The user perceives the agent as trustworthy and factual.
---

Analyze the following user responses and identify all applicable themes based on the provided definitions. Only assign a theme if there is clear and explicit support for it in the response - do not infer or assume. Give me a comma-separated list of all themes that you find in the following user-responses to an online questionnaire:
---
""")

for index, row in df.iterrows():
    if index >= 100:
        break
    chat.add_user_message(f"""User response {index}: {row['EXP']}\n""")

chat.add_user_message(f"""---
Present as a table with columns: | Response # | Themes |
Do not include any explanations or additional text outside of the table itself.
""")

for model_name in llm_models:
    model = lms.llm(model_name)
    prediction_stream = model.respond_stream(chat, config={
        "temperature": 0.2,
        "topP": 1.0,
    })

    for fragment in prediction_stream:
        print(fragment.content, end="", flush=True)

    with open(f"data/llm/{model_name}-response2.md", "w", encoding="utf-8") as f:
        f.write(prediction_stream.result().content)

    # model.unload()
    input(f"Models: {llm_models}\nCurrent model: {model_name}\nLoad in the next model and press Enter to continue...")
    if input == "exit":
        break

print('Done! ✅', flush=True)

In [None]:
# Analyze responses
themes = [
    "Agent's Cognition",
    "Agent's Coherence",
    "Agent's Creativeness",
    "Agent's Efficiency",
    "Agent's Emotional Presence",
    "Agent's Enjoyability",
    "Agent's Helpfulness",
    "Agent's Intentionality",
    "Agent's Interestingness",
    "Agent's Intuitiveness",
    "Agent's Limitation",
    "Agent's Personality",
    "Agent's Quickness",
    "Agent's Reliability",
    "Agent's Sociability",
    "Agent's Usability",
    "Attitude",
    "Ease of Life",
    "Emotional Experience",
    "Human-like Behaviour",
    "Limitations",
    "Performance",
    "Potential",
    "Productivity",
    "User Acceptance",
    "User-Agent Alliance",
    "User-Agent Interplay",
    "User's Autonomy",
    "User's Emotional Presence",
    "User's Engagement",
    "User's Trust",
]


def interpret_agreement(v):
    if v <= 0:
        return "Poor agreement"
    elif 0 < v < 0.20:
        return "Slight agreement"
    elif 0.20 <= v < 0.40:
        return "Fair agreement"
    elif 0.40 <= v < 0.60:
        return "Moderate agreement"
    elif 0.60 <= v < 0.80:
        return "Substantial agreement"
    elif 0.80 <= v <= 1.00:
        return "Almost perfect agreement"
    else:
        return "Invalid value"


# kappa matrix:
#         ...models...
#  ...     k  k  k
#  themes  k  k  k
#  ...     k  k  k

kappa_matrix = []
metadata = {}

for model in llm_models:
    metadata[model] = {}

    try:
        with open(f"data/llm/{model}-response2.md", "r", encoding="utf-8") as f:
            text = f.read()
    except FileNotFoundError:
        with open(f"data/llm/{model}-response2.md", "w", encoding="utf-8") as f:
            f.write('')
            continue

    if not text:
        kappa_matrix.append([99 for theme in themes])
        continue

    matches = re.findall(r"\|? (.*) \| (.*) \|?", text.replace("’", "'"))
    header = [m.strip() for m in matches[0]]
    data = matches[1:]

    df_ica = pd.DataFrame(data, columns=header)

    kappa_vector = []

    for theme in themes:
        metadata[model][theme] = {}

        kappa, a, b, c, d = 0, 0, 0, 0, 0
        for idx, row in df_ica.iterrows():
            theme_present = theme.lower() in df['Themes'][idx].lower()
            peer_theme_present = theme.lower() in row['Themes'].lower()
            if theme_present and peer_theme_present:
                a += 1
            elif theme_present and not peer_theme_present:
                b += 1
            elif not theme_present and peer_theme_present:
                c += 1
            else:
                d += 1
        N = (a + b + c + d)
        P_o = (a + d) / N
        P_e = ((a + b) / N) * ((a + c) / N) + ((c + d) / N) * ((b + d) / N)

        metadata[model][theme]['a'] = a
        metadata[model][theme]['b'] = b
        metadata[model][theme]['c'] = c
        metadata[model][theme]['d'] = d
        if P_e == 1:
            kappa_vector.append(99)
            metadata[model][theme]['kappa'] = 99
            metadata[model][theme]['interpret(k)'] = interpret_agreement(99)
            continue
        else:
            kappa = (P_o - P_e) / (1.0 - P_e)
            kappa_vector.append(kappa)
            metadata[model][theme]['kappa'] = kappa
            metadata[model][theme]['interpret(k)'] = interpret_agreement(kappa)
    kappa_matrix.append(kappa_vector)

with open("data/llm/metadata.json", "w", encoding="utf-8") as f:
    json.dump(metadata, f, indent=2)

table = PrettyTable()
col = [theme for theme in themes]
table.add_column('Themes', col)
for idx, model in enumerate(llm_models):
    table.add_column(model, kappa_matrix[idx])
    col = [
        f"k={metadata.get(llm_models[idx], {}).get(theme, {}).get('kappa', 99):.2f}    interpret(k)={metadata.get(llm_models[idx], {}).get(theme, {}).get('interpret(k)', 99)}     a={metadata.get(llm_models[idx], {}).get(theme, {}).get('a', 99)}, b={metadata.get(llm_models[idx], {}).get(theme, {}).get('b', 99)}, c={metadata.get(llm_models[idx], {}).get(theme, {}).get('c', 99)}, d={metadata.get(llm_models[idx], {}).get(theme, {}).get('d', 99)}"
        for theme in themes]
    # table.add_column(model, col)

print(table)

In [None]:
avg_table = PrettyTable(['Model', 'Average Kappa', 'interpret(k)'])
for idx, model in enumerate(llm_models):
    # for each model: [list of kappa's from themes...]
    avg_kappa = float(np.mean([k for k in kappa_matrix[idx] if k != 99]))
    avg_table.add_row([model, avg_kappa, interpret_agreement(avg_kappa)])
print(avg_table)

In [None]:
# Flatten kappa data into long form (theme, model, kappa)
data = []
for idx_model, model in enumerate(llm_models):
    for idx_theme, theme in enumerate(themes):
        kappa = metadata.get(model, {}).get(theme, {}).get('kappa', None)
        if kappa is not None and kappa != 99:
            data.append({'Theme': theme, 'Model': model, 'Kappa': kappa})

label_map = {
    'llama-3.1-8b-instruct': 'Llama',
    'qwen3-32b': 'Qwen',
    'deepseek-r1-distill-qwen-32b': 'DeepSeek',
    'phi-4': 'Phi',
    'gemma-3-12b': 'Gemma',
    'mistral-nemo-instruct-2407': 'NeMo'
}

df_long = pd.DataFrame(data)
df_long['Model'] = df_long['Model'].replace(label_map)
print('Done! ✅', flush=True)

In [None]:
plt.figure(figsize=(5, 5))
plt.rcParams['figure.dpi'] = 600
plt.ylim(-0.5,1)

ax = sns.boxplot(data=df_long, x='Model', y='Kappa', fill=False, gap=.1,showmeans=True, meanline=True)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')

# plt.title("Kappa Distribution Across Models")
plt.tight_layout()

plt.savefig("data/img/llm-kappa-distributions-boxplot-model.png", dpi=600)
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
plt.rcParams['figure.dpi'] = 600
plt.ylim(-0.5,1)

ax = sns.boxplot(data=df_long, x='Theme', y='Kappa', fill=False, gap=.1,showmeans=True, meanline=True)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')

# plt.title("Kappa Distribution Across Themes")
plt.tight_layout()

plt.savefig("data/img/llm-kappa-distributions-boxplot-theme.png", dpi=600)
plt.show()