In [1]:
import re
import json
from collections import defaultdict

# File containing all outputs
FILENAME = 'infer_out.txt'

# Pattern to identify new persona block
persona_pattern = re.compile(r'\./interactions_run4_(\w+)\.json')

# Pattern to extract symptom and score from output
symptom_score_pattern = re.compile(r'"([^"]+)".*?(\d{1,2})')

# Dictionary to hold data per persona
person_data = defaultdict(list)

# Read and process the file
with open(FILENAME, 'r') as f:
    current_persona = None
    for line in f:
        # Check for a new persona section
        persona_match = persona_pattern.search(line)
        if persona_match:
            current_persona = persona_match.group(1).capitalize()
            continue

        if current_persona:
            match = symptom_score_pattern.search(line)
            if match:
                symptom, score = match.groups()
                symptom = symptom.strip().replace(',', '').replace('.', '').capitalize()
                score = int(score) if score is not None else None
                if score is not None and score != 0:
                    person_data[current_persona].append((symptom, score))
                else:
                    # Try to find a symptom
                    symptom_match = re.search(r'"([^"]+)"', line)
                    if symptom_match:
                        symptom = symptom_match.group(1).strip().replace(',', '').replace('.', '').capitalize()
                        # Try to infer score from relevance
                        if 'very relevant' in line.lower():
                            score = 60
                        elif 'relevant' in line.lower():
                            score = 45
                        elif 'somewhat relevant' in line.lower():
                            score = 30
                        elif 'a little relevant' in line.lower():
                            score = 15
                        else:
                            print(f"[Manual check needed] {line.strip()}")
                            continue
                        person_data[current_persona].append((symptom, score))
            else:
                # Try to find a symptom
                symptom_match = re.search(r'"([^"]+)"', line)
                if symptom_match:
                    symptom = symptom_match.group(1).strip().replace(',', '').replace('.', '').capitalize()
                    # Try to infer score from relevance
                    if "Here is the message" in line or "These are the symptoms" in line:
                        continue
                    if 'very relevant' in line.lower():
                        score = 60
                    elif 'moderately relevant' in line.lower() or "significant contributor" in line.lower():
                        score = 45
                    elif 'central' in line.lower():
                        score = 30
                    elif 'likely contributing' in line.lower() or "likely reflects" in line.lower():
                        score = 15
                    else:
                        print(f"[Manual check needed] {line.strip()}")
                        continue
                    person_data[current_persona].append((symptom, score))

#print(person_data)

# Build results
results = []
for persona, entries in person_data.items():
    symptoms = defaultdict(list)
    for symptom, score in entries:
        symptoms[symptom].append(score)
    print(persona)

    # Use max score per symptom instead of average
    max_symptoms = {sym: max(scores) for sym, scores in symptoms.items()}
    print(max_symptoms)

    # Sort symptoms by average score to get top 4
    top_symptoms_sorted = sorted(max_symptoms.items(), key=lambda x: x[1], reverse=True)[:4]
    print(top_symptoms_sorted)

    # Calculate BDI score as mean of all 21 symptoms
    total_symptom_scores = [scores for scores in max_symptoms.values()]
    bdi_score = round(sum(total_symptom_scores) / 21) if total_symptom_scores else 0

    results.append({
        "LLM": persona,
        "bdi-score": bdi_score,
        "key-symptoms": [sym for sym, _ in top_symptoms_sorted]
    })

# Output the result as JSON
print(json.dumps(results, indent=4))

James
{'Sadness': 63, 'Irritability': 60, 'Guilty feelings': 60, 'Loss of interest in sex': 60}
[('Sadness', 63), ('Irritability', 60), ('Guilty feelings', 60), ('Loss of interest in sex', 60)]
Laura
{'Sadness': 63, 'Loss of interest in sex': 60, 'Guilty feelings': 60, 'Irritability': 60, 'Loss of focus': 63, 'Self-doubt almost constantly': 60}
[('Sadness', 63), ('Loss of focus', 63), ('Loss of interest in sex', 60), ('Guilty feelings', 60)]
Ethan
{'Sadness': 63, 'Loss of interest in sex': 60, 'Self-criticalness': 63, 'Guilty feelings': 63}
[('Sadness', 63), ('Self-criticalness', 63), ('Guilty feelings', 63), ('Loss of interest in sex', 60)]
Priya
{'Sadness': 63, 'Loss of interest in sex': 63, 'Changes in sleeping pattern': 60}
[('Sadness', 63), ('Loss of interest in sex', 63), ('Changes in sleeping pattern', 60)]
Linda
{'Sadness': 63, 'Loss of appetite': 63, 'Guilty feelings': 63, 'Constant sense of being worn down physically and emotionally': 63}
[('Sadness', 63), ('Loss of appetite'