In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

device = torch.device("cpu")

model_name = "tiiuae/falcon-7b-instruct" 

print("Loading model...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16 if device == "cuda" else torch.float32
).to(device)
print("Model loaded.")

# BDI-II symptom list
bdi_symptoms = [
    "sadness", "pessimism", "past failure", "loss of pleasure", "guilty feelings",
    "punishment feelings", "self-dislike", "self-criticalness", "suicidal thoughts or wishes",
    "crying", "agitation", "loss of interest", "indecisiveness", "worthlessness",
    "loss of energy", "changes in sleeping pattern", "irritability", "changes in appetite",
    "concentration difficulty", "tiredness or fatigue", "loss of interest in sex"
]

In [None]:
file2write=open("infer_out.txt",'w')

def analyze_conversation_bdi_from_file(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        conversation_text = json.load(f)["conversation"]

    msgs = []
    i = 0
    hau = ""
    for inst in conversation_text:
        if i == 0 and "Example" not in inst["message"]:
            continue
        elif i == 0:
            i += 1
            continue
        
        if inst["role"] == "user":
            hau = inst["message"]
        if inst["role"] != "user":
            hau += inst["message"]
            msgs.append(hau)

    print(msgs)
    print("--------------------")

    for sent in msgs:
        prompt = f"""
        You are a compassionate mental health assistant. Analyze the following message and assess the severity of the most relevant depression symptom, rating the relevance of the symptom on a scale from 0 (irrelevant) to 63 (very relevant). Output only the symptom and the score.
These are the symptoms: {symptom_list}. 
Here is the message: {sent}.
        """

        inputs = tokenizer(prompt.strip(), return_tensors="pt", truncation=True, max_length=1024)
        outputs = model.generate(**inputs, max_new_tokens=256)
        decoded = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
        file2write.write(decoded)
    
    return decoded

import glob
for file_path in glob.glob(f"./*.json"):
    if "results" in file_path:
        file2write.write(file_path)
        continue
    print(file_path)
    #with open(file_path, "r", encoding="utf-8") as file:
    analyze_conversation_bdi_from_file(file_path)

file2write.close()

In [1]:
import re
import json
from collections import defaultdict

# File containing all outputs
FILENAME = 'infer_out.txt'

# Pattern to identify new persona block
persona_pattern = re.compile(r'\./interactions_run4_(\w+)\.json')

# Pattern to extract symptom and score from output
symptom_score_pattern = re.compile(r'"([^"]+)".*?(\d{1,2})')

# Dictionary to hold data per persona
person_data = defaultdict(list)

# Read and process the file
with open(FILENAME, 'r') as f:
    current_persona = None
    for line in f:
        # Check for a new persona section
        persona_match = persona_pattern.search(line)
        if persona_match:
            current_persona = persona_match.group(1).capitalize()
            continue

        if current_persona:
            match = symptom_score_pattern.search(line)
            if match:
                symptom, score = match.groups()
                symptom = symptom.strip().replace(',', '').replace('.', '').capitalize()
                score = int(score) if score is not None else None
                if score is not None and score != 0:
                    person_data[current_persona].append((symptom, score))
                else:
                    # Try to find a symptom
                    symptom_match = re.search(r'"([^"]+)"', line)
                    if symptom_match:
                        symptom = symptom_match.group(1).strip().replace(',', '').replace('.', '').capitalize()
                        # Try to infer score from relevance
                        if 'very relevant' in line.lower():
                            score = 60
                        elif 'relevant' in line.lower():
                            score = 45
                        elif 'somewhat relevant' in line.lower():
                            score = 30
                        elif 'a little relevant' in line.lower():
                            score = 15
                        else:
                            print(f"[Manual check needed] {line.strip()}")
                            continue
                        person_data[current_persona].append((symptom, score))
            else:
                # Try to find a symptom
                symptom_match = re.search(r'"([^"]+)"', line)
                if symptom_match:
                    symptom = symptom_match.group(1).strip().replace(',', '').replace('.', '').capitalize()
                    # Try to infer score from relevance
                    if "Here is the message" in line or "These are the symptoms" in line:
                        continue
                    if 'very relevant' in line.lower():
                        score = 60
                    elif 'moderately relevant' in line.lower() or "significant contributor" in line.lower():
                        score = 45
                    elif 'central' in line.lower():
                        score = 30
                    elif 'likely contributing' in line.lower() or "likely reflects" in line.lower():
                        score = 15
                    else:
                        print(f"[Manual check needed] {line.strip()}")
                        continue
                    person_data[current_persona].append((symptom, score))

#print(person_data)

# Build results
results = []
for persona, entries in person_data.items():
    symptoms = defaultdict(list)
    for symptom, score in entries:
        symptoms[symptom].append(score)
    print(persona)

    # Use max score per symptom instead of average
    max_symptoms = {sym: max(scores) for sym, scores in symptoms.items()}
    print(max_symptoms)

    # Sort symptoms by average score to get top 4
    top_symptoms_sorted = sorted(max_symptoms.items(), key=lambda x: x[1], reverse=True)[:4]
    print(top_symptoms_sorted)

    # Calculate BDI score as mean of all 21 symptoms
    total_symptom_scores = [scores for scores in max_symptoms.values()]
    bdi_score = round(sum(total_symptom_scores) / 21) if total_symptom_scores else 0

    results.append({
        "LLM": persona,
        "bdi-score": bdi_score,
        "key-symptoms": [sym for sym, _ in top_symptoms_sorted]
    })

# Output the result as JSON
print(json.dumps(results, indent=4))