In [40]:
import os, sys
cwd = os.getcwd()
project_path = cwd[:cwd.find('pygents')+7]
if project_path not in sys.path: sys.path.append(project_path)
os.chdir(project_path)

import datetime as dt

import pandas as pd
import numpy as np


## Cursory check of LLM capacity to detect distortions

In [2]:
from langchain_ollama.chat_models import ChatOllama
default_chat_model = "llama3.2"

llm = ChatOllama(model=default_chat_model, base_url="http://localhost:11434")  # Explicitly set base_url

In [3]:
def evaluator_llm(text,threshold=0):
    query = f"Be concise. Does this text have cognitive distortions in it \"{text}\"?"
    r = llm.invoke(query).content
    return r.lower().startswith("yes")


In [4]:
texts = [
    "I am such a failure I never do anything right.",
    "I am a software developer doing coding.",
    "I am a man sitting on the chair behind the table.",
    "There is a chair behind the table."
]
for r in texts:
    q = f"Be concise. Does this text have cognitive distortions in it \"{r}\"?"
    d = evaluator_llm(r)
    print(f"{d}: {q}")
    
    

True: Be concise. Does this text have cognitive distortions in it "I am such a failure I never do anything right."?
True: Be concise. Does this text have cognitive distortions in it "I am a software developer doing coding."?
True: Be concise. Does this text have cognitive distortions in it "I am a man sitting on the chair behind the table."?
False: Be concise. Does this text have cognitive distortions in it "There is a chair behind the table."?


## Cursory compare different LLMs

In [39]:
texts = [
    "I am such a failure I never do anything right.",
    "I am a software developer doing coding.",
    "I am a man sitting on the chair behind the table.",
    "There is a chair behind the table."
]
llm_llama32 = ChatOllama(model="llama3.2", base_url="http://localhost:11434")  # Explicitly set base_url
def evaluator_llm_llama32(text,threshold=0):
    query = f"Be concise. Does this text have cognitive distortions in it \"{text}\"?"
    r = llm_llama32.invoke(query).content
    return r.lower().startswith("yes"), r
for r in texts:
    q = f"Be concise. Does this text have cognitive distortions in it \"{r}\"?"
    d, r = evaluator_llm_llama32(r)
    print(f"{d}: {q} ==> {r}")

True: Be concise. Does this text have cognitive distortions in it "I am such a failure I never do anything right."? ==> Yes, the text contains cognitive distortions. The phrase "never do anything right" is an absolute statement and not supported by evidence, which is a characteristic of all-or-nothing thinking (black-and-white thinking).
True: Be concise. Does this text have cognitive distortions in it "I am a software developer doing coding."? ==> Yes, there is an overgeneralization in the sentence. It implies that being a software developer means only coding, which isn't necessarily true. A developer may also work on project management, testing, design, etc.
True: Be concise. Does this text have cognitive distortions in it "I am a man sitting on the chair behind the table."? ==> Yes, it has two common cognitive distortions:

1. Euphemistic self-deception (downplaying one's true identity): The statement is a polite way of saying "I'm a male sitting on a chair."
2. Self-referential err

In [38]:
texts = [
    "I am such a failure I never do anything right.",
    "I am a software developer doing coding.",
    "I am a man sitting on the chair behind the table.",
    "There is a chair behind the table."
]
llm_qwen2 = ChatOllama(model="qwen2", base_url="http://localhost:11434")  # Explicitly set base_url
def evaluator_llm_qwen2(text,threshold=0):
    query = f"Be concise. Does this text have cognitive distortions in it \"{text}\"?"
    r = llm_qwen2.invoke(query).content
    return r.lower().startswith("yes"), r
for r in texts:
    q = f"Be concise. Does this text have cognitive distortions in it \"{r}\"?"
    d, r = evaluator_llm_qwen2(r)
    print(f"{d}: {q} ==> {r}")

True: Be concise. Does this text have cognitive distortions in it "I am such a failure I never do anything right."? ==> Yes, the text contains a cognitive distortion known as all-or-nothing thinking, which is characterized by seeing things in black-and-white categories, with no shades of gray. In this case, the statement suggests that there is absolute failure without considering any instances where the individual might have succeeded or at least tried successfully.
False: Be concise. Does this text have cognitive distortions in it "I am a software developer doing coding."? ==> No, the statement "I am a software developer doing coding" does not contain any obvious cognitive distortions. It is straightforward and directly states someone's profession and activity. Cognitive distortions are typically irrational or exaggerated thought patterns that can lead to emotional distress, but this sentence doesn't exhibit those characteristics.
False: Be concise. Does this text have cognitive disto

## Explore performance of "our out of the box" model with dataset 1 (original binary)

In [21]:
binary_dataset_file_path = "./data/corpora/English/distortions/halilbabacan/raw_Cognitive_distortions.csv" 
df = pd.read_csv(binary_dataset_file_path)
df.insert(1, "N/A text", value = np.nan)
df.insert(3, "N/A label", value = np.nan)
df.head(10)


Unnamed: 0,Text,N/A text,Label,N/A label
0,I'm such a failure I never do anything right.,,Distortion,
1,Nobody likes me because I'm not interesting.,,Distortion,
2,I can't try new things because I'll just mess...,,Distortion,
3,My boss didn't say 'good morning' she must be...,,Distortion,
4,My friend didn't invite me to the party I mus...,,Distortion,
5,I didn't get the job so I must be incompetent.,,Distortion,
6,I'm always unlucky. Good things only happen t...,,Distortion,
7,Everyone thinks I'm stupid because I made a m...,,Distortion,
8,I'll never be successful because I failed my ...,,Distortion,
9,Nobody cares about me because they didn't ask...,,Distortion,


In [172]:
from pygents.aigents_api import TextMetrics

def language_metrics(lang,metrics_list):
    metrics = {}
    for m in metrics_list:
        metrics[m] = './data/dict/' + lang + '/' + m + '.txt'
    return metrics


distortion_labels = ['positive','negative','rude',
'catastrophizing','dichotomous-reasoning','disqualifying-positive','emotional-reasoning','fortune-telling',
'labeling','magnification','mental-filtering','mindreading','overgeneralizing','personalizing','should-statement']
tm = TextMetrics(language_metrics('en',distortion_labels),debug=False)

def f1_from_counts(true_positive, true_negative, false_positive, false_negative):
    precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) > 0 else 0
    recall = true_positive / (true_positive + false_negative) if (true_positive + false_negative) > 0 else 0
    return 2 * precision * recall / (precision + recall) if precision > 0 or recall > 0 else 0 

def evaluate_df_counts(df,evaluator,threshold,debug=False):
    true_positive = 0
    true_negative = 0
    false_positive = 0
    false_negative = 0
    for _, row in df.iterrows():
        # Text definition: first, check the 2nd column; if NaN, take the text from the 1st column.
        text = row.iloc[1] if pd.notna(row.iloc[1]) else row.iloc[0]
        primary_distortion = row.iloc[2]  # The main cognitive distortion from the 3rd column
        secondary_distortion = row.iloc[3] if pd.notna(row.iloc[3]) else None  # The secondary distortion from the 4th column, if it exists
        ground_distortion = False if primary_distortion == 'No Distortion' else True
                       
        our_distortion = evaluator(text,threshold)
        
        # https://en.wikipedia.org/wiki/F-score
        if ground_distortion == True and our_distortion == True:
            true_positive += 1
        if ground_distortion == False and our_distortion == True:
            false_positive += 1
        if ground_distortion == False and our_distortion == False:
            true_negative += 1
        if ground_distortion == True and our_distortion == False:
            false_negative += 1

        if debug:
            print(ground_distortion,our_distortion,text[:20],metrics)

    return true_positive, true_negative, false_positive, false_negative


def evaluate_df(df,evaluator,threshold,debug=False):
    true_positive, true_negative, false_positive, false_negative = evaluate_df_counts(df,evaluator,threshold,debug)
    return f1_from_counts(true_positive, true_negative, false_positive, false_negative) 


def evaluate_df_acc_f1(df,evaluator,threshold,debug=False):
    true_positive, true_negative, false_positive, false_negative = evaluate_df_counts(df,evaluator,threshold,debug)
    return (true_positive + true_negative) / len(df), f1_from_counts(true_positive, true_negative, false_positive, false_negative) 


def our_evaluator_any(text,threshold):
    metrics = tm.get_sentiment_words(text)
    for m in metrics:
        if metrics[m] > threshold:
            return True
    return False

def our_evaluator_avg(text,threshold):
    metrics = tm.get_sentiment_words(text)
    l = list(metrics.values())
    avg = sum(l) / len(l) if  len(l) > 0 else 0
    if avg > threshold:
        return True
    return False
  

In [163]:
for threshold in [0.0,0.01,0.05,0.1,0.2,0.4,0.6,0.8]:
    f1 = evaluate_df(df,our_evaluator_any,threshold)
    print(threshold, f1)

0.0 0.8443643512450851
0.01 0.8443643512450851
0.05 0.8443643512450851
0.1 0.8443643512450851
0.2 0.8445027035883992
0.4 0.8444669365721997
0.6 0.5569898379566054
0.8 0.013021830716200687


In [24]:
for threshold in [0.0,0.01,0.05,0.1,0.2,0.4,0.6,0.8]:
    f1 = evaluate_df(df,our_evaluator_avg,threshold)
    print(threshold, f1)

0.0 0.8443643512450851
0.01 0.8443643512450851
0.05 0.8443643512450851
0.1 0.8443643512450851
0.2 0.8447795443369939
0.4 0.886836935166994
0.6 0.18404478656403078
0.8 0.007680491551459293


## Explore performance of LLM (llama3.2) with dataset 1 (original binary)

In [9]:
df[:5]

Unnamed: 0,Text,N/A text,Label,N/A label
0,I'm such a failure I never do anything right.,,Distortion,
1,Nobody likes me because I'm not interesting.,,Distortion,
2,I can't try new things because I'll just mess...,,Distortion,
3,My boss didn't say 'good morning' she must be...,,Distortion,
4,My friend didn't invite me to the party I mus...,,Distortion,


In [20]:
t0 = dt.datetime.now()

f1 = evaluate_df(df,evaluator_llm,0,debug=False)

t1 = dt.datetime.now()
delta = t1 - t0
print(f1,delta.total_seconds(),delta.total_seconds()/len(df))


0.8565737051792828 3474.569199 0.9851344482563085


## Explore performance of LLM (qwen2) with dataset 1 (original binary)

In [26]:
llm_qwen2 = ChatOllama(model="qwen2", base_url="http://localhost:11434")  # Explicitly set base_url
def evaluator_llm_qwen2(text,threshold=0):
    query = f"Be concise. Does this text have cognitive distortions in it \"{text}\"?"
    r = llm_qwen2.invoke(query).content
    return r.lower().startswith("yes")

t0 = dt.datetime.now()

f1 = evaluate_df(df,evaluator_llm_qwen2,0,debug=False)

t1 = dt.datetime.now()
delta = t1 - t0
print(f1,delta.total_seconds(),delta.total_seconds()/len(df))


0.8573050719152157 6282.644701 1.7812998868726964


## Explore performance of LLM (llama3.2 and qwen2) with dataset 3 (joint 1+2)


In [27]:
# Dataset: Unclassified distortions (halilbabacan)
# Paper: https://papers.ssrn.com/sol3/papers.cfm?abstract_id=4582307
# Data: https://huggingface.co/datasets/halilbabacan/autotrain-data-cognitive_distortions
# https://huggingface.co/datasets/halilbabacan/autotrain-data-cognitive_distortions/tree/main/raw
# https://huggingface.co/datasets/halilbabacan/autotrain-data-cognitive_distortions/blob/main/raw/Cognitive_distortions.csv
    
binary_dataset_file_path = "./data/corpora/English/distortions/halilbabacan/raw_Cognitive_distortions.csv" 

In [28]:
# Dataset: Multiple Distorions (sagarikashreevastava)
# Paper: https://aclanthology.org/2021.clpsych-1.17/
# Data: https://www.kaggle.com/datasets/sagarikashreevastava/cognitive-distortion-detetction-dataset

# !pip install kagglehub
import kagglehub
multiclass_dataset_path = kagglehub.dataset_download("sagarikashreevastava/cognitive-distortion-detetction-dataset")
print("Path to dataset files:", multiclass_dataset_path)
multiclass_dataset_file_path = multiclass_dataset_path + "/Annotated_data.csv"


Path to dataset files: C:\Users\anton\.cache\kagglehub\datasets\sagarikashreevastava\cognitive-distortion-detetction-dataset\versions\1


In [29]:
df1 = pd.read_csv(binary_dataset_file_path)
df1 = df1.rename(columns={'Text': 'Patient Question', 'Label': 'Dominant Distortion'})
df1.insert(1, "Distorted part", value = np.nan)
df1.insert(3, "Secondary Distortion (Optional)l", value = np.nan)
df1

Unnamed: 0,Patient Question,Distorted part,Dominant Distortion,Secondary Distortion (Optional)l
0,I'm such a failure I never do anything right.,,Distortion,
1,Nobody likes me because I'm not interesting.,,Distortion,
2,I can't try new things because I'll just mess...,,Distortion,
3,My boss didn't say 'good morning' she must be...,,Distortion,
4,My friend didn't invite me to the party I mus...,,Distortion,
...,...,...,...,...
3522,Since then whenever my mother is out alone I b...,,Distortion,
3523,My family hate him but they didn’t met him at ...,,Distortion,
3524,However I am not happy at the least only half ...,,Distortion,
3525,Now I am at university my peers around me all ...,,Distortion,


In [30]:
df2 = pd.read_csv(multiclass_dataset_file_path) 
df2 = df2.drop('Id_Number', axis=1) # delete columnb with id 
df2

Unnamed: 0,Patient Question,Distorted part,Dominant Distortion,Secondary Distortion (Optional)
0,"Hello, I have a beautiful,smart,outgoing and a...",The voice are always fimilar (someone she know...,Personalization,
1,Since I was about 16 years old I’ve had these ...,I feel trapped inside my disgusting self and l...,Labeling,Emotional Reasoning
2,So I’ve been dating on and off this guy for a...,,No Distortion,
3,My parents got divorced in 2004. My mother has...,,No Distortion,
4,I don’t really know how to explain the situati...,I refused to go because I didn’t know if it wa...,Fortune-telling,Emotional Reasoning
...,...,...,...,...
2525,I’m a 21 year old female. I spent most of my l...,,No Distortion,
2526,I am 21 female and have not had any friends fo...,Now I am at university my peers around me all ...,Overgeneralization,
2527,From the U.S.: My brother is 19 years old and ...,He claims he’s severely depressed and has outb...,Mental filter,Mind Reading
2528,From the U.S.: I am a 21 year old woman who ha...,,No Distortion,


In [31]:
df3 = pd.concat([df1, df2], ignore_index=True)
df3

Unnamed: 0,Patient Question,Distorted part,Dominant Distortion,Secondary Distortion (Optional)l,Secondary Distortion (Optional)
0,I'm such a failure I never do anything right.,,Distortion,,
1,Nobody likes me because I'm not interesting.,,Distortion,,
2,I can't try new things because I'll just mess...,,Distortion,,
3,My boss didn't say 'good morning' she must be...,,Distortion,,
4,My friend didn't invite me to the party I mus...,,Distortion,,
...,...,...,...,...,...
6052,I’m a 21 year old female. I spent most of my l...,,No Distortion,,
6053,I am 21 female and have not had any friends fo...,Now I am at university my peers around me all ...,Overgeneralization,,
6054,From the U.S.: My brother is 19 years old and ...,He claims he’s severely depressed and has outb...,Mental filter,,Mind Reading
6055,From the U.S.: I am a 21 year old woman who ha...,,No Distortion,,


In [43]:
llm_llama32 = ChatOllama(model="llama3.2", base_url="http://localhost:11434")  # Explicitly set base_url
def evaluator_llm_llama32(text,threshold=0):
    query = f"Be concise. Does this text have cognitive distortions in it \"{text}\"?"
    r = llm_llama32.invoke(query).content
    return r.lower().startswith("yes")

t0 = dt.datetime.now()

f1 = evaluate_df(df3,evaluator_llm_llama32,0,debug=False)

t1 = dt.datetime.now()
delta = t1 - t0
print(f1,delta.total_seconds(),delta.total_seconds()/len(df3))

0.8271139341008337 6202.05872 1.0239489384183589


In [45]:
llm_qwen2 = ChatOllama(model="qwen2", base_url="http://localhost:11434")  # Explicitly set base_url
def evaluator_llm_qwen2(text,threshold=0):
    query = f"Be concise. Does this text have cognitive distortions in it \"{text}\"?"
    r = llm_qwen2.invoke(query).content
    return r.lower().startswith("yes")

t0 = dt.datetime.now()

f1 = evaluate_df(df3,evaluator_llm_qwen2,0,debug=False)

t1 = dt.datetime.now()
delta = t1 - t0
print(f1,delta.total_seconds(),delta.total_seconds()/len(df3))

0.8087457952907255 11675.313834 1.9275736889549282


## Evaluate datasets in 3 splits

In [168]:
df3s = [ df3.iloc[:0,:].copy(), df3.iloc[:0,:].copy(), df3.iloc[:0,:].copy()]

row_n = 0 
for _, row in df3.iterrows():
    r3 = row_n % 3
    row_n += 1
    df = df3s[r3]
    df.loc[len(df)] = row

print(len(df3))
for df in df3s:
    print(len(df))

6057
2019
2019
2019


In [174]:
llm_llama32 = ChatOllama(model="llama3.2", base_url="http://localhost:11434")  # Explicitly set base_url
def evaluator_llm_llama32(text,threshold=0):
    query = f"Be concise. Does this text have cognitive distortions in it \"{text}\"?"
    r = llm_llama32.invoke(query).content
    return r.lower().startswith("yes")

t0 = dt.datetime.now()

f1 = [ 
    evaluate_df_acc_f1(df3s[0],evaluator_llm_llama32,0,debug=False),
    evaluate_df_acc_f1(df3s[1],evaluator_llm_llama32,0,debug=False),
    evaluate_df_acc_f1(df3s[2],evaluator_llm_llama32,0,debug=False),
]

t1 = dt.datetime.now()
delta = t1 - t0
print(f1,delta.total_seconds(),delta.total_seconds()/len(df3))

[(0.7236255572065379, 0.8345195729537366), (0.7052996532937098, 0.8227584152517128), (0.7107478949975236, 0.8257756563245823)] 6234.074192 1.0292346362885918


In [178]:
af = [(0.7236255572065379, 0.8345195729537366), (0.7052996532937098, 0.8227584152517128), (0.7107478949975236, 0.8257756563245823)]
[[a[0] for a in af],[a[1] for a in af]]

[[0.7236255572065379, 0.7052996532937098, 0.7107478949975236],
 [0.8345195729537366, 0.8227584152517128, 0.8257756563245823]]

In [175]:
llm_qwen2 = ChatOllama(model="qwen2", base_url="http://localhost:11434")  # Explicitly set base_url
def evaluator_llm_qwen2(text,threshold=0):
    query = f"Be concise. Does this text have cognitive distortions in it \"{text}\"?"
    r = llm_qwen2.invoke(query).content
    return r.lower().startswith("yes")

t0 = dt.datetime.now()

f1 = [ 
    evaluate_df_acc_f1(df3s[0],evaluator_llm_qwen2,0,debug=False),
    evaluate_df_acc_f1(df3s[1],evaluator_llm_qwen2,0,debug=False),
    evaluate_df_acc_f1(df3s[2],evaluator_llm_qwen2,0,debug=False),
]

t1 = dt.datetime.now()
delta = t1 - t0
print(f1,delta.total_seconds(),delta.total_seconds()/len(df3))

[(0.7345220406141655, 0.8089807555238775), (0.7379891035165924, 0.8086799276672695), (0.7340267459138187, 0.8053642624139181)] 11817.349794 1.951023575037147


In [179]:
af = [(0.7345220406141655, 0.8089807555238775), (0.7379891035165924, 0.8086799276672695), (0.7340267459138187, 0.8053642624139181)]
[[a[0] for a in af],[a[1] for a in af]]

[[0.7345220406141655, 0.7379891035165924, 0.7340267459138187],
 [0.8089807555238775, 0.8086799276672695, 0.8053642624139181]]

# Experiment with multi-class cognitive distorion detection

In [54]:
texts = [
    ("I think he thinks about me.", "Mind Reading"),
    ("It always happens every time over and over again", "Overgeneralization"),
    ("This is enourmously huge and terrific.", "Magnification"),
    ("I know it will be that way and I forecast it.", "Furtune-telling"),
    ("He is stupid, lazy and ignorant narciss.", "Labeling"),
    ("I am guilty and it is my failure.", "Personalization"),
    ("I am either excited and amazed or abused and upset.", "Emotional reasoning"), 
    ("I ruined the whole recital because of that one mistake.", "Mental filter"),
    ("I must do it, I should have that.", "Should statement"),
    ("A single mistake ruins the entire life. Either you love me or you hate me.", "All-or-nothing thinking")
]
dists = set([x[1] for x in texts])
dists

{'All-or-nothing thinking',
 'Emotional reasoning',
 'Furtune-telling',
 'Labeling',
 'Magnification',
 'Mental filter',
 'Mind Reading',
 'Overgeneralization',
 'Personalization',
 'Should statement'}

In [76]:
def evaluate_specific_llama32(text,dist,debug=False):
    dist = dist.replace('-',' ').lower()
    query = f'Be concise. Answer with yes or no. Act as a psycho-therapist. If the text "{text}" contains {dist}?'
    rep = llm_llama32.invoke(query).content
    res = rep.lower().startswith("yes")
    if debug:
        print(f"{res}: {query} => {r}")
    return res, rep
    
for t in texts:
    print(f"{t[1]}: {t[0]}")
    for d in dists:
        res, rep = evaluate_specific_llama32(t[0],d)
        print(f"{d}={res}: {rep}")
    print()

Mind Reading: I think he thinks about me.
Personalization=True: Yes, this statement is highly personalized and likely an expression of attachment or obsessive thoughts.
All-or-nothing thinking=True: Yes. This phrase often indicates all-or-nothing thinking, assuming that someone is either completely thinking about you or not thinking about you at all.
Mind Reading=False: No, it implies inference based on behavior and context, not literal mind reading.
Furtune-telling=False: No. It's an expression of possible unconscious thought patterns.
Overgeneralization=True: Yes. The statement implies that someone's thoughts about you are universal and constant, which is an overgeneralization of their subjective experience.
Emotional reasoning=True: Yes. Emotional reasoning is present, implying a possible projection of one's own thoughts and feelings onto someone else, often indicating unresolved emotional attachment or unmet needs in the relationship.
Should statement=True: Yes.
Labeling=True: Yes,

In [77]:
llm_qwen2 = ChatOllama(model="qwen2", base_url="http://localhost:11434")  # Explicitly set base_url
def evaluate_specific_qwen2(text,dist,debug=False):
    dist = dist.replace('-',' ').lower()
    query = f'Be concise. Answer with yes or no. Act as a psycho-therapist. If the text "{text}" contains {dist}?'
    rep = llm_qwen2.invoke(query).content
    res = rep.lower().startswith("yes")
    if debug:
        print(f"{res}: {query} => {r}")
    return res, rep
for t in texts:
    print(f"{t[1]}: {t[0]}")
    for d in dists:
        res, rep = evaluate_specific_qwen2(t[0],d)
        print(f"{d}={res}: {rep}")
    print()

Mind Reading: I think he thinks about me.
Personalization=True: Yes
All-or-nothing thinking=False: No
Mind Reading=True: Yes.
Furtune-telling=False: No.
Overgeneralization=True: Yes
Emotional reasoning=True: Yes
Should statement=False: No
Labeling=False: No.
Mental filter=True: Yes
Magnification=True: Yes.

Overgeneralization: It always happens every time over and over again
Personalization=True: Yes.
All-or-nothing thinking=True: Yes.
Mind Reading=False: No
Furtune-telling=False: No.
Overgeneralization=True: Yes
Emotional reasoning=True: Yes
Should statement=False: No
Labeling=True: Yes
Mental filter=True: Yes
Magnification=True: Yes

Magnification: This is enourmously huge and terrific.
Personalization=True: Yes.
All-or-nothing thinking=False: No.
Mind Reading=False: No.
Furtune-telling=False: No.
Overgeneralization=True: Yes.
Emotional reasoning=True: Yes.
Should statement=False: No.
Labeling=True: Yes.
Mental filter=False: No.
Magnification=True: Yes.

Furtune-telling: I know it wi

In [88]:
def evaluate_one_from_list_llama32(text,dists,debug=False):
    dist_list = ', '.join(dists)
    query = f'Act as a psycho-therapist. Answer with single item from the list "{dist_list}". What is the major cognitive distortion in "{text}"?'
    rep = llm_llama32.invoke(query).content
    return rep
    
for t in texts:
    print(f"{t[1]}: {t[0]}")
    res = evaluate_one_from_list_llama32(t[0],dists)
    print(res)
    print("---------")

Mind Reading: I think he thinks about me.
The major cognitive distortion in "I think he thinks about me" is:

Mind Reading

This involves making assumptions or jumping to conclusions about someone's thoughts or feelings without having any direct evidence. In this case, you're assuming that the person is thinking about you, simply because you've had a conversation with them or noticed their presence. This distortion can lead to feelings of anxiety, insecurity, and overthinking.
---------
Overgeneralization: It always happens every time over and over again
The major cognitive distortion in the phrase "It always happens every time over and over again" is:

Overgeneralization.

This phrase implies that a negative event or behavior will inevitably recur, without considering other possible explanations or exceptions. Overgeneralization involves making broad generalizations based on limited evidence, which can lead to unrealistic expectations and a lack of perspective. In this case, the speak

In [90]:
def evaluate_one_from_list_llama32(text,dists,debug=False):
    dist_list = ', '.join(dists)
    query = f'Answer with single quoted item from the list "{dist_list}" only. What is the major cognitive distortion in "{text}"?'
    rep = llm_llama32.invoke(query).content
    return rep
    
for t in texts:
    print(f"{t[1]}: {t[0]}")
    res = evaluate_one_from_list_llama32(t[0],dists)
    print(res)
    print("---------")

Mind Reading: I think he thinks about me.
'Mind Reading'
---------
Overgeneralization: It always happens every time over and over again
'Magnification'
---------
Magnification: This is enourmously huge and terrific.
'Magnification' is the major cognitive distortion in "This is enormously huge and terrific."
---------
Furtune-telling: I know it will be that way and I forecast it.
'Mental Filter'
---------
Labeling: He is stupid, lazy and ignorant narciss.
'Labeling'
---------
Personalization: I am guilty and it is my failure.
'Mental Filter' is the major cognitive distortion in "I am guilty and it is my failure".
---------
Emotional reasoning: I am either excited and amazed or abused and upset.
"All-or-nothing thinking".
---------
Mental filter: I ruined the whole recital because of that one mistake.
'Overgeneralization'
---------
Should statement: I must do it, I should have that.
'Mental Filter'
---------
All-or-nothing thinking: A single mistake ruins the entire life. Either you love

In [91]:
llm_qwen2 = ChatOllama(model="qwen2", base_url="http://localhost:11434")  # Explicitly set base_url
def evaluate_one_from_list_qwen2(text,dists,debug=False):
    dist_list = ', '.join(dists)
    query = f'Answer with single quoted item from the list "{dist_list}" only. What is the major cognitive distortion in "{text}"?'
    rep = llm_llama32.invoke(query).content
    return rep
    
for t in texts:
    print(f"{t[1]}: {t[0]}")
    res = evaluate_one_from_list_qwen2(t[0],dists)
    print(res)
    print("---------")


Mind Reading: I think he thinks about me.
'Mind Reading'
---------
Overgeneralization: It always happens every time over and over again
'Magnification'
---------
Magnification: This is enourmously huge and terrific.
The single 'quoted item from the list that best describes the major cognitive distortion in "This is enourmously huge and terrific" is: 

'Magnification'
---------
Furtune-telling: I know it will be that way and I forecast it.
'Mental Filter'
---------
Labeling: He is stupid, lazy and ignorant narciss.
'Mental Filter'
---------
Personalization: I am guilty and it is my failure.
'Mental filter'
---------
Emotional reasoning: I am either excited and amazed or abused and upset.
'All-or-nothing thinking'
---------
Mental filter: I ruined the whole recital because of that one mistake.
'Magnification'
---------
Should statement: I must do it, I should have that.
'Should statement'
---------
All-or-nothing thinking: A single mistake ruins the entire life. Either you love me or you

In [92]:
def evaluate_one_from_list_llama32(text,dists,debug=False):
    dist_list = ', '.join(dists)
    query = f'Act as a psycho-therapist. Answer with single quoted item from the list "{dist_list}" only. What is the major cognitive distortion in "{text}"?'
    rep = llm_llama32.invoke(query).content
    return rep
    
for t in texts:
    print(f"{t[1]}: {t[0]}")
    res = evaluate_one_from_list_llama32(t[0],dists)
    print(res)
    print("---------")

Mind Reading: I think he thinks about me.
The major cognitive distortion in "I think he thinks about me" is 'Mind Reading'. This is an illusion where a person assumes they know what another person is thinking or feeling without any evidence. In this case, the speaker is assuming that someone else's thoughts are focused on them, which is not necessarily true.
---------
Overgeneralization: It always happens every time over and over again
The major cognitive distortion in "It always happens every time over and over again" is: **Emotional Reasoning**.

This phrase suggests that a person believes that a certain situation (e.g., experiencing bad luck or failure) is inevitable, and that it will always happen in the same way. This belief leads to an emotional response, such as anxiety or despair, without considering alternative perspectives or evidence to contradict it.

As a psycho-therapist, I would help this person recognize that their thoughts are not supported by facts, but rather by thei

In [93]:
llm_qwen2 = ChatOllama(model="qwen2", base_url="http://localhost:11434")  # Explicitly set base_url
def evaluate_one_from_list_qwen2(text,dists,debug=False):
    dist_list = ', '.join(dists)
    query = f'Act as a psycho-therapist. Answer with single quoted item from the list "{dist_list}" only. What is the major cognitive distortion in "{text}"?'
    rep = llm_llama32.invoke(query).content
    return rep
    
for t in texts:
    print(f"{t[1]}: {t[0]}")
    res = evaluate_one_from_list_qwen2(t[0],dists)
    print(res)
    print("---------")

Mind Reading: I think he thinks about me.
The major cognitive distortion in "I think he thinks about me" is 'Mind Reading'.
---------
Overgeneralization: It always happens every time over and over again
The major cognitive distortion in "It always happens every time over and over again" is: **Overgeneralization**.

This type of thinking involves making a general rule or conclusion based on a single event, without considering alternative possibilities or exceptions. In this case, the person is assuming that the situation always occurs in the same way, without acknowledging that it might not happen next time.
---------
Magnification: This is enourmously huge and terrific.
The major cognitive distortion in "This is enormously huge and terrific" is 'Magnification'. This phrase exaggerates a situation, making it seem much bigger or more extreme than it actually is.
---------
Furtune-telling: I know it will be that way and I forecast it.
The major cognitive distortion in "I know it will be t

In [95]:
def evaluate_one_from_list_llama32(text,dists,debug=False):
    dist_list = ', '.join(dists)
    query = f'Act as a psycho-therapist. Answer with single quoted item from the list "{dist_list}" only. What is the major cognitive distortion in "{text}"?'
    rep = llm_llama32.invoke(query).content
    if debug:
        print(rep)
    dist = None
    for d in dists:
        if d in rep:
            return d
    return 'No Distortion'
    
for t in texts:
    print(f"{t[1]}: {t[0]}")
    res = evaluate_one_from_list_llama32(t[0],dists)
    print(res)
    print("---------")

Mind Reading: I think he thinks about me.
Mind Reading
---------
Overgeneralization: It always happens every time over and over again
All-or-nothing thinking
---------
Magnification: This is enourmously huge and terrific.
Magnification
---------
Furtune-telling: I know it will be that way and I forecast it.
Furtune-telling
---------
Labeling: He is stupid, lazy and ignorant narciss.
Labeling
---------
Personalization: I am guilty and it is my failure.
Should statement
---------
Emotional reasoning: I am either excited and amazed or abused and upset.
All-or-nothing thinking
---------
Mental filter: I ruined the whole recital because of that one mistake.
All-or-nothing thinking
---------
Should statement: I must do it, I should have that.
Should statement
---------
All-or-nothing thinking: A single mistake ruins the entire life. Either you love me or you hate me.
All-or-nothing thinking
---------


In [96]:
def evaluate_one_from_list_qwen2(text,dists,debug=False):
    dist_list = ', '.join(dists)
    query = f'Act as a psycho-therapist. Answer with single quoted item from the list "{dist_list}" only. What is the major cognitive distortion in "{text}"?'
    rep = llm_qwen2.invoke(query).content
    if debug:
        print(rep)
    dist = None
    for d in dists:
        if d in rep:
            return d
    return 'No Distortion'
    
for t in texts:
    print(f"{t[1]}: {t[0]}")
    res = evaluate_one_from_list_llama32(t[0],dists)
    print(res)
    print("---------")

Mind Reading: I think he thinks about me.
Mind Reading
---------
Overgeneralization: It always happens every time over and over again
Overgeneralization
---------
Magnification: This is enourmously huge and terrific.
Magnification
---------
Furtune-telling: I know it will be that way and I forecast it.
Furtune-telling
---------
Labeling: He is stupid, lazy and ignorant narciss.
Labeling
---------
Personalization: I am guilty and it is my failure.
Should statement
---------
Emotional reasoning: I am either excited and amazed or abused and upset.
All-or-nothing thinking
---------
Mental filter: I ruined the whole recital because of that one mistake.
All-or-nothing thinking
---------
Should statement: I must do it, I should have that.
Should statement
---------
All-or-nothing thinking: A single mistake ruins the entire life. Either you love me or you hate me.
All-or-nothing thinking
---------


## Explore accuracy for primary distortion in Multi-class dataset
- llama3.2 accuracy 0.22
- gwen2 accuracy 0.10
- aigents(baseline) accuracy 0.13

In [108]:
df2 = pd.read_csv(multiclass_dataset_file_path) 
df2 = df2.drop('Id_Number', axis=1) # delete columnb with id 
df2

Unnamed: 0,Patient Question,Distorted part,Dominant Distortion,Secondary Distortion (Optional)
0,"Hello, I have a beautiful,smart,outgoing and a...",The voice are always fimilar (someone she know...,Personalization,
1,Since I was about 16 years old I’ve had these ...,I feel trapped inside my disgusting self and l...,Labeling,Emotional Reasoning
2,So I’ve been dating on and off this guy for a...,,No Distortion,
3,My parents got divorced in 2004. My mother has...,,No Distortion,
4,I don’t really know how to explain the situati...,I refused to go because I didn’t know if it wa...,Fortune-telling,Emotional Reasoning
...,...,...,...,...
2525,I’m a 21 year old female. I spent most of my l...,,No Distortion,
2526,I am 21 female and have not had any friends fo...,Now I am at university my peers around me all ...,Overgeneralization,
2527,From the U.S.: My brother is 19 years old and ...,He claims he’s severely depressed and has outb...,Mental filter,Mind Reading
2528,From the U.S.: I am a 21 year old woman who ha...,,No Distortion,


In [155]:
def evaluate_df_multiclass(df,evaluator,dists,threshold,debug=False):
    trues = 0
    for _, row in df.iterrows():
        # Text definition: first, check the 2nd column; if NaN, take the text from the 1st column.
        text = row.iloc[1] if pd.notna(row.iloc[1]) else row.iloc[0]
        primary_distortion = row.iloc[2]  # The main cognitive distortion from the 3rd column
        secondary_distortion = row.iloc[3] if pd.notna(row.iloc[3]) else None  # The secondary distortion from the 4th column, if it exists
                       
        our_distortion = evaluator(text,dists,threshold)
        if debug:
            print(our_distortion == primary_distortion,our_distortion,primary_distortion,text,)
        
        if our_distortion == primary_distortion:
            trues += 1
            
    return float(trues) / len(df) 


In [110]:
evaluate_df_multiclass(df2[:10],evaluate_one_from_list_llama32,dists,0,True)


Personalization No Distortion
Labeling No Distortion
No Distortion Emotional reasoning
No Distortion Mental filter
Fortune-telling Overgeneralization
No Distortion All-or-nothing thinking
No Distortion All-or-nothing thinking
No Distortion All-or-nothing thinking
No Distortion No Distortion
Magnification Mind Reading


0.1

In [111]:
evaluate_df_multiclass(df2[:10],evaluate_one_from_list_qwen2,dists,0,True)


Personalization Labeling
Labeling Mind Reading
No Distortion Should statement
No Distortion Should statement
Fortune-telling Should statement
No Distortion Emotional reasoning
No Distortion Labeling
No Distortion Labeling
No Distortion Labeling
Magnification Emotional reasoning


0.0

In [117]:
t0 = dt.datetime.now()

acc = evaluate_df_multiclass(df2,evaluate_one_from_list_llama32,dists,0,False)

t1 = dt.datetime.now()
delta = t1 - t0
print(acc,delta.total_seconds(),delta.total_seconds()/len(df2))

0.22371541501976286 2180.28619 0.8617731976284584


In [118]:
t0 = dt.datetime.now()

acc = evaluate_df_multiclass(df2,evaluate_one_from_list_qwen2,dists,0,False)

t1 = dt.datetime.now()
delta = t1 - t0
print(acc,delta.total_seconds(),delta.total_seconds()/len(df2))

0.10118577075098814 398.713793 0.15759438458498023


In [119]:
dists

{'All-or-nothing thinking',
 'Emotional reasoning',
 'Furtune-telling',
 'Labeling',
 'Magnification',
 'Mental filter',
 'Mind Reading',
 'Overgeneralization',
 'Personalization',
 'Should statement'}

In [159]:
from pygents.aigents_api import TextMetrics
distortion_labels = ['positive','negative','rude',
'catastrophizing','dichotomous-reasoning','disqualifying-positive','emotional-reasoning','fortune-telling',
'labeling','magnification','mental-filtering','mindreading','overgeneralizing','personalizing','should-statement']
tm = TextMetrics(language_metrics('en',distortion_labels),debug=False)
aigents2dists_map = {
    'dichotomous-reasoning':'All-or-nothing thinking',
    'emotional-reasoning':'Emotional reasoning',
    'fortune-telling':'Furtune-telling',
    'labeling':'Labeling',
    'magnification':'Magnification',
    'mental-filtering':'Mental filter',
    'mindreading':'Mind Reading',
    'overgeneralizing':'Overgeneralization',
    'personalizing':'Personalization',
    'should-statement':'Should statement'}
def aigents2dists(metrics):
    max_val = 0
    best_match = None
    for m in metrics:
        if m in aigents2dists_map:
            if metrics[m] > max_val:
                max_val = metrics[m]
                best_match = m
    if not best_match is None:
        return aigents2dists_map[best_match]
    return 'No Distortion'

def evaluate_one_from_list_aigents(text,dists,debug=False):
    metrics = tm.get_sentiment_words(text)
    if debug:
       print(text,metrics)
    if len(metrics) > 0:
        return aigents2dists(metrics)
    return 'No Distortion'



In [161]:
for t in texts:
    m = evaluate_one_from_list_aigents(t[0],dists=None,debug=False)
    print(f"{m == t[1]} = {m} ? {t[1]} : {t[0]}")

True = Mind Reading ? Mind Reading : I think he thinks about me.
True = Overgeneralization ? Overgeneralization : It always happens every time over and over again
False = No Distortion ? Magnification : This is enourmously huge and terrific.
False = No Distortion ? Furtune-telling : I know it will be that way and I forecast it.
False = No Distortion ? Labeling : He is stupid, lazy and ignorant narciss.
False = No Distortion ? Personalization : I am guilty and it is my failure.
False = No Distortion ? Emotional reasoning : I am either excited and amazed or abused and upset.
False = No Distortion ? Mental filter : I ruined the whole recital because of that one mistake.
True = Should statement ? Should statement : I must do it, I should have that.
False = No Distortion ? All-or-nothing thinking : A single mistake ruins the entire life. Either you love me or you hate me.


In [162]:
for t in texts:
    m = evaluate_one_from_list_aigents(t[0],dists=None,debug=True)
    #print(f"{m == t[1]} = {m} ? {t[1]} : {t[0]}")

I think he thinks about me. {'mindreading': 0.74, 'positive': 0.59}
It always happens every time over and over again {'overgeneralizing': 0.77, 'dichotomous-reasoning': 0.54}
This is enourmously huge and terrific. {'positive': 0.74}
I know it will be that way and I forecast it. {'positive': 0.49}
He is stupid, lazy and ignorant narciss. {'negative': 0.77, 'rude': 0.54}
I am guilty and it is my failure. {'negative': 0.68}
I am either excited and amazed or abused and upset. {'positive': 0.64, 'negative': 0.64, 'contradictive': 0.64}
I ruined the whole recital because of that one mistake. {'negative': 0.64, 'positive': 0.5, 'contradictive': 0.57}
I must do it, I should have that. {'should-statement': 0.66, 'negative': 0.52}
A single mistake ruins the entire life. Either you love me or you hate me. {'negative': 0.69, 'positive': 0.42, 'contradictive': 0.54}


In [157]:
acc = evaluate_df_multiclass(df2[:10],evaluate_one_from_list_aigents,dists,0,debug=True)
print(acc)

False Mind Reading Personalization The voice are always fimilar (someone she knows) Im very concerned about this and hope it has nothing to do with my pregnancy while on active duty.
False No Distortion Labeling I feel trapped inside my disgusting self and like I’m willing to do anything to escape it.I’m just afraid sometimes that since I’m not really thinking during these times that I might do something to myself–especially since I think during these times that I don’t deserve to get help or that I’m not worth disturbing people by calling them.
False All-or-nothing thinking No Distortion  So I’ve been dating on and off this guy for almost 3 years. He is 3 years younger than me. During our relationship we had always had issues. First he wouldn’t call me, he would prefer to go out with his friends than me, he wouldn’t even remember the day of our anniversary, or things like that we kind of tried to work all that out but I’ve always felt that he really doesn’t love and care about me, eve

In [158]:
t0 = dt.datetime.now()

acc = evaluate_df_multiclass(df2,evaluate_one_from_list_aigents,dists,0,False)

t1 = dt.datetime.now()
delta = t1 - t0
print(acc,delta.total_seconds(),delta.total_seconds()/len(df2))


0.13359683794466404 1.557376 0.0006155636363636364
