In [1]:
import os, sys
cwd = os.getcwd()
project_path = cwd[:cwd.find('pygents')+7]
if project_path not in sys.path: sys.path.append(project_path)
os.chdir(project_path)

import datetime as dt

import pandas as pd
import numpy as np
import json
from langchain_ollama.chat_models import ChatOllama

# Experiment with multi-class cognitive distorion detection

In [2]:
texts = [
    ("Earth is round.", "No Distortion"),
    ("I think he thinks about me.", "Mind Reading"),
    ("It always happens every time over and over again", "Overgeneralization"),
    ("This is enourmously huge and terrific.", "Magnification"),
    ("I know it will be that way and I forecast it.", "Furtune-telling"),
    ("He is stupid, lazy and ignorant narciss.", "Labeling"),
    ("I am guilty and it is my failure.", "Personalization"),
    ("I am either excited and amazed or abused and upset.", "Emotional reasoning"), 
    ("I ruined the whole recital because of that one mistake.", "Mental filter"),
    ("I must do it, I should have that.", "Should statement"),
    ("A single mistake ruins the entire life. Either you love me or you hate me.", "All-or-nothing thinking")
]
dists = set([x[1] for x in texts])
dists

{'All-or-nothing thinking',
 'Emotional reasoning',
 'Furtune-telling',
 'Labeling',
 'Magnification',
 'Mental filter',
 'Mind Reading',
 'No Distortion',
 'Overgeneralization',
 'Personalization',
 'Should statement'}

In [3]:
llm_llama32 = ChatOllama(model="llama3.2", base_url="http://localhost:11434", temperature=0.0)  # Explicitly set base_url


In [4]:
def evaluate_specific_model(model,text,dists,debug=False):
    query = f'You are professional psycho-therapist experienced in cognitive-behavioral therapy. '\
f'You can label texts witn none or some of cognitive distortoions, represented by the following labels: {str(dists)}. '\
f'When labeling, return only JSON array in square brackets of strings in double quotes representing the labels. '\
f'Label this text for presense or absence of any cognitive distortions fom given list: "{text}"'
    if debug:
        print(query)
    rep = llm_llama32.invoke(query).content
    return rep

evaluate_specific_model(llm_llama32,"I think he thinks about me.",dists,True)


You are professional psycho-therapist experienced in cognitive-behavioral therapy. You can label texts witn none or some of cognitive distortoions, represented by the following labels: {'Should statement', 'Furtune-telling', 'Magnification', 'Overgeneralization', 'Personalization', 'All-or-nothing thinking', 'Emotional reasoning', 'Mental filter', 'Mind Reading', 'Labeling', 'No Distortion'}. When labeling, return only JSON array in square brackets of strings in double quotes representing the labels. Label this text for presense or absence of any cognitive distortions fom given list: "I think he thinks about me."


'["Should statement", "Mind Reading"]'

In [5]:
for t in texts:
    print(f"{t[1]}: {t[0]}")
    rep = evaluate_specific_model(llm_llama32,t,dists,False)
    print(rep)
    if "'\"" in rep:
        rep = rep.replace("'\"",'"')
        rep = rep.replace("\"'",'"')
    elif "'" in rep:
        rep = rep.replace("'",'"')
    print(rep)
    rep_list = json.loads(rep)
    print({value: 1.0 for index, value in enumerate(rep_list)})
    print()

No Distortion: Earth is round.
["No Distortion"]
["No Distortion"]
{'No Distortion': 1.0}

Mind Reading: I think he thinks about me.
["Mind Reading"]
["Mind Reading"]
{'Mind Reading': 1.0}

Overgeneralization: It always happens every time over and over again
["Overgeneralization"]
["Overgeneralization"]
{'Overgeneralization': 1.0}

Magnification: This is enourmously huge and terrific.
["Magnification"]
["Magnification"]
{'Magnification': 1.0}

Furtune-telling: I know it will be that way and I forecast it.
["Furtune-telling"]
["Furtune-telling"]
{'Furtune-telling': 1.0}

Labeling: He is stupid, lazy and ignorant narciss.
["Labeling"]
["Labeling"]
{'Labeling': 1.0}

Personalization: I am guilty and it is my failure.
["Personalization"]
["Personalization"]
{'Personalization': 1.0}

Emotional reasoning: I am either excited and amazed or abused and upset.
["All-or-nothing thinking", "Emotional reasoning"]
["All-or-nothing thinking", "Emotional reasoning"]
{'All-or-nothing thinking': 1.0, 'E

In [8]:
# Dataset: Multiple Distorions (sagarikashreevastava)
# Paper: https://aclanthology.org/2021.clpsych-1.17/
# Data: https://www.kaggle.com/datasets/sagarikashreevastava/cognitive-distortion-detetction-dataset

# !pip install kagglehub
import kagglehub
multiclass_dataset_path = kagglehub.dataset_download("sagarikashreevastava/cognitive-distortion-detetction-dataset")
print("Path to dataset files:", multiclass_dataset_path)
multiclass_dataset_file_path = multiclass_dataset_path + "/Annotated_data.csv"


Path to dataset files: C:\Users\anton\.cache\kagglehub\datasets\sagarikashreevastava\cognitive-distortion-detetction-dataset\versions\1


## Explore accuracy for primary distortion in Multi-class dataset


In [9]:
df2 = pd.read_csv(multiclass_dataset_file_path) 
df2 = df2.drop('Id_Number', axis=1) # delete columnb with id 
df2

Unnamed: 0,Patient Question,Distorted part,Dominant Distortion,Secondary Distortion (Optional)
0,"Hello, I have a beautiful,smart,outgoing and a...",The voice are always fimilar (someone she know...,Personalization,
1,Since I was about 16 years old I’ve had these ...,I feel trapped inside my disgusting self and l...,Labeling,Emotional Reasoning
2,So I’ve been dating on and off this guy for a...,,No Distortion,
3,My parents got divorced in 2004. My mother has...,,No Distortion,
4,I don’t really know how to explain the situati...,I refused to go because I didn’t know if it wa...,Fortune-telling,Emotional Reasoning
...,...,...,...,...
2525,I’m a 21 year old female. I spent most of my l...,,No Distortion,
2526,I am 21 female and have not had any friends fo...,Now I am at university my peers around me all ...,Overgeneralization,
2527,From the U.S.: My brother is 19 years old and ...,He claims he’s severely depressed and has outb...,Mental filter,Mind Reading
2528,From the U.S.: I am a 21 year old woman who ha...,,No Distortion,


In [10]:
from pygents.recognition_evaluators import evaluate_tm_df

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\anton\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


In [11]:
TODO next!

SyntaxError: invalid syntax (2503578606.py, line 1)

In [None]:
def evaluate_df_multiclass(df,evaluator,dists,threshold,debug=False):
    trues = 0
    for _, row in df.iterrows():
        # Text definition: first, check the 2nd column; if NaN, take the text from the 1st column.
        text = row.iloc[1] if pd.notna(row.iloc[1]) else row.iloc[0]
        primary_distortion = row.iloc[2]  # The main cognitive distortion from the 3rd column
        secondary_distortion = row.iloc[3] if pd.notna(row.iloc[3]) else None  # The secondary distortion from the 4th column, if it exists
                       
        our_distortion = evaluator(text,dists,threshold)
        if debug:
            print(our_distortion == primary_distortion,our_distortion,primary_distortion,text,)
        
        if our_distortion == primary_distortion:
            trues += 1
            
    return float(trues) / len(df) 


In [None]:
evaluate_df_multiclass(df2[:10],evaluate_one_from_list_llama32,dists,0,True)


In [None]:
evaluate_df_multiclass(df2[:10],evaluate_one_from_list_qwen2,dists,0,True)


In [None]:
t0 = dt.datetime.now()

acc = evaluate_df_multiclass(df2,evaluate_one_from_list_llama32,dists,0,False)

t1 = dt.datetime.now()
delta = t1 - t0
print(acc,delta.total_seconds(),delta.total_seconds()/len(df2))

In [None]:
t0 = dt.datetime.now()

acc = evaluate_df_multiclass(df2,evaluate_one_from_list_qwen2,dists,0,False)

t1 = dt.datetime.now()
delta = t1 - t0
print(acc,delta.total_seconds(),delta.total_seconds()/len(df2))

In [None]:
dists

In [None]:
from pygents.aigents_api import TextMetrics
distortion_labels = ['positive','negative','rude',
'catastrophizing','dichotomous-reasoning','disqualifying-positive','emotional-reasoning','fortune-telling',
'labeling','magnification','mental-filtering','mindreading','overgeneralizing','personalizing','should-statement']
tm = TextMetrics(language_metrics('en',distortion_labels),debug=False)
aigents2dists_map = {
    'dichotomous-reasoning':'All-or-nothing thinking',
    'emotional-reasoning':'Emotional reasoning',
    'fortune-telling':'Furtune-telling',
    'labeling':'Labeling',
    'magnification':'Magnification',
    'mental-filtering':'Mental filter',
    'mindreading':'Mind Reading',
    'overgeneralizing':'Overgeneralization',
    'personalizing':'Personalization',
    'should-statement':'Should statement'}
def aigents2dists(metrics):
    max_val = 0
    best_match = None
    for m in metrics:
        if m in aigents2dists_map:
            if metrics[m] > max_val:
                max_val = metrics[m]
                best_match = m
    if not best_match is None:
        return aigents2dists_map[best_match]
    return 'No Distortion'

def evaluate_one_from_list_aigents(text,dists,debug=False):
    metrics = tm.get_sentiment_words(text)
    if debug:
       print(text,metrics)
    if len(metrics) > 0:
        return aigents2dists(metrics)
    return 'No Distortion'



In [None]:
for t in texts:
    m = evaluate_one_from_list_aigents(t[0],dists=None,debug=False)
    print(f"{m == t[1]} = {m} ? {t[1]} : {t[0]}")

In [None]:
for t in texts:
    m = evaluate_one_from_list_aigents(t[0],dists=None,debug=True)
    #print(f"{m == t[1]} = {m} ? {t[1]} : {t[0]}")

In [None]:
acc = evaluate_df_multiclass(df2[:10],evaluate_one_from_list_aigents,dists,0,debug=True)
print(acc)

In [None]:
t0 = dt.datetime.now()

acc = evaluate_df_multiclass(df2,evaluate_one_from_list_aigents,dists,0,False)

t1 = dt.datetime.now()
delta = t1 - t0
print(acc,delta.total_seconds(),delta.total_seconds()/len(df2))
