In [1]:
import json
import requests
import os
import pandas as pd

# Measure constructs using Generative AI

In [2]:


def process_api_output(output_str):
  """
  Processes the API output string and returns a dictionary.

  Args:
    output_str: The string output from the API call.

  Returns:
    A dictionary containing the extracted data.
  
  Example:
    
    output_str1 = '{  "desire to escape": [[1], ["I want out"]],  "loneliness": [[1], ["No one cares about me"]],  "suicidal ideation": [[0.5], ["I want out", "It wont get better"]] }'
    output_str2 = '{  "desire to escape": [[1], ["I want out"]],  "loneliness": [[1], ["No one cares about me"]],  "suicidal ideation": [[0.5], ["I want out", "It wont get better"]] }Explanation: - The text clearly expresses a "desire to escape" with the phrase "I want out", which suggests a strong desire to leave the current situation.- The text also clearly expresses "loneliness" with the phrase "No one cares about me", which indicates feelings of isolation and disconnection.- The text may suggest "suicidal ideation" with the phrases "I want out" and "It wont get better", but it\'s not explicitly stated, hence the lower score.'
    output_str3 = '{  "desire to escape": [[1], ["I want out"]],  "loneliness": [[1], ["No one cares about me"]],  "suicidal ideation": [[0.5], ["I want out", "It wont get better"]] }Some additional information here.'

    print(process_api_output(output_str1))
    print(process_api_output(output_str2))
    print(process_api_output(output_str3))
  """
  data = {} 
  start_index = output_str.find('{') 
  end_index = output_str.rfind('}') + 1

  try:
    # Attempt to directly load the JSON string
    data = json.loads(output_str)
  except json.JSONDecodeError:
    # If JSON decoding fails, try to extract the JSON part
    if start_index != -1 and end_index != -1:
      json_part = output_str[start_index:end_index]
      data = json.loads(json_part)
    else:
      raise ValueError("Invalid API output format.")

  # Extract the additional note if it exists
  if start_index != 0 or end_index != len(output_str):
    data['Additional note'] = output_str[end_index:].strip()

  return data



In [3]:
def openrouter_request(prompt, OPENROUTER_API_KEY, model = 'meta-llama/llama-3.1-405b-instruct:free',
                       temperature = 0):
  """
  free models: 20 requests per minute and 200 requests per day. See https://openrouter.ai/docs/limits
  """

  response = requests.post(
      url="https://openrouter.ai/api/v1/chat/completions",
      headers={
        "Authorization": f"Bearer {OPENROUTER_API_KEY}",

      },
      data=json.dumps({
        "model": model, # Optional
        "temperature": temperature,
        "response_format": { "type": "json_object" },
        "messages": [
          { "role": "user", "content": prompt}
        ]

      })
      )
  metadata = json.loads(response.text.strip())
  try:
    final_result = metadata['choices'][0]['message']['content']
    final_result = dict(eval(final_result))
    return final_result, metadata

  except:
    try:
        final_result = process_api_output(final_result)
        return final_result, metadata
        
    except Exception as e:
        print('Error:', e)
        print('Could not parse the response, probably because the model did not follow the instructions well. gpt-4o,gpt-4o-mini, gemini 1.5, claude 3.5  works well.')
        print("This is what I'm trying to parse with eval() in content:")
        print(metadata)
  

# Set API key and choose model
- API key is associated to your account. You can add money to use paid models. 
- All models: https://openrouter.ai/models
- Paid models:
    - 'gpt-4o'
    - 'gpt-4o-mini'
    - 'anthropic/claude-3.5-sonnet'
- Free models: https://openrouter.ai/models?max_price=0
    - free models: certain requests per minute (e.g., 20) and N requests per day (depends on model). See https://openrouter.ai/docs/limits
    - "google/gemini-2.0-flash-exp:free"
    - "meta-llama/llama-3.1-405b-instruct:free"
    - "meta-llama/llama-3.1-70b-instruct:free"
    - "meta-llama/llama-3.1-8b-instruct:free"


In [4]:
try:
    import api_keys
    OPENROUTER_API_KEY = api_keys.openrouter_key 
except:
    print("Module 'api_key' not found. Add your key here. ") # get it from OpenRouter AI: https://openrouter.ai/settings/keys
    OPENROUTER_API_KEY = "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" # This key will only work for free models. get your own and add some dollars from OpenRouter AI: https://openrouter.ai/settings/keys

model  = "gpt-4o-mini" # "gpt-4o", "gpt-4o-mini" (cheaper), "meta-llama/llama-3.1-405b-instruct:free"   others: https://openrouter.ai/models

# Design a prompt
- consider using a different scale: 0 = Not at all, 1 = Mildly related, 2 = Som, 3 = Prototypical mention of the category
- consider changing "mentions" to "expresses" or "is suffering"

In [5]:
prompt_template = """Classify the text into one or more of the following {context} categories: {categories}.

Provide a score (between 0 and 1) as to whether the text clearly mentions the category and an explanation (words or phrases from the text that are clearly about the category).

Text: 
{text}

Structure your response in the following JSON format (no extra text):
{{'category_A': [[score], [words, phrases]], 'category_B': [[score], [words, phrases]], ...}}

JSON:
"""

In [6]:
categories = ['desire to escape', 'suicidal ideation', 'anger', 'loneliness']

In [7]:
text = 'No one cares about me. I go to therapy, but it doesnt work. It wont get better. I want out. Im feeling hyperactive.'

In [8]:
prompt = prompt_template.format(context = '',
              categories = categories,
              text= text
              )

print('Prompt:')
print(prompt)

Prompt:
Classify the text into one or more of the following  categories: ['desire to escape', 'suicidal ideation', 'anger', 'loneliness'].

Provide a score (between 0 and 1) as to whether the text clearly mentions the category and an explanation (words or phrases from the text that are clearly about the category).

Text: 
No one cares about me. I go to therapy, but it doesnt work. It wont get better. I want out. Im feeling hyperactive.

Structure your response in the following JSON format (no extra text):
{'category_A': [[score], [words, phrases]], 'category_B': [[score], [words, phrases]], ...}

JSON:



### With definitions


In [9]:

prompt_template_with_definitions = """Classify the text into one or more of the following {context} categories with their corresponding definitions:\n\n{categories}

Provide a score (between 0 and 1) as to whether the text clearly mentions the category and an explanation (words or phrases from the text that are very prototypical expressions of the category).
Text: 
{text}

Structure your response in the following JSON format (no extra text):
{{'category_A': [[score], [words, phrases]], 'category_B': [[score], [words, phrases]], ...}}

JSON:
"""

categories_with_definitions = {'desire to escape': 'wanting to escape emotional pain', 
              'suicidal ideation': "desire of not wanting to live", 
              'anger': "negative high arousal with irritability and anger",
              'loneliness': "aversive state experienced when a discrepancy exists between the interpersonal relationships one wishes to have and those that one perceives they currently have. The perception that one's social relationships are not living up to some expectation",
              }

# indent dict one line per entry
categories_with_definitions = '\n'.join([f"{key}: {value}" for key, value in categories_with_definitions.items()])

# Insert into prompt
prompt_with_definitions = prompt_template_with_definitions.format(context = '', # I change to prompt_template_with_definitions
              categories = categories_with_definitions,
              text= text
              )


print('Prompt:')
print(prompt_with_definitions)


Prompt:
Classify the text into one or more of the following  categories with their corresponding definitions:

desire to escape: wanting to escape emotional pain
suicidal ideation: desire of not wanting to live
anger: negative high arousal with irritability and anger
loneliness: aversive state experienced when a discrepancy exists between the interpersonal relationships one wishes to have and those that one perceives they currently have. The perception that one's social relationships are not living up to some expectation

Provide a score (between 0 and 1) as to whether the text clearly mentions the category and an explanation (words or phrases from the text that are very prototypical expressions of the category).
Text: 
No one cares about me. I go to therapy, but it doesnt work. It wont get better. I want out. Im feeling hyperactive.

Structure your response in the following JSON format (no extra text):
{'category_A': [[score], [words, phrases]], 'category_B': [[score], [words, phrases

# Submit prompt to openrouter


In [10]:
# Choose which prompt to use
print(prompt)
final_result, metadata = openrouter_request(prompt, OPENROUTER_API_KEY, model = 'anthropic/claude-3.5-sonnet', temperature=0)

Classify the text into one or more of the following  categories: ['desire to escape', 'suicidal ideation', 'anger', 'loneliness'].

Provide a score (between 0 and 1) as to whether the text clearly mentions the category and an explanation (words or phrases from the text that are clearly about the category).

Text: 
No one cares about me. I go to therapy, but it doesnt work. It wont get better. I want out. Im feeling hyperactive.

Structure your response in the following JSON format (no extra text):
{'category_A': [[score], [words, phrases]], 'category_B': [[score], [words, phrases]], ...}

JSON:



In [11]:
# Recap what the text was
text

'No one cares about me. I go to therapy, but it doesnt work. It wont get better. I want out. Im feeling hyperactive.'

In [12]:
final_result

{'desire to escape': [[0.8], ['I want out']],
 'suicidal ideation': [[0.3], ['I want out', 'It wont get better']],
 'loneliness': [[1.0], ['No one cares about me']],
 'anger': [[0.0], []]}

In [13]:
metadata

{'id': 'gen-1737065561-6V64VIPtyvFMSOi6yqen',
 'provider': 'Amazon Bedrock',
 'model': 'anthropic/claude-3.5-sonnet',
 'object': 'chat.completion',
 'created': 1737065561,
 'choices': [{'logprobs': None,
   'finish_reason': 'end_turn',
   'index': 0,
   'message': {'role': 'assistant',
    'content': '{\n    "desire to escape": [[0.8], ["I want out"]],\n    "suicidal ideation": [[0.3], ["I want out", "It wont get better"]],\n    "loneliness": [[1.0], ["No one cares about me"]],\n    "anger": [[0.0], []]\n}',
    'refusal': ''}}],
 'usage': {'prompt_tokens': 169, 'completion_tokens': 84, 'total_tokens': 253}}

# loop through text documents


In [14]:
server = False # Options: "google", "local", False  # True: Running on colab and looking for files on on google drive; False: run on your local computer and find files locally


if server == 'google':
    # ON GOOGLE DRIVE
    from google.colab import drive
    drive.mount('/content/drive')
    input_dir = '/content/drive/MyDrive/your_folder/' # Or find using os.listdir('/content/')
    output_dir = '/content/drive/MyDrive/your_folder/'
elif server == 'local':
    # ON YOUR COMPUTER
    input_dir = './' # add path to input_dir
    output_dir = './'
else:
    # Load default data
    reddit_df = pd.read_csv('https://mair.sites.fas.harvard.edu/datasets/rmhd_27subreddits_1300posts_train.csv', index_col = 0)
    reddit_df_subset = reddit_df[reddit_df['subreddit'].isin(['suicidewatch', 'anxiety'])]
    reddit_df_subset_random = reddit_df_subset.sample(frac=1, random_state=42)
    reddit_df_subset_random_5 =  reddit_df_subset_random[['subreddit', 'post']][:5]
    # Add an incrementing number to the beginning of the key and create a dictionary
    texts = {
        f"{i+1}_{row.subreddit}": row.post 
        for i, row in enumerate(reddit_df_subset_random_5.itertuples(index=False))
    }
    
    output_dir = './data/output/'

os.makedirs(output_dir, exist_ok=True)
if server in ['google', 'local']:
    # load txt files. Loop through text 
    print('current working directory: ', os.getcwd()) 
    texts = {}

    for file in os.listdir(input_dir):
        if file.endswith('.txt'):
            with open(os.path.join(input_dir, file), 'r') as f:
                text = f.read()
                texts[file] = text

texts



{'1_suicidewatch': "31 and ready to stop existing I've made so many mistakes, medicated or not that desire is still there. Husband decides he's ready to divorce mostly all of my fault. I don't see a way to ever feel happy again. The years of depression (18 years) have won.\n",
 '2_anxiety': "Freezing up I'm new to the sub, so I don't really know if this is the right place to ask for help, but I've been told that this is probably linked to some form of anxiety. I've recently gotten over depression through the help of a medication, and since I've started feeling better, I've been feeling a lot more on edge, and social situations and various other interactions have gotten a lot harder. There are times where I'll be talking with somebody (this can be any person, talking about anything. I've had it happen with teachers, relatives, and even some of my closest friends, so I don't think its related to who I'm talking to), and some kind of response is prompted, I come up with an answer/response

In [15]:
# Loop through texts and apply openrouter
results = {}

for file, text  in texts.items():
    prompt = prompt_template_with_definitions.format(context = 'mental health',
              categories = categories_with_definitions,
              text= text
              )

    
    print(prompt)
    
    # Make request. WARNING: this costs some money depending on model
    response, metadata = openrouter_request(prompt, OPENROUTER_API_KEY, model = model, temperature=0)
    print('---------------------')
    print(response)
    results[file] = response
    print('\n========================\n')


Classify the text into one or more of the following mental health categories with their corresponding definitions:

desire to escape: wanting to escape emotional pain
suicidal ideation: desire of not wanting to live
anger: negative high arousal with irritability and anger
loneliness: aversive state experienced when a discrepancy exists between the interpersonal relationships one wishes to have and those that one perceives they currently have. The perception that one's social relationships are not living up to some expectation

Provide a score (between 0 and 1) as to whether the text clearly mentions the category and an explanation (words or phrases from the text that are very prototypical expressions of the category).
Text: 
31 and ready to stop existing I've made so many mistakes, medicated or not that desire is still there. Husband decides he's ready to divorce mostly all of my fault. I don't see a way to ever feel happy again. The years of depression (18 years) have won.


Structure

In [16]:
results

{'1_suicidewatch': {'suicidal ideation': [[0.9],
   ['ready to stop existing', "I don't see a way to ever feel happy again"]],
  'desire to escape': [[0.7],
   ['that desire is still there', 'years of depression have won']],
  'loneliness': [[0.5],
   ["Husband decides he's ready to divorce", 'mostly all of my fault']]},
 '2_anxiety': {'desire to escape': [[0.3],
   ['wanting to escape emotional pain',
    'avoid certain interactions',
    "fear that it's going to happen again"]],
  'suicidal ideation': [[0], []],
  'anger': [[0], []],
  'loneliness': [[0.5],
   ['social situations',
    'various other interactions have gotten a lot harder',
    'some of my closest friends']]},
 '3_anxiety': {'desire to escape': [[0.3],
   ['wanting to escape emotional pain',
    "I don't know what to do, I don't know how to cope"]],
  'suicidal ideation': [[0], []],
  'anger': [[0], []],
  'loneliness': [[0.4], ['worried my husband is going to start hating me']]},
 '4_anxiety': {'desire to escape': [[

In [17]:
# Format JSON into CSV
metadata


{'id': 'gen-1737065577-lyodkOZ1rQjnycKewj7T',
 'provider': 'OpenAI',
 'model': 'openai/gpt-4o-mini',
 'object': 'chat.completion',
 'created': 1737065577,
 'choices': [{'logprobs': None,
   'finish_reason': 'stop',
   'index': 0,
   'message': {'role': 'assistant',
    'content': '{\n  "suicidal ideation": [[0.9], ["Would drinking isopropyl alcohol kill me?", "Would it be any better than bleach?"]],\n  "desire to escape": [[0.5], ["drinking isopropyl alcohol", "bleach"]]\n}',
    'refusal': ''}}],
 'system_fingerprint': 'fp_72ed7ab54c',
 'usage': {'prompt_tokens': 209, 'completion_tokens': 62, 'total_tokens': 271}}

# Turn JSON results into a csv
With each document as a row and each category as a column

In [18]:
rows = []
for post_id, categories in results.items():
    row = {'sentence': post_id}
    for category, (score, explanation) in categories.items():
        row[f'{category}_score'] = score[0]
        row[f'{category}_explanation'] = explanation
        # Expand words/phrases into separate columns
        # for i, word in enumerate(words):
        #     row[f'{category}_word_{i + 1}'] = word
    rows.append(row)

# Create DataFrame
df = pd.DataFrame(rows)
display(df)
df.to_csv(output_dir+'openrouter_results.csv', index=False)


Unnamed: 0,sentence,suicidal ideation_score,suicidal ideation_explanation,desire to escape_score,desire to escape_explanation,loneliness_score,loneliness_explanation,anger_score,anger_explanation
0,1_suicidewatch,0.9,"[ready to stop existing, I don't see a way to ...",0.7,"[that desire is still there, years of depressi...",0.5,"[Husband decides he's ready to divorce, mostly...",,
1,2_anxiety,0.0,[],0.3,"[wanting to escape emotional pain, avoid certa...",0.5,"[social situations, various other interactions...",0.0,[]
2,3_anxiety,0.0,[],0.3,"[wanting to escape emotional pain, I don't kno...",0.4,[worried my husband is going to start hating me],0.0,[]
3,4_anxiety,0.0,[],0.2,"[wanting to escape emotional pain, trying to e...",0.0,[],0.0,[]
4,5_suicidewatch,0.9,"[Would drinking isopropyl alcohol kill me?, Wo...",0.5,"[drinking isopropyl alcohol, bleach]",,,,
