## **Import Libraries**

In [1]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_cohere import ChatCohere
import cohere

import pandas as pd
pd.set_option("display.max_colwidth", None)

In [2]:
from getpass import getpass
import os
co_api_key = getpass("Enter Cohere API key: ")
os.environ['COHERE_API_KEY'] = co_api_key
cohere_client = cohere.Client(api_key=co_api_key)

## **Read Dataset**

In [4]:
df_co = pd.read_csv('Data/Labeled/ClimateOffensive_Labeled.csv')
df_cc = pd.read_csv('Data/Labeled/ClimateChange_Labeled.csv')
df_st = pd.read_csv('Data/Labeled/Sustainability_Labeled.csv')
df_ax = pd.read_csv('Data/Labeled/Anxietyhelp_Labeled.csv')

In [8]:
df_sample = pd.concat([df_co,df_cc,df_st,df_ax], ignore_index=True)

## **Define Tagging Chain for Classifying Comments**

In [10]:
class YouthTextClassification(BaseModel):
    youth_likelihood: int = Field(description="Likelihood of the text being generated by youth, on a scale from 1 to 10")
    anxiety_level: int = Field(description="Level of anxiety expressed in the text, on a scale from 1 to 10")
    stress_level: int = Field(description="Level of stress expressed in the text, on a scale from 1 to 10")
    youth_indicators: str = Field(description="Key words or phrases used in the text that indicate youth language")
    anxiety_stress_indicators: str = Field(description="Key words or phrases used in the text that indicate anxiety or stress")


llm = ChatCohere(model="command-r-plus", temperature=0).with_structured_output(YouthTextClassification)

prompt = ChatPromptTemplate.from_template(
"""
Extract the properties mentioned in the 'YouthTextClassification' function from the following passage.

Analyze the text and provide a classification with the following fields:
1. youth_likelihood: Likelihood of the text being generated by youth, on a scale from 1 to 10
2. anxiety_level: Level of anxiety expressed in the text, on a scale from 1 to 10
3. stress_level: Level of stress expressed in the text, on a scale from 1 to 10
4. youth_indicators: Key words or phrases used in the text that indicate youth language (comma-separated list)
5. anxiety_stress_indicators: Key words or phrases used in the text that indicate anxiety or stress (comma-separated list)

Passage:
{text}
"""
)

tag_chain = prompt | llm

In [None]:
import time

def process(text, max_retries=5, retry_delay=5):
    results = []
    for attempt in range(max_retries):
        try:
            result = tag_chain.invoke({'text':text})
            results.append(result)
            break
        except Exception as e:
            if attempt == max_retries -1:
                print(f"Failed to process text after {max_retries} attempts: {e}")
                results.append(None)
            else:
                wait_time = retry_delay * (2 ** attempt)
                print(f"Error occurred: {e}. Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
    return results

new_data = []
for i in range(339,len(df_sample)):
    text = df_sample.loc[i,'body']
    result = process(text)
    print('i= ',i, ', result: ',result)

    if result:
        new_data.append({
            'body': text,
            'youth_likelihood': result[0].youth_likelihood,
            'anxiety_level': result[0].anxiety_level,
            'stress_level': result[0].stress_level,
            'youth_indicators': result[0].youth_indicators,
            'anxiety_stress_indicators': result[0].anxiety_stress_indicators
        })
    else:
        new_data.append({
            'body': text,
            'youth_likelihood': None,
            'anxiety_level': None,
            'stress_level': None,
            'youth_indicators': None,
            'anxiety_stress_indicators': None
        })

# Create a new DataFrame with the results
results_df = pd.DataFrame(new_data)

In [119]:
results_df[results_df['youth_likelihood']>5].sample(n=3)

Unnamed: 0,body,youth_likelihood,anxiety_level,stress_level,youth_indicators,anxiety_stress_indicators
297,"Try getting CBD oil, it really lowered my anxiety and had fewer panic attacks since I started taking it. Also, stress levels lowered as-well.",7,7,7,"CBD oil, panic attacks","anxiety, panic attacks, stress"
36,It's so fucking easy! Why the hell does everyone do so much mental gymnastics instead of just facing reality.\n\nEating meat when you don't need to is bad for the planet and it's animal cruelty and there are no two ways about that.,7,5,5,"fuck, fucking, hell","anxiety, stress, worry, nervous, scared, afraid, panic, phobia, fear, overwhelmed, upset, sad, depressed, crying, tearful, tear, mental health, mental gymnastics"
61,"&gt;I disagree that green energy is cheaper, because if it was, people would buy it.\n\nExcept it is cheaper. You are ignoring the whole conversation we had where I explained to you why you are misusing the concept. The term you are looking for is ""market price"".",6,2,3,"disagree, ignoring",none


In [120]:
results_df[results_df['youth_likelihood']>5].sample(n=3)

Unnamed: 0,body,youth_likelihood,anxiety_level,stress_level,youth_indicators,anxiety_stress_indicators
36,It's so fucking easy! Why the hell does everyone do so much mental gymnastics instead of just facing reality.\n\nEating meat when you don't need to is bad for the planet and it's animal cruelty and there are no two ways about that.,7,5,5,"fuck, fucking, hell","anxiety, stress, worry, nervous, scared, afraid, panic, phobia, fear, overwhelmed, upset, sad, depressed, crying, tearful, tear, mental health, mental gymnastics"
25,"We're here to do something about climate change. We're not here to talk about why it's happening, how bad it is, or who to blame. We're here to brainstorm, organize, and act. Use this space to find resources, connect with others, and learn more about how you can make a difference. Please keep in mind [the sub's mission](https://new.reddit.com/r/ClimateOffensive/wiki/rules#wiki_our_mission) as you vote and comment, beware of [inactivism](https://www.scientificamerican.com/article/climate-deniers-shift-tactics-to-inactivism/), and follow [Reddiquette](https://www.reddithelp.com/hc/en-us/articles/205926439). \n\n\n*I am a bot, and this action was performed automatically. Please [contact the moderators of this subreddit](/message/compose/?to=/r/ClimateOffensive) if you have any questions or concerns.*",7,5,5,"brainstorm, organize, act, resources, connect, learn, vote, comment, beware, follow","climate change, climate deniers, inactivism"
330,"I have read a great book, ""Anxiety Busters"", which has really helped me to calm down my anxiety thoughts.",6,5,3,"book, busters","anxiety, calm down"


In [95]:
# Merge the results with the original DataFrame
df_sample = pd.concat([df_sample.reset_index(drop=True), results_df.drop('body', axis=1)], axis=1)

In [121]:
df_sample.to_csv('Data/First_results.csv', index=False)