In [2]:
# import the necessary libraries
import nltk
import pandas as pd
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

In [3]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/abhaydhek/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [4]:
ps = PorterStemmer()

def stemming(content):
    content = re.sub('[^a-zA-Z]', ' ', content)
    content = re.sub(r'\s+', ' ', content).strip()
    content = content.lower()
    words = content.split()
    stemmed_words = [ps.stem(word) for word in words if word not in stopwords.words('english')]
    processed_content = ' '.join(stemmed_words)
    return processed_content

In [5]:
df=pd.read_csv("Reddit_Comments.csv")

In [6]:
df.columns


Index(['Username', 'Post Title', 'Comment', 'Timestamp'], dtype='object')

In [10]:
df['Cleaned Text']= df['Comment'].apply(stemming)

In [None]:
df

Unnamed: 0,Username,Post Title,Comment,Timestamp,Cleaned Text
0,Deleted,What's the most useless advice you've heard ab...,[removed],2024-05-30 15:36:42,remov
1,NoResource9942,What's the most useless advice you've heard ab...,“Everyone has depression nowadays because of t...,2024-05-30 16:06:06,everyon depress nowaday state world invalid go...
2,PocketShebee,What's the most useless advice you've heard ab...,"Worst advice: pull yourself together, everyone...",2024-05-30 16:09:51,worst advic pull togeth everyon problem
3,pingpongcat_,What's the most useless advice you've heard ab...,i've been told it gets better with time or get...,2024-05-30 15:37:24,told get better time get tf get wors time unas...
4,JDMWeeb,What's the most useless advice you've heard ab...,My parents gaslighting me that I was fine and ...,2024-05-30 15:40:05,parent gaslight fine refus therapi help year
...,...,...,...,...,...
42592,Puzzled-Response-629,Do you think psych meds are worth it if they h...,That's good if they helped you.\n\nThe thing t...,2024-11-14 13:11:03,good help thing wonder chemic imbal idea would...
42593,Dazzling-Fire_Sign,Do you think psych meds are worth it if they h...,"right, taking care of your mental health is ju...",2024-11-14 15:15:17,right take care mental health import physic he...
42594,AmenoFPS,Do you think psych meds are worth it if they h...,"You can't really know, it's mostly just a theo...",2024-11-14 15:21:13,realli know mostli theori chemic imbal theori ...
42595,Unortha,Do you think psych meds are worth it if they h...,Research the symptoms you think you have in co...,2024-11-14 17:20:49,research symptom think comparison symptom ment...


In [11]:
df1 = df[
    (df['Username'] != "Deleted") & 
    (df['Cleaned Text'].notnull()) & 
    (~df['Cleaned Text'].str.contains(r'\[removed\]|\[deleted\]', na=False)) & 
    (df['Cleaned Text'].str.strip() != '')
][['Username', 'Cleaned Text', 'Timestamp']].copy()

In [12]:
df1

Unnamed: 0,Username,Cleaned Text,Timestamp
1,NoResource9942,everyon depress nowaday state world invalid go...,2024-05-30 16:06:06
2,PocketShebee,worst advic pull togeth everyon problem,2024-05-30 16:09:51
3,pingpongcat_,told get better time get tf get wors time unas...,2024-05-30 15:37:24
4,JDMWeeb,parent gaslight fine refus therapi help year,2024-05-30 15:40:05
5,pxtatosoup,ex tell smart depress depress unintellig peopl...,2024-05-30 18:13:19
...,...,...,...
42592,Puzzled-Response-629,good help thing wonder chemic imbal idea would...,2024-11-14 13:11:03
42593,Dazzling-Fire_Sign,right take care mental health import physic he...,2024-11-14 15:15:17
42594,AmenoFPS,realli know mostli theori chemic imbal theori ...,2024-11-14 15:21:13
42595,Unortha,research symptom think comparison symptom ment...,2024-11-14 17:20:49


In [13]:
# Save the 'Cleaned Text' DataFrame to a new CSV file
df1.to_csv('CLMentalHealth.csv', index=False)

In [14]:
import pandas as pd
from transformers import pipeline

In [15]:
model_name = "distilbert-base-uncased-finetuned-sst-2-english"  # Replace with your desired model

# Set device=0 to use the first GPU
classifier = pipeline("text-classification", model=model_name, device=0)

# Example text to classify
text = "I feel hopeless and like there's no way out."

# Perform classification
result = classifier(text)

# Output the result
print(result)

[{'label': 'NEGATIVE', 'score': 0.999643087387085}]


In [16]:
df = pd.read_csv('CLMentalHealth.csv')  # Replace with your dataset file name

# Define a function to classify text
def classify_with_model(text):
    max_char_limit = 512  # Limit input size
    truncated_text = text[:max_char_limit]
    
    # Classify the text using the model
    result = classifier(truncated_text)
    label = result[0]['label']
    
    # Define depressive emotions (adjust if needed for your model's labels)
    depressive_emotions = ['NEGATIVE']  # Common labels for negative sentiment; adjust as per your model
    
    # Return 1 if the label matches depressive emotion, otherwise 0
    return 1 if label in depressive_emotions else 0

In [18]:
# Step 3: Apply the function to the dataset
df['Depressive'] = df['Cleaned Text'].apply(classify_with_model)  # Replace 'text_column_name' with your column name

# Step 4: Save the updated dataset
df.to_csv('CLLMentalHealth.csv', index=False)
print("Labeled dataset saved to 'CLLMentalHealth.csv'")

Labeled dataset saved to 'CLLMentalHealth.csv'


In [17]:
test_statements = [
    "I feel so lonely and hopeless.",
    "Life is beautiful, and I am grateful.",
    "I can't stop crying; everything feels wrong.",
    "Today was a great day at work!",
    "I hate myself for failing again.",
    "I am so proud of what I achieved.",
    "Nothing makes sense anymore; I'm drowning.",
    "I feel loved and cared for by my family."
]

for statement in test_statements:
    label = classify_with_model(statement)
    print(f"Statement: '{statement}' -> Label: {label}")


Statement: 'I feel so lonely and hopeless.' -> Label: 1
Statement: 'Life is beautiful, and I am grateful.' -> Label: 0
Statement: 'I can't stop crying; everything feels wrong.' -> Label: 1
Statement: 'Today was a great day at work!' -> Label: 0
Statement: 'I hate myself for failing again.' -> Label: 1
Statement: 'I am so proud of what I achieved.' -> Label: 0
Statement: 'Nothing makes sense anymore; I'm drowning.' -> Label: 1
Statement: 'I feel loved and cared for by my family.' -> Label: 0


In [19]:
df['Cleaned Text']

0        everyon depress nowaday state world invalid go...
1                  worst advic pull togeth everyon problem
2        told get better time get tf get wors time unas...
3             parent gaslight fine refus therapi help year
4        ex tell smart depress depress unintellig peopl...
                               ...                        
37682    good help thing wonder chemic imbal idea would...
37683    right take care mental health import physic he...
37684    realli know mostli theori chemic imbal theori ...
37685    research symptom think comparison symptom ment...
37686                                              exactli
Name: Cleaned Text, Length: 37687, dtype: object

In [20]:
df = pd.read_csv('CLLMentalHealth.csv')  
df.head(1000)

Unnamed: 0,Username,Cleaned Text,Timestamp,Depressive
0,NoResource9942,everyon depress nowaday state world invalid go...,2024-05-30 16:06:06,1
1,PocketShebee,worst advic pull togeth everyon problem,2024-05-30 16:09:51,1
2,pingpongcat_,told get better time get tf get wors time unas...,2024-05-30 15:37:24,1
3,JDMWeeb,parent gaslight fine refus therapi help year,2024-05-30 15:40:05,0
4,pxtatosoup,ex tell smart depress depress unintellig peopl...,2024-05-30 18:13:19,1
...,...,...,...,...
995,mentalhealth-ModTeam,thank post r mentalhealth remov post determin ...,2024-09-12 13:55:07,1
996,ShatteredExistence_,thank thought comment share experi realli mean...,2024-09-12 11:12:24,1
997,mentalhealth-ModTeam,thank post r mentalhealth remov post determin ...,2024-09-12 13:55:24,1
998,scrumdiddlyanxious,want jump conclus sinc trauma came differ sour...,2024-09-12 14:42:51,1
