In [26]:
#This project is to analyze whether the posts on Reddit:r/mentalhealth are suicidal
#1) We start by web-scrape the content of the post (title, link, paragraphs) on Reddit:r/mentalhealth
#2) Put into a text-classification model to analyze if the content is suicidal

In [24]:
#Download libraries
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
import time
import torch
import matplotlib.pyplot as plt

In [3]:
#Open the chrome browser with selenium driver
driver = webdriver.Chrome()
driver.get('https://www.reddit.com/r/mentalhealth/')

In [4]:
#Scroll down to the bottom of the website to load more content
n = 0
#You could decide how much content to load, but be careful cause we are going to put the paragraphs into the pipeline analysis, too many posts at the same time may cause a CPU or Memory problem on the PC
#looping 10 times can get around 270 posts
while (n<10):
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(1)
    n = n + 1

In [8]:
#Create an empty dictionary to store the posts scraped
DICT = {"titles":[], "links":[], "paragraphs": []}

#web scrape all the Reddit posts loaded on the website
elements = driver.find_elements(By.TAG_NAME, 'shreddit-post')

#Putting different elements into the dictionary
for element in elements:
    DICT["links"].append(element.get_attribute('content-href'))
    DICT["titles"].append(element.get_attribute('post-title'))
    #Each post elements contains a 'p' element:
    paragraphs_elements = element.find_elements(By.TAG_NAME,'p')
    paragraph_text = ""
    #Consolidate all the paragraphs together
    for para in paragraphs_elements:
        paragraph_text += para.text
    DICT["paragraphs"].append(paragraph_text)
    paragraph_text = ""
        
reddit_df = pd.DataFrame(DICT)

In [9]:
#Import the pipeline of model 'sentinet/suicidality' as a high-level helper
#Could find the website of the model at https://huggingface.co/sentinet/suicidality
from transformers import pipeline
pipe = pipeline("text-classification", model="sentinet/suicidality")

  from .autonotebook import tqdm as notebook_tqdm





In [10]:
#Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("sentinet/suicidality")
model = AutoModelForSequenceClassification.from_pretrained("sentinet/suicidality")

In [11]:
#Putting the titles of each post into the pipeline model to analysis the suicidality of the title
reddit_df['title_suicidal_result'] = reddit_df['titles'].apply(lambda x: pipe(x))

#Extracting the Label and the Score from the results
reddit_df['title_suicidal_indicator'] = reddit_df['title_suicidal_result'].apply(lambda x: 1 if x[0]['label'] == 'LABEL_1' else 0)
reddit_df['title_suicidal_indicator_score'] = reddit_df['title_suicidal_result'].apply(lambda x: x[0]['score'])

In [12]:
#Putting the paragraphs of each post into the model (Since the number of tensors of each post content could be large, we could not simply use the high level pipeline as we did in title analysis:
#The paragraphs could be large, so it's a high memory running event, it maybe a little bit slow procress depending on the number of posts scrapped
reddit_df['paragraphs_suicidal_result'] = reddit_df['paragraphs'].apply(lambda x: model(**tokenizer.encode_plus(x, padding="longest", truncation=True, return_tensors="pt")).logits)

#Consolidte the results and extracting the label and the score from the results to another two columns
reddit_df['paragraphs_suicidal_result'] = reddit_df['paragraphs_suicidal_result'].apply(lambda x: (x.softmax(dim=1)))
reddit_df['paragraphs_suicidal_result'] = reddit_df['paragraphs_suicidal_result'].apply(lambda x: list(x[0]))
reddit_df['paragraph_suicidal_indicator'] = reddit_df['paragraphs_suicidal_result'].apply(lambda x: 0 if x[0].item()>x[1].item() else 1)
reddit_df['paragraph_suicidal_indicator_score'] = reddit_df['paragraphs_suicidal_result'].apply(lambda x: x[0].item() if x[0].item()>x[1].item() else x[1].item())

In [13]:
#Print the dataframe
reddit_df

Unnamed: 0,titles,links,paragraphs,title_suicidal_result,title_suicidal_indicator,title_suicidal_indicator_score,paragraphs_suicidal_result,paragraph_suicidal_indicator,paragraph_suicidal_indicator_score
0,Surveys & Research Studies Changes,https://www.reddit.com/r/mentalhealth/comments...,,"[{'label': 'LABEL_0', 'score': 0.9974748492240...",0,0.997475,"[tensor(0.9899, grad_fn=<UnbindBackward0>), te...",0,0.989944
1,How to limited sensitive ads on reddit,https://www.reddit.com/r/mentalhealth/comments...,,"[{'label': 'LABEL_0', 'score': 0.9989472031593...",0,0.998947,"[tensor(0.9899, grad_fn=<UnbindBackward0>), te...",0,0.989944
2,I think my girlfriend is becoming schizophrenic.,https://www.reddit.com/r/mentalhealth/comments...,My girlfriend and I live together and she is c...,"[{'label': 'LABEL_0', 'score': 0.9950187206268...",0,0.995019,"[tensor(0.0622, grad_fn=<UnbindBackward0>), te...",1,0.937793
3,How to stop crying because it's emotionally ma...,https://www.reddit.com/r/mentalhealth/comments...,I know this sounds like a weird question but w...,"[{'label': 'LABEL_0', 'score': 0.9572092890739...",0,0.957209,"[tensor(0.9399, grad_fn=<UnbindBackward0>), te...",0,0.939931
4,How tf do I lose weight w out starving tf outt...,https://www.reddit.com/r/mentalhealth/comments...,I'm this isn't the right sub but I rlly need h...,"[{'label': 'LABEL_0', 'score': 0.9986096620559...",0,0.998610,"[tensor(0.0616, grad_fn=<UnbindBackward0>), te...",1,0.938365
...,...,...,...,...,...,...,...,...,...
273,A tooth cavity made me have a breakdown,https://www.reddit.com/r/mentalhealth/comments...,"I have been depressed for a few years, and my ...","[{'label': 'LABEL_0', 'score': 0.9952375888824...",0,0.995238,"[tensor(0.0780, grad_fn=<UnbindBackward0>), te...",1,0.922014
274,Why do I go through random sleep phases?,https://www.reddit.com/r/mentalhealth/comments...,"I usually sleep around 5-7 hours a day, but at...","[{'label': 'LABEL_0', 'score': 0.9948146939277...",0,0.994815,"[tensor(0.0460, grad_fn=<UnbindBackward0>), te...",1,0.953992
275,I think I'm developing Schizophrenia and I'm s...,https://www.reddit.com/r/mentalhealth/comments...,"So to preface, my mother has Bipolar Disorder ...","[{'label': 'LABEL_0', 'score': 0.9282981753349...",0,0.928298,"[tensor(0.0455, grad_fn=<UnbindBackward0>), te...",1,0.954493
276,A good friend's mental health has spiraled so ...,https://www.reddit.com/r/mentalhealth/comments...,I'm unsure what to do here. The friendship sta...,"[{'label': 'LABEL_1', 'score': 0.9004208445549...",1,0.900421,"[tensor(0.0577, grad_fn=<UnbindBackward0>), te...",1,0.942321


In [18]:
#Print the post that both the titles and the paragraphs are suicidal
suicidal_posts = reddit_df[(reddit_df['title_suicidal_indicator'] == 1) & (reddit_df['paragraph_suicidal_indicator'] == 1)] 
suicidal_posts 

Unnamed: 0,titles,links,paragraphs,title_suicidal_result,title_suicidal_indicator,title_suicidal_indicator_score,paragraphs_suicidal_result,paragraph_suicidal_indicator,paragraph_suicidal_indicator_score
5,I think killing myself is the last option i have,https://www.reddit.com/r/mentalhealth/comments...,I am 24 living away from family to support the...,"[{'label': 'LABEL_1', 'score': 0.9941534399986...",1,0.994153,"[tensor(0.0027, grad_fn=<UnbindBackward0>), te...",1,0.997298
10,Is there any way to kinda give yourself therapy?,https://www.reddit.com/r/mentalhealth/comments...,I'm in my first day of my psych101 class for t...,"[{'label': 'LABEL_1', 'score': 0.9302034378051...",1,0.930203,"[tensor(0.0179, grad_fn=<UnbindBackward0>), te...",1,0.982074
11,"Resources for people who aren't mentally ill, ...",https://www.reddit.com/r/mentalhealth/comments...,Hello! I am searching for any resources that w...,"[{'label': 'LABEL_1', 'score': 0.9323724508285...",1,0.932372,"[tensor(0.0615, grad_fn=<UnbindBackward0>), te...",1,0.938528
17,Do you think someone with a mental health issu...,https://www.reddit.com/r/mentalhealth/comments...,"I might be bipolar, my therapist pointed it ou...","[{'label': 'LABEL_1', 'score': 0.8799483776092...",1,0.879948,"[tensor(0.0527, grad_fn=<UnbindBackward0>), te...",1,0.947336
20,Nothing works,https://www.reddit.com/r/mentalhealth/comments...,"I am on Zoloft 300mg, Duloxetine 60mg, remeron...","[{'label': 'LABEL_1', 'score': 0.9710419178009...",1,0.971042,"[tensor(0.0030, grad_fn=<UnbindBackward0>), te...",1,0.997004
...,...,...,...,...,...,...,...,...,...
264,I need help.,https://www.reddit.com/r/mentalhealth/comments...,I don't know if this is the right place to ask...,"[{'label': 'LABEL_1', 'score': 0.9664091467857...",1,0.966409,"[tensor(0.0124, grad_fn=<UnbindBackward0>), te...",1,0.987621
266,I wish i could make it stop,https://www.reddit.com/r/mentalhealth/comments...,Im so tierd of my brain being cruel... any tim...,"[{'label': 'LABEL_1', 'score': 0.9839653968811...",1,0.983965,"[tensor(0.1251, grad_fn=<UnbindBackward0>), te...",1,0.874920
267,Why is it my job to make sure everyone else is...,https://www.reddit.com/r/mentalhealth/comments...,I know I’m fucked up. I have so many fucking c...,"[{'label': 'LABEL_1', 'score': 0.9487884640693...",1,0.948788,"[tensor(0.0291, grad_fn=<UnbindBackward0>), te...",1,0.970930
270,I'm so scared and I need help,https://www.reddit.com/r/mentalhealth/comments...,I'm scared there is something extremely wrong ...,"[{'label': 'LABEL_1', 'score': 0.9122234582901...",1,0.912223,"[tensor(0.0090, grad_fn=<UnbindBackward0>), te...",1,0.990976


In [22]:
#print the correlation between the title_suicidal_indicator and the paragraph_suicidal_indicator
print(reddit_df['title_suicidal_indicator'].corr(reddit_df['paragraph_suicidal_indicator']))

0.278840629684456


In [None]:
# In conclusion, we scraped 278 posts from Reddit and then found out 68 posts are indicated to be suicidal (both the title and paragraphs are suicidal based on the model)
# The correlation between the titles and paragraphs is not that high which also makes sense at some points as the title are just some short descriptions.