In [None]:
import tensorflow as tf
from tensorflow import keras
from keras.models import load_model

## Getting data

In [None]:
# !git clone https://huggingface.co/datasets/dair-ai/emotion

In [None]:
import gzip
import shutil

for partition in ["train", "test", "validation"]:
  with gzip.open(f"/content/emotion/data/{partition}.jsonl.gz", 'rb') as f_in:
    with open(f"/content/{partition}.jsonl", 'wb') as f_out:
        shutil.copyfileobj(f_in, f_out)

In [None]:
# !mkdir /root/.kaggle
# !cp kaggle.json /root/.kaggle
# !kaggle datasets download -d nikhileswarkomati/suicide-watch

In [None]:
# import zipfile

# with zipfile.ZipFile("/content/suicide-watch.zip") as f:
#   f.extractall()

Commented out to avoid rerunning

## Preparing data

In [None]:
import pandas as pd

In [None]:
emotionDF = pd.read_json("/content/train.jsonl", lines=True)
emotionDFTest = pd.read_json("/content/test.jsonl", lines=True)
emotionDFValid = pd.read_json("/content/validation.jsonl", lines=True)

In [None]:
emotionDF = pd.concat([emotionDF, emotionDFTest, emotionDFValid])
emotionDF = emotionDF[emotionDF["label"] != 0]
emotionDF.rename(columns={'label': 'class'}, inplace=True)
emotionDF["class"] = emotionDF["class"].apply(lambda x: 0)

Loading emotion dataset, removing sad posts, renaming column, and converting labels to useful integers

In [None]:
suicideDF = pd.read_csv("/content/Suicide_Detection.csv")
suicideDF = suicideDF[["text", "class"]]
suicideDF["class"] = suicideDF["class"].apply(lambda x: 1 if x == "suicide" else 0)

Loading suicide prevention dataset, selecting relevant data, converting classes into number instead of string

In [None]:
emotionDF = emotionDF.reset_index(drop=True)
suicideDF = suicideDF.reset_index(drop=True)

In [None]:
emotionDF

Unnamed: 0,text,class
0,im grabbing a minute to post i feel greedy wrong,0
1,i am ever feeling nostalgic about the fireplac...,0
2,i am feeling grouchy,0
3,ive been taking or milligrams or times recomme...,0
4,i feel as confused about life as a teenager or...,0
...,...,...
14198,i feel a bit rude leaving you hanging there fr...,0
14199,i constantly worry about their fight against n...,0
14200,i feel its important to share this info for th...,0
14201,i truly feel that if you are passionate enough...,0


In [None]:
suicideDF

Unnamed: 0,text,class
0,Ex Wife Threatening SuicideRecently I left my ...,1
1,Am I weird I don't get affected by compliments...,0
2,Finally 2020 is almost over... So I can never ...,0
3,i need helpjust help me im crying so hard,1
4,"I’m so lostHello, my name is Adam (16) and I’v...",1
...,...,...
232069,If you don't like rock then your not going to ...,0
232070,You how you can tell i have so many friends an...,0
232071,pee probably tastes like salty tea😏💦‼️ can som...,0
232072,The usual stuff you find hereI'm not posting t...,1


In [None]:
df = pd.concat([emotionDF, suicideDF])
df = df.sample(frac=1)

concating both the dataset

In [None]:
indices_to_drop = df[df['class'] == 1].index[:int(len(df[df['class'] == 1]) * 0.6)]

In [None]:
df.drop(indices_to_drop, inplace=True)

In [None]:
df = df.reset_index(drop=True)

In [None]:
df["class"].value_counts()

0    126044
1     46415
Name: class, dtype: int64

Dropping some of the suicidal text to make the model perform better

In [None]:
splitPoint = round(df.shape[0] * 0.8)
shuffled = df.sample(frac=1)
trainDF = shuffled.iloc[:splitPoint].reset_index(drop=True)
validDF =shuffled.iloc[splitPoint:].reset_index(drop=True)

In [None]:
trainDF

Unnamed: 0,text,class
0,Our english teacher gave us a task where one o...,0
1,The irony.When I was a kid I never really unde...,1
2,hey guys i am bored pls entertain me thanks,0
3,When you wake up in the morning and the very f...,1
4,I have everything.- beautiful caring gf. Carin...,1
...,...,...
137962,i hurt and feel suspicious and definitely get ...,0
137963,Paste whatever you have copied in the replies ...,0
137964,I just saw someone jerking off on zoom All of ...,0
137965,Pedophilia shouldn't be considered a mental il...,0


In [None]:
validDF

Unnamed: 0,text,class
0,What should I do with lifeDear ppl of Reddit\n...,1
1,I feel empty as my balls after I nut I just wa...,0
2,"Finally told my crush I love him Well, I told ...",0
3,How do I not think about suicide?I thought eve...,1
4,Is anyone interested in seeing some hand Drawn...,0
...,...,...
34487,My mental illnesses make me feel worthless and...,1
34488,What's the craziest shit that happened in your...,0
34489,i hope i never graduate high school cause i do...,0
34490,Weird dream I had So my first dream was that t...,0


In [None]:
trainDF["class"].value_counts()

0    100864
1     37103
Name: class, dtype: int64

In [None]:
validDF["class"].value_counts()

0    25180
1     9312
Name: class, dtype: int64

Train test splitting

In [None]:
from tensorflow.data import Dataset

In [None]:
def prepareData(data, batch=100):
  data = data.sample(frac=1)
  labels = data["class"].values
  features = data["text"].values

  dataset = Dataset.from_tensor_slices((features, labels))
  return dataset.batch(batch)

In [None]:
trainData = prepareData(trainDF)
validData = prepareData(validDF)

In [None]:
for x, y in validData:
  print(x, y)
  break

tf.Tensor(
[b'I failed \xf0\x9f\x98\x94 I hit 10 days but I fucking failed fuck me'
 b'I am going to sleep now, do you want to tell me a thing, I will read that in the morning Good Night its 00:57 here in India.'
 b"This has never been asked in this sub today, but I'm curious what do you think about your future?"
 b'I think today I realised how much of a British accent I got I asked my mum if she wanted some water\n\n\nAs in watoah or however it sounds like'
 b'We all complain about not having a gf/bf So here is an idea put in the comments \n\nAge \n\nGender\n\nCountry and state \n\nAnd boom we hook up with each other depending on that'
 b'The concept of life is unbearable\n\nThe world\xe2\x80\x99s climate will raise to an unbearable temperature while I am still alive. Our favorite restaurants and businesses we grew up will and are closing. With climate change, we don\xe2\x80\x99t even have that long to live, I\xe2\x80\x99m twenty and I probably won\xe2\x80\x99t die from natural causes

## Defining model

In [None]:
from keras.layers import TextVectorization, Dense, Embedding, Bidirectional, LSTM, Dropout
from keras.models import Sequential

In [None]:
vectorizer = TextVectorization(max_tokens=2000)
vectorizer.adapt(df.text.sample(5000).values)

In [None]:
model = Sequential([
    vectorizer,
    Embedding(2000, 300),
    Dropout(0.5),
    Bidirectional(LSTM(100)),
    Dense(1, "sigmoid")
])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVe  (None, None)              0         
 ctorization)                                                    
                                                                 
 embedding (Embedding)       (None, None, 300)         600000    
                                                                 
 dropout (Dropout)           (None, None, 300)         0         
                                                                 
 bidirectional (Bidirection  (None, 200)               320800    
 al)                                                             
                                                                 
 dense (Dense)               (None, 1)                 201       
                                                                 
Total params: 921001 (3.51 MB)
Trainable params: 921001 

## Training Model

In [None]:
from keras.callbacks import EarlyStopping
from keras.metrics import Recall, Precision

In [None]:
earlyStopper = EarlyStopping()

In [None]:
model.compile("adam", "binary_crossentropy", metrics=["accuracy", Recall(), Precision()])

In [None]:
history = model.fit(trainData, epochs=100, validation_data=validData, callbacks=[earlyStopper])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100


In [None]:
model.save("/content/drive/MyDrive/SuicidePreventionSecondAttempt")