<a href="https://colab.research.google.com/github/Twishanu/Depression-Anxiety-Suicide-detection/blob/main/AnxietyDepression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [3]:
raw_data = pd.read_csv('/content/Suicide_Detection.csv')

In [4]:
raw_data.head()

Unnamed: 0.1,Unnamed: 0,text,class
0,2,Ex Wife Threatening SuicideRecently I left my ...,suicide
1,3,Am I weird I don't get affected by compliments...,non-suicide
2,4,Finally 2020 is almost over... So I can never ...,non-suicide
3,8,i need helpjust help me im crying so hard,suicide
4,9,"I’m so lostHello, my name is Adam (16) and I’v...",suicide


In [5]:
print(raw_data)

        Unnamed: 0                                               text  \
0                2  Ex Wife Threatening SuicideRecently I left my ...   
1                3  Am I weird I don't get affected by compliments...   
2                4  Finally 2020 is almost over... So I can never ...   
3                8          i need helpjust help me im crying so hard   
4                9  I’m so lostHello, my name is Adam (16) and I’v...   
...            ...                                                ...   
232069      348103  If you don't like rock then your not going to ...   
232070      348106  You how you can tell i have so many friends an...   
232071      348107  pee probably tastes like salty tea😏💦‼️ can som...   
232072      348108  The usual stuff you find hereI'm not posting t...   
232073      348110  I still haven't beaten the first boss in Hollo...   

              class  
0           suicide  
1       non-suicide  
2       non-suicide  
3           suicide  
4           s

In [6]:
# first unnamed column has been removed
data = raw_data.loc[:, ~raw_data.columns.str.contains('^Unnamed')]

In [7]:
data.head()

Unnamed: 0,text,class
0,Ex Wife Threatening SuicideRecently I left my ...,suicide
1,Am I weird I don't get affected by compliments...,non-suicide
2,Finally 2020 is almost over... So I can never ...,non-suicide
3,i need helpjust help me im crying so hard,suicide
4,"I’m so lostHello, my name is Adam (16) and I’v...",suicide


In [8]:
# null values to '' empty string
data = data.where((pd.notnull(data)),'')

In [9]:
data.shape

(232074, 2)

In [13]:
# encoding the labels to numerical values
# suicide - 1 non-suicide - 0
data.loc[data['class'] == 'suicide', 'class',] = 1
data.loc[data['class'] == 'non-suicide', 'class',] = 0

In [14]:
data.head()

Unnamed: 0,text,class
0,Ex Wife Threatening SuicideRecently I left my ...,1
1,Am I weird I don't get affected by compliments...,0
2,Finally 2020 is almost over... So I can never ...,0
3,i need helpjust help me im crying so hard,1
4,"I’m so lostHello, my name is Adam (16) and I’v...",1


In [15]:
# seperating the data as texts(x axis) and labels(y axis)

X = data['text']
Y = data['class']

In [16]:
print(X)

0         Ex Wife Threatening SuicideRecently I left my ...
1         Am I weird I don't get affected by compliments...
2         Finally 2020 is almost over... So I can never ...
3                 i need helpjust help me im crying so hard
4         I’m so lostHello, my name is Adam (16) and I’v...
                                ...                        
232069    If you don't like rock then your not going to ...
232070    You how you can tell i have so many friends an...
232071    pee probably tastes like salty tea😏💦‼️ can som...
232072    The usual stuff you find hereI'm not posting t...
232073    I still haven't beaten the first boss in Hollo...
Name: text, Length: 232074, dtype: object


In [17]:
print(Y)

0         1
1         0
2         0
3         1
4         1
         ..
232069    0
232070    0
232071    0
232072    1
232073    0
Name: class, Length: 232074, dtype: object


In [18]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=3)

In [19]:
X_train.shape

(185659,)

In [20]:
# converting text data to meaningful tags - Feature extraction
feature_extraction = TfidfVectorizer(min_df = 1, stop_words='english', lowercase=True)

X_train_features = feature_extraction.fit_transform(X_train)
X_test_features = feature_extraction.transform(X_test)

# converting Y values to integers

Y_train = Y_train.astype('int')
Y_test = Y_test.astype('int')

In [21]:
print(X_train_features)

  (0, 77369)	0.08300866901514829
  (0, 79434)	0.1388597376620859
  (0, 139095)	0.1353903084919808
  (0, 93385)	0.338128888578077
  (0, 100271)	0.17910297308262996
  (0, 13810)	0.3375364702473572
  (0, 85946)	0.28408736078289504
  (0, 2545)	0.41576045827630964
  (0, 131054)	0.22946590465689704
  (0, 39506)	0.15696464600651697
  (0, 24393)	0.1167631194141496
  (0, 80153)	0.0747732991925332
  (0, 53492)	0.08876743075655497
  (0, 56974)	0.10562562447663325
  (0, 44630)	0.07698089644387945
  (0, 122610)	0.244851764986864
  (0, 113385)	0.12126826529414376
  (0, 59499)	0.12668887020131064
  (0, 75746)	0.13213175107890265
  (0, 119565)	0.43344095447240966
  (0, 76820)	0.11838217792137506
  (0, 60526)	0.10031157056578235
  (1, 64829)	0.12340468863809001
  (1, 122381)	0.1460953548314486
  (1, 69983)	0.09894073745626038
  :	:
  (185658, 29766)	0.09403166363661049
  (185658, 81814)	0.10617342872485004
  (185658, 60658)	0.11227049164439265
  (185658, 114667)	0.1462239443908233
  (185658, 57406)	0.0

In [22]:
# Creating the model now
model = LogisticRegression()

In [23]:
# Training the model with training data
# Training the logistic regression model with the training data
model.fit(X_train_features, Y_train)

In [24]:
# Evaluating the trained model
# prediction on training data

prediction_on_training_data = model.predict(X_train_features)
# comparing the predicted data and the actual data
accuracy_on_training_data = accuracy_score(Y_train, prediction_on_training_data)

In [25]:
print(accuracy_on_training_data)

0.9439994829229932


In [26]:
prediction_on_test_data = model.predict(X_test_features)
# comparing the predicted data and the actual data
accuracy_on_test_data = accuracy_score(Y_test, prediction_on_test_data)

In [27]:
print(accuracy_on_test_data)

0.9335559625121189


In [31]:
input_data = ["I am very happy yayyyy. I want to explore the world."]
input_data_features = feature_extraction.transform(input_data)

In [32]:
prediction = model.predict(input_data_features)
if(prediction == 0):
  print("non-suicidal")
else:
  print("suicidal")

non-suicidal


In [33]:
from sklearn.metrics import confusion_matrix

In [34]:
confusion_matrix(Y_test, prediction_on_test_data)

array([[21956,  1312],
       [ 1772, 21375]])