In [1]:
import nltk
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer
import pandas as pd
import numpy as np 

import warnings
warnings.filterwarnings('ignore')

In [2]:
import re

In [3]:
df = pd.read_csv('Combined Data.csv')
df = df.iloc[:,1:]
df = df[df['statement'].isnull()==False]
df.reset_index()
df.head()

Unnamed: 0,statement,status
0,oh my gosh,Anxiety
1,"trouble sleeping, confused mind, restless hear...",Anxiety
2,"All wrong, back off dear, forward doubt. Stay ...",Anxiety
3,I've shifted my focus to something else but I'...,Anxiety
4,"I'm restless and restless, it's been a month n...",Anxiety


In [4]:
lemmatizer = WordNetLemmatizer()
stemmer = PorterStemmer()

In [5]:
stop_words = set(stopwords.words('english'))

def process_text(text):
    cleaned_text = re.sub('[^a-zA-Z]', " ", text)
    cleaned_text = cleaned_text.lower()
    words = cleaned_text.split()
    processed_words = [stemmer.stem(word) for word in words if word not in stop_words]
    processed_text = " ".join(processed_words)
    return processed_text

In [None]:
batch_size = 10000
corpus = []

for start in range(0, len(df), batch_size):
    end = start + batch_size
    batch = df.iloc[start:end]
    batch['processed_text'] = batch['statement'].apply(process_text)
    corpus.extend(batch['processed_text'].tolist())

In [None]:
corpus

In [32]:
from sklearn.feature_extraction.text import CountVectorizer
CV = CountVectorizer(binary = True,ngram_range=(2,3))
X = CV.fit_transform(corpus)

In [33]:
### using TFIDF

from sklearn.feature_extraction.text import TfidfVectorizer
CV1 = TfidfVectorizer(ngram_range=(2,3))
X1 = CV1.fit_transform(corpus)
y = df['status']

In [34]:
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score,classification_report

#Train Test Split

X_train , X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state = 9)
tf_X_train , tf_X_test, tf_y_train, tf_y_test = train_test_split(X1, y, test_size=0.2, random_state = 9)

# Modelling
**Navie Bayes Classifier with default parameters**

In [35]:
Emotion_classifier_model = MultinomialNB(alpha= 1.0)
Emotion_classifier_model.fit(X_train,y_train)

#Prediction
ypred_test = Emotion_classifier_model.predict(X_test)
ypred_train = Emotion_classifier_model.predict(X_train)

# Evaluation

print("Train Accuracy", accuracy_score(y_train,ypred_train))
print("Test Accuracy", accuracy_score(y_test,ypred_test))
report = classification_report(y_test, ypred_test)
print("Classification Report:\n", report)

Train Accuracy 0.9720719438116933
Test Accuracy 0.6384170067381608
Classification Report:
                       precision    recall  f1-score   support

             Anxiety       0.71      0.68      0.69       781
             Bipolar       0.70      0.62      0.66       547
          Depression       0.52      0.62      0.56      2986
              Normal       0.88      0.70      0.78      3321
Personality disorder       0.72      0.38      0.50       229
              Stress       0.63      0.33      0.43       507
            Suicidal       0.53      0.67      0.59      2166

            accuracy                           0.64     10537
           macro avg       0.67      0.57      0.60     10537
        weighted avg       0.67      0.64      0.64     10537



In [36]:
tf_Emotion_classifier_model = MultinomialNB(alpha= 1.0)
tf_Emotion_classifier_model.fit(tf_X_train,tf_y_train)

#Prediction
tf_ypred_test = Emotion_classifier_model.predict(tf_X_test)
tf_ypred_train = Emotion_classifier_model.predict(tf_X_train)

# Evaluation

print("Train Accuracy TF", accuracy_score(tf_y_train,tf_ypred_train))
print("Test Accuracy TF", accuracy_score(tf_y_test,tf_ypred_test))
tf_report = classification_report(tf_y_test, tf_ypred_test)
print("Classification Report:\n", tf_report)

Train Accuracy TF 0.9754650721336371
Test Accuracy TF 0.6266489513144159
Classification Report:
                       precision    recall  f1-score   support

             Anxiety       0.89      0.48      0.62       781
             Bipolar       0.96      0.35      0.52       547
          Depression       0.51      0.64      0.57      2986
              Normal       0.70      0.81      0.75      3321
Personality disorder       0.97      0.29      0.45       229
              Stress       0.97      0.23      0.37       507
            Suicidal       0.58      0.58      0.58      2166

            accuracy                           0.63     10537
           macro avg       0.80      0.48      0.55     10537
        weighted avg       0.67      0.63      0.62     10537



In [57]:
# Trying it on the new text

# New sentence to classify
new_sentence = input("Enter your new sentence here: ")

# Preprocess the new sentence
def preprocess_single_sentence(sentence):
    cleaned_sentence = re.sub('[^a-zA-Z]', " ", sentence)
    cleaned_sentence = cleaned_sentence.lower()
    words = cleaned_sentence.split()
    processed_words = [stemmer.stem(word) for word in words if word not in stop_words]
    processed_sentence = " ".join(processed_words)
    return processed_sentence

processed_sentence = preprocess_single_sentence(new_sentence)

# Transform the preprocessed sentence using the fitted CountVectorizer
sentence_vector = CV.transform([processed_sentence])

# Make prediction using the trained model
prediction = tf_Emotion_classifier_model.predict(sentence_vector)
prediction_proba = tf_Emotion_classifier_model.predict_proba(sentence_vector)

# Map prediction to emotions
mental_state = {
    0: "Anxiety",
    1: "Bipolar",
    2: "Depression",
    3: "Normal",
    4: "Personality disorder",
    5: "Stress",
    6: "Suicidal"
}

for index, proba in enumerate(prediction_proba[0]):
    emotion = mental_state[index]
    print(f"{emotion}: {proba * 100:.2f}%")
print("Prediction probabilities of your current mental state is  :", prediction)

Enter your new sentence here:  I feel abundance, happy and successful 


Anxiety: 6.19%
Bipolar: 4.52%
Depression: 36.47%
Normal: 26.27%
Personality disorder: 1.73%
Stress: 4.22%
Suicidal: 20.59%
Prediction probabilities of your current mental state is  : ['Depression']
