In [1]:

import pandas as pd
import re
import string
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
import nltk

# these dependencies need to be installed before-hand
# nltk.download("stopwords")
# nltk.download("wordnet")


df = pd.read_csv("stress.csv")


if "text" not in df.columns or "label" not in df.columns:
    raise KeyError("Columns 'text' and 'label' not found in dataset!")


lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words("english"))


def preprocess_text(text):
    if not isinstance(text, str):  # Ensure it's a string
        return ""
    
    text = text.lower()  # Convert to lowercase
    text = re.sub(f"[{re.escape(string.punctuation)}]", "", text)  # Remove punctuation
    words = text.split()  # Split by spaces (instead of word_tokenize)
    
    # Remove stopwords & apply lemmatization
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    
    return " ".join(words)


df["Processed_Text"] = df["text"].astype(str).apply(preprocess_text)


vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df["Processed_Text"])  # Features
y = df["label"]  # Target (Stress / No Stress)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Naïve Bayes model
model = MultinomialNB()
model.fit(X_train, y_train)


y_pred = model.predict(X_test)

# Model evaluation
# print("Accuracy:", accuracy_score(y_test, y_pred))
# print(classification_report(y_test, y_pred))


user_input = input("Enter a sentence to predict stress level: ")
processed_input = preprocess_text(user_input)  # Preprocess user input
input_vector = vectorizer.transform([processed_input])  # Convert to numerical form
prediction = model.predict(input_vector)  # Predict using trained model

#result
print("Prediction:", "Stress" if prediction[0] == 1 else "No Stress")


Enter a sentence to predict stress level:  i am going to kill myself


Prediction: Stress
