In [42]:
# Import necessary libraries
import pandas as pd
import re
import string
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
import nltk

# nltk resources which we need to download before-hand
# nltk.download("stopwords")
# nltk.download("wordnet")


df = pd.read_csv("stress.csv")


if "text" not in df.columns or "label" not in df.columns:
    raise KeyError("Columns 'text' and 'label' not found in dataset!")


lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words("english"))


def preprocess_text(text):
    if not isinstance(text, str):  # Ensure it's a string
        return ""
    
    text = text.lower()  # Convert to lowercase
    text = re.sub(f"[{re.escape(string.punctuation)}]", "", text)  # Remove punctuation
    words = text.split()  # Split by spaces (instead of word_tokenize)
    
    # Remove stopwords & apply lemmatization
    words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
    
    return " ".join(words)


df["Processed_Text"] = df["text"].astype(str).apply(preprocess_text)

# Convert text into numerical form using TF-IDF
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df["Processed_Text"])  # Features
y = df["label"]  # Target (Stress / No Stress)

# Split data into train & test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Naïve Bayes model
model = MultinomialNB()
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Model evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# Take user input and predict
user_input = input("Enter a sentence to predict stress level: ")
processed_input = preprocess_text(user_input)  # Preprocess user input
input_vector = vectorizer.transform([processed_input])  # Convert to numerical form
prediction = model.predict(input_vector)  # Predict using trained model

# Print result
print("Prediction:", "Stress" if prediction[0] == 1 else "No Stress")


Accuracy: 0.6883802816901409
              precision    recall  f1-score   support

           0       0.83      0.41      0.55       263
           1       0.65      0.93      0.76       305

    accuracy                           0.69       568
   macro avg       0.74      0.67      0.66       568
weighted avg       0.73      0.69      0.66       568



Enter a sentence to predict stress level:  help me


Prediction: Stress
