In [1]:
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

In [2]:
file_path = "/content/Stress.csv"
df = pd.read_csv(file_path)

In [3]:
df.head()

Unnamed: 0,subreddit,post_id,sentence_range,text,label,confidence,social_timestamp
0,ptsd,8601tu,"(15, 20)","He said he had not felt that way before, sugge...",1,0.8,1521614353
1,assistance,8lbrx9,"(0, 5)","Hey there r/assistance, Not sure if this is th...",0,1.0,1527009817
2,ptsd,9ch1zh,"(15, 20)",My mom then hit me with the newspaper and it s...,1,0.8,1535935605
3,relationships,7rorpp,"[5, 10]","until i met my new boyfriend, he is amazing, h...",1,0.6,1516429555
4,survivorsofabuse,9p2gbc,"[0, 5]",October is Domestic Violence Awareness Month a...,1,0.8,1539809005


In [4]:
df = df[['text', 'label']]

In [5]:
def preprocess_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\W', ' ', text)  # Remove special characters
    tokens = text.split()  # Simple tokenization (split by space)
    return " ".join(tokens)

In [6]:
# text preprocessing
df['clean_text'] = df['text'].astype(str).apply(preprocess_text)

In [7]:
# Convert text data into numerical features using TF-IDF
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['clean_text'])
y = df['label']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)

In [10]:
y_pred = nb_model.predict(X_test)

In [11]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.69
Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.38      0.53       263
           1       0.64      0.96      0.77       305

    accuracy                           0.69       568
   macro avg       0.77      0.67      0.65       568
weighted avg       0.76      0.69      0.66       568



In [12]:
def predict_stress(sentence):
    processed_sentence = preprocess_text(sentence)
    vectorized_sentence = vectorizer.transform([processed_sentence])
    prediction = nb_model.predict(vectorized_sentence)[0]
    return "Stressful" if prediction == 1 else "Not Stressful"

In [13]:
user_input = input("Enter a sentence: ")
print("Prediction:", predict_stress(user_input))

Enter a sentence: I can’t sleep at night and my mind feels constantly overwhelmed.
Prediction: Stressful
