In [None]:
import pandas as pd
import numpy as np
import re
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [None]:
file_path = "/content/Stress.csv"
df = pd.read_csv(file_path)

In [None]:
def clean_text(text):
    text = text.lower()  # converting to lowercase
    text = re.sub(r'\W', ' ', text)  # remove special characters
    text = re.sub(r'\s+', ' ', text).strip()  # remove extra spaces
    return text

In [None]:
# text preprocessing
df['clean_text'] = df['text'].astype(str).apply(clean_text)

In [None]:
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df['clean_text'], df['label'], test_size=0.2, random_state=42)

In [None]:
tokenizer = Tokenizer(num_words=5000)  # uses the top 5000 words
tokenizer.fit_on_texts(X_train)

In [None]:
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

In [None]:
max_len = max(len(seq) for seq in X_train_seq)  # find max length
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len, padding='post')
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len, padding='post')

In [None]:
model = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=max_len),  # embedding layer
    LSTM(128, return_sequences=True),  # LSTM layer
    Dropout(0.3),
    LSTM(64),  # another LSTM layer
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')  # output layer (binary)
])



In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
model.fit(X_train_pad, y_train, epochs=5, batch_size=32, validation_data=(X_test_pad, y_test))

Epoch 1/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 844ms/step - accuracy: 0.5220 - loss: 0.6922 - val_accuracy: 0.5370 - val_loss: 0.6913
Epoch 2/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 733ms/step - accuracy: 0.5201 - loss: 0.6932 - val_accuracy: 0.5370 - val_loss: 0.6926
Epoch 3/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 751ms/step - accuracy: 0.5171 - loss: 0.6929 - val_accuracy: 0.5370 - val_loss: 0.6908
Epoch 4/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 728ms/step - accuracy: 0.5306 - loss: 0.6918 - val_accuracy: 0.5370 - val_loss: 0.6908
Epoch 5/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 759ms/step - accuracy: 0.5231 - loss: 0.6929 - val_accuracy: 0.5370 - val_loss: 0.6914


<keras.src.callbacks.history.History at 0x7d0230287a90>

In [None]:
loss, accuracy = model.evaluate(X_test_pad, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 174ms/step - accuracy: 0.5440 - loss: 0.6910
Test Accuracy: 0.54


In [None]:
def predict_stress(sentence):
    sentence = clean_text(sentence)
    sequence = tokenizer.texts_to_sequences([sentence])
    padded_seq = pad_sequences(sequence, maxlen=max_len, padding='post')
    prediction = model.predict(padded_seq)[0][0]
    return "Stressful" if prediction > 0.5 else "Not Stressful"

In [None]:
new_sentence = "I can’t sleep at night and my mind feels constantly overwhelmed."
print(f"Prediction: {predict_stress(new_sentence)}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 394ms/step
Prediction: Stressful
