In [1]:
import pandas as pd
import numpy as np
import re
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional, Conv1D, MaxPooling1D, Flatten, Dense, Dropout, GlobalMaxPooling1D, Input, Concatenate
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [2]:
file_path = "/content/Stress.csv"
df = pd.read_csv(file_path)

In [3]:
def clean_text(text):
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\W', ' ', text)  # Remove special characters
    text = re.sub(r'\s+', ' ', text).strip()  # Remove extra spaces
    return text

In [4]:
# text preprocessing
df['clean_text'] = df['text'].astype(str).apply(clean_text)

In [5]:
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

In [6]:
X_train, X_test, y_train, y_test = train_test_split(df['clean_text'], df['label'], test_size=0.2, random_state=42)

In [7]:
tokenizer = Tokenizer(num_words=5000)  # uses the top 5000 words
tokenizer.fit_on_texts(X_train)

In [8]:
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

In [9]:
max_len = max(len(seq) for seq in X_train_seq)  # Find max length
X_train_pad = pad_sequences(X_train_seq, maxlen=max_len, padding='post')
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len, padding='post')

In [10]:
#pure bilstm
model1 = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=max_len),
    Bidirectional(LSTM(128, return_sequences=True)),
    Bidirectional(LSTM(64)),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])



In [11]:
model1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [12]:
model1.fit(X_train_pad, y_train, epochs=5, batch_size=32, validation_data=(X_test_pad, y_test))

Epoch 1/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 2s/step - accuracy: 0.5379 - loss: 0.6848 - val_accuracy: 0.6479 - val_loss: 0.6205
Epoch 2/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 2s/step - accuracy: 0.7848 - loss: 0.4963 - val_accuracy: 0.6813 - val_loss: 0.5904
Epoch 3/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 2s/step - accuracy: 0.8811 - loss: 0.3136 - val_accuracy: 0.7042 - val_loss: 0.7279
Epoch 4/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 2s/step - accuracy: 0.9291 - loss: 0.1993 - val_accuracy: 0.6937 - val_loss: 0.9270
Epoch 5/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 2s/step - accuracy: 0.9676 - loss: 0.1150 - val_accuracy: 0.6849 - val_loss: 0.9929


<keras.src.callbacks.history.History at 0x7b3b30d5d0d0>

In [13]:
loss, accuracy = model1.evaluate(X_test_pad, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 402ms/step - accuracy: 0.6678 - loss: 1.0313
Test Accuracy: 0.68


In [14]:
#pure cnn
model2 = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=max_len),  # embedding layer
    Conv1D(filters=128, kernel_size=5, activation='relu'),  # convolution layer
    MaxPooling1D(pool_size=2),  # max pooling
    Dropout(0.3),
    Flatten(),  # flatten to feed into dense layers
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')  # output layer (binary)
])

In [15]:
model2.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [36]:
model2.fit(X_train_pad, y_train, epochs=5, batch_size=32, validation_data=(X_test_pad, y_test))

Epoch 1/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 220ms/step - accuracy: 0.9995 - loss: 0.0086 - val_accuracy: 0.7570 - val_loss: 1.0478
Epoch 2/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 141ms/step - accuracy: 0.9992 - loss: 0.0079 - val_accuracy: 0.7430 - val_loss: 1.0439
Epoch 3/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 161ms/step - accuracy: 0.9964 - loss: 0.0075 - val_accuracy: 0.7342 - val_loss: 1.1527
Epoch 4/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 151ms/step - accuracy: 0.9978 - loss: 0.0058 - val_accuracy: 0.7482 - val_loss: 1.2276
Epoch 5/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 141ms/step - accuracy: 0.9980 - loss: 0.0046 - val_accuracy: 0.7289 - val_loss: 1.3312


<keras.src.callbacks.history.History at 0x7b3b1e432250>

In [37]:
loss, accuracy = model2.evaluate(X_test_pad, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 33ms/step - accuracy: 0.7218 - loss: 1.3623
Test Accuracy: 0.73


In [39]:
# cnn -> bilstm
model3 = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=max_len),
    Conv1D(128, 5, activation='relu'),
    Bidirectional(LSTM(64, return_sequences=True)),
    Bidirectional(LSTM(32)),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

In [40]:
model3.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [41]:
model3.fit(X_train_pad, y_train, epochs=5, batch_size=32, validation_data=(X_test_pad, y_test))

Epoch 1/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 795ms/step - accuracy: 0.5775 - loss: 0.6776 - val_accuracy: 0.7113 - val_loss: 0.5833
Epoch 2/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 784ms/step - accuracy: 0.7814 - loss: 0.4812 - val_accuracy: 0.7183 - val_loss: 0.5611
Epoch 3/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 785ms/step - accuracy: 0.8860 - loss: 0.3118 - val_accuracy: 0.7201 - val_loss: 0.6367
Epoch 4/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 769ms/step - accuracy: 0.9360 - loss: 0.1884 - val_accuracy: 0.6743 - val_loss: 0.9874
Epoch 5/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 748ms/step - accuracy: 0.9631 - loss: 0.1204 - val_accuracy: 0.7077 - val_loss: 0.8165


<keras.src.callbacks.history.History at 0x7b3b17952ed0>

In [42]:
loss, accuracy = model3.evaluate(X_test_pad, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 156ms/step - accuracy: 0.6983 - loss: 0.8300
Test Accuracy: 0.71


In [43]:
# bilstm -> cnn
model4 = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=max_len),
    Bidirectional(LSTM(128, return_sequences=True)),
    Conv1D(128, 5, activation='relu'),
    GlobalMaxPooling1D(),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

In [44]:
model4.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [45]:
model4.fit(X_train_pad, y_train, epochs=5, batch_size=32, validation_data=(X_test_pad, y_test))

Epoch 1/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 1s/step - accuracy: 0.5499 - loss: 0.6907 - val_accuracy: 0.7130 - val_loss: 0.5523
Epoch 2/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 1s/step - accuracy: 0.7865 - loss: 0.4833 - val_accuracy: 0.7394 - val_loss: 0.5543
Epoch 3/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 1s/step - accuracy: 0.8927 - loss: 0.2784 - val_accuracy: 0.7377 - val_loss: 0.6451
Epoch 4/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 1s/step - accuracy: 0.9611 - loss: 0.1201 - val_accuracy: 0.7165 - val_loss: 0.9216
Epoch 5/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 1s/step - accuracy: 0.9765 - loss: 0.0825 - val_accuracy: 0.7095 - val_loss: 0.9610


<keras.src.callbacks.history.History at 0x7b3b27a0ce50>

In [46]:
loss, accuracy = model4.evaluate(X_test_pad, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 379ms/step - accuracy: 0.6978 - loss: 0.9853
Test Accuracy: 0.71


In [47]:
# parallel cnn and bilstm
input_layer = Input(shape=(max_len,))
embedding = Embedding(input_dim=5000, output_dim=128, input_length=max_len)(input_layer)

# CNN Branch
cnn_branch = Conv1D(128, 5, activation='relu')(embedding)
cnn_branch = GlobalMaxPooling1D()(cnn_branch)

# BiLSTM Branch
lstm_branch = Bidirectional(LSTM(128, return_sequences=False))(embedding)

# Concatenate both branches
merged = Concatenate()([cnn_branch, lstm_branch])
dense = Dense(64, activation='relu')(merged)
dropout = Dropout(0.5)(dense)
output = Dense(1, activation='sigmoid')(dropout)

model5 = Model(inputs=input_layer, outputs=output)

In [48]:
model5.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [49]:
model5.fit(X_train_pad, y_train, epochs=5, batch_size=32, validation_data=(X_test_pad, y_test))

Epoch 1/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 1s/step - accuracy: 0.5562 - loss: 0.6785 - val_accuracy: 0.7042 - val_loss: 0.5771
Epoch 2/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 1s/step - accuracy: 0.7727 - loss: 0.5036 - val_accuracy: 0.7095 - val_loss: 0.5530
Epoch 3/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 1s/step - accuracy: 0.8446 - loss: 0.3785 - val_accuracy: 0.7218 - val_loss: 0.6104
Epoch 4/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 1s/step - accuracy: 0.9367 - loss: 0.1968 - val_accuracy: 0.7148 - val_loss: 0.6415
Epoch 5/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 1s/step - accuracy: 0.9723 - loss: 0.0993 - val_accuracy: 0.7306 - val_loss: 0.7902


<keras.src.callbacks.history.History at 0x7b3b09f7d910>

In [50]:
loss, accuracy = model5.evaluate(X_test_pad, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 257ms/step - accuracy: 0.7285 - loss: 0.8125
Test Accuracy: 0.73


In [51]:
# stacked cnn and bilstm
model6 = Sequential([
    Embedding(input_dim=5000, output_dim=128, input_length=max_len),
    Conv1D(128, 5, activation='relu'),
    Conv1D(64, 3, activation='relu'),
    Bidirectional(LSTM(64, return_sequences=True)),
    Bidirectional(LSTM(32)),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

In [52]:
model6.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [53]:
model6.fit(X_train_pad, y_train, epochs=5, batch_size=32, validation_data=(X_test_pad, y_test))

Epoch 1/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 745ms/step - accuracy: 0.5272 - loss: 0.6869 - val_accuracy: 0.7183 - val_loss: 0.6098
Epoch 2/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 723ms/step - accuracy: 0.7003 - loss: 0.5929 - val_accuracy: 0.6919 - val_loss: 0.5890
Epoch 3/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 718ms/step - accuracy: 0.8431 - loss: 0.3899 - val_accuracy: 0.6620 - val_loss: 0.6321
Epoch 4/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 719ms/step - accuracy: 0.9230 - loss: 0.2433 - val_accuracy: 0.6725 - val_loss: 0.8589
Epoch 5/5
[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 701ms/step - accuracy: 0.9608 - loss: 0.1372 - val_accuracy: 0.6796 - val_loss: 0.9743


<keras.src.callbacks.history.History at 0x7b3b0dccd4d0>

In [57]:
loss, accuracy = model6.evaluate(X_test_pad, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 156ms/step - accuracy: 0.6705 - loss: 1.0078
Test Accuracy: 0.68


In [None]:
def predict_stress(sentence):
    sentence = clean_text(sentence)
    sequence = tokenizer.texts_to_sequences([sentence])
    padded_seq = pad_sequences(sequence, maxlen=max_len, padding='post')
    # change the model name accordingly.
    prediction = model5.predict(padded_seq)[0][0]
    return "Stressful" if prediction > 0.5 else "Not Stressful"

In [56]:
new_sentence = "I can’t sleep at night and my mind feels constantly overwhelmed."
print(f"Prediction: {predict_stress(new_sentence)}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 749ms/step
Prediction: Stressful
