# Stress & Anxiety Detection from Chat - AI Project

In [None]:
# STEP 1: Import Required Packages
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

nltk.download('stopwords')

In [None]:
# STEP 2: Upload CSV File
from google.colab import files
uploaded = files.upload()

In [None]:
# STEP 3: Load and View Dataset
df = pd.read_csv('stress_chat_dataset.csv')
df.head()

In [None]:
# STEP 4: Preprocess Text Data
def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in text.split() if word not in stop_words]
    return " ".join(tokens)

df['clean_text'] = df['text'].apply(preprocess)
df[['text', 'clean_text', 'label']].head()

In [None]:
# STEP 5: Convert Text to TF-IDF Features
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['clean_text'])
y = df['label']

In [None]:
# STEP 6: Split Data into Training and Test Sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
# STEP 7: Train the Logistic Regression Model
model = LogisticRegression()
model.fit(X_train, y_train)

In [None]:
# STEP 8: Evaluate Model Performance
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

In [None]:
# STEP 9: Predict Stress from New Chat Input
def predict_message(message):
    msg = preprocess(message)
    vec = vectorizer.transform([msg])
    prediction = model.predict(vec)
    return "😟 Stressed" if prediction[0] == 1 else "🙂 Normal"

# Test predictions
print(predict_message("I feel so tired and worried"))
print(predict_message("Everything is awesome today!"))