<a href="https://colab.research.google.com/github/anfalsiddiqui45/mental-health-models/blob/main/baseline_lr_tfdf.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
import pandas as pd

BASE_PATH = "/content/drive/MyDrive"

train_df = pd.read_csv(f"{BASE_PATH}/train.csv")
test_df  = pd.read_csv(f"{BASE_PATH}/test.csv")

X_train = train_df["statement"]
y_train = train_df["label"]

X_test = test_df["statement"]
y_test = test_df["label"]


In [None]:

from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(
    max_features=20000,
    ngram_range=(1, 2),
    stop_words="english"
)

X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf  = tfidf.transform(X_test)


In [None]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(
    max_iter=1000,
    class_weight="balanced",
    n_jobs=-1
)

lr.fit(X_train_tfidf, y_train)


In [None]:
y_pred = lr.predict(X_test_tfidf)


In [None]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(
    max_iter=1000,
    class_weight="balanced",
    n_jobs=-1
)

lr.fit(X_train_tfidf, y_train)


In [None]:
y_pred = lr.predict(X_test_tfidf)


In [None]:
from sklearn.metrics import classification_report

print(
    classification_report(
        y_test,
        y_pred,
        target_names=["Normal", "Depression", "Anxiety", "Stress"]
    )
)


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import os

RESULTS_PATH = f"{BASE_PATH}/results"
os.makedirs(RESULTS_PATH, exist_ok=True)

cm = confusion_matrix(y_test, y_pred)

plt.figure(figsize=(6,5))
sns.heatmap(
    cm,
    annot=True,
    fmt="d",
    cmap="Blues",
    xticklabels=["Normal", "Depression", "Anxiety", "Stress"],
    yticklabels=["Normal", "Depression", "Anxiety", "Stress"]
)

plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix â€“ LR + TF-IDF")
plt.tight_layout()
plt.savefig(f"{RESULTS_PATH}/confusion_matrix_lr_tfidf.png")
plt.show()


In [None]:
from sklearn.metrics import classification_report

report = classification_report(
    y_test,
    y_pred,
    target_names=["Normal", "Depression", "Anxiety", "Stress"],
    output_dict=True
)

pd.DataFrame(report).transpose().to_csv(
    f"{RESULTS_PATH}/lr_tfidf_classification_report.csv"
)
