## Imports

In [1]:
# The needed libraries were imported.
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

## Load Dataset

In [2]:
# Reading the dataset.
df = pd.read_csv('Mental Health Sentiments.csv')

print("Columns in the dataset:", df.columns)

Columns in the dataset: Index(['ID', 'Statement', 'Status'], dtype='object')


## Data Preprocessing

In [3]:
# Handle missing values in the 'Statement' column (replacing with NaN with a placeholder).
df['Statement'] = df['Statement'].fillna('Missing Statement') 

# Encode target labels if they are not already numeric.
label_encoder = LabelEncoder()

# Apply label encoding while handling NaN values properly.
df['Status'] = label_encoder.fit_transform(df['Status'].fillna('Unknown'))

# Drop rows with 'Unknown' after encoding if necessary.
df = df[df['Status'] != label_encoder.transform(['Unknown'])[0]]

## Target Variable Setup

In [4]:
X = df['Statement']  
y = df['Status']    

## Text Data Vectorization

In [5]:
# Convert text data to numerical features by using the TfidfVectorizer.
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(X) 

## Train and Test Splittings

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Training the Logistic Regression Model

In [7]:
# Fitting.
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

print("\nModel trained successfully.")


Model trained successfully.


## Evaluating the Model

In [8]:
# Model on test data.
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("\nAccuracy of the model:", accuracy * 100, "%")

# Printing a detailed classification report (precision, recall, F1-score).
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Accuracy of the model: 77.53793948534263 %

Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.76      0.78       777
           1       0.90      0.69      0.78       588
           2       0.71      0.75      0.73      3041
           3       0.86      0.96      0.91      3348
           4       0.64      0.48      0.55       234
           5       0.73      0.45      0.55       533
           6       0.71      0.66      0.69      2088

    accuracy                           0.78     10609
   macro avg       0.77      0.68      0.71     10609
weighted avg       0.77      0.78      0.77     10609



## User Input Prediction

In [9]:
custom_input = ["Im good, feeling okay bro."]

# Preprocess the custom input using the trained TfidfVectorizer.
custom_input_transformed = vectorizer.transform(custom_input)

# Predict using the trained model.
custom_prediction = model.predict(custom_input_transformed)

# Decode the label to get the original category name.
predicted_label = label_encoder.inverse_transform(custom_prediction)

print("\nPredicted class for the custom input:", predicted_label)


Predicted class for the custom input: ['Normal']
