## Imports

In [9]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn import svm
from sklearn.metrics import accuracy_score, f1_score, classification_report

## Loading the Dataset

In [10]:
df = pd.read_csv("Mental Health Dataset.csv")
print("Dataset loaded. Shape:", df.shape)

Dataset loaded. Shape: (5000, 20)


## Preprocessing the Data

In [11]:
# Creating a new column for 'Sentiment' based on 'Productivity_Change'.
df['Sentiment'] = np.where(df['Productivity_Change'].str.lower().isin(['increase', 'stable']), 'Positive', 'Negative')

# Combining text-based columns and transform numerical features into text.
df['Combined_Text'] = (
    "Age: " + df['Age'].astype(str) + ", Stress Level: " + df['Stress_Level'] + ", Mental Health Condition: " +
    df['Mental_Health_Condition'].astype(str) + ", Work-Life Balance: " + df['Work_Life_Balance_Rating'].astype(str) +
    ", Productivity Change: " + df['Productivity_Change'] + ", Satisfaction: " + df['Satisfaction_with_Remote_Work']
)

## Shuffling and Splitting Data

In [12]:
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

# Spliting the data into training and test sets.
x_train, x_test, y_train, y_test = train_test_split(df['Combined_Text'], df['Sentiment'], test_size=0.2, random_state=42)

## Feature Extraction with TF-IDF Vectorizer

In [13]:
# Feature Extraction.
vectorizer = TfidfVectorizer(min_df=5, ngram_range=(1, 2))
x_train_vec = vectorizer.fit_transform(x_train)
x_test_vec = vectorizer.transform(x_test)

# Model Training with SVM and adjust the weights per the supervisor's instruction.
param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf'], 'gamma': ['scale', 'auto']}
clf_svm = RandomizedSearchCV(
    svm.SVC(class_weight='balanced'),
    param_grid,
    scoring='f1_macro',
    cv=3,
    n_iter=10,
    random_state=42
)
clf_svm.fit(x_train_vec, y_train)

## Best Model Check and Results

In [14]:
# Best Model Evaluation.
best_svm = clf_svm.best_estimator_
y_pred = best_svm.predict(x_test_vec)

# Results.
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='macro')
print("Accuracy:", accuracy * 100, "%")
print("F1 Score:", f1)
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 100.0 %
F1 Score: 1.0

Classification Report:
               precision    recall  f1-score   support

    Negative       1.00      1.00      1.00       659
    Positive       1.00      1.00      1.00       341

    accuracy                           1.00      1000
   macro avg       1.00      1.00      1.00      1000
weighted avg       1.00      1.00      1.00      1000



## Function for Prediction

In [15]:
def predict_review_sentiment(reviews):
    # Transforming the user input reviews using the trained TF-IDF vectorizer.
    reviews_vec = vectorizer.transform(reviews)
    
    # Predict sentiment
    predictions = best_svm.predict(reviews_vec)
    
    return predictions

## Testing user inputs ma nigga 

In [16]:
# Testing the function with user inputs.

# User Inputs.
review_inputs = [
    "Age: 30, Stress Level: Low, Mental Health Condition: None, Work-Life Balance: 5, Productivity Change: Increase, Satisfaction: Very Satisfied",
    "Age: 35, Stress Level: Moderate, Mental Health Condition: None, Work-Life Balance: 4, Productivity Change: Increase, Satisfaction: Satisfied",
    "Age: 25, Stress Level: Low, Mental Health Condition: None, Work-Life Balance: 4, Productivity Change: Stable, Satisfaction: Very Satisfied",
    "Age: 50, Stress Level: Low, Mental Health Condition: None, Work-Life Balance: 5, Productivity Change: Increase, Satisfaction: Satisfied",
    "Age: 40, Stress Level: Low, Mental Health Condition: None, Work-Life Balance: 5, Productivity Change: Increase, Satisfaction: Extremely Satisfied",
    "Age: 28, Stress Level: High, Mental Health Condition: Anxiety, Work-Life Balance: 1, Productivity Change: Decrease, Satisfaction: Unsatisfied",
    "Age: 45, Stress Level: High, Mental Health Condition: Depression, Work-Life Balance: 2, Productivity Change: Decrease, Satisfaction: Very Unsatisfied",
    "Age: 32, Stress Level: High, Mental Health Condition: Anxiety, Work-Life Balance: 1, Productivity Change: Decrease, Satisfaction: Unsatisfied",
    "Age: 40, Stress Level: Moderate, Mental Health Condition: None, Work-Life Balance: 2, Productivity Change: Decrease, Satisfaction: Dissatisfied",
    "Age: 50, Stress Level: High, Mental Health Condition: None, Work-Life Balance: 1, Productivity Change: Decrease, Satisfaction: Very Unsatisfied",
    "Age: 28, Stress Level: High, Mental Health Condition: Anxiety, Work-Life Balance: 1, Productivity Change: Decrease, Satisfaction: Unsatisfied",
    "Age: 45, Stress Level: High, Mental Health Condition: Depression, Work-Life Balance: 2, Productivity Change: Decrease, Satisfaction: Very Unsatisfied",
    "Age: 32, Stress Level: High, Mental Health Condition: Anxiety, Work-Life Balance: 1, Productivity Change: Decrease, Satisfaction: Unsatisfied",
    "Age: 40, Stress Level: Moderate, Mental Health Condition: None, Work-Life Balance: 2, Productivity Change: Decrease, Satisfaction: Dissatisfied",
    "Age: 50, Stress Level: High, Mental Health Condition: None, Work-Life Balance: 1, Productivity Change: Decrease, Satisfaction: Very Unsatisfied"
]

# Predict sentiments by the created function in the previous cell.
predictions = predict_review_sentiment(review_inputs)

# Displaying the results.
print("\n==== Review Sentiment Predictions ====\n")
for i, (review, prediction) in enumerate(zip(review_inputs, predictions), 1):
    sentiment = "Positive" if prediction == "Positive" else "Negative"
    print(f"Review #{i}:")
    print(f"  Input: {review}")
    print(f"  Predicted Sentiment: {sentiment}\n" + "-"*50)


==== Review Sentiment Predictions ====

Review #1:
  Input: Age: 30, Stress Level: Low, Mental Health Condition: None, Work-Life Balance: 5, Productivity Change: Increase, Satisfaction: Very Satisfied
  Predicted Sentiment: Positive
--------------------------------------------------
Review #2:
  Input: Age: 35, Stress Level: Moderate, Mental Health Condition: None, Work-Life Balance: 4, Productivity Change: Increase, Satisfaction: Satisfied
  Predicted Sentiment: Positive
--------------------------------------------------
Review #3:
  Input: Age: 25, Stress Level: Low, Mental Health Condition: None, Work-Life Balance: 4, Productivity Change: Stable, Satisfaction: Very Satisfied
  Predicted Sentiment: Negative
--------------------------------------------------
Review #4:
  Input: Age: 50, Stress Level: Low, Mental Health Condition: None, Work-Life Balance: 5, Productivity Change: Increase, Satisfaction: Satisfied
  Predicted Sentiment: Positive
-----------------------------------------