**Load libraries and dataset**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

In [None]:
data = pd.read_csv('/content/mental_health_dataset.csv')

**Processing**

In [None]:
data.isnull().sum()

Unnamed: 0,0
User_ID,0
Age,0
Gender,0
Occupation,0
Country,0
Mental_Health_Condition,0
Severity,501
Consultation_History,0
Stress_Level,0
Sleep_Hours,0


In [None]:
for i in data.select_dtypes(include = 'object').columns:
    data[i].fillna(data[i].mode()[0], inplace = True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data[i].fillna(data[i].mode()[0], inplace = True)


In [None]:
categorical_columns = ['Gender', 'Occupation', 'Country', 'Severity', 'Consultation_History', 'Stress_Level']
encoder = LabelEncoder()

In [None]:
for col in categorical_columns:
    data[col] = encoder.fit_transform(data[col].astype(str))

In [None]:
X = data.drop(columns=['User_ID', 'Mental_Health_Condition'])
y = encoder.fit_transform(data['Mental_Health_Condition'])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)  # 'rbf' kernel is used by default
svm_model.fit(X_train, y_train)

In [None]:
y_pred = svm_model.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.5
Classification Report:
               precision    recall  f1-score   support

           0       0.51      0.38      0.44       102
           1       0.49      0.62      0.55        98

    accuracy                           0.50       200
   macro avg       0.50      0.50      0.49       200
weighted avg       0.50      0.50      0.49       200



In [None]:
from sklearn.model_selection import GridSearchCV
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001, 'scale'],
    'kernel': ['rbf', 'linear']
}
grid = GridSearchCV(SVC(), param_grid, cv=3, scoring='accuracy')
grid.fit(X_train, y_train)
print("Best Parameters:", grid.best_params_)
y_pred = grid.best_estimator_.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Improved Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, y_pred))

Best Parameters: {'C': 0.1, 'gamma': 1, 'kernel': 'rbf'}
Improved Accuracy: 0.49
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       102
           1       0.49      1.00      0.66        98

    accuracy                           0.49       200
   macro avg       0.24      0.50      0.33       200
weighted avg       0.24      0.49      0.32       200



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


**Data Balancing and SMOTE**

In [None]:
from sklearn.decomposition import PCA
from imblearn.over_sampling import SMOTE

In [None]:
sm = SMOTE(random_state=42)
X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

In [None]:
pca = PCA(n_components=0.95)  # Retain 95% of variance
X_train_pca = pca.fit_transform(X_train_res)
X_test_pca = pca.transform(X_test)

In [None]:
svm_model = SVC(kernel='rbf', C=10, gamma='scale', random_state=42)
svm_model.fit(X_train_pca, y_train_res)
y_pred = svm_model.predict(X_test_pca)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy after balancing and PCA:", accuracy)
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy after balancing and PCA: 0.555
Classification Report:
               precision    recall  f1-score   support

           0       0.57      0.55      0.56       102
           1       0.54      0.56      0.55        98

    accuracy                           0.56       200
   macro avg       0.56      0.56      0.55       200
weighted avg       0.56      0.56      0.56       200

