In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
df = pd.read_csv('/home/breno-luiz/Documents/Planilha/Students.csv')

In [3]:
X = df.drop(columns=['Depression'])
y = df['Depression']

In [4]:
X_num = X.select_dtypes(exclude=['object'])
X_cat = X.select_dtypes(include=['object'])

In [5]:
one_hot_encoder = OneHotEncoder()
X_cat_encoded = one_hot_encoder.fit_transform(X_cat).toarray()

minmax = MinMaxScaler()
X_num_scaled = minmax.fit_transform(X_num)

In [6]:
X_all = np.concatenate((X_num_scaled, X_cat_encoded), axis=1)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_all, y, test_size=2/3, random_state=1)

In [8]:
param_grid_rf = {
    'n_estimators': [50, 100, 200, 300, 400, 500],
    'max_depth': [None, 10, 20, 30, 40, 50],
    'max_leaf_nodes': [None, 10, 20, 30, 40, 50],
}

param_grid_knn = {
    'n_neighbors': [3, 5, 7, 10, 15, 20],
    'weights': ['uniform', 'distance']
}

param_grid_svc = {
    'C': [0.1, 1, 10, 100, 1000],
    'kernel': ['linear', 'rbf']
}

In [9]:
grid_rf = GridSearchCV(RandomForestClassifier(), param_grid_rf, cv=5, scoring='accuracy')
grid_rf.fit(X_train, y_train)
best_rf = grid_rf.best_estimator_

grid_knn = GridSearchCV(KNeighborsClassifier(), param_grid_knn, cv=5, scoring='accuracy')
grid_knn.fit(X_train, y_train)
best_knn = grid_knn.best_estimator_

grid_svc = GridSearchCV(SVC(), param_grid_svc, cv=5, scoring='accuracy')
grid_svc.fit(X_train, y_train)
best_svc = grid_svc.best_estimator_

In [10]:
models = {'Random Forest': best_rf, 'KNN': best_knn, 'SVC': best_svc}

for name, model in models.items():
    try:
        y_pred = model.predict(X_test)
        print(f"{name} - Accuracy: {accuracy_score(y_test, y_pred):.4f}")
        print(f"{name} - Precision: {precision_score(y_test, y_pred, average='weighted'):.4f}")
        print(f"{name} - Recall: {recall_score(y_test, y_pred, average='weighted'):.4f}")
        print(f"{name} - F1 Score: {f1_score(y_test, y_pred, average='weighted'):.4f}")
        print("\n")
    except Exception as e:
        print(f"Erro ao processar o modelo {name}: {e}")

print("Best Hyperparameters for Random Forest:", grid_rf.best_params_)
print("Best Hyperparameters for KNN:", grid_knn.best_params_)
print("Best Hyperparameters for SVC:", grid_svc.best_params_)

Random Forest - Accuracy: 0.8687
Random Forest - Precision: 0.8691
Random Forest - Recall: 0.8687
Random Forest - F1 Score: 0.8686


KNN - Accuracy: 0.7821
KNN - Precision: 0.7834
KNN - Recall: 0.7821
KNN - F1 Score: 0.7819


SVC - Accuracy: 0.9522
SVC - Precision: 0.9528
SVC - Recall: 0.9522
SVC - F1 Score: 0.9522


Best Hyperparameters for Random Forest: {'max_depth': 20, 'max_leaf_nodes': None, 'n_estimators': 400}
Best Hyperparameters for KNN: {'n_neighbors': 5, 'weights': 'uniform'}
Best Hyperparameters for SVC: {'C': 1, 'kernel': 'linear'}
