In [2]:
import pandas as pd
from sklearn.neural_network import MLPClassifier

In [3]:
import os

df_labels = pd.read_csv('data/innovaid_hackathon_anima/output_classes.csv')

X = []
y = []
for file in os.listdir('preprocessed'):
    df = pd.read_csv(os.path.join('preprocessed', file))
    if df.shape[0] != 50:
        continue
    X.append(df[['pos', 'neg', 'neu']].to_numpy().reshape(150))
    y.append(list(df_labels[df_labels['sid'] == file.split('.')[0]]['has_depression'])[0])

In [4]:
len(X), len(y)

(2963, 2963)

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### MLP Classifier

In [5]:
model = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, activation='relu', random_state=42)

model.fit(X_train, y_train)

In [6]:
model.score(X_test, y_test)

0.7403035413153457

### SVM

In [7]:
from sklearn.svm import SVC

model = SVC(random_state=42)
model.fit(X_train, y_train)

In [8]:
model.score(X_test, y_test)

0.8043844856661045

---

In [6]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import ListedColormap

from sklearn.datasets import make_circles, make_classification, make_moons
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier



names = [
    "Nearest Neighbors",
    "Linear SVM",
    "RBF SVM",
    "Gaussian Process",
    "Decision Tree",
    "Random Forest",
    "Neural Net",
    "AdaBoost",
    "Naive Bayes",
    "QDA",
]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025, random_state=42),
    SVC(gamma=2, C=1, random_state=42),
    GaussianProcessClassifier(1.0 * RBF(1.0), random_state=42),
    DecisionTreeClassifier(max_depth=5, random_state=42),
    RandomForestClassifier(
        max_depth=5, n_estimators=10, max_features=1, random_state=42
    ),
    MLPClassifier(alpha=1, max_iter=1000, random_state=42),
    AdaBoostClassifier(random_state=42),
    GaussianNB(),
    QuadraticDiscriminantAnalysis(),
]


for name, clf in zip(names, classifiers):
        clf = make_pipeline(StandardScaler(), clf)
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)
        print(f'{name}: {round(score, 5)}')

Nearest Neighbors: 0.76391
Linear SVM: 0.80438
RBF SVM: 0.80438
Gaussian Process: 0.80438
Decision Tree: 0.76391
Random Forest: 0.80438
Neural Net: 0.74874
AdaBoost: 0.77572
Naive Bayes: 0.49916
QDA: 0.67622
