### Initial Setup

#### Import Libraries

In [None]:
import pandas as pd

from os import getcwd

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler

from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

#### Load Dataset (`data/Student Depression Dataset.csv`)

In [None]:
RANDOM_STATE = 513

cwd = getcwd()
data = "data"
raw_data_file = "Student Depression Dataset.csv"

df = pd.read_csv(f"{cwd}/{data}/{raw_data_file}", na_values="?").dropna() # drop any null values

df.info()
df.head()

#### Cleaning Data

In [None]:
df = df.drop("id", axis=1) # drop id (not needed for training)

# Convert these categorical yes/no questions to 1/0
df["Have you ever had suicidal thoughts ?"] = df["Have you ever had suicidal thoughts ?"].map(lambda x: 1 if x == "Yes" else 0)
df["Family History of Mental Illness"] = df["Family History of Mental Illness"].map(lambda x: 1 if x == "Yes" else 0)

df.info()
df.head()

#### Train-Test Split

In [None]:
attr = df.drop("Depression", axis=1)
target = df["Depression"]

attr_train, attr_test, target_train, target_test = train_test_split(attr, target, test_size=0.2, random_state=RANDOM_STATE)

#### Preprocessor Configuration

In [None]:
categorical_columns = attr.columns[attr.dtypes == "object"].tolist()
numerical_columns = attr.columns[attr.dtypes != "object"].tolist()
print(f"categorical columns = {categorical_columns}")
print(f"numerical columns = {numerical_columns}")

preprocessor = ColumnTransformer(transformers=[
    ('categorical', OneHotEncoder(handle_unknown="infrequent_if_exist"), categorical_columns),
    ('numerical', MinMaxScaler(), numerical_columns)
])

### Random Forest Classification

#### Model Configuration - Random Forest

In [None]:
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier())
])

#### Model Fitting & Predictions - Random Forest

In [None]:
model.fit(attr_train, target_train)
target_pred = model.predict(attr_test)

#### Metrics & Accuracy - Random Forest

In [None]:
acc = accuracy_score(target_test, target_pred)
cm = confusion_matrix(target_test, target_pred)
cr = classification_report(target_test, target_pred)

print(acc)
print(cm)
print(cr)

### CART Classification

#### Model Configuration - CART

In [None]:
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', DecisionTreeClassifier())
])

#### Model Fitting & Predictions - CART

In [None]:
model.fit(attr_train, target_train)
target_pred = model.predict(attr_test)

#### Metrics & Accuracy - CART

In [None]:
acc = accuracy_score(target_test, target_pred)
cm = confusion_matrix(target_test, target_pred)
cr = classification_report(target_test, target_pred)

print(acc)
print(cm)
print(cr)

### KNN Classification

#### Model Configuration - KNN

In [None]:
K = 5 # change value of K

model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', KNeighborsClassifier(n_neighbors=K))
])

#### Model Fitting & Predictions - KNN

In [None]:
model.fit(attr_train, target_train)
target_pred = model.predict(attr_test)

#### Metrics & Accuracy - KNN

In [None]:
acc = accuracy_score(target_test, target_pred)
cm = confusion_matrix(target_test, target_pred)
cr = classification_report(target_test, target_pred)

print(acc)
print(cm)
print(cr)