# Training & Evaluation
* We are going to break down training and evaluation into multiple notebooks, one for each algorithm that we train and evalutate. 
* In this first notebook, we'll create baseline models to get the predictions based on `stratified` and `most frequent` classes

## Install Libraries

In [1]:
%pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


## Import Libraries

In [8]:
import os
import sys
import pandas as pd
import numpy as np
from sklearn.compose import ColumnTransformer
from pathlib import Path

# Build an absolute path from this notebook's parent directory
module_path = os.path.abspath(os.path.join('..'))

# Add to sys.path if not already present
if module_path not in sys.path:
    sys.path.append(module_path)
    
from src.utils import hello_world
from src.utils import categorical_preprocessing, numerical_preprocessing

## Initialize Directories

In [None]:
data_root_dir = Path("..", "data/")
models_root_dir = Path("..", "models/")

## Read Data

In [9]:
X_train = pd.read_csv(Path(data_root_dir,"X_train.csv"))
y_train = pd.read_csv(Path(data_root_dir,"y_train.csv"))

In [10]:



preprocessed_data = ColumnTransformer([("preprocess_gender", categorical_preprocessing.gender_pipeline, ["gender"]),
                                       ("preprocess_profession",
                                        categorical_preprocessing.profession_pipeline, ["profession"]),
                                       ("sleep_duration_pipeline", categorical_preprocessing.make_sleep_duration_pipeline_fn(
                                           encoding="ordinal"), ["sleep_duration"]),
                                       ("dietary_habits_pipeline", categorical_preprocessing.make_dietary_habits_pipeline_fn(
                                           encoding="onehot"), ["dietary_habits"]),
                                       ("degree_pipeline",
                                        categorical_preprocessing.degree_pipeline, ["degree"]),
                                       (
    "suicidal_thoughts_pipeline", categorical_preprocessing.suicidal_thoughts_pipeline, [
        "suicidal_thoughts"]
),
    (
    "family_history_pipeline", categorical_preprocessing.family_history_pipeline, ["family_history"]
),
    ("city_pipeline", categorical_preprocessing.city_pipeline, ["city"]),
    ("age_pipeline", numerical_preprocessing.age_pipeline, ["age"]),
    ("cgpa_pipeline", numerical_preprocessing.cgpa_pipeline, ["cgpa"]),
    ("hours_pipeline", numerical_preprocessing.hours_pipeline, ["work_study_hours"]),
    ("ratings_column_pipeline", numerical_preprocessing.rating_columns_pipeline, [
        "academic_pressure", "study_satisfaction", "financial_stress"])
])

preprocessed_data_df = pd.DataFrame(preprocessed_data.fit_transform(
    X_train,y_train), columns=preprocessed_data.get_feature_names_out())
preprocessed_data_df.head()

Unnamed: 0,preprocess_gender__gender_female,preprocess_gender__gender_male,preprocess_profession__profession_student,preprocess_profession__profession_working,sleep_duration_pipeline__sleep_duration,dietary_habits_pipeline__dietary_habits_healthy,dietary_habits_pipeline__dietary_habits_moderate,dietary_habits_pipeline__dietary_habits_unhealthy,degree_pipeline__encode_degree_field__degree_field_architecture,degree_pipeline__encode_degree_field__degree_field_arts,...,age_pipeline__encode_age_range__age_range_gte_33,cgpa_pipeline__encode_cgpa_range__cgpa_range_4_to_7,cgpa_pipeline__encode_cgpa_range__cgpa_range_gte_7,cgpa_pipeline__encode_cgpa_range__cgpa_range_lt_4,hours_pipeline__encode_hours_range__hours_range_4_to_8,hours_pipeline__encode_hours_range__hours_range_gte_8,hours_pipeline__encode_hours_range__hours_range_lt_4,ratings_column_pipeline__academic_pressure,ratings_column_pipeline__study_satisfaction,ratings_column_pipeline__financial_stress
0,0.0,1.0,1.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.620758,-1.427009,1.298596
1,0.0,1.0,1.0,0.0,2.0,0.0,1.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,-0.104993,-0.692052,-1.486784
2,0.0,1.0,1.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,-0.104993,-0.692052,1.298596
3,0.0,1.0,1.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,1.0,0.0,-0.104993,1.51282,-0.094094
4,0.0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,-0.104993,0.777863,1.298596
