In [None]:
import sys
from pathlib import Path

# Add the src folder to the Python path
src_path = Path('../src').resolve()  # adjust if your notebook is in another folder
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))

# Now you can import your package
from diabetes_mellitus.load_clean_data.load_data import load_data
from diabetes_mellitus.load_clean_data.remove_nan import remove_nan
from diabetes_mellitus.load_clean_data.fill_nan_mean import fill_nan_mean
from diabetes_mellitus.load_clean_data.generate_dummies import generate_dummies
from diabetes_mellitus.load_clean_data.binary_encoding_gender import binary_encoding_gender
from diabetes_mellitus.load_clean_data.updated_train_test_split import updated_train_test_split

from diabetes_mellitus.train_model.fit_model import fit_model

from diabetes_mellitus.metrics.updated_roc_auc_score import updated_roc_auc_score


# load data
data = load_data('../data/sample_diabetes_mellitus_data.csv')

# train/test split
X_train, X_test, y_train, y_test = updated_train_test_split(data, 'diabetes_mellitus')

# cleaning and feature engineering
X_train, y_train = remove_nan(X_train, y_train, ['age','gender','ethnicity'])
X_test, y_test = remove_nan(X_test, y_test, ['age','gender','ethnicity'])
X_train = fill_nan_mean(X_train, ['height', 'weight'])
X_test = fill_nan_mean(X_test, ['height', 'weight'])
X_train = generate_dummies(X_train, ['ethnicity'])
X_test = generate_dummies(X_test, ['ethnicity'])
X_train = binary_encoding_gender(X_train)
X_test = binary_encoding_gender(X_test)

# train model and make predictions. Note that we select here the features to 
# fit the model in order to be able to modify the regressors included for different uses or cases.
X_train = X_train[['age', 'height', 'weight', 'aids', 'cirrhosis', 'hepatic_failure','immunosuppression','leukemia','lymphoma']]
X_test = X_test[['age', 'height', 'weight', 'aids', 'cirrhosis', 'hepatic_failure','immunosuppression','leukemia','lymphoma']]

updated_X_train, updated_X_test = fit_model(X_train, X_test, y_train)

# model metrics - 
train_roc_auc, test_roc_auc = updated_roc_auc_score(updated_X_train, updated_X_test, y_train, y_test)