In [None]:
# notebooks/diabetes_predictor_notebook.ipynb

import pandas as pd
from diabetes_predictor.data_loader import DataLoader
from diabetes_predictor.preprocessors.drop_na_preprocessor import DropNaPreprocessor
from diabetes_predictor.preprocessors.fill_na_preprocessor import FillNaPreprocessor
from diabetes_predictor.feature_extractors.feature_transformer_1 import AgeBinningFeatureExtractor
from diabetes_predictor.feature_extractors.feature_transformer_2 import BMIExtractor
from diabetes_predictor.models.model import DiabetesModel
from sklearn.metrics import roc_auc_score

# Load the data
data_loader = DataLoader('../data/dataset.csv')
train_data, test_data = data_loader.load_data()

# Preprocess the data
drop_na = DropNaPreprocessor(columns_to_check=['age', 'gender', 'ethnicity'])
train_data = drop_na.preprocess(train_data)
test_data = drop_na.preprocess(test_data)

fill_na = FillNaPreprocessor(columns_to_fill=['height', 'weight'])
train_data = fill_na.preprocess(train_data)
test_data = fill_na.preprocess(test_data)

# Feature extraction
age_binner = AgeBinningFeatureExtractor()
train_data = age_binner.transform(train_data)
test_data = age_binner.transform(test_data)

bmi_extractor = BMIExtractor()
train_data = bmi_extractor.transform(train_data)
test_data = bmi_extractor.transform(test_data)

# Train the model
feature_columns = ['age_binned', 'BMI']
target_column = 'diabetes'
model = DiabetesModel(feature_columns, target_column)

model.train(train_data)

# Make predictions
test_data['predictions'] = model.predict(test_data)

# Evaluate the model
roc_auc = roc_auc_score(test_data[target_column], test_data['predictions'])
print(f'ROC AUC Score: {roc_auc}')
