### Configure Notebook

#### Load Python Libraries

In [30]:
import logging
import os
import sys
import numpy as np

import joblib
import pandas as pd

import joblib

#### Define Global Variables

In [14]:
data_dir = "../../data/processed/"  # The directory where the dataset is located.
feature_columns = [
    "gender",
    "age",
    "hypertension",
    "heart_disease",
    "ever_married",
    "work_type",
    "residence_type",
    "avg_glucose_level",
    "bmi",
    "smoking_status",
]  # Feature columns.
random_state = 42  # Ensure that pipeline is reproducible.

#### Configure Logger

In [15]:
log_format = (
    "[%(asctime)s] - p%(process)s %(name)s %(lineno)d - %(levelname)s:%(message)s"
)
logging.basicConfig(
    level=logging.INFO,
    stream=sys.stdout,
    format=log_format,
    datefmt="%Y-%m-%d %H:%M:%S",
)
logger = logging.getLogger()

### Load Raw Dataset

In [16]:
df_processed = pd.read_csv(
    os.path.join(data_dir, "stroke_records.csv")
)  # Load raw dataset as Pandas DataFrame.
logger.info(f"Raw Dataset Number of Records: {len(df_processed)}")

[2022-06-02 23:45:18] - p96628 root 4 - INFO:Raw Dataset Number of Records: 5109


In [17]:
example_data = df_processed.head(1)  # Select first record.
example_data = example_data[feature_columns]  # Select only feature columns.

In [35]:
print(example_data.to_dict('records')[0])

{'gender': 'Male', 'age': 67.0, 'hypertension': 0, 'heart_disease': 1, 'ever_married': 'Yes', 'work_type': 'Private', 'Residence_type': 'Urban', 'avg_glucose_level': 228.69, 'bmi': 36.6, 'smoking_status': 'formerly smoked'}


### Generate Pipeline Predictions

In [32]:
pipeline = joblib.load(
    "src/pipelines/RF_A_0.9507.joblib"
)  # Load pipeline using environment file path.
prediction = pipeline.predict(example_data)[
    0
]  # Generate prediction using features_dataframe.
try:
    prediction_probability = pipeline.predict_proba(example_data)[
        0
    ].max()  # Generate prediction probability using features_dataframe.
except AttributeError:
    logger.error("Method predict_proba is not available for provided architecture.")
    prediction_probability = None  # Use dummy value.

logger.info(f"Pipeline Prediction: {prediction}")
logger.info(f"Pipeline Prediction Probability: {np.round(prediction_probability, 5)}")

[2022-06-02 23:49:15] - p96628 root 9 - INFO:Pipeline Prediction: 0
[2022-06-02 23:49:15] - p96628 root 10 - INFO:Pipeline Prediction Probability: 0.73375
