# Predicting Cardiovascular Disease Interface


This is the user interface designed to predict the probability of cardiovascular disease based on various characteristics provided by the user, including:
- Age
- Height
- Weight
- Gender
- Systolic blood pressure
- Diastolic blood pressure
- Cholesterol levels
- Glucose levels
- Smoking status
- Alcohol intake
- Physical activity
  
The input for each characteristic is analyzed using an artificial neural network. The neural network is trained on a dataset available at https://www.kaggle.com/datasets/sulianova/cardiovascular-disease-dataset, achieving an accuracy of approximately 72%. It's important to note that this prediction is not a substitute for a comprehensive diagnosis by medical professionals, as it has limitations. For an accurate diagnosis of cardiovascular disease, please consult healthcare experts.


In [83]:
import pandas as pd
import ipywidgets as widgets

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
import ipywidgets as widgets
from ipywidgets import interact
from datetime import datetime
from IPython.display import clear_output


In [81]:
cardio = pd.read_csv('./cardio_train.csv', sep=';')
cardio.head()

# Converting data from days to years
cardio['age'] = cardio['age'] / 365
    
# Filtering out improbable data
cardio = cardio[((cardio['ap_hi'] > 0) & (cardio['ap_lo'] > 0))]
cardio = cardio[(cardio['age'] >= cardio['age'].quantile(0.05)) & (cardio['age'] <= cardio['age'].quantile(0.95)) &
                 (cardio['height'] >= cardio['height'].quantile(0.05)) & (cardio['height'] <= cardio['height'].quantile(0.95))]

# Calculate BMI (Body Mass Index)
cardio['bmi'] = cardio['weight'] / ((cardio['height'] / 100) ** 2)

# Filter outliers in BMI
cardio_filtered = cardio[(cardio['bmi'] >= cardio['bmi'].quantile(0.05)) & (cardio['bmi'] <= cardio['bmi'].quantile(0.95))]
cardio['bmi'] = cardio_filtered['bmi']
cardio = cardio.dropna()
cols = list(cardio.columns)
cols.remove('bmi')
cols.insert(-1, 'bmi')
cardio = cardio[cols]

# Transforming columns
numeric_cols = ['age', 'height', 'weight', 'ap_hi', 'ap_lo', 'bmi']
cat_cols = ['gender', 'cholesterol', 'gluc', 'smoke', 'alco', 'active']
preproc = ColumnTransformer(
    transformers = [
        ('standard_scaler', StandardScaler(), numeric_cols),
        ('categorical_cols', OneHotEncoder(handle_unknown='ignore'), cat_cols)
    ]
)

# Getting feature and response columns
dataset = cardio[numeric_cols + cat_cols]
result = cardio['cardio']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(dataset,
                                                   result,
                                                   test_size=0.2,
                                                   random_state=1)

# Artificial Neural Network
pl_nn = Pipeline([
    ('preprocessor', preproc),
    ('nn', MLPClassifier()),
])
# print(X_train.head())
print(X_train.head())
pl_nn.fit(X_train, y_train)

             age  height  weight  ap_hi  ap_lo        bmi  gender  \
65105  49.156164     169    71.0    130     80  24.859074       2   
11706  50.306849     166    63.0    120     60  22.862534       1   
68605  54.323288     165    60.0    120    100  22.038567       1   
16692  47.564384     165    75.0    120     80  27.548209       2   
46091  50.320548     168    78.0    150    100  27.636054       1   

       cholesterol  gluc  smoke  alco  active  
65105            2     1      0     0       1  
11706            1     1      0     0       0  
68605            1     1      0     0       0  
16692            1     1      0     0       1  
46091            1     1      0     0       1  
[1 0 0 ... 0 1 0]
Training accuracy: 0.7311561417019639
Testing accuracy: 0.7288217173661917


In [94]:
@interact
def display_form():
    print("Please fill out the following form to assess your likelihood of developing Cardiovascular Disease. Please keep in mind that the prediction results may not be entirely accurate, and it's advisable to consult with medical professionals for a more comprehensive evaluation. The Machine Learning model may take some time to predict your likelihood for Cardiovascular Disease.")

style = {'description_width': 'initial'}
output_widget = widgets.Output()

def assess_form(birth_date, height, weight, Gender, systolic, diastolic, Cholesterol, Glucose, Smoking, Alcohol, Physical):
    print(f"Selected birthday: {birth_date}")
    print(f"Selected height: {height} centimeters")
    print(f"Selected weight: {weight} kilograms")
    print(f"Selected Gender: {Gender}")
    print(f"Blood Pressure: {systolic} / {diastolic}")
    print(f"Selected Cholesterol: {Cholesterol}")
    print(f"Selected Glucose: {Glucose}")
    print(f"Selected Smoking: {Smoking}")
    print(f"Selected Alcohol: {Alcohol}")
    print(f"Selected Physical: {Physical}")

birth_date_widget = widgets.DatePicker(description='Birthday', style=style)
height_widget = widgets.IntSlider(value=136, min=0, max=272, description='Height in Centimeters', style=style)
weight_widget = widgets.FloatSlider(value=60, min=2, max=635, step=0.1, description='Weight in Kilograms', readout_format='.1f', style=style)
gender_widget = widgets.Dropdown(options=[('Select', -1), ('Man', 1), ('Woman', 2)], style=style)
systolic_widget = widgets.IntText(value=0, description='Systolic')
diastolic_widget = widgets.IntText(value=0, description='Diastolic')
cholesterol_widget = widgets.Dropdown(options=[('Select', -1), ('Normal', 1), ('Above Normal', 2), ('Well Above Normal', 3)], style=style)
glucose_widget = widgets.Dropdown(options=[('Select', -1), ('Normal', 1), ('Above Normal', 2), ('Well Above Normal', 3)], style=style)
smoking_widget = widgets.Dropdown(options=[('Select', -1), ('I have smoked', 1), ('I have never smoked', 0)], style=style)
alcohol_widget = widgets.Dropdown(options=[('Select', -1), ('I consume alcohol', 1), ('I do not consume alcohol', 0)], style=style)
physical_widget = widgets.Dropdown(options=[('Select', -1), ('I engage in regular physical activity', 1), ('I do not engage in regular physical activity', 0)], style=style)
submit_button = widgets.Button(description='Submit', style=style)

def on_submit_button_clicked(button):
    with output_widget:
        clear_output(wait = True)
        #all form fields should be filled
        if (not birth_date_widget.value or not height_widget.value or not weight_widget.value or gender_widget.value == -1 or cholesterol_widget.value == -1 or glucose_widget.value==-1 or smoking_widget.value == -1 or alcohol_widget.value == -1 or physical_widget.value ==-1 or systolic_widget.value <= 0 or diastolic_widget.value <= 0):
            print("Please fill out all fields with a valid input for the best results.")
            return

        #convert age input to age to number of years
        input_date = birth_date_widget.value
        input_date = str(input_date)
        input_date = datetime.strptime(input_date, "%Y-%m-%d")
        today = datetime.today()

        age = (today - input_date).days/365
        height = height_widget.value
        weight = weight_widget.value
        gender = gender_widget.value
        ap_hi = systolic_widget.value
        ap_lo = diastolic_widget.value
        cholesterol = cholesterol_widget.value
        gluc = glucose_widget.value
        smoke = smoking_widget.value
        alco  = alcohol_widget.value
        active = physical_widget.value
        bmi = weight/((height/100)**2)
        
        new_data = {
            'age': [age],
            'height': [height],
            'weight': [weight],
            'ap_hi': [ap_hi],
            'ap_lo': [ap_lo],
            'bmi': [bmi],
            'gender': [gender],
            'cholesterol': [cholesterol],
            'gluc': [gluc],
            'smoke': [smoke],
            'alco': [alco],
            'active': [active]
        }

        X_new = pd.DataFrame(new_data)

        # #predict the value
        prediction = pl_nn.predict(X_new)
        if prediction[0] == 1:
            print("Based on the provided statistics, there is an indication of a potential risk of Cardiovascular Disease.")
        else:
            print("Based on the provided statistics, there doesn't seem to be an indication of a potential risk of Cardiovascular Disease.")
        print("It's essential to remember that this prediction is not entirely accurate. For a precise assessment, we recommend consulting with a medical professional.")


submit_button.on_click(on_submit_button_clicked)

interact(assess_form,
         birth_date=birth_date_widget,
         height=height_widget,
         weight=weight_widget,
         Gender=gender_widget,
         systolic=systolic_widget,
         diastolic=diastolic_widget,
         Cholesterol=cholesterol_widget,
         Glucose=glucose_widget,
         Smoking=smoking_widget,
         Alcohol=alcohol_widget,
         Physical=physical_widget)

display(submit_button)
display(output_widget)




interactive(children=(Output(),), _dom_classes=('widget-interact',))

interactive(children=(DatePicker(value=None, description='Birthday', step=1, style=DescriptionStyle(descriptio…

Button(description='Submit', style=ButtonStyle())

Output()