# IoT Hackathon - Stroke Prediction App

# Part III - Prediction & Visualization


## 1. Imports

In [1]:
pip install jupyter-dash

Collecting jupyter-dash
  Downloading jupyter_dash-0.4.0-py3-none-any.whl (20 kB)
Collecting dash
  Downloading dash-2.0.0-py3-none-any.whl (7.3 MB)
[K     |████████████████████████████████| 7.3 MB 8.3 MB/s 
Collecting ansi2html
  Downloading ansi2html-1.6.0-py3-none-any.whl (14 kB)
Collecting plotly>=5.0.0
  Downloading plotly-5.5.0-py2.py3-none-any.whl (26.5 MB)
[K     |████████████████████████████████| 26.5 MB 61.8 MB/s 
[?25hCollecting dash-table==5.0.0
  Downloading dash_table-5.0.0.tar.gz (3.4 kB)
Collecting dash-html-components==2.0.0
  Downloading dash_html_components-2.0.0.tar.gz (3.8 kB)
Collecting dash-core-components==2.0.0
  Downloading dash_core_components-2.0.0.tar.gz (3.4 kB)
Collecting flask-compress
  Downloading Flask_Compress-1.10.1-py3-none-any.whl (7.9 kB)
Collecting tenacity>=6.2.0
  Downloading tenacity-8.0.1-py3-none-any.whl (24 kB)
Collecting brotli
  Downloading Brotli-1.0.9-cp37-cp37m-manylinux1_x86_64.whl (357 kB)
[K     |███████████████████████████████

In [2]:
pip install dash-pivottable

Collecting dash-pivottable
  Downloading dash_pivottable-0.0.2-py3-none-any.whl (5.9 MB)
[K     |████████████████████████████████| 5.9 MB 7.3 MB/s 
Installing collected packages: dash-pivottable
Successfully installed dash-pivottable-0.0.2


In [3]:
import sklearn
import pandas as pd
import numpy as np

from jupyter_dash import JupyterDash

import plotly.express as px
import dash
from dash import dcc
from dash import html
import dash_pivottable
from dash.dependencies import Input, Output
import plotly.graph_objects as go

In [13]:
df = pd.read_csv('../data/elderly_strokes.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,id,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,1,9046,Male,67,0,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1
1,2,51676,Female,61,0,0,Yes,Self-employed,Rural,202.21,,never smoked,1
2,3,31112,Male,80,0,1,Yes,Private,Rural,105.92,32.5,never smoked,1
3,4,1665,Female,79,1,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1
4,5,56669,Male,81,0,0,Yes,Private,Urban,186.21,29.0,formerly smoked,1


In [14]:
df.shape

(1376, 13)

In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1376 entries, 0 to 1375
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Unnamed: 0         1376 non-null   int64  
 1   id                 1376 non-null   int64  
 2   gender             1376 non-null   object 
 3   age                1376 non-null   int64  
 4   hypertension       1376 non-null   int64  
 5   heart_disease      1376 non-null   int64  
 6   ever_married       1376 non-null   object 
 7   work_type          1376 non-null   object 
 8   Residence_type     1376 non-null   object 
 9   avg_glucose_level  1376 non-null   float64
 10  bmi                1285 non-null   float64
 11  smoking_status     1376 non-null   object 
 12  stroke             1376 non-null   int64  
dtypes: float64(2), int64(6), object(5)
memory usage: 139.9+ KB


In [16]:
for col in df:
    print(df[col].name, ':', df[col].unique())

Unnamed: 0 : [   1    2    3 ... 1374 1375 1376]
id : [ 9046 51676 31112 ... 68398 18234 44873]
gender : ['Male' 'Female']
age : [67 61 80 79 81 74 69 78 64 75 60 71 82 65 72 63 76 77 73 70 66 68 62]
hypertension : [0 1]
heart_disease : [1 0]
ever_married : ['Yes' 'No']
work_type : ['Private' 'Self-employed' 'Govt_job']
Residence_type : ['Urban' 'Rural']
avg_glucose_level : [228.69 202.21 105.92 ...  71.97  83.75 125.2 ]
bmi : [36.6  nan 32.5 24.  29.  27.4 22.8 24.2 29.7 36.8 28.2 37.5 25.8 37.8
 22.4 26.6 27.2 23.5 28.3 22.2 30.5 26.5 33.7 23.1 23.9 26.4 20.2 33.6
 27.7 31.4 36.5 33.2 32.8 25.3 47.5 20.3 28.9 28.1 31.1 21.7 27.  24.1
 22.9 27.3 32.3 25.6 29.8 26.3 26.2 24.4 28.  28.8 34.6 19.4 22.6 27.1
 30.9 31.3 31.  31.7 28.4 20.1 26.7 38.7 29.9 34.9 25.  21.8 30.  27.5
 24.6 26.1 31.9 34.1 36.9 45.7 34.2 23.6 22.3 32.9 37.1 45.  25.5 37.4
 34.5 27.9 29.5 46.  26.9 45.5 28.5 31.5 32.  30.8 33.  23.4 30.7 21.5
 28.6 42.2 35.4 16.9 34.4 26.8 39.3 32.6 35.9 21.2 42.4 40.5 29.3 19.6
 

In [17]:
df = df.dropna()
df.shape

(1285, 13)

- Missing values clean
- Unnamed and Id cols are useless
- hypertension, heart_disease: change 0 ->'No', 1 -> 'Yes'
- work_type : change 'Private'-> 0, 'Self-employed'-> 1, 'Govt_job'-> 2


In [18]:
df = df.drop(['Unnamed: 0', 'id'], axis=1)

In [19]:
df.shape

(1285, 11)

In [20]:
df.head()

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,Male,67,0,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1
2,Male,80,0,1,Yes,Private,Rural,105.92,32.5,never smoked,1
3,Female,79,1,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1
4,Male,81,0,0,Yes,Private,Urban,186.21,29.0,formerly smoked,1
5,Male,74,1,1,Yes,Private,Rural,70.09,27.4,never smoked,1


In [21]:
df.hypertension = df.hypertension.replace([0, 1], ['No', 'Yes'])
df.heart_disease = df.heart_disease.replace([0, 1], ['No', 'Yes'])
df.stroke = df.stroke.replace([0, 1], ['No', 'Yes'])

In [22]:
df.head()

Unnamed: 0,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,Male,67,No,Yes,Yes,Private,Urban,228.69,36.6,formerly smoked,Yes
2,Male,80,No,Yes,Yes,Private,Rural,105.92,32.5,never smoked,Yes
3,Female,79,Yes,No,Yes,Self-employed,Rural,174.12,24.0,never smoked,Yes
4,Male,81,No,No,Yes,Private,Urban,186.21,29.0,formerly smoked,Yes
5,Male,74,Yes,Yes,Yes,Private,Rural,70.09,27.4,never smoked,Yes


In [23]:
df = df.rename(columns={'gender': 'Gender', 'age':'Age', 'hypertension': 'Hypertension', 'heart_disease': 'Heart disease', 'ever_married':'Ever married', 'work_type':'Work type', 'Residence_type': 'Residence type', 'avg_glucose_level':'Average glucose level', 'bmi': 'BMI', 'smoking_status':'Smoking status', 'stroke':'Ever had stroke'})

In [24]:
df.head()

Unnamed: 0,Gender,Age,Hypertension,Heart disease,Ever married,Work type,Residence type,Average glucose level,BMI,Smoking status,Ever had stroke
0,Male,67,No,Yes,Yes,Private,Urban,228.69,36.6,formerly smoked,Yes
2,Male,80,No,Yes,Yes,Private,Rural,105.92,32.5,never smoked,Yes
3,Female,79,Yes,No,Yes,Self-employed,Rural,174.12,24.0,never smoked,Yes
4,Male,81,No,No,Yes,Private,Urban,186.21,29.0,formerly smoked,Yes
5,Male,74,Yes,Yes,Yes,Private,Rural,70.09,27.4,never smoked,Yes


In [25]:
feature_names = ['Gender','Age', 'Hypertension', 'Heart disease', 'Ever married', 'Work type', 'Residence type', 'Average glucose level', 'BMI', 'Smoking status']

In [26]:
cat_features = ['Gender', 'Hypertension', 'Heart disease', 'Ever married', 'Work type', 'Residence type', 'Smoking status']
num_features = ['Age', 'Average glucose level', 'BMI']

In [27]:
gender_values = df['Gender'].unique().tolist()
binary_values = ['Yes', 'No']
work_values = df['Work type'].unique().tolist()
residence_values = df['Residence type'].unique().tolist()
smoke_values = df['Smoking status'].unique().tolist()

In [28]:
external_scripts = [
    'https://www.google-analytics.com/analytics.js',
    {'src': 'https://cdn.polyfill.io/v2/polyfill.min.js'},
    {
        'src': 'https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.10/lodash.core.js',
        'integrity': 'sha256-Qqd/EfdABZUcAxjOkMi8eGEivtdTkh3b65xCZL4qAQA=',
        'crossorigin': 'anonymous'
    }
]

In [29]:
# external CSS stylesheets
external_stylesheets = [
    'https://codepen.io/chriddyp/pen/bWLwgP.css',
    {
        'href': 'https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/css/bootstrap.min.css',
        'rel': 'stylesheet',
        'integrity': 'sha384-MCw98/SFnGE8fJT3GXwEOngsV7Zt27NXFoaoApmYm81iuXoPkFOJwJ8ERdknLPMO',
        'crossorigin': 'anonymous'
    }
]

# 'http://your-external-assets-folder-url/'

In [62]:
from typing_extensions import Annotated
app = JupyterDash(__name__,
                  meta_tags=[{"name": "viewport", "content": "width=device-width, initial-scale=1"}],)
#external_stylesheets=external_stylesheets 
app.title = "Stroke Prevention App"

server = app.server

app.config.suppress_callback_exceptions = True

app.layout = html.Div(
    id="app-container",
    children=[
        # Header
        html.Div(
            id="header",
            children = [html.H1(children='Stroke Prevention App', style={'textAlign': 'center','color': 'purple'}),
                        html.H2(children='IoT and AI for Healthcare\n', style={'textAlign': 'center','color': 'blue'}),
                        html.P(children='Authors: Jorge Acosta, Laura Arocas, Clotide Braye, Farah Benayad, Ana Marín, Blanca Rodrigo, Moritz Schillinger', style={'textAlign': 'center'})]
        ),
        html.Hr(),
        # Predictions
        html.Div(
            id="predictions",
            children = [
                # Left column
                html.Div(
                    children=[html.H2(children='Make Predictions'),
                              html.H4(children='Insert patient data'),
                              
                              html.P(children='Age'),
                              dcc.Input(id="age_in", 
                                        value='', 
                                        type='text'),
                              
                              html.P(children='BMI'),
                              dcc.Input(id="bmi_in", 
                                        value='', 
                                        type='text'),
                              
                              html.P(children='Glucose level'),
                              dcc.Input(id="glucose_in", 
                                        value='', 
                                        type='text'),
                              
                              html.P(children='Gender'),
                              dcc.Dropdown(
                                    id="gender_select",
                                    options=[{"label": x, "value": x} for x in gender_values],
                                    value=gender_values[0],
                                    clearable=False),
                              
                              html.P(children='Has hypertension'),
                              dcc.Dropdown(
                                    id="hypertension_select",
                                    options=[{"label": x, "value": x} for x in binary_values],
                                    value=binary_values[0],
                                    clearable=False),
                              
                              html.P(children='Has heart disease'),
                              dcc.Dropdown(
                                    id="heart_disease_select",
                                    options=[{"label": x, "value": x} for x in binary_values],
                                    value=binary_values[0],
                                    clearable=False),
                              
                              html.P(children='Is married'),
                              dcc.Dropdown(
                                    id="married_select",
                                    options=[{"label": x, "value": x} for x in binary_values],
                                    value=binary_values[0],
                                    clearable=False),
                              
                              html.P(children='Work type'),
                              dcc.Dropdown(
                                    id="work_select",
                                    options=[{"label": x, "value": x} for x in work_values],
                                    value=work_values[0],
                                    clearable=False),
                              
                              html.P(children='Residence type'),
                              dcc.Dropdown(
                                    id="residence_select",
                                    options=[{"label": x, "value": x} for x in residence_values],
                                    value=residence_values[0],
                                    clearable=False),
                              
                              html.P(children='Smoke status'),
                              dcc.Dropdown(
                                    id="smoke_select",
                                    options=[{"label": x, "value": x} for x in smoke_values],
                                    value=smoke_values[0],
                                    clearable=False),
                              
                             ],              
                    style={'width': '600px', 'padding':'20px'}),
                
                # Right column
                html.Div(
                    children = [html.H4(children='Risk prediction algorithm'),
                                html.Div(id='prediction'),
                                html.P(children='\n'),
                                html.P(id='recommendations')], 
                    style={'padding': 20, 'flex': 1})
            ],
            style={'display': 'flex', 'flex-direction': 'row', 'flex-basis': 'fill'}
        ),
        html.Hr(),
        # Feature analysis
        html.Div(
            id="single_features",
            children = [
                # Left column
                html.Div(
                    children=[html.H2(children='Analyse single features'),
                              html.P(children='You can analyse the different features here.'),
                              html.P(children='The dataset features are:'),
                              html.P(children=[html.Ul(id= 'feature_names', children = [html.Li(i) for i in feature_names]
                                    )])],
                         ##
                    style={'width': '250px', 'padding':'20px'}),
                # Right column
                html.Div(
                    children = [html.P(children='Select Feature'),
                                dcc.Dropdown(
                                    id="feature_select",
                                    options=[{"label": x, "value": x} for x in feature_names],
                                    value=feature_names[0],
                                    clearable=False),
                                dcc.Graph(id="bar-chart")], 
                    style={'padding': 20, 'flex': 1})], 
            style={'display': 'flex', 'flex-direction': 'row', 'flex-basis': 'fill'})
    ])


@app.callback(
    Output('prediction', 'children'),
    Input('gender_select', 'value'),
    Input('age_in', 'value'),
    Input('hypertension_select', 'value'),
    Input('heart_disease_select', 'value'),
    Input('married_select', 'value'),
    Input('work_select', 'value'),
    Input('residence_select', 'value'),
    Input('glucose_in', 'value'),
    Input('bmi_in', 'value'),
    Input('smoke_select', 'value')
)
def update_prediction(gender, age, hypertension, heart_disease, married, work, residence, glucose, bmi, smoke):
    if gender == 'Male':
        return u'Gender is {}, age is {} the risk is HIGH'.format(gender, age)
    else:
        return u'Gender is {}, age is {} the risk is LOW'.format(gender, age)
    

@app.callback(
    Output('recommendations', 'children'),
    Input('hypertension_select', 'value'),
    Input('heart_disease_select', 'value'),
    Input('glucose_in', 'value'),
    Input('bmi_in', 'value'),
    Input('smoke_select', 'value')
)
def update_recommendation(hypertension, heart_disease, glucose, bmi, smoke):
    message =['Recommended actions for the patient:']
    if hypertension == 'Yes':
        message.append('Eat healthy foods, decrease the salt in your diet, increase physical activity, limit alcohol, don\'t smoke.')
    if heart_disease == 'Yes':
        message.append('Maintain a reasonable body weight, stay physically active, check your cholesterol level regularly, avoid smoking tobacco, using intravenous drugs, or taking street drugs.')
    if smoke == 'smokes':
        message.append('Some recommendations if you are trying to quit or reduce smoking tabacco: Some ways to help you, you can find them in this link: https://www.nhs.uk/live-well/quit-smoking/10-self-help-tips-to-stop-smoking/')
    if hypertension == 'No' and heart_disease == 'No' and smoke != 'smokes': 
        message.append('No recommended actions.')
    
    return '\n'.join(message)


@app.callback(
    Output('bar-chart', 'figure'),
    Input('feature_select', 'value')
)
def update_bar_chart(feature_x):
    if feature_x in num_features:
        fig = px.box(df, y=feature_x, 
                         color="Ever had stroke")
        
    if feature_x in cat_features: 
        fig = px.histogram(df, x=feature_x, 
                     color="Ever had stroke", 
                     barmode='group',
                     histfunc='count')
    return fig



if __name__ == "__main__":
    app.run_server(debug=True)

Dash app running on http://127.0.0.1:8050/


In [45]:
message =['Recommended actions for the patient:']
message.append('   Some recommendations if you had hypertension are: Eat healthy foods, decrease the salt in your diet, increase physical activity,limit alcohol, dont smoke.')

In [46]:
print(message)

['Recommended actions for the patient:', '   Some recommendations if you had hypertension are: Eat healthy foods, decrease the salt in your diet, increase physical activity,limit alcohol, dont smoke.']


In [47]:
print("\r\n".join(message))

Recommended actions for the patient:
   Some recommendations if you had hypertension are: Eat healthy foods, decrease the salt in your diet, increase physical activity,limit alcohol, dont smoke.
