In [13]:
import pandas as pd
import numpy as np
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
from jupyter_dash import JupyterDash
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.utils.validation import check_is_fitted
from category_encoders import OneHotEncoder


## Import

In [14]:
def wrangle(filepath):
    """Read maternity data file into ``DataFrame``.

    Parameters
    ----------
    filepath : str
        Location of CSV file.
    """
    df = pd.read_excel(filepath)
    
    #Drop leaky column
    df.drop(columns="BWT", inplace=True)
    
    #Rename columns
    df. columns =["Birth_weight", "Age", "Mother_weight", "Race", "Smoking_status",
                  "History_of_premature_labor", "History_of_Hypertension", "Presence_of_uterine_irritability", "Physician_visits"]
    
    #Rename values in categorical variables columns
    df["Race"] = df["Race"].map({1:"White", 2:"Black", 3:"Others"})
    df["Smoking_status"] = df["Smoking_status"].map({0:"No", 1:"Yes"})
    df["History_of_Hypertension"] = df["History_of_Hypertension"].map({0:"No", 1:"Yes"})
    df["Presence_of_uterine_irritability"] = df["Presence_of_uterine_irritability"].map({0:"No", 1:"Yes"})
    
    return df

In [15]:
df = wrangle("data/low_birth_weight.xls")

## Split

In [16]:
target="Birth_weight"
X = df.drop(columns=target)
y = df[target]

In [17]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

## Build Model

In [18]:
#Baseline
acc_baseline = y_train.value_counts(normalize=True).max()
print("Baseline Accuracy:", round(acc_baseline, 2))

Baseline Accuracy: 0.66


## Iterate

In [19]:
#Build model
model = make_pipeline(
    OneHotEncoder(use_cat_names=True),
    LogisticRegression(max_iter=1000)
)
#Fit model to training data
model.fit(X_train, y_train)

  elif pd.api.types.is_categorical(cols):


Pipeline(steps=[('onehotencoder',
                 OneHotEncoder(cols=['Race', 'Smoking_status',
                                     'History_of_Hypertension',
                                     'Presence_of_uterine_irritability'],
                               use_cat_names=True)),
                ('logisticregression', LogisticRegression(max_iter=1000))])

## Build Dashboard

In [20]:
app = JupyterDash(__name__)

In [21]:
app.layout = html.Div([
    dcc.Input(id='age', 
              type='number', 
              min=13, 
              max=75,
              step=1,
              placeholder="Age"),
    dcc.Input(id='weight', 
              type='number', 
              min=50, 
              max=600,
              step=1,
              placeholder="Weight in pounds"),
    dcc.Dropdown(id="race-id",
        options=[{'label':'White', 'value': 'White'},
                 {'label':'Black', 'value': 'Black'},
                 {'label':'Others', 'value': 'Others'}],
        placeholder="Race"
    ),
    dcc.Dropdown(id="smoke-id",
        options=[{'label':'Yes', 'value': 'Yes'},
                  {'label':'No', 'value': 'No'}],
        placeholder="Smoking status"
    ),
    dcc.Dropdown(id="prem-id",
        options=[{'label':'None', 'value': 0},
                 {'label':'One', 'value': 1},
                 {'label':'Two', 'value': 2},
                 {'label':'Three and above', 'value': 3}],
        placeholder="History of premature labor"
    ),
    dcc.Dropdown(id="hyper-id",
        options=[{'label':'Yes', 'value': 'Yes'},
                 {'label':'No', 'value': 'No'}],
        placeholder="History of hypertension"
    ),
    dcc.Dropdown(id="uterine-id",
        options=[{'label':'Yes', 'value': 'Yes'},
                 {'label':'No', 'value': 'No'}],
        placeholder="Presence of uterine irritability"
    ),
    dcc.Input(id='physician-visit', 
              type='number', 
              min=0, 
              max=20,
              step=1,
              placeholder="No. of physician visits"),
    html.Button('Submit', id='submit-val', n_clicks=0),
    html.P(id="result-text")
])

In [22]:
@app.callback(
    Output("result-text", "children"),
    Input("submit-val", "n_clicks"),
    State("age", "value"),
    State("weight", "value"),
    State("race-id", "value"),
    State("smoke-id", "value"),
    State("prem-id", "value"),
    State("hyper-id", "value"),
    State("uterine-id", "value"),
    State("physician-visit", "value")
)
def make_prediction(n_clicks, Age, Mother_weight, Race, Smoking_status, History_of_premature_labor, History_of_Hypertension, Presence_of_uterine_irritability,Physician_visits):
    data = {
        "Age": Age,  
        "Mother_weight": Mother_weight, 
        "Race": Race,
        "Smoking_status" : Smoking_status,                    
        "History_of_premature_labor":History_of_premature_labor,
        "History_of_Hypertension":History_of_Hypertension,
        "Presence_of_uterine_irritability": Presence_of_uterine_irritability,
        "Physician_visits": Physician_visits
    }
    df = pd.DataFrame(data, index=[0])
    predicted_value = {
        0:"Normal birth weight",
        1:"Low birth weight"
    }
    
    prediction = model.predict(df)
    
    my_dict = {
        0:"Normal Birth Weight",
        1:"Low Birth Weight"
    }
    return f"The predicted outcome is: {np.vectorize(my_dict.get)(prediction[0])}"

In [23]:
app.run_server(debug=True)

Dash app running on http://127.0.0.1:8050/
