In [1]:
#Import libraries

import pandas as pd
import numpy as np

from dash import Input, Output, dcc, html
from jupyter_dash import JupyterDash

from kmodes.kprototypes import KPrototypes

In [2]:
#Import dataset, all scaled and wrangled from second notebook

doctor_data = pd.read_csv("Data/doctor_data_merge")
doctor_data.head()

Unnamed: 0,Complaint Type,Qty,Region,Category,Rank,Incidence rate,R rate,Experience,Purchases
0,No Complaints,-0.404304,4 15,Specialist,Ambassador,10.941947,-0.29716,1.137682,2.796951
1,No Complaints,-0.404304,1 8 T4,General Practitioner,Ambassador,7.958297,-1.528994,-0.920733,1.975615
2,No Complaints,-0.404304,1 9 T4,Specialist,Ambassador,6.963747,0.565124,-0.920733,1.676947
3,No Complaints,-0.404304,1 10 T3,Specialist,Ambassador,5.720559,1.249476,-0.097367,1.303612
4,No Complaints,-0.404304,1 14 T4,Specialist,Ambassador,4.477371,-0.215038,0.365776,0.930277


In [3]:
#Instantiate application
app = JupyterDash(__name__)

In [11]:
#Begin to create layout

app.layout = html.Div(
    [
        #Create H1 header for title of app
        html.H1("Doctors Clusters according to selected features"),
        #Create H2 Header for subtitle for error_metric
        html.H2("Error metric"),
        #radio item for switching btw rank and w/o rank
        html.H3("With or without Rank"),
        dcc.RadioItems(
            options = [{"label": "Include Rank features", "value": True},
                      {"label": "Without Rank feature", "value": False}],
            value = True,
            id = "On/Off Rank"
        ),
        #Create dcc Slider for switching number of clusters
        html.H3("Slider for controlling number of clusters"),
        dcc.Slider(min = 2, max = 16, step = 1, value =2, id = "slider"),
        html.H3("Error Metric below"),
        html.H5("Note: It takes a little while to load up"),
        html.Div(id = "error-metric")
    ]
    
)

In [5]:
#Define business layer to generate column names based on with/without rank

def business_rank(include_rank = True):
    """Returns features with/without rank column
    
    Parameters
    ----------
    include_rank: bool, default=True 
    
    If ``True`` return feat names with the rank
    column, If ``False`` return feat names without rank
    """
    
    if include_rank:
        feat_names = doctor_data.columns.to_list()
        #Generate categorical index for algorithm
        cat_index = [0, 2, 3, 4]
        #Create dataset
        X = doctor_data[feat_names]
        X = X.values
        X[:, 1] = X[:, 1].astype(float)
        X[:, 5] = X[:, 5].astype(float)
        X[:, 6] = X[:, 6].astype(float)
        X[:, 7] = X[:, 7].astype(float)
        X[:, 8] = X[:, 8].astype(float)
        
    else:
        feat_names = doctor_data.drop(columns = "Rank")
        feat_names = feat_names.columns.to_list()
        cat_index = [0, 2, 3]
        #Create dataset
        X = doctor_data[feat_names]
        X = X.values
        X[:, 1] = X[:, 1].astype(float)
        X[:, 4] = X[:, 4].astype(float)
        X[:, 5] = X[:, 5].astype(float)
        X[:, 6] = X[:, 6].astype(float)
        X[:, 7] = X[:, 7].astype(float)
    return X, cat_index

In [6]:
def business_model(include_rank=True, k = 2, return_error = False):
    """
    Rrturns kprototype model or error from model
    
    Parameters
    ----------
    include_rank: bool, default=True
    If ``True`` it will include rank column
    and categorical index needed for algorithm 
    If ``False`` exclude the rank column
    and categorical index needed for algorithm
    
    return_error: bool, default=False
    If ``True`` returns model errors
    If ``False`` returns only the model
    
    k: int, default=2
    Number of clusters that model should use
    for training model
    """
    #Extract features and cat_index
    data, cat_index = business_rank(include_rank=include_rank)
    
    #Fit predict model with k
    model = KPrototypes(n_clusters=k, init="Huang", random_state = 42)
    clusters = model.fit_predict(data, categorical = cat_index)
    
    if return_error:
        error = model.cost_
        return error
    else:
        return clusters

In [7]:
business_model(include_rank=False, return_error=True)

2378.349799664885

In [8]:
@app.callback(
    Output("error-metric", "children"),
    Input("On/Off Rank", "value"),
    Input("slider", "value")
)
def print_error(include_rank=True, k = 2):
    """Print the errot metric from KP model
    
    Parameter
    --------
    include_rank: bool, default:True
    ``If True`` includes the rank column
    ``If False`` does not include rank column
    k: int, deafult:2
    The number of clusters that a KP 
    algorithm will use.
    """
    error = business_model(k=k, include_rank=include_rank, return_error=True)
    
    return html.H3(error)

In [9]:
print_error()

H3(2555.242209089429)

In [10]:
app.run_server(host = "localhost", mode = "external")

Dash app running on http://localhost:8050/
