# Dashboard

In [1]:
import warnings
import time

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objs as go

from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.pipeline import make_pipeline
from sklearn.decomposition import PCA

import dash
from jupyter_dash import JupyterDash
import dash_html_components as html
import dash_core_components as dcc
from dash.dependencies import Input, Output

warnings.simplefilter(action="ignore", category=FutureWarning)

ModuleNotFoundError: No module named 'dash'

In [2]:
def wrangle(filepath):
    df = pd.read_csv(filepath)
    df.set_index("customer_identifier", inplace=True)
    df_sample = df.sample(frac=.1, random_state=42)
    
    return df_sample

In [3]:
df = wrangle("water_cons_data.csv")

print(df.shape)
df.head()

(20148, 24)


Unnamed: 0_level_0,2019-Jul,2019-Aug,2019-Sep,2019-Oct,2019-Nov,2019-Dec,2020-Jan,2020-Feb,2020-Mar,2020-Apr,...,2020-Sep,2020-Oct,2020-Nov,2020-Dec,2021-Jan,2021-Feb,2021-Mar,2021-Apr,2021-May,2021-Jun
customer_identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
204712316,10,8,10,11,6,7,5,6,0,16,...,26,7,6,11,5,6,3,4,4,6
15E512623,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
07N311499,7,6,7,7,2,2,6,5,8,7,...,10,13,10,5,5,6,5,8,6,9
254610305,9,8,6,10,5,9,11,9,0,21,...,9,8,9,7,11,9,5,21,11,20
05C115614,14,9,7,7,9,6,6,4,4,3,...,5,0,0,1,4,4,5,2,16,11


In [4]:
app = JupyterDash(__name__)

In [5]:
app.layout = html.Div(
    [
        html.Div(
            [
                html.H1("Apparent Loss in Water")
            ],
            style = {"text-align":"center"}
        ),
        html.H2("K-means Clustering"),
        dcc.Slider(min=2, max=8, step=1, value=2, id="k-slider"),
        html.Div(id="k-text"),
        dcc.Graph(id="pca-scatter")
    ]
    #style = {"text-align":"center"}
)

In [6]:
def get_model(k=2):
    model = make_pipeline(
        StandardScaler(), KMeans(n_clusters=k, random_state=42)
    )
    model.fit(df)
    
    return model

In [7]:
def get_pca_labels(k=2):
    transformer = PCA(n_components=2, random_state=42)
    X_t = transformer.fit_transform(df)
    X_pca = pd.DataFrame(X_t, columns=["PCA1", "PCA2"])
    model = get_model(k=k)
    X_pca["labels"] = model.named_steps["kmeans"].labels_.astype(str)
    
    X_pca.sort_values("labels", inplace=True)
    
    return X_pca

In [8]:
@app.callback(
    Output("k-text", "children"),
    Input("k-slider", "value")
)
def serve_k_selected(k=2):
    text = [
        html.H3(f"Number of clusters (K): {k}")
    ]
    
    return text

In [9]:
@app.callback(
    Output("pca-scatter", "figure"),
    Input("k-slider", "value")
)

def get_scatter(k=2):
    fig = px.scatter(
        data_frame=get_pca_labels(k=k),
        x="PCA1", y="PCA2", color="labels",
        title="PCA Representation of Clusters"
    )
    fig.update_layout(xaxis_title="PCA1", yaxis_title="PCA2")
    
    return fig

In [10]:
if __name__=="__main__":
    app.run_server(mode="external")

Dash app running on http://127.0.0.1:8050/
