In [13]:
import dash 
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc
import dash_table
import pandas as pd
import numpy as np 
from sklearn.cluster import KMeans
import seaborn as sns 
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import plotly.express as px
import plotly.graph_objs as go
import plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import base64
import datetime
import io
#import cufflinks as cf

In [14]:
#app = dash.Dash(external_stylesheets=[dbc.themes.SPACELAB]) #SIMPLEX
#app = dash.Dash(external_stylesheets=[dbc.themes.DARKLY])
#app = dash.Dash(external_stylesheets=[dbc.themes.BOOTSTRAP])
#SOLAR
#SKETCHY
#CERULEAN

In [15]:
colors = {
    "graphBackground": "#F5F5F5",
    "background": "#ffffff",
    "text": "#000000"}

In [None]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

#app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
app = dash.Dash(external_stylesheets=[dbc.themes.CERULEAN])
server = app.server


colors = {
    "graphBackground": "#F5F5F5",
    "background": "#ffffff",
    "text": "#000000"
}

app.layout = html.Div([
    dcc.Store(id='intermediate-value'),
    dcc.Tabs([ 
        dcc.Tab(label = 'Data Import', children=[
            html.H1(children='Please Import Data in a CSV or Xlsx Format', style = {'text-align': 'center'}), 
                dcc.Upload(
                    id='upload-data',
                    children=html.Div([
                        'Drag and Drop or ',
                        html.A('Select Files')
                    ]),
                    style={
                        'width': '100%',
                        'height': '60px',
                        'lineHeight': '60px',
                        'borderWidth': '1px',
                        'borderStyle': 'dashed',
                        'borderRadius': '5px',
                        'textAlign': 'center',
                        'margin': '10px'
                    },
                    # Allow multiple files to be uploaded
                    multiple=True
                ),
                html.Div(id='output-data-upload')
                ]),
        dcc.Tab(label='Kmeans Exploration', children = [
            html.H1('Kmeans Pre-Processing', style = {'text-align': 'center'}),
            #html.H3('''Elbow Plot: Optimal Clustering Centeroids''', style = {'text-align': 'left'}),
            html.Br(),
            dcc.Slider(
              id='elbow-range-slider',
              min=1,
              max=20,
              value=3,
              marks = {
                  1: {'label': '1 Cluster'},
                  5: {'label': '5 Clusters'},
                  10:{'label': '10 Clusters'},
                  15:{'label': '15 Clusters'},
                  20:{'label': '20 Clusters'}
              }
            ),
            html.Br(),
            html.Div(id = 'slider-output-container', children =[]),
            html.Br(),
            #html.Div(id = 'slider-output-container', children =[]),
            html.H3('''Elbow Plot: Optimal Clustering Centeroids''', style = {'text-align': 'left'}),
            dcc.Graph(id = 'Elbow Plot Container', figure = {}),
            html.Br()
        ]),
        dcc.Tab(label='Kmeans Predictions', children = [
            html.H1('Kmeans Market Segmentation', style = {'text-align': 'center'}),
            html.H3('''Kmeans Output Table''', style = {'text-align': 'left'}),
            html.Br(),
            html.H5('''Please select the amount of clusters you would like to segment your data by:''', style = {'text-align': 'left'}),
            dcc.Dropdown(id='n-cluster',
                         options=[
                             {'label': '1', 'value': 1},
                             {'label': '2', 'value': 2},
                             {'label': '3', 'value': 3},
                             {'label': '4', 'value': 4},
                             {'label': '5', 'value': 5},
                             {'label': '6', 'value': 6},
                             {'label': '7', 'value': 7},
                             {'label': '8', 'value': 8},
                             {'label': '9', 'value': 9},
                             {'label': '10', 'value': 10}
                         ],
                         value=3
                        ),
            html.Br(),
            html.Div(id = 'n-cluster-container', children = []),
            html.Br(),
            html.H3('''Kmeans Centroids Plot ''', style = {'text-align': 'left'}),
            dcc.Graph(id = 'centroid-container', figure = {}),
            html.Br(),
            #html.H3('''Kmeans Centroids Plot ''', style = {'text-align': 'left'}),
            html.Div(id='kmeans-table'),
            # Hidden div inside the app that stores the intermediate value
            #html.Div(id='intermediate-value', style={'display': 'none'})      
        ])
    ])
])

def parse_data(contents, filename):
    content_type, content_string = contents.split(',')

    decoded = base64.b64decode(content_string)
    try:
        if 'csv' in filename:
            # Assume that the user uploaded a CSV or TXT file
            df = pd.read_csv(
                io.StringIO(decoded.decode('utf-8')))
        elif 'xls' in filename:
            # Assume that the user uploaded an excel file
            df = pd.read_excel(io.BytesIO(decoded))
        elif 'txt' or 'tsv' in filename:
            # Assume that the user upl, delimiter = r'\s+'oaded an excel file
            df = pd.read_csv(
                io.StringIO(decoded.decode('utf-8')), delimiter = r'\s+')
    except Exception as e:
        print(e)
        return html.Div([
            'There was an error processing this file.'
        ])

    return df



def preprocess(table):
    result = table
    result = result.select_dtypes(include=[np.number])
    #to_be_dropped=pd.DataFrame(result.categorical).columns
    #result= result.drop(to_be_dropped,axis=1)
    
    for feature_name in result.columns:
        max_value = result[feature_name].max()
        min_value = result[feature_name].min()
        result[feature_name] = (result[feature_name] - min_value) / (max_value - min_value)
        
    
    return result


@app.callback([Output('output-data-upload', 'children'),
               Output('intermediate-value', 'data')],
            [
                Input('upload-data', 'contents'),
                Input('upload-data', 'filename')
            ])


def update_table(contents, filename):
    table = html.Div()
    
    if contents:
        
        contents = contents[0]
        filename = filename[0]
        df = parse_data(contents, filename)
        df = preprocess(df)
        

        table = html.Div([
            html.H5(filename),
            dash_table.DataTable(
                data=df.to_dict('rows'),
                columns=[{'name': i, 'id': i} for i in df.columns]
            ),
            html.Hr(),
            html.Div('Raw Content'),
            html.Pre(contents[0:200] + '...', style={
                'whiteSpace': 'pre-wrap',
                'wordBreak': 'break-all'
            })
        ])
        
        
    return table, df.to_json(date_format='iso', orient='split')


#to_be_dropped=pd.DataFrame(table.categorical).columns
#table = table.drop(to_be_dropped,axis=1)


@app.callback(

    #Output(component_id = 'kmeans-table', )
    [Output(component_id = 'slider-output-container', component_property = 'children'),
    Output(component_id = 'Elbow Plot Container', component_property = 'figure')],
    [Input(component_id = 'elbow-range-slider', component_property = 'value'),
     Input('intermediate-value', 'data')]
)

def update_graph(cluster_slct, jsonified_cleaned_data): #need to figure out how to use the table from the previous callback as an input 
    print(cluster_slct)
    print(type(cluster_slct))
    
    
    df_1 = pd.read_json(jsonified_cleaned_data, orient='split')
    #df = table
    
    
    container = 'The range of chosen clusters is: 1 to {}'.format(cluster_slct)
    
    #df_copy = df_normalized.copy()
    
    
    distortions = []
    k = list(range(1,cluster_slct))
    for clusters in k:
        kmeanModel = KMeans(n_clusters = clusters)
        kmeanModel.fit(df_1)
        distortions.append(kmeanModel.inertia_)
        
    fig = px.line(x=k , y=distortions)
    #fig = go.Figure()
    #fig.add_trace(go.Scatter(x=k, y=distortions,
                    #mode='lines',
                    #name='lines'))
    
    return container, fig





###testing 
@app.callback(

    #Output(component_id = 'kmeans-table', )
    [Output(component_id = 'n-cluster-container', component_property = 'children'),
    #Output(component_id = 'centroid container', component_property = 'figure'),
    Output(component_id = 'kmeans-table', component_property = 'data'),
    Output(component_id = 'centroid-container', component_property = 'figure')],
    [Input(component_id = 'n-cluster', component_property = 'value'),
     Input('intermediate-value', 'data'),
     Input('upload-data', 'contents'),
     Input('upload-data', 'filename')]
)

def update_table(n_cluster_slct, jsonified_cleaned_data, contents, filename):
    print(n_cluster_slct)
    print(type(n_cluster_slct))
    
    #testing 
    if contents:
        
        contents = contents[0]
        filename = filename[0]
        df = parse_data(contents, filename)
    #testing 
    
    
    n_cluster_container = 'The chosen cluster is: {}'.format(n_cluster_slct)

    dff = pd.read_json(jsonified_cleaned_data, orient='split')
    df_copy = pd.read_json(jsonified_cleaned_data, orient='split')
    #df_kmeans_table = df_normalized.copy()
    #df_prep_copy = df_prep.copy()
    #kmeans - produce kmeans segmentation model 
    kmeanModel = KMeans(n_clusters= n_cluster_slct)
    kmeanModel.fit(dff)
    
    dff['k_means_cluster']=kmeanModel.predict(dff)

    #append model results back to original prep dataframe for interpretability 
    df['k_means_cluster'] = dff['k_means_cluster']

    #data_1 = df.to_dict('rows')
    
    data_table1 = df.to_dict('rows')
    columns_1 = [{"name": i, "id": i} for i in df.columns]
    
    
    
    #return dash_table.DataTable(data=data_table1, columns=columns_1)
    label = df['k_means_cluster']
    #centroids_check = kmeans.cluster_centers_
    u_labels = np.unique(label)
 
    #plotting the results:
    #fig_2 = px.scatter(x=kmeanModel.cluster_centers_[:, 0], y=kmeanModel.cluster_centers_[:, 1]) -- could add this back in?
    #maybe add in a .describe() table?
    X = dff

    pca = PCA(len(X.columns))
    components = pca.fit_transform(X)

    fig_2 = px.scatter(components, x=0, y=1, color=dff['k_means_cluster']) #add an option to view in wither 2d or 3d
    #total_var = pca.explained_variance_ratio_.sum() * 100
    #fig_2 = px.scatter_3d(
    #components, x=0, y=1, z=2, color=df_kmeans_table['k_means_cluster'],
    #title=f'Total Explained Variance: {total_var:.2f}%'

        
    return  html.Div([dash_table.DataTable(
                data=df.to_dict('rows'),
                columns=[{'name': i, 'id': i} for i in df.columns],
                style_header={'backgroundColor': "#87CEEB",
                              'fontWeight': 'bold',
                              'textAlign': 'center',},
                style_table={'overflowX': 'scroll'},  
                style_cell={'minWidth': '180px', 'width': '180px',
                        'maxWidth': '180px','whiteSpace': 'normal'},                        
                 row_selectable="multi",
                 editable = False,
                 page_size = 5,
                 sort_mode = 'multi',
                 sort_action = 'multi',
                 filter_action='native'),
               html.Hr()
        ]), n_cluster_container, fig_2


###testing 




if __name__ == '__main__':
    app.run_server()

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [22/Sep/2021 18:46:54] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [22/Sep/2021 18:46:54] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
127.0.0.1 - - [22/Sep/2021 18:46:54] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
127.0.0.1 - - [22/Sep/2021 18:46:54] "[37mGET /_dash-component-suites/dash_core_components/async-upload.js HTTP/1.1[0m" 200 -


Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\app.py", line 1952, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\app.py", line 1821, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\_compat.py", line 39, in reraise
    raise value
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\app.py", line 1950, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\app.py", line 1936, in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
  File "C:\Users\Willie\anaconda3\lib\site-packages\dash\dash.py", line 1096, in dispatch
    respons

127.0.0.1 - - [22/Sep/2021 18:46:54] "[35m[1mPOST /_dash-update-component HTTP/1.1[0m" 500 -


33
<class 'int'>

<class 'int'>
Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\app.py", line 1952, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\app.py", line 1821, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\_compat.py", line 39, in reraise
    raise value
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\app.py", line 1950, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\app.py", line 1936, in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
  File "C:\Users\Willie\anaconda3\lib\site-packages\dash\dash.py", li

Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\app.py", line 1952, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\app.py", line 1821, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\_compat.py", line 39, in reraise
    raise value
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\app.py", line 1950, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\app.py", line 1936, in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
  File "C:\Users\Willie\anaconda3\lib\site-packages\dash\dash.py", line 1096, in dispatch
    respons

127.0.0.1 - - [22/Sep/2021 18:46:54] "[35m[1mPOST /_dash-update-component HTTP/1.1[0m" 500 -


Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\app.py", line 1952, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\app.py", line 1821, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\_compat.py", line 39, in reraise
    raise value
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\app.py", line 1950, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\Users\Willie\anaconda3\lib\site-packages\flask\app.py", line 1936, in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
  File "C:\Users\Willie\anaconda3\lib\site-packages\dash\dash.py", line 1096, in dispatch
    respons

127.0.0.1 - - [22/Sep/2021 18:46:54] "[35m[1mPOST /_dash-update-component HTTP/1.1[0m" 500 -


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

127.0.0.1 - - [22/Sep/2021 18:47:04] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [22/Sep/2021 18:47:04] "[37mGET /_dash-component-suites/dash_table/async-highlight.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [22/Sep/2021 18:47:04] "[37mGET /_dash-component-suites/dash_table/async-table.js HTTP/1.1[0m" 200 -




127.0.0.1 - - [22/Sep/2021 18:47:04] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -

KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.



3
<class 'int'>
3
<class 'int'>


127.0.0.1 - - [22/Sep/2021 18:47:04] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [22/Sep/2021 18:47:09] "[37mGET /_dash-component-suites/dash_core_components/async-slider.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [22/Sep/2021 18:47:09] "[37mGET /_dash-component-suites/dash_core_components/async-graph.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [22/Sep/2021 18:47:09] "[37mGET /_dash-component-suites/dash_core_components/async-plotlyjs.js HTTP/1.1[0m" 200 -

KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.

127.0.0.1 - - [22/Sep/2021 18:47:13] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


5
<class 'int'>


127.0.0.1 - - [22/Sep/2021 18:47:23] "[37mGET /_dash-component-suites/dash_core_components/async-dropdown.js HTTP/1.1[0m" 200 -




127.0.0.1 - - [22/Sep/2021 18:47:26] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


2
<class 'int'>



KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.

127.0.0.1 - - [22/Sep/2021 18:57:46] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


2
<class 'int'>



KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.

127.0.0.1 - - [22/Sep/2021 18:58:28] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


5
<class 'int'>
