In [2]:
import dash
import dash_bootstrap_components as dbc
from dash_bootstrap_templates import load_figure_template
from dash import Input, Output, dcc, html, dash_table
import base64

In [3]:
#Import Library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import plotly.express as px

In [4]:
#Import Library
from sklearn.neighbors import LocalOutlierFactor
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.metrics import silhouette_samples
from yellowbrick.cluster import KElbowVisualizer
from yellowbrick.cluster import SilhouetteVisualizer, InterclusterDistance

In [5]:
# Import Data
# Data awal disimpan kedalam frame data
data = pd.read_csv('dataset/datasetFull.csv')
data

Unnamed: 0,Timestamp,EmailAddress,NamaLengkap,NIM,Angkatan,GolonganUKT,NoHP,IPGanjil,OrganisasiGanjil,KerjaGanjil,IPGenap,OrganisasiGenap,KerjaGenap
0,23/09/2022 16:57,all4yandaru@gmail.com,Liek Allyandaru,123180054,2018,4,82136564484,3.83,Ya,Tidak,3.96,Tidak,Tidak
1,23/09/2022 18:16,isnan.rizqikurniawan@gmail.com,Isnan Rizqi Kurniawan,123170041,2017,4,81215255331,3.83,Ya,Tidak,3.80,Ya,Tidak
2,23/09/2022 18:20,michelpiercetahya@gmail.com,Michel Pierce Tahya,123210103,2021,5,85156441564,3.90,Tidak,Tidak,3.70,Ya,Tidak
3,23/09/2022 18:20,indahyaniomel@gmail.com,Indahyani,123180013,2018,1,-,3.60,Tidak,Tidak,2.83,Tidak,Tidak
4,23/09/2022 18:20,dilaajengm@gmail.com,Dila Ajeng Meiliawati,123180009,2018,4,81275270774,3.62,Tidak,Tidak,3.70,Tidak,Tidak
...,...,...,...,...,...,...,...,...,...,...,...,...,...
205,12/10/2022 10:52,anggita.setiyani@gmail.com,Anggita Setiyani Putri,124190037,2019,2,-,3.82,Ya,Tidak,3.48,Tidak,Ya
206,12/10/2022 11:11,123210098@student.upnyk.ac.id,Aditya Prayoga,123210098,2021,3,Gak usah mas,3.86,Ya,Tidak,3.60,Tidak,Tidak
207,12/10/2022 13:13,aliyahalmairah1506@gmail.com,A. Aliyah Almairah Syarif Putri,123190003,2019,4,89606347306,3.65,Ya,Tidak,3.11,Ya,Tidak
208,12/10/2022 15:32,nikolasnanda@gmail.com,Nicholas Nanda Sulaksana,123180049,2018,4,6.28E+12,3.83,Ya,Tidak,4.00,Ya,Tidak


In [6]:
# Penghapusan kolom data yang tidak digunakan
columns = ['NamaLengkap','Timestamp','EmailAddress','Angkatan','NoHP']
data.drop(columns, inplace= True, axis=1)
data = data.reset_index(drop=True)
data


Unnamed: 0,NIM,GolonganUKT,IPGanjil,OrganisasiGanjil,KerjaGanjil,IPGenap,OrganisasiGenap,KerjaGenap
0,123180054,4,3.83,Ya,Tidak,3.96,Tidak,Tidak
1,123170041,4,3.83,Ya,Tidak,3.80,Ya,Tidak
2,123210103,5,3.90,Tidak,Tidak,3.70,Ya,Tidak
3,123180013,1,3.60,Tidak,Tidak,2.83,Tidak,Tidak
4,123180009,4,3.62,Tidak,Tidak,3.70,Tidak,Tidak
...,...,...,...,...,...,...,...,...
205,124190037,2,3.82,Ya,Tidak,3.48,Tidak,Ya
206,123210098,3,3.86,Ya,Tidak,3.60,Tidak,Tidak
207,123190003,4,3.65,Ya,Tidak,3.11,Ya,Tidak
208,123180049,4,3.83,Ya,Tidak,4.00,Ya,Tidak


In [7]:
data['NIM'] = data['NIM'].astype(str)

In [8]:
columns = ['NIM']
dataFilter = data.copy()
dataFilter.drop(columns, inplace= True, axis=1)
dataFilter = dataFilter.reset_index(drop= True)

In [9]:
dataFilter = dataFilter[['IPGenap','OrganisasiGenap','KerjaGenap']]
dataFilter

Unnamed: 0,IPGenap,OrganisasiGenap,KerjaGenap
0,3.96,Tidak,Tidak
1,3.80,Ya,Tidak
2,3.70,Ya,Tidak
3,2.83,Tidak,Tidak
4,3.70,Tidak,Tidak
...,...,...,...
205,3.48,Tidak,Ya
206,3.60,Tidak,Tidak
207,3.11,Ya,Tidak
208,4.00,Ya,Tidak


In [10]:
dataGenap = dataFilter.copy()
yesNoIndex = {'Ya':1,'Tidak':0}
dataGenap = dataGenap.replace(yesNoIndex)
dataGenap

Unnamed: 0,IPGenap,OrganisasiGenap,KerjaGenap
0,3.96,0,0
1,3.80,1,0
2,3.70,1,0
3,2.83,0,0
4,3.70,0,0
...,...,...,...
205,3.48,0,1
206,3.60,0,0
207,3.11,1,0
208,4.00,1,0


In [11]:
outliers = []
fixOutlier = pd.DataFrame({
    'NIM' : data['NIM'],
    'IP' : data['IPGenap'],
    'Organisasi' : data['OrganisasiGenap'],
    'Kerja' : data['KerjaGenap'],
    })
fixOutlier = fixOutlier.replace(yesNoIndex)
fixOutlierDummy = fixOutlier.copy()

In [12]:
dataFinal = pd.DataFrame({
    'IP' : dataGenap['IPGenap'],
    'Organisasi' : dataGenap['OrganisasiGenap'],
    'Kerja' : dataGenap['KerjaGenap'],
    })
dataFinal

Unnamed: 0,IP,Organisasi,Kerja
0,3.96,0,0
1,3.80,1,0
2,3.70,1,0
3,2.83,0,0
4,3.70,0,0
...,...,...,...
205,3.48,0,1
206,3.60,0,0
207,3.11,1,0
208,4.00,1,0


In [13]:
# Penerapan Metode Local Outlier Factor
clf = LocalOutlierFactor(n_neighbors=20, contamination="auto")
X = dataFinal[['IP','Organisasi','Kerja']].values
dataX= np.array(dataFinal[['IP']])
dataY= np.array(dataFinal[['Organisasi']])
dataZ= np.array(dataFinal[['Kerja']])

y_pred = clf.fit_predict(X)
X_scores = clf.negative_outlier_factor_
round_off_values = np.around(X_scores, decimals =2)
new =round_off_values*(-1)

In [14]:
datas = pd.DataFrame(new)
outlier = []
i = 0
for score in datas[0]:
    if score >= 1.5:
        outlier.append(i)
        outliers.append(fixOutlierDummy["NIM"][i])
    i += 1

print(len(outlier))

30


In [15]:
dataFinal.drop(outlier, inplace=True)
fixOutlierDummy.drop(outlier, inplace=True)
dataFinal = dataFinal.reset_index(drop=True)
fixOutlierDummy = fixOutlierDummy.reset_index(drop=True)

In [16]:
# Penerapan Metode Local Outlier Factor
clf = LocalOutlierFactor(n_neighbors=20, contamination="auto")
X = dataFinal[['IP','Organisasi','Kerja']].values
dataX= np.array(dataFinal[['IP']])
dataY= np.array(dataFinal[['Organisasi']])
dataZ= np.array(dataFinal[['Kerja']])

y_pred = clf.fit_predict(X)
X_scores = clf.negative_outlier_factor_
round_off_values = np.around(X_scores, decimals =2)
new =round_off_values*(-1)

In [17]:
datas = pd.DataFrame(new)
outlier = []
i = 0
for score in datas[0]:
    if score >= 1.5:
        outlier.append(i)
        outliers.append(fixOutlierDummy["NIM"][i])
    i += 1

print(len(outlier))

8


In [18]:
dataFinal.drop(outlier, inplace=True)
fixOutlierDummy.drop(outlier, inplace=True)
dataFinal = dataFinal.reset_index(drop=True)
fixOutlierDummy = fixOutlierDummy.reset_index(drop=True)

In [19]:
x = []
for nim in fixOutlier["NIM"]:
    if nim in outliers:
        x.append("Outlier")
    else:
        x.append("Inlier")
fixOutlier["Outlier"] = x

In [20]:
fixOutlierDummy

Unnamed: 0,NIM,IP,Organisasi,Kerja
0,123180054,3.96,0,0
1,123170041,3.80,1,0
2,123210103,3.70,1,0
3,123180013,2.83,0,0
4,123180009,3.70,0,0
...,...,...,...,...
167,123210146,3.40,0,0
168,124200064,3.94,1,0
169,123210098,3.60,0,0
170,123180049,4.00,1,0


In [21]:
fig_lof = px.scatter_3d(fixOutlier, x='IP', y='Organisasi', z='Kerja', color = 'Outlier', opacity=0.7)
fig_lof.update_layout(margin=dict(l=0, r=0, b=0, t=0))

In [22]:
dataScaled = dataFinal.copy()
dataScaled

Unnamed: 0,IP,Organisasi,Kerja
0,3.96,0,0
1,3.80,1,0
2,3.70,1,0
3,2.83,0,0
4,3.70,0,0
...,...,...,...
167,3.40,0,0
168,3.94,1,0
169,3.60,0,0
170,4.00,1,0


In [23]:
# Modelling
# Elbow Method

ssd = []
range_n_clusters = [2, 3, 4, 5, 6, 7, 8]
for num_clusters in range_n_clusters:
    kmeans = KMeans(n_clusters=num_clusters, max_iter=300)
    kmeans.fit(dataScaled)
    
    ssd.append(kmeans.inertia_)
    
# plot the SSDs for each n_clusters
fig_elbow = px.line(x=range_n_clusters, y=ssd, labels={'x' :'Cluster', 'y' :'Elbow SSE(Sum of Square Error)'} )
fig_elbow

In [24]:
# K-Means Method

kmeans = KMeans(n_clusters=3, max_iter=300, random_state= 56)
kmeans.fit(dataScaled)
labels = kmeans.predict(dataScaled)
samplesCentroids = kmeans.cluster_centers_[labels]

In [25]:
fixData = fixOutlierDummy.copy()
dataScaled.insert(3,"Cluster", kmeans.labels_.astype(str))
dataFinal.insert(3,"Cluster", kmeans.labels_.astype(str))
fixData.insert(4,"Cluster", kmeans.labels_.astype(str))
fixData

Unnamed: 0,NIM,IP,Organisasi,Kerja,Cluster
0,123180054,3.96,0,0,0
1,123170041,3.80,1,0,1
2,123210103,3.70,1,0,1
3,123180013,2.83,0,0,0
4,123180009,3.70,0,0,0
...,...,...,...,...,...
167,123210146,3.40,0,0,0
168,124200064,3.94,1,0,1
169,123210098,3.60,0,0,0
170,123180049,4.00,1,0,1


In [26]:
dataFinal["Cluster"].value_counts()

0    99
1    51
2    22
Name: Cluster, dtype: int64

In [27]:
# dataPlot = dataScaled[[0,1,2]]
dataPlot = dataScaled.drop(columns=['Cluster'])
dataPlot

Unnamed: 0,IP,Organisasi,Kerja
0,3.96,0,0
1,3.80,1,0
2,3.70,1,0
3,2.83,0,0
4,3.70,0,0
...,...,...,...
167,3.40,0,0
168,3.94,1,0
169,3.60,0,0
170,4.00,1,0


In [28]:
# scatter_fig = px.scatter(dataPlot, x=test.T[0], y=test.T[1], color = dataScaled['Cluster'], opacity=0.7)
scatter_fig = px.scatter_3d(dataPlot, x="IP", y="Organisasi", z="Kerja", color = dataScaled['Cluster'], opacity=0.7)
scatter_fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
scatter_fig

In [29]:
# fig, ax = plt.subplots(2, 2, figsize=(15,25))
# for i in [2, 3, 4, 5]:
#     '''
#     Create KMeans instance for different number of clusters
#     '''
#     km = KMeans(n_clusters=i, max_iter=300, random_state=56)
#     q, mod = divmod(i, 2)
#     '''
#     Create SilhouetteVisualizer instance with KMeans instance
#     Fit the visualizer
#     '''
#     visualizer = SilhouetteVisualizer(km, colors='yellowbrick', ax=ax[q-1][mod])
#     visualizer.fit(dataPlot) 

In [30]:
# Silhouette Score

range_n_clusters = [2, 3, 4, 5, 6, 7, 8]

s_cluster = 0
max_score = 0

results_sil = {}

for num_clusters in range_n_clusters:
    
    # intialise kmeans
    kmeans = KMeans(n_clusters=num_clusters, max_iter=50)
    kmeans.fit(dataPlot)
    
    cluster_labels = kmeans.labels_

    # silhouette score
    silhouette_avg = silhouette_score(dataPlot, cluster_labels)

    results_sil.update({num_clusters: silhouette_avg})

    if silhouette_avg > max_score:
      max_score = silhouette_avg
      s_cluster = num_clusters

#Visualize
fig_silhouette = px.line( x= list(results_sil.keys()), y=list(results_sil.values()), labels={'x' :'Number of Clusters', 'y' :'Silhouette Score'})
fig_silhouette

# sil_cluster = "Optimal Cluster = {}, with {} score.".format(s_cluster, max_score)

In [59]:
testWL = dataGenap.copy()
testWL

Unnamed: 0,IPGenap,OrganisasiGenap,KerjaGenap
0,3.96,0,0
1,3.80,1,0
2,3.70,1,0
3,2.83,0,0
4,3.70,0,0
...,...,...,...
205,3.48,0,1
206,3.60,0,0
207,3.11,1,0
208,4.00,1,0


In [60]:
ssd = []
range_n_clusters = [2, 3, 4, 5, 6, 7, 8]
for num_clusters in range_n_clusters:
    kmeans = KMeans(n_clusters=num_clusters, max_iter=300)
    kmeans.fit(testWL)
    
    ssd.append(kmeans.inertia_)
    
# plot the SSDs for each n_clusters
fig_elbow = px.line(x=range_n_clusters, y=ssd, labels={'x' :'Cluster', 'y' :'Elbow SSE(Sum of Square Error)'} )
fig_elbow

In [61]:
# Silhouette Score

range_n_clusters = [2, 3, 4, 5, 6, 7, 8]

s_cluster = 0
max_score = 0

results_sil = {}

for num_clusters in range_n_clusters:
    
    # intialise kmeans
    kmeans = KMeans(n_clusters=num_clusters, max_iter=50)
    kmeans.fit(testWL)
    
    cluster_labels = kmeans.labels_

    # silhouette score
    silhouette_avg = silhouette_score(testWL, cluster_labels)

    results_sil.update({num_clusters: silhouette_avg})

    if silhouette_avg > max_score:
      max_score = silhouette_avg
      s_cluster = num_clusters

#Visualize
fig_silhouette = px.line( x= list(results_sil.keys()), y=list(results_sil.values()), labels={'x' :'Number of Clusters', 'y' :'Silhouette Score'})
fig_silhouette

# sil_cluster = "Optimal Cluster = {}, with {} score.".format(s_cluster, max_score)

In [62]:
# K-Means Method

kmeans = KMeans(n_clusters=6, max_iter=300, random_state= 56)
kmeans.fit(testWL)
labels = kmeans.predict(testWL)
samplesCentroids = kmeans.cluster_centers_[labels]

In [63]:
testWL.insert(3,"Cluster", kmeans.labels_.astype(str))
testWL

Unnamed: 0,IPGenap,OrganisasiGenap,KerjaGenap,Cluster
0,3.96,0,0,5
1,3.80,1,0,1
2,3.70,1,0,1
3,2.83,0,0,0
4,3.70,0,0,5
...,...,...,...,...
205,3.48,0,1,2
206,3.60,0,0,5
207,3.11,1,0,1
208,4.00,1,0,1


In [64]:
# scatter_fig = px.scatter(dataPlot, x=test.T[0], y=test.T[1], color = dataScaled['Cluster'], opacity=0.7)
scatter_fig = px.scatter_3d(testWL, x="IPGenap", y="OrganisasiGenap", z="KerjaGenap", color = testWL['Cluster'], opacity=0.7)
scatter_fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
scatter_fig

In [32]:
# css untuk sidebar.
SIDEBAR_STYLE = {
    "position": "fixed",
    "top": 0,
    "left": 0,
    "bottom": 0,
    "width": "16rem",
    "padding": "2rem 1rem",
    "background-color": "#221E40",
}

# css untuk main content yang terletak di sebelah kanan sidebar
CONTENT_STYLE = {
    "margin-left": "18rem",
    "margin-right": "2rem",
    "padding": "2rem 1rem",
}

In [33]:
app = dash.Dash(external_stylesheets=[dbc.themes.BOOTSTRAP])

In [34]:
PAGE_SIZE = 10

home = html.Div([
    html.H2('Raw Data', style = {'font-weight' : 'bold'}),
    html.Br(),
    dbc.Col([
            dbc.Card(
                [html.H6(id="hael"), html.H4("Data Asli")],
                className="p-4 mr-2 shadow-sm  ",
            ),
            html.Div(
                dash_table.DataTable(    
                    id='table-filtering',
                    columns=[
                        {"name": i, "id": i} for i in data.columns
                    ],
                    style_header={
                        'backgroundColor': 'rgb(50, 50, 50)',
                        'color': 'white'
                    },
                    style_cell={'padding': '5px'},
                        style_cell_conditional=[
                        {
                            'textAlign': 'center'
                        }
                    ],

                    page_current=0,
                    page_size=PAGE_SIZE,
                    page_action='custom',

                    filter_action='custom',
                    filter_query='' 
                )
            )
        ],
    ),
])

operators = [['ge ', '>='],
             ['le ', '<='],
             ['lt ', '<'],
             ['gt ', '>'],
             ['ne ', '!='],
             ['eq ', '='],
             ['contains '],
             ['datestartswith ']]


def split_filter_part(filter_part):
    for operator_type in operators:
        for operator in operator_type:
            if operator in filter_part:
                name_part, value_part = filter_part.split(operator, 1)
                name = name_part[name_part.find('{') + 1: name_part.rfind('}')]

                value_part = value_part.strip()
                v0 = value_part[0]
                if (v0 == value_part[-1] and v0 in ("'", '"', '`')):
                    value = value_part[1: -1].replace('\\' + v0, v0)
                else:
                    try:
                        value = float(value_part)
                    except ValueError:
                        value = value_part
                return name, operator_type[0].strip(), value

    return [None] * 3


@app.callback(
    Output('table-filtering', "data"),
    Input('table-filtering', "page_current"),
    Input('table-filtering', "page_size"),
    Input('table-filtering', "filter_query"))
def update_table(page_current,page_size, filter):
    print(filter)
    filtering_expressions = filter.split(' && ')
    print(filtering_expressions)
    dff = data
    for filter_part in filtering_expressions:
        col_name, operator, filter_value = split_filter_part(filter_part)
        print(operator)
        if operator in ('eq', 'ne', 'lt', 'le', 'gt', 'ge'):
            
            # these operators match pandas series operator method names
            dff = dff.loc[getattr(dff[col_name], operator)(filter_value)]
        elif operator == 'contains':
            if str(filter_value)[-2:] == ".0":
                filter_value = str(round(filter_value));
            else:
                filter_value = str(filter_value);
            dff = dff.loc[dff[col_name].astype(str).str.contains(filter_value)]
        elif operator == 'datestartswith':
            # this is a simplification of the front-end filtering logic,
            # only works with complete fields in standard format
            filter_value = str(filter_value);
            dff = dff.loc[dff[col_name].astype(str).str.startswith(filter_value)]

    return dff.iloc[
        page_current*page_size:(page_current+ 1)*page_size
    ].to_dict('rows')

In [35]:
page1 = html.Div([
    html.H2('Data Cleansing', style = {'font-weight' : 'bold'}),
    html.Br(),
    dbc.Col([
            dbc.Card(
                [html.H6(id="hael"), html.H4("Remove Unused and Null Data")],
                className="p-4 mr-2 shadow-sm  ",
            ),
            html.Div(
                dash_table.DataTable(    
                    id='table2-filtering',
                    columns=[
                        {"name": i, "id": i} for i in dataFilter.columns
                    ],
                    style_header={
                        'backgroundColor': 'rgb(50, 50, 50)',
                        'color': 'white'
                    },
                    style_cell={'padding': '5px'},
                        style_cell_conditional=[
                        {
                            'textAlign': 'center'
                        }
                    ],
                    page_current=0,
                    page_size=PAGE_SIZE,
                    page_action='custom',

                    filter_action='custom',
                    filter_query='' 
                )
            ),
        ],
    ),
    html.Br(),
    dbc.Col([
            dbc.Card(
                [html.H6(id="hael"), html.H4("Encoding Data")],
                className="p-4 mr-2 shadow-sm  ",
            ),
            html.Div(
                dash_table.DataTable(    
                    id='table2-encoding',
                    columns=[
                        {"name": i, "id": i} for i in dataGenap.columns
                    ],
                    style_header={
                        'backgroundColor': 'rgb(50, 50, 50)',
                        'color': 'white'
                    },
                    style_cell={'padding': '5px'},
                        style_cell_conditional=[
                        {
                            'textAlign': 'center'
                        }
                    ],
                    page_current=0,
                    page_size=PAGE_SIZE,
                    page_action='custom',

                    filter_action='custom',
                    filter_query='' 
                )
            ),
        ],
    ),
])

@app.callback(
    Output('table2-filtering', "data"),
    Input('table2-filtering', "page_current"),
    Input('table2-filtering', "page_size"),
    Input('table2-filtering', "filter_query"))
def update_table(page_current,page_size, filter):
    print(filter)
    filtering_expressions = filter.split(' && ')
    dff = dataFilter
    for filter_part in filtering_expressions:
        col_name, operator, filter_value = split_filter_part(filter_part)
        if operator in ('eq', 'ne', 'lt', 'le', 'gt', 'ge'):
            # these operators match pandas series operator method names
            dff = dff.loc[getattr(dff[col_name], operator)(filter_value)]
        elif operator == 'contains':
            if str(filter_value)[-2:] == ".0":
                filter_value = str(round(filter_value));
            else:
                filter_value = str(filter_value);
            dff = dff.loc[dff[col_name].astype(str).str.contains(filter_value)]
        elif operator == 'datestartswith':
            # this is a simplification of the front-end filtering logic,
            # only works with complete fields in standard format
            filter_value = str(filter_value);
            dff = dff.loc[dff[col_name].astype(str).str.startswith(filter_value)]

    return dff.iloc[
        page_current*page_size:(page_current+ 1)*page_size
    ].to_dict('rows')


@app.callback(
    Output('table2-encoding', "data"),
    Input('table2-encoding', "page_current"),
    Input('table2-encoding', "page_size"),
    Input('table2-encoding', "filter_query"))
def update_table(page_current,page_size, filter):
    print(filter)
    filtering_expressions = filter.split(' && ')
    dff = dataGenap
    for filter_part in filtering_expressions:
        col_name, operator, filter_value = split_filter_part(filter_part)
        if operator in ('eq', 'ne', 'lt', 'le', 'gt', 'ge'):
            # these operators match pandas series operator method names
            dff = dff.loc[getattr(dff[col_name], operator)(filter_value)]
        elif operator == 'contains':
            if str(filter_value)[-2:] == ".0":
                filter_value = str(round(filter_value));
            else:
                filter_value = str(filter_value);
            dff = dff.loc[dff[col_name].astype(str).str.contains(filter_value)]
        elif operator == 'datestartswith':
            # this is a simplification of the front-end filtering logic,
            # only works with complete fields in standard format
            filter_value = str(filter_value);
            dff = dff.loc[dff[col_name].astype(str).str.startswith(filter_value)]

    return dff.iloc[
        page_current*page_size:(page_current+ 1)*page_size
    ].to_dict('rows')

In [36]:
page2 = html.Div([
    html.H2('Data Preparation', style = {'font-weight' : 'bold'}),
    html.Br(),
    dbc.Col([
            dbc.Card(
                [html.H6(id="hael"), html.H4("Outlier Detection")],
                className="p-4 mr-2 shadow-sm  ",
            ),
            html.Div(
                dash_table.DataTable(    
                    id='table2-outlier',
                    columns=[
                        {"name": i, "id": i} for i in fixOutlier.columns
                    ],
                    style_header={
                        'backgroundColor': 'rgb(50, 50, 50)',
                        'color': 'white'
                    },
                    style_cell={'padding': '5px'},
                        style_cell_conditional=[
                        {
                            'textAlign': 'center'
                        }
                    ],
                    page_current=0,
                    page_size=PAGE_SIZE,
                    page_action='custom',

                    filter_action='custom',
                    filter_query='' 
                )
            ),
            html.Br(),
            dcc.Graph(figure=fig_lof),
        ],
    ),
])

@app.callback(
    Output('table2-outlier', "data"),
    Input('table2-outlier', "page_current"),
    Input('table2-outlier', "page_size"),
    Input('table2-outlier', "filter_query"))
def update_table(page_current,page_size, filter):
    print(filter)
    filtering_expressions = filter.split(' && ')
    dff = fixOutlier
    for filter_part in filtering_expressions:
        col_name, operator, filter_value = split_filter_part(filter_part)
        if operator in ('eq', 'ne', 'lt', 'le', 'gt', 'ge'):
            # these operators match pandas series operator method names
            dff = dff.loc[getattr(dff[col_name], operator)(filter_value)]
        elif operator == 'contains':
            if str(filter_value)[-2:] == ".0":
                filter_value = str(round(filter_value));
            else:
                filter_value = str(filter_value);
            dff = dff.loc[dff[col_name].astype(str).str.contains(filter_value)]
        elif operator == 'datestartswith':
            # this is a simplification of the front-end filtering logic,
            # only works with complete fields in standard format
            filter_value = str(filter_value);
            dff = dff.loc[dff[col_name].astype(str).str.startswith(filter_value)]

    return dff.iloc[
        page_current*page_size:(page_current+ 1)*page_size
    ].to_dict('rows')

In [37]:
page3 = html.Div([
    html.H2('Data Modelling', style = {'font-weight' : 'bold'}),
    # html.Br(),
    # dbc.Col([
    #         dbc.Card(
    #             [html.H6(id="hael"), html.H4("Elbow Method")],
    #             className="p-4 mr-2 shadow-sm  ",

    #         ),
    #         dcc.Graph(figure=fig_elbow),
    #     ],
    # ),
    # html.Br(),
    # dbc.Col([
    #         dbc.Card(
    #             [html.H6(id="hael"), html.H4("Silhouette Coefficient")],
    #             className="p-4 mr-2 shadow-sm  ",

    #         ),
    #         dcc.Graph(figure=fig_silhouette),
    #     ],
    # ),
    html.Br(),
    dbc.Row([
            dbc.Col([
                    dbc.Card(
                        [html.H6(id="hael"), html.H4("Elbow Method")],
                        className="p-4 mr-2 shadow-sm  ",
                    ),
                    dcc.Graph(figure=fig_elbow),
                ],
            ),
            dbc.Col([
                    dbc.Card(
                        [html.H6(id="hael"), html.H4("Silhouette Coefficient")],
                        className="p-4 mr-2 shadow-sm  ",
                    ),
                    dcc.Graph(figure=fig_silhouette),
                ],
            ),
        ],
        className="m-2 mb-4",
    ),
    html.Br(),
    dbc.Col([
            dbc.Card(
                [html.H6(id="sr"), html.H4("Clustering Result")],
                className="p-4 mr-2 shadow-sm  ",

            ),
            html.Div(
                dash_table.DataTable(    
                    id='table-cluster',
                    columns=[
                        {"name": i, "id": i} for i in fixData.columns
                    ],
                    style_header={
                        'backgroundColor': 'rgb(50, 50, 50)',
                        'color': 'white'
                    },
                    style_cell={'padding': '5px'},
                        style_cell_conditional=[
                        {
                            'textAlign': 'center'
                        }
                    ],

                    page_current=0,
                    page_size=PAGE_SIZE,
                    page_action='custom',

                    filter_action='custom',
                    filter_query='' 
                )
            ),
        ],
    ),
])

@app.callback(
    Output('table-cluster', "data"),
    Input('table-cluster', "page_current"),
    Input('table-cluster', "page_size"),
    Input('table-cluster', "filter_query"))
def update_table(page_current,page_size, filter):
    print(filter)
    filtering_expressions = filter.split(' && ')
    dff = fixData
    for filter_part in filtering_expressions:
        col_name, operator, filter_value = split_filter_part(filter_part)
        if operator in ('eq', 'ne', 'lt', 'le', 'gt', 'ge'):
            # these operators match pandas series operator method names
            dff = dff.loc[getattr(dff[col_name], operator)(filter_value)]
        elif operator == 'contains':
            if str(filter_value)[-2:] == ".0":
                filter_value = str(round(filter_value));
            else:
                filter_value = str(filter_value);
            dff = dff.loc[dff[col_name].astype(str).str.contains(filter_value)]
        elif operator == 'datestartswith':
            # this is a simplification of the front-end filtering logic,
            # only works with complete fields in standard format
            filter_value = str(filter_value);
            dff = dff.loc[dff[col_name].astype(str).str.startswith(filter_value)]

    return dff.iloc[
        page_current*page_size:(page_current+ 1)*page_size
    ].to_dict('rows')

In [38]:
sidebar = html.Div(
    [
        html.H3("Sistem Klasterisasi Performa Akademik Mahasiswa", style={'color': 'white', 'font-family':'sans-serif', 'size':12}),
        html.Hr(),
        dbc.Nav(
            [
                dbc.NavLink("Raw Data", href="/", active="exact", style={'color': 'white'}),
                dbc.NavLink("Data Cleansing", href="/page-1", active="exact", style={'color': 'white'}),
                dbc.NavLink("Data Preparation", href="/page-2", active="exact", style={'color': 'white'}),
                dbc.NavLink("Modelling", href="/page-3", active="exact", style={'color': 'white'}),
                # dbc.NavLink("Result and Conclusion", href="/page-4", active="exact", style={'color': 'white'}),
            ],
            vertical=True,
            pills=True,
            style={'marginTop': "3rem"}
        ),
    ],
    style=SIDEBAR_STYLE,
)

content = html.Div(id="page-content", style=CONTENT_STYLE)

app.layout = html.Div([dcc.Location(id="url"), sidebar, content])


@app.callback(Output("page-content", "children"), [Input("url", "pathname")])
def render_page_content(pathname):
    if pathname == "/":
        return home
    elif pathname == "/page-1":
        return page1
    elif pathname == "/page-2":
        return page2
    elif pathname == "/page-3":
        return page3
    # elif pathname == "/page-4":
    #     return page4
    # If the user tries to reach a different page, return a 404 message
    return dbc.Jumbotron(
        [
            html.H1("404: Not found", className="text-danger"),
            html.Hr(),
            html.P(f"The pathname {pathname} was not recognised..."),
        ]
    )

In [39]:
if __name__ == "__main__":
    app.run_server(port=4050)

Dash is running on http://127.0.0.1:4050/

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:4050
Press CTRL+C to quit
