#Install libraries

In [7]:
!pip install dash



In [8]:
!pip install dash-bootstrap-components



#Importing the Libraries

In [10]:
import dash
import dash_bootstrap_components as dbc
from dash import dcc, html, Input, Output
import pandas as pd
import plotly.express as px
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import StandardScaler
import numpy as np
from dash import dash_table
import io
import base64

In [11]:
# Load dataset
df = pd.read_csv("/content/Wholesale customers data.csv").dropna()
num_features = ['Fresh', 'Milk', 'Grocery', 'Frozen', 'Detergents_Paper', 'Delicassen']
data = df[num_features]

In [12]:
# Standardize data
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)

In [13]:
# Function to perform clustering
def perform_clustering(data, algorithm, n_clusters=3, eps=0.5, min_samples=5):
    if algorithm == "KMeans":
        model = KMeans(n_clusters=n_clusters, random_state=42)
        clusters = model.fit_predict(data)
    elif algorithm == "DBSCAN":
        model = DBSCAN(eps=eps, min_samples=min_samples)
        clusters = model.fit_predict(data)
    else:
        clusters = np.zeros(data.shape[0])
    return clusters

# Dashboard Functions

In [14]:
def generate_csv(df):
    output = io.StringIO()
    df.to_csv(output, index=False)
    return "data:text/csv;base64," + base64.b64encode(output.getvalue().encode()).decode()

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

app.layout = dbc.Container([
    dbc.Row([
        dbc.Col(html.H1("Interactive Clustering Dashboard", className="text-center text-primary mb-4"), width=12)
    ]),

    dbc.Row([
        dbc.Col([
            html.Label("Select Clustering Algorithm"),
            dcc.Dropdown(
                id='algorithm-dropdown',
                options=[
                    {'label': 'KMeans', 'value': 'KMeans'},
                    {'label': 'DBSCAN', 'value': 'DBSCAN'}
                ],
                value='KMeans',
                className="mb-3"
            ),
            html.Label("Select Features for Scatter Plot"),
            dcc.Dropdown(
                id='x-feature',
                options=[{'label': col, 'value': col} for col in num_features],
                value='Fresh',
                className="mb-3"
            ),
            dcc.Dropdown(
                id='y-feature',
                options=[{'label': col, 'value': col} for col in num_features],
                value='Milk',
                className="mb-3"
            ),
            html.Label("Select Number of Clusters"),
            dcc.Slider(
                id='n-clusters-slider',
                min=2,
                max=10,
                step=1,
                value=3,
                marks={i: str(i) for i in range(2, 11)},
                className="mb-4"
            ),
            html.Label("DBSCAN: Epsilon (eps)"),
            dcc.Slider(
                id='eps-slider',
                min=0.1,
                max=2.0,
                step=0.1,
                value=0.5,
                marks={i/10: str(i/10) for i in range(1, 21)},
                className="mb-4"
            ),
            html.Label("DBSCAN: Minimum Samples"),
            dcc.Slider(
                id='min-samples-slider',
                min=1,
                max=10,
                step=1,
                value=5,
                marks={i: str(i) for i in range(1, 11)},
                className="mb-4"
            )
        ], width=3, className="p-4 bg-light shadow rounded"),

        dbc.Col([
            dcc.Graph(id='cluster-plot', config={'scrollZoom': True, 'displaylogo': False}),
            html.H4("Summary Table"),
            dash_table.DataTable(
                id='summary-table',
                style_table={'overflowX': 'auto'},
                style_cell={'textAlign': 'center'}
            ),
            html.Button("Download Cluster Data", id="download-button", className="btn btn-primary mt-3"),
            dcc.Download(id="download-cluster-data")
        ], width=9)
    ])
], fluid=True)

@app.callback(
    [Output('cluster-plot', 'figure'), Output('summary-table', 'data')],
    [Input('algorithm-dropdown', 'value'), Input('n-clusters-slider', 'value'),
     Input('eps-slider', 'value'), Input('min-samples-slider', 'value'),
     Input('x-feature', 'value'), Input('y-feature', 'value')]
)
def update_dashboard(algorithm, n_clusters, eps, min_samples, x_feature, y_feature):
    clusters = perform_clustering(data_scaled, algorithm, n_clusters=n_clusters, eps=eps, min_samples=min_samples)
    df['Cluster'] = clusters.astype(str)

    fig = px.scatter(df, x=x_feature, y=y_feature, color=df['Cluster'],
                     title='Clustering Results', color_discrete_sequence=px.colors.qualitative.Set1)

    summary = df.groupby('Cluster').mean().round(2).reset_index().to_dict('records')

    return fig, summary

@app.callback(
    Output("download-cluster-data", "data"),
    Input("download-button", "n_clicks"),
    prevent_initial_call=True
)
def download_csv(n_clicks):
    return dcc.send_data_frame(df.to_csv, "cluster_data.csv", index=False)

if __name__ == '__main__':
    app.run_server(debug=True)


<IPython.core.display.Javascript object>