In [None]:
!pip install dash
!pip install jupyter-dash

Collecting jupyter-dash
  Downloading jupyter_dash-0.4.0-py3-none-any.whl (20 kB)
Collecting ansi2html
  Downloading ansi2html-1.6.0-py3-none-any.whl (14 kB)
Installing collected packages: ansi2html, jupyter-dash
Successfully installed ansi2html-1.6.0 jupyter-dash-0.4.0


In [None]:
import dash
from dash import dcc
from dash import html
import plotly.express as px
import pandas as pd

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install dash_bootstrap_components



In [None]:
import dash
import dash_bootstrap_components as dbc
import pandas as pd
import plotly.graph_objs as go
import plotly.express as px
from dash import dcc, html
from dash.dependencies import Input, Output
from sklearn import datasets
from sklearn.cluster import KMeans
from jupyter_dash import JupyterDash

stock_data = pd.read_csv('/content/drive/My Drive/Analytics in Practice APG/data_all.csv')
stock_data = stock_data.rename(columns={'VALUE (x$1000)': 'Value', 'SHRS OR PRN AMT': 'Share', 'Manager Name': 'Manager', "NAME OF ISSUER": 'Stock'})


In [None]:
JupyterDash.infer_jupyter_proxy_config()

In [None]:
app = JupyterDash(external_stylesheets=[dbc.themes.BOOTSTRAP])

controls = dbc.Card(
    [
        html.Div(
            [
                dbc.Label("Cluster count"),
                dbc.Input(id="cluster-count", type="number", value=3, min=1, step=1),
            ]
        ),
    ],
    body=True,
)

app.layout = dbc.Container(
    [
        html.H1("Stock k-means clustering"),
        html.Hr(),
        dbc.Row([
            dbc.Col([
                dbc.Row(
                    [dbc.Col(controls, md=12)]
                ),
                dbc.Row(
                    [
                        dbc.Col(dcc.Graph(id="cluster-graph1"), md=12),  
                        dbc.Col(dcc.Graph(id="cluster-graph2"), md=12),  
                        dbc.Col(dcc.Graph(id="cluster-graph3"), md=12), 
                    ],
                    align="center",
                ),
                ], md=8),
            dbc.Col([
                dbc.Col(dcc.Graph(id="manager_table"), md=12),
                ], md=4),
        ]),
    ],
    fluid=True,
)


@app.callback(
    Output("cluster-graph1", "figure"),
    [
        Input("cluster-count", "value"),
    ],
)
def make_graph1(n_clusters):
    df = make_kmeans(n_clusters)

    data = [
        go.Scatter(
            x=df.loc[df.cluster == str(c), 'Value'],
            y=df.loc[df.cluster == str(c), 'Share'],
            mode="markers",
            hovertext=df.index, 
            marker={"size": 8},
            name="Cluster {}".format(c),
        )
        for c in range(n_clusters)
    ]

    layout = {"xaxis": {"title": 'Value'}, "yaxis": {"title": 'Share'}}

    return go.Figure(data=data, layout=layout).update_xaxes(type="log").update_yaxes(type="log")

@app.callback(
    Output("cluster-graph2", "figure"),
    [
        Input("cluster-count", "value"),
    ],
)
def make_graph2(n_clusters):
    df = make_kmeans(n_clusters)
    df = stock_data.join(df[['cluster']], on='Manager')
    df = df[['cluster', 'Stock', 'Value']].groupby(['cluster', 'Stock'],as_index=False).agg('sum')
    df = df.sort_values('Value', ascending=False).groupby(['cluster']).head(5).sort_values('cluster')

    fig = px.bar(df, y="Stock", x="Value", color="cluster", barmode="group", log_x=True)
    fig.update_xaxes(rangeselector_yanchor='bottom')

    return fig


@app.callback(
    Output("cluster-graph3", "figure"),
    [
        Input("cluster-count", "value"),
    ],
)
def make_graph3(n_clusters):
    df = make_kmeans(n_clusters)
    df = df.reset_index().rename({'index':'Manager'}, axis = 'columns')
    df = df.sort_values('Value', ascending=False).groupby(['cluster']).head(5).sort_values('cluster')

    fig = px.bar(df, y="Manager", x="Value", color="cluster", barmode="group", log_x=True)
    fig.update_xaxes(rangeselector_yanchor='bottom')

    return fig


@app.callback(
    Output("manager_table", "figure"),
    [
        Input("cluster-count", "value"),
    ],
)
def make_table(n_clusters):
    top_fund = (stock_data.groupby(['Manager'])
                .agg({"NEW_CUSIP":"count"})
                .sort_values("NEW_CUSIP", ascending=False))[:10]
    top_fund = top_fund.reset_index()

    return go.Figure(data=[go.Table(header=dict(values=list(top_fund.columns)),
                 cells=dict(values=[list(top_fund.iloc[:,0]), 
                                    list(top_fund.iloc[:,1])]))
                                    ])


def make_kmeans(n_clusters):
    x = 'Value'
    y = 'Share'
    df = stock_data[['Manager', 'Value', 'Share']].groupby('Manager').agg('sum')
    km = KMeans(n_clusters=max(n_clusters, 1), random_state=123)
    km.fit(df.values)
    df["cluster"] = km.labels_
    df = df.astype({"cluster": str})
    return df


# if __name__ == "__main__":
    # app.run_server(debug=True, port=8888)
app.run_server(mode="inline", )

ConnectionError: ignored

In [None]:
%tb

SystemExit: ignored