In [150]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression


import plotly.graph_objects as go
import plotly.express as px

from dash import Dash, dcc, html, Input, Output, State, dash_table

data = load_boston()
df = pd.DataFrame(data["data"])
df.columns = data["feature_names"]
df["PRICE"] = data["target"]
df.head()

app = Dash()
app.layout = html.Div(
    [
        html.H2("탐색적데이터 분석"),
        html.H3("데이터 확인"),
        html.Hr(),
        html.Button("결측치탐색실행",id="null-search",n_clicks=0),
        html.Div(id="null-search-result"),
        html.Hr(),
        html.Button("데이터타입확인",id="type-search",n_clicks=0),
        html.Div(id="type-search-result"),
        html.Hr(),
        html.H3("통계정보확인"),
        html.Button("통계량계산", id="stat-search",n_clicks=0),
        html.Div(id="stat-result"),
        html.Hr(),
        html.H3("시각화정보"),
        html.Hr(),
        html.H4("1. 변수별 히스토그램"),
        html.Hr(),
        dcc.Dropdown(id="histogram-dropdown",
                    options = [
                        {"label":i, "value":i} for i in df.columns
                    ]),
        dcc.Graph(id="histo-result"),
        html.Hr(),
        html.H4("2.변수별 박스플롯"),
        dcc.Checklist(id="boxplot-checklist",
                      options = [{"label":i,"value":i} for i in df.columns],
                      value = df.columns,
                      labelStyle={'display': 'inline-block'}
                     ),
        html.Div(id="boxplot-result"),
        html.Hr(),
        html.H4("3.변수별 상관관계"),
        html.Label("변수선택"),
        dcc.Checklist(id="corr-checklist",
                      options = [{"label":i,"value":i} for i in df.columns],
                      value = df.columns,
                      labelStyle={'display': 'inline-block'}
                     ),
        html.Label("상관계수선택"),
        dcc.RadioItems(id="corr-radio",
                      options = [{"label":"pearson","value":"pearson"},
                                 {"label":"spearman","value":"spearman"},
                                 {"label":"kendall","value":"kendall"},]),
        html.Div(id="corr-result"),
        
    ]
)
@app.callback(
    Output("null-search-result","children"),
    Input("null-search","n_clicks"),
)
def update_null_search(n):
    if n > 0:
        dt = pd.DataFrame(df.isna().sum().values.reshape(1,-1),columns=df.columns)
        return html.Div([
                            html.H5("결측치정보"),
                            html.Hr(),
                            dash_table.DataTable(
                                    data = dt.to_dict("record"),
                                    columns = [{"name":str(i),"id":str(i)} for i in dt.columns],
                            )
                        ]
                        )   

@app.callback(
    Output("type-search-result","children"),
    Input("type-search","n_clicks"),
)
def update_type_search(n):
    if n > 0:
        dt = df.dtypes.reset_index()
        dt.columns = ["변수명","데이터타입"]
        dt["사용가능여부"] = np.where(dt.데이터타입 == "object","X","O")
        dt["데이터타입"] = dt["데이터타입"].astype("str")
        return html.Div([
                            html.H5("변수 정보"),
                            html.Hr(),
                            dash_table.DataTable(
                                data = dt.to_dict("record"),
                                columns = [{"name":i,"id":i} for i in dx.columns]
                            )
                        ]
        )

@app.callback(
    Output("stat-result","children"),
    Input("stat-search","n_clicks"),

)        
def update_stat(n):
    if n >0:
        dt = df.describe().loc[["mean","std","min","max"],:].T.reset_index()
        dt.columns = ["변수명","평균","표준편차","최대","최소"]
        return html.Div([
            html.H5("통계정보"),
            html.Hr(),
            dash_table.DataTable(
                data = dt.to_dict("r"),
                columns = [{"name":i,"id":i} for i in dt.columns ]
            )
        ])
        
@app.callback(
    Output("histo-result","figure"),
    Input("histogram-dropdown","value"),
)
def update_histo(n):
    x = df.loc[:,n]
    fig =  go.Figure(data=[go.Histogram(x=x)])
    return fig

@app.callback(
    Output("boxplot-result","children"),
    Input("boxplot-checklist","value"),
)
def update_boxplot(n):
    fig = go.Figure()
    for i in range(len(n)):
        y = df.loc[:,n[i]]
        fig.add_trace(go.Box(y=y))
    return html.Div(
        dcc.Graph(figure=fig)
    )
@app.callback(
    Output("corr-result","children"),
    Input("corr-checklist","value"),
    Input("corr-radio","value"),
)
def update_corr(corr_var,corr_method):
    dt = df.loc[:,corr_var]
    corr_df = dt.corr(method=corr_method).reset_index().rename({"index":"columns"},axis=1)
    
    z = dt.corr(method=corr_method)
    z1 = np.rot90(z,1)
    fig1 = go.Figure(data=go.Heatmap(
                    z=z))
    fig2 = go.Figure(data=go.Heatmap(
                    z=z1))
    return html.Div(
        [dash_table.DataTable(
            data = corr_df.to_dict("r"),
            columns = [{"name":i,"id":i} for i in corr_df.columns]
        ),
         dcc.Graph(figure=fig1),
         dcc.Graph(figure=fig2)
        ]
    )


app.run_server(port=7777)

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is running on http://127.0.0.1:7777/

Dash is run

 * Running on http://127.0.0.1:7777/ (Press CTRL+C to quit)
127.0.0.1 - - [05/Nov/2021 17:44:20] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [05/Nov/2021 17:44:20] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [05/Nov/2021 17:44:20] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [05/Nov/2021 17:44:20] "GET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1" 200 -
127.0.0.1 - - [05/Nov/2021 17:44:20] "GET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1" 200 -
127.0.0.1 - - [05/Nov/2021 17:44:20] "GET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1" 200 -
127.0.0.1 - - [05/Nov/2021 17:44:20] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [05/Nov/2021 17:44:20] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [05/Nov/2021 17:44:20] "POST /_dash-update-component HTTP/1.1" 200 -


Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2657, in get_loc
    return self._engine.get_loc(key)
  File "pandas/_libs/index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 1601, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 1608, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: None

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Anaconda3\lib\site-packages\flask\app.py", line 2446, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Anaconda3\lib\site-packages\flask\app.py", line 1951, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Anaconda3\lib\

127.0.0.1 - - [05/Nov/2021 17:44:20] "POST /_dash-update-component HTTP/1.1" 200 -


Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2657, in get_loc
    return self._engine.get_loc(key)
  File "pandas/_libs/index.pyx", line 108, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 1601, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 1608, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: None

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Anaconda3\lib\site-packages\flask\app.py", line 2446, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Anaconda3\lib\site-packages\flask\app.py", line 1951, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Anaconda3\lib\

127.0.0.1 - - [05/Nov/2021 17:44:20] "POST /_dash-update-component HTTP/1.1" 500 -


Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\Anaconda3\lib\site-packages\flask\app.py", line 2446, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Anaconda3\lib\site-packages\flask\app.py", line 1951, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Anaconda3\lib\site-packages\flask\app.py", line 1820, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "C:\Anaconda3\lib\site-packages\flask\_compat.py", line 39, in reraise
    raise value
  File "C:\Anaconda3\lib\site-packages\flask\app.py", line 1949, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\Anaconda3\lib\site-packages\flask\app.py", line 1935, in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
  File "C:\Anaconda3\lib\site-packages\dash\dash.py", line 1336, in dispatch
    response.set_data(func(*args, outputs_list=outputs_list))
  File "C:\Anaconda3\lib\site-packages\d

127.0.0.1 - - [05/Nov/2021 17:44:20] "POST /_dash-update-component HTTP/1.1" 500 -
127.0.0.1 - - [05/Nov/2021 17:44:23] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [05/Nov/2021 17:44:23] "GET /_dash-component-suites/dash/dash_table/async-highlight.js HTTP/1.1" 200 -
127.0.0.1 - - [05/Nov/2021 17:44:23] "GET /_dash-component-suites/dash/dash_table/async-table.js HTTP/1.1" 200 -
127.0.0.1 - - [05/Nov/2021 17:44:40] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [05/Nov/2021 17:44:41] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [05/Nov/2021 17:44:42] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [05/Nov/2021 17:44:42] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [05/Nov/2021 17:44:43] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [05/Nov/2021 17:44:44] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [05/Nov/2021 17:44:44] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [05/Nov/2

In [102]:
dt = df.describe().loc[["mean","std","min","max"],:].T.reset_index()
dt.columns = ["변수명","평균","표준편차","최대","최소"]

Unnamed: 0,var,mean,std,min,max
0,CRIM,3.613524,8.601545,0.00632,88.9762
1,ZN,11.363636,23.322453,0.0,100.0
2,INDUS,11.136779,6.860353,0.46,27.74
3,CHAS,0.06917,0.253994,0.0,1.0
4,NOX,0.554695,0.115878,0.385,0.871
5,RM,6.284634,0.702617,3.561,8.78
6,AGE,68.574901,28.148861,2.9,100.0
7,DIS,3.795043,2.10571,1.1296,12.1265
8,RAD,9.549407,8.707259,1.0,24.0
9,TAX,408.237154,168.537116,187.0,711.0
