In [1]:
import numpy as np
import pandas as pd
from collections import Counter

import matplotlib.pyplot as plt
import seaborn as sns

import scipy.stats as st
from scipy.cluster import hierarchy
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

In [2]:
from dash import dash, dcc, html, Input, Output, State
import dash_bootstrap_components as dbc
from dash.exceptions import PreventUpdate
import plotly.express as px

In [3]:
data_frame = pd.read_csv("executor_prepared_data.csv").drop("Unnamed: 0", axis=1)

In [6]:
app = dash.Dash(external_stylesheets=[dbc.themes.BOOTSTRAP])

app.layout = html.Div([
    dcc.Textarea(
        placeholder='Wynik',
        disabled = 1,
        id = "result"),
    
    # CODE_GENDER 1
    dcc.Dropdown(
        data_frame["CODE_GENDER"].unique(),
        #["M", "F"],
        placeholder = "CODE_GENDER",
        id = "gender"),
    
    # FLAG_OWN_CAR 2
    dcc.Dropdown(
        data_frame["FLAG_OWN_CAR"].unique(),
        placeholder = "FLAG_OWN_CAR",
        id = "car"),
    
    # FLAG_OWN_REALTY 3
    dcc.Dropdown(
        data_frame["FLAG_OWN_REALTY"].unique(),
        placeholder = "FLAG_OWN_REALTY",
        id = "realty"),
    
    # NAME_INCOME_TYPE 4
    dcc.Dropdown(
        data_frame["NAME_INCOME_TYPE"].unique(),
        placeholder = "NAME_INCOME_TYPE",
        id = "income_type"),
    
    # NAME_EDUCATION_TYPE 5
    dcc.Dropdown(
        data_frame["NAME_EDUCATION_TYPE"].unique(),
        placeholder = "NAME_EDUCATION_TYPE",
        id = "education_type"),
    
    # NAME_FAMILY_STATUS 6
    dcc.Dropdown(
        data_frame["NAME_FAMILY_STATUS"].unique(),
        placeholder = "NAME_FAMILY_STATUS",
        id = "family_status"),
    
    # NAME_HOUSING_TYPE 7
    dcc.Dropdown(
        data_frame["NAME_HOUSING_TYPE"].unique(),
        placeholder = "NAME_HOUSING_TYPE",
        id = "housing_type"),
    
    # FLAG_WORK_PHONE 8
    dcc.Dropdown(
        data_frame["FLAG_WORK_PHONE"].unique(),
        placeholder = "FLAG_WORK_PHONE",
        id = "work_phone"),
    
    # FLAG_PHONE 9
    dcc.Dropdown(
        data_frame["FLAG_PHONE"].unique(),
        placeholder = "FLAG_PHONE",
        id = "phone"),
    
    # FLAG_EMAIL 10
    dcc.Dropdown(
        data_frame["FLAG_EMAIL"].unique(),
        placeholder = "FLAG_EMAIL",
        id = "email"),
    
    # OCCUPATION_TYPE 11
    dcc.Dropdown(
        data_frame["OCCUPATION_TYPE"].unique(),
        placeholder = "OCCUPATION_TYPE",
        id = "occupation"),
    
    # CHILDREN 12
    dcc.Dropdown(
        data_frame["CHILDREN"].unique(),
        placeholder = "CHILDREN",
        id = "children"),
    
    # MEMBERS 13
    dcc.Dropdown(
        data_frame["MEMBERS"].unique(),
        placeholder = "MEMBERS",
        id = "members"),
    
    # AGE 14
    dcc.Dropdown(
        data_frame["AGE"].unique(),
        placeholder = "AGE",
        id = "age"),
    
    # YEARS_EMPLOYED 15
    dcc.Dropdown(
        data_frame["YEARS_EMPLOYED"].unique(),
        placeholder = "YEARS_EMPLOYED",
        id = "employed"),
    
    # INCOME 16
    dcc.Dropdown(
        data_frame["INCOME"].unique(),
        placeholder = "INCOME",
        id = "income")
])


@app.callback(
    Output("result", "value"),
    Input("gender", "value"),
    Input("car", "value"),
    Input("realty", "value"),
    Input("income_type", "value"),
    Input("education_type", "value"),
    Input("family_status", "value"),
    Input("housing_type", "value"),
    Input("work_phone", "value"),
    Input("phone", "value"),
    Input("email", "value"),
    Input("occupation", "value"),
    Input("children", "value"), 
    Input("members", "value"),
    Input("age", "value"),
    Input("employed", "value"),
    Input("income", "value"))


def dash_function(gender, car, realty, income_type, education_type, family_status, housing_type,
                  work_phone, phone, email, occupation, children, members, age, employed, income):

    personal_data = {'CODE_GENDER': gender,
                     'FLAG_OWN_CAR': car,
                     'FLAG_OWN_REALTY': realty,
                     'NAME_INCOME_TYPE': income_type,
                     'NAME_EDUCATION_TYPE': education_type,
                     'NAME_FAMILY_STATUS': family_status,
                     'NAME_HOUSING_TYPE': housing_type,
                     'FLAG_WORK_PHONE': work_phone,
                     'FLAG_PHONE': phone,
                     'FLAG_EMAIL': email,
                     'OCCUPATION_TYPE': occupation,
                     'CHILDREN': children,
                     'MEMBERS': members,
                     'AGE': age,
                     'YEARS_EMPLOYED': employed,
                     'INCOME': income}
    
    main_features = ['OCCUPATION_TYPE',
                     'AGE',
                     'INCOME',
                     'FLAG_EMAIL',
                     'NAME_FAMILY_STATUS',
                     'NAME_EDUCATION_TYPE',
                     'NAME_INCOME_TYPE',
                     'MEMBERS']
    
    extra_features = ['FLAG_OWN_CAR',
                      'FLAG_OWN_REALTY',
                      'NAME_HOUSING_TYPE',
                      'FLAG_WORK_PHONE',
                      'FLAG_PHONE',
                      'CHILDREN',
                      'YEARS_EMPLOYED',
                      'CODE_GENDER']

    main_data = {feature: personal_data[feature] for feature in main_features}
    extra_data = {feature: personal_data[feature] for feature in extra_features}
    
    
    def conditional_probability(data_frame, personal_data, split_method, threshold):

        if split_method == "standard":
            good_clients = data_frame[(data_frame["GOOD"] == 1) | (data_frame["SUFFICIENT"] == 1)]
            bad_clients = data_frame[(data_frame["GOOD"] == 0) & (data_frame["SUFFICIENT"] == 0)]
        elif split_method == "good":
            good_clients = data_frame[data_frame["GOOD"] == 1]
            bad_clients = data_frame[data_frame["GOOD"] == 0]
        elif split_method == "sufficient":
            data_frame = data_frame[data_frame["GOOD"] == 0]
            good_clients = data_frame[data_frame["SUFFICIENT"] == 1]
            bad_clients = data_frame[data_frame["SUFFICIENT"] == 0]

        for i in range(1, len(personal_data)+1):
            records = 0

            for key, value in personal_data.items():
                conditional_records = len(data_frame[data_frame[key] == value])
                if conditional_records > records:
                    records = conditional_records
                    specific_key = key
                    specific_value = value

            good_records = len(good_clients[good_clients[specific_key] == specific_value])
            bad_records = len(bad_clients[bad_clients[specific_key] == specific_value])
            client_records = good_records + bad_records

            if client_records >= threshold and bad_records > 0:
                personal_data = {key:personal_data[key] for key in personal_data if key!=specific_key}
                good_clients = good_clients[good_clients[specific_key] == specific_value]
                bad_clients = bad_clients[bad_clients[specific_key] == specific_value]
                probability = round(bad_records/(bad_records + good_records), 5)
            else:
                break
                
        return probability, personal_data


    def executor(data_frame, personal_data, main_data, extra_data,
                 split_method = "standard", threshold = 1000, second_threshold = 500):
    
        result = conditional_probability(data_frame, main_data, split_method, threshold)
        probability, data = result

        if len(data) != 0:
            result = conditional_probability(data_frame, main_data, split_method, second_threshold)
            new_probability, new_data = result

            if len(new_data) < len(data) and new_probability <= probability:
                probability = new_probability
                data = new_data

            extra_data = dict(extra_data, **data)    

        for key, value in extra_data.items():
            single_dict = {key: value}
            new_probability = conditional_probability(data_frame, dict(main_data, **single_dict),
                                                      split_method, second_threshold)[0]

            if new_probability <= probability:
                probability = new_probability
                main_data = dict(main_data, **single_dict)

        return probability
     
    
    x = conditional_probability(data_frame, main_data, "standard", 1000)[0]
    y = executor(data_frame, personal_data, main_data, extra_data)
    
    
    return x, y

In [None]:
if __name__ == '__main__':
    app.run_server()

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [03/Apr/2022 12:33:20] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [03/Apr/2022 12:33:21] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [03/Apr/2022 12:33:21] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [03/Apr/2022 12:33:21] "GET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1" 200 -


Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "D:\Programy\Anaconda\lib\site-packages\flask\app.py", line 2446, in wsgi_app
    response = self.full_dispatch_request()
  File "D:\Programy\Anaconda\lib\site-packages\flask\app.py", line 1951, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "D:\Programy\Anaconda\lib\site-packages\flask\app.py", line 1820, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "D:\Programy\Anaconda\lib\site-packages\flask\_compat.py", line 39, in reraise
    raise value
  File "D:\Programy\Anaconda\lib\site-packages\flask\app.py", line 1949, in full_dispatch_request
    rv = self.dispatch_request()
  File "D:\Programy\Anaconda\lib\site-packages\flask\app.py", line 1935, in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
  File "D:\Programy\Anaconda\lib\site-packages\dash\dash.py", line 1383, in dispatch
    response.set_data(func(*args, outputs_list

127.0.0.1 - - [03/Apr/2022 12:33:21] "POST /_dash-update-component HTTP/1.1" 500 -


Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "D:\Programy\Anaconda\lib\site-packages\flask\app.py", line 2446, in wsgi_app
    response = self.full_dispatch_request()
  File "D:\Programy\Anaconda\lib\site-packages\flask\app.py", line 1951, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "D:\Programy\Anaconda\lib\site-packages\flask\app.py", line 1820, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "D:\Programy\Anaconda\lib\site-packages\flask\_compat.py", line 39, in reraise
    raise value
  File "D:\Programy\Anaconda\lib\site-packages\flask\app.py", line 1949, in full_dispatch_request
    rv = self.dispatch_request()
  File "D:\Programy\Anaconda\lib\site-packages\flask\app.py", line 1935, in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
  File "D:\Programy\Anaconda\lib\site-packages\dash\dash.py", line 1383, in dispatch
    response.set_data(func(*args, outputs_list

127.0.0.1 - - [03/Apr/2022 12:33:22] "POST /_dash-update-component HTTP/1.1" 500 -


Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "D:\Programy\Anaconda\lib\site-packages\flask\app.py", line 2446, in wsgi_app
    response = self.full_dispatch_request()
  File "D:\Programy\Anaconda\lib\site-packages\flask\app.py", line 1951, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "D:\Programy\Anaconda\lib\site-packages\flask\app.py", line 1820, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "D:\Programy\Anaconda\lib\site-packages\flask\_compat.py", line 39, in reraise
    raise value
  File "D:\Programy\Anaconda\lib\site-packages\flask\app.py", line 1949, in full_dispatch_request
    rv = self.dispatch_request()
  File "D:\Programy\Anaconda\lib\site-packages\flask\app.py", line 1935, in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
  File "D:\Programy\Anaconda\lib\site-packages\dash\dash.py", line 1383, in dispatch
    response.set_data(func(*args, outputs_list

127.0.0.1 - - [03/Apr/2022 12:33:23] "POST /_dash-update-component HTTP/1.1" 500 -


Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "D:\Programy\Anaconda\lib\site-packages\flask\app.py", line 2446, in wsgi_app
    response = self.full_dispatch_request()
  File "D:\Programy\Anaconda\lib\site-packages\flask\app.py", line 1951, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "D:\Programy\Anaconda\lib\site-packages\flask\app.py", line 1820, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "D:\Programy\Anaconda\lib\site-packages\flask\_compat.py", line 39, in reraise
    raise value
  File "D:\Programy\Anaconda\lib\site-packages\flask\app.py", line 1949, in full_dispatch_request
    rv = self.dispatch_request()
  File "D:\Programy\Anaconda\lib\site-packages\flask\app.py", line 1935, in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
  File "D:\Programy\Anaconda\lib\site-packages\dash\dash.py", line 1383, in dispatch
    response.set_data(func(*args, outputs_list

127.0.0.1 - - [03/Apr/2022 12:33:24] "POST /_dash-update-component HTTP/1.1" 500 -
127.0.0.1 - - [03/Apr/2022 12:33:29] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [03/Apr/2022 12:33:34] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [03/Apr/2022 12:33:35] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [03/Apr/2022 12:33:36] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [03/Apr/2022 12:33:43] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [03/Apr/2022 12:33:43] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [03/Apr/2022 12:33:45] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [03/Apr/2022 12:33:46] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [03/Apr/2022 12:33:48] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [03/Apr/2022 12:33:50] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [03/Apr/2022 12:33:53] "POST /_dash-update-component HTTP/1.1" 200 -
127.