In [1]:
#File description & paths to csv's
"""
Code is divided into the following sections:
1. Import data & create dataframes:
    -credit_records_o contains credit_records from csv file, same structure as in the file (long form)
    -application_records_o contains data from application csv, file same structure as in the file
    -credit_records_t is wide form of credit_records_o 
2. Prepare data in application records (i.e. fill in blanks in occupation type, standardize values and more)
3. Enrich credit data with start, end, duration, count of individual statues (0_count, 1_count... 5 ratio) and count all statuses (all_late_count),
ratio of individual status (0_ratio, 1_ratio... 5_ratio) and all late statuse ratio (all_later_ratio)
    - credit_records_t_label
4. Enrich credit data with Active & TOTAL_STATUS & GOOD_BAD CLIENT measures calculated wiht Mateusz method
5. Merge app & labelled credit data and calculate bins for discrete variables (birth date, employment date & amount income)
    - app_and_credit_label_2
6. Functions for GUI in IPWIDGET that take & store user input from ipywidgets
5. 
"""

#read script location and look for folder called data in which there should be files to import
import os
data_dir=os.path.join(os.path.abspath(''),"data")
path_to_credit_records_csv=os.path.join(data_dir,"credit_record.csv")
path_to_application_records_csv=os.path.join(data_dir,"application_record.csv")

In [24]:
### 1. Import of data & dataframe def 
import pandas as pd

#create credit_records_o from data in csv
file=open(path_to_credit_records_csv) 
credit_records_o=pd.read_csv(file) 
file.close()

#create application_records_o from data in csv
file=open(path_to_application_records_csv) 
application_records_o=pd.read_csv(file) 
file.close()

#create wide version of credit_records
credit_records_t=pd.pivot(credit_records_o, index='ID',columns='MONTHS_BALANCE',values='STATUS') 
credit_records_t = credit_records_t[credit_records_t.columns.sort_values(ascending=False)] #reorder months balance column in descending order

In [25]:
### 2. Applcation Data Preparation Asia

import numpy as np
application_records_t=application_records_o.copy()
application_records_t.drop("FLAG_MOBIL", axis=1, inplace=True) #dropping as its 1 across all rows
application_records_t["FLAG_OWN_CAR"].replace(["Y","N"],["1","0"],inplace=True) #standardize to same format as other flags i.e. 1 for True and 0 for False
application_records_t["FLAG_OWN_REALTY"].replace(["Y","N"],["1","0"],inplace=True) #standardize to same format as other flags i.e. 1 for True and 0 for False
application_records_t["FLAG_OWN_CAR"]=application_records_t["FLAG_OWN_CAR"].astype(int) #standardize to int format
application_records_t["FLAG_OWN_REALTY"]=application_records_t["FLAG_OWN_REALTY"].astype(int) #standardize to int format
application_records_t["OCCUPATION_TYPE"].fillna(value="Not provided", inplace=True) #fill in blanks in occupation type

# files categorizing
conditions=[
    (application_records_t["CNT_CHILDREN"] == 0),
    (application_records_t["CNT_CHILDREN"] == 1),
    (application_records_t["CNT_CHILDREN"] == 2) & (application_records_t["CNT_CHILDREN"] ==3 ),
    (application_records_t["CNT_CHILDREN"] >3)
]
values=["No children", "1 child", "2-3 children", "Many children"]
application_records_t["CNT_CHILDREN_CATEGORIZED"]=np.select(conditions,values)
conditions1=[
    (application_records_t["CNT_FAM_MEMBERS"] == 1),
    (application_records_t["CNT_FAM_MEMBERS"] == 2),
    (application_records_t["CNT_FAM_MEMBERS"] == 3),
    (application_records_t["CNT_FAM_MEMBERS"] == 4),
    (application_records_t["CNT_FAM_MEMBERS"] >4)
]
values1=["1 person household", "couple", "2+1", "2+2","big family"]
application_records_t["CNT_FAM_MEMBERS_CATEGORIZED"]=np.select(conditions1,values1)
conditions2=[
    (application_records_t["AMT_INCOME_TOTAL"] <= 50000),
    (application_records_t["AMT_INCOME_TOTAL"] > 50000)&(application_records_t["AMT_INCOME_TOTAL"]<=100000),
    (application_records_t["AMT_INCOME_TOTAL"] > 100000)& (application_records_t["AMT_INCOME_TOTAL"]<=200000),
    (application_records_t["AMT_INCOME_TOTAL"] > 200000),
]
values2=["very low income", "medium income", "high income", "very high income"]
application_records_t["AMT_INCOME_TOTAL_CATEGORIZED"]=np.select(conditions2,values2)
conditions3=[
    (application_records_t["DAYS_BIRTH"] < 0 ) & (application_records_t["DAYS_BIRTH"]>=-7300),
    (application_records_t["DAYS_BIRTH"] < -7300) & (application_records_t["DAYS_BIRTH"]>=-10950),
    (application_records_t["DAYS_BIRTH"] < -10950) & (application_records_t["DAYS_BIRTH"]>=-14600),
    (application_records_t["DAYS_BIRTH"] < -14600) & (application_records_t["DAYS_BIRTH"]>=-18250),
    (application_records_t["DAYS_BIRTH"] < -18250) & (application_records_t["DAYS_BIRTH"]>=-21900),
    (application_records_t["DAYS_BIRTH"] < -21900) & (application_records_t["DAYS_BIRTH"]>=-25201)
]
values3=["under 20ties", "under 30ties", "under 40ties", "under 50ties","under 60ties","over 60ties"]
application_records_t["DAYS_BIRTH_CATEGORIZED"]=np.select(conditions3,values3)
conditions4=[
    (application_records_t["DAYS_EMPLOYED"] > 0),
    (application_records_t["DAYS_EMPLOYED"] > 0),
    (application_records_t["DAYS_EMPLOYED"] < 0 ) & (application_records_t["DAYS_EMPLOYED"]>=-1825),
    (application_records_t["DAYS_EMPLOYED"] < -1825) & (application_records_t["DAYS_EMPLOYED"]>=-3650),
    (application_records_t["DAYS_EMPLOYED"] < -3650) & (application_records_t["DAYS_EMPLOYED"]>=-5475),
    (application_records_t["DAYS_EMPLOYED"] < -5475),
             ]
values4=["pensioner","no working experience", "under 5 years", "under 10 years", "under 15 years", "more than 15 years"]
application_records_t["DAYS_EMPLOYED_CATEGORIZED"]=np.select(conditions4,values4)


Unnamed: 0,ID,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,...,FLAG_WORK_PHONE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,CNT_CHILDREN_CATEGORIZED,CNT_FAM_MEMBERS_CATEGORIZED,AMT_INCOME_TOTAL_CATEGORIZED,DAYS_BIRTH_CATEGORIZED,DAYS_EMPLOYED_CATEGORIZED
0,5008804,M,1,1,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,...,1,0,0,Not provided,2.0,No children,couple,very high income,under 40ties,under 15 years
1,5008805,M,1,1,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,...,1,0,0,Not provided,2.0,No children,couple,very high income,under 40ties,under 15 years
2,5008806,M,1,1,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,...,0,0,0,Security staff,2.0,No children,couple,high income,under 60ties,under 5 years
3,5008808,F,0,1,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,...,0,1,1,Sales staff,1.0,No children,1 person household,very high income,under 60ties,under 10 years
4,5008809,F,0,1,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,...,0,1,1,Sales staff,1.0,No children,1 person household,very high income,under 60ties,under 10 years
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
438552,6840104,M,0,1,0,135000.0,Pensioner,Secondary / secondary special,Separated,House / apartment,...,0,0,0,Not provided,1.0,No children,1 person household,high income,over 60ties,pensioner
438553,6840222,F,0,0,0,103500.0,Working,Secondary / secondary special,Single / not married,House / apartment,...,0,0,0,Laborers,1.0,No children,1 person household,high income,under 50ties,under 10 years
438554,6841878,F,0,0,0,54000.0,Commercial associate,Higher education,Single / not married,With parents,...,1,0,0,Sales staff,1.0,No children,1 person household,medium income,under 30ties,under 5 years
438555,6842765,F,0,1,0,72000.0,Pensioner,Secondary / secondary special,Married,House / apartment,...,0,0,0,Not provided,2.0,No children,couple,medium income,under 60ties,pensioner


In [26]:
### 3. Enrich credit data and define labels 

from collections import Counter
p=credit_records_t.copy() 

#add colulmns with val_count(count of statuses), start (first month of loan), end (last month of loan), duration (loan duration)
p['val_count']=p.apply(lambda x: Counter(x),axis=1) 
grouped_id=credit_records_o.groupby('ID')
p['start_mth']=grouped_id['MONTHS_BALANCE'].min() #month at which the credit started for client
p['end_mth']=grouped_id['MONTHS_BALANCE'].max() #month at which the credit ended for client
p['duration']=abs(p['start_mth']-p['end_mth'])+1 #duration of the credit for client

#add columns with 0,1,2,3,4,5 statuses count and their ratios per client
p['0_count']=p['val_count'].apply(lambda x: x.get('0',0)) 
p['0_ratio']=p['0_count']/p['duration'] 
p['1_count']=p['val_count'].apply(lambda x: x.get('1',0)) 
p['1_ratio']=p['1_count']/p['duration'] 
p['2_count']=p['val_count'].apply(lambda x: x.get('2',0))
p['2_ratio']=p['2_count']/p['duration'] 
p['3_count']=p['val_count'].apply(lambda x: x.get('3',0))
p['3_ratio']=p['3_count']/p['duration'] 
p['4_count']=p['val_count'].apply(lambda x: x.get('4',0))
p['4_ratio']=p['4_count']/p['duration'] 
p['5_count']=p['val_count'].apply(lambda x: x.get('5',0))
p['5_ratio']=p['5_count']/p['duration'] 
# p['all_late_count']=p['val_count'].apply(lambda x: x.get('0',0)+x.get('1',0)+x.get('2',0)+x.get('3',0)+x.get('4',0)+x.get('5',0)) 
p['all_late_count']=p['val_count'].apply(lambda x: x.get('2',0)+x.get('3',0)+x.get('4',0)+x.get('5',0)) 
p['all_late_ratio']=p['all_late_count']/p['duration'] 
p['all_late_flag']=p['all_late_ratio'].apply(lambda x: 1 if x>0.20 else 0)

credit_records_t_label=p.copy()


In [27]:
### 4. Enrich credit data with Mateusz method
df=credit_records_o.copy() 

df["MONTHS_BALANCE"] = df["MONTHS_BALANCE"].abs()

df_last = df.groupby(by="ID")["MONTHS_BALANCE"].min().reset_index(name="LAST_RECORD")
df = pd.merge(df, df_last, on="ID", how="left")

df_all = df.groupby(by="ID")["MONTHS_BALANCE"].count().reset_index(name="NUMBER_OF_RECORDS")
df = pd.merge(df, df_all, on="ID", how="left")

df["MONTHS"] = df["MONTHS_BALANCE"] - df["LAST_RECORD"]

def account_activity(ID, last_record):
    
    """
    Funkcja zwracająca obecny stan konta klienta.
    Konto aktywne - 1
    Konto nieaktywne - 0
    """
        
    if last_record == 0:
        active = 1
    else:
        active = 0
        
        
    return active

df_upgrade = df.copy()
df_upgrade["ACTIVE"] = df_upgrade.apply(lambda df: account_activity(df["ID"], df["LAST_RECORD"]), axis=1)

def month_weight(month, number_of_records):
    
    """
    Funkcja przyporządkowująca wagę danemu miesiącu (month) z uwzględnieniem liczby wszystkich miesięcy (number_of_months).
    Wagi maleją w sposób liniowy, a ich suma daje 1.
    """
    
    k = abs(month) + 1
    n = number_of_records
    
    if n < 4:
        weight = 0
    elif 4 <= n <= 24:
        weight = -2*k/(n*(n+1))+2/n
    else:
        n = 24
        if k <= 24:
            weight = -2*k/(n*(n+1))+2/n
        else:
            weight = 0
              
                
    return weight

def status2number(month, status, number_of_records):
    
    """
    W oparciu o funkcję month_weight, status2number przekształca status z wybranego miesiąca na liczbę z przedziału <-1,1>.

    """
    
    if month <= 23:
        weight = month_weight(month, number_of_records)

        if status is "5":
            value = -1
        elif status is "X":
            value = 0
        elif status is "C":
            value = 1
        else:
            n = int(status)
            value = 1/2**(n+1)-1
    else:
        weight = 1
        value = 0
    
    
    return weight * value

df_upgrade["PARTIAL_STATUS"] = df_upgrade.apply(lambda df: status2number(df["MONTHS"], df["STATUS"], df["NUMBER_OF_RECORDS"]), axis=1)

df_status = df_upgrade.groupby(by = "ID")["PARTIAL_STATUS"].sum().reset_index(name="TOTAL_STATUS")

df_credit = pd.merge(df_upgrade, df_status, on="ID", how="left")

def good_bad_function(ID, status):
    
    """
    Funkcja określająca czy dany klient może otrzymać kredyt.
    """
    
    if status >= 0:
        judgment = 1
    else:
        judgment = 0
    
    
    return judgment

df_credit["GOOD_BAD"] = df_credit.apply(lambda df: good_bad_function(df["ID"], df["TOTAL_STATUS"]), axis=1)

df_result = df_credit[["ID", "ACTIVE", "TOTAL_STATUS", "GOOD_BAD"]].drop_duplicates().copy()


"is" with a literal. Did you mean "=="?


"is" with a literal. Did you mean "=="?


"is" with a literal. Did you mean "=="?


"is" with a literal. Did you mean "=="?


"is" with a literal. Did you mean "=="?


"is" with a literal. Did you mean "=="?


"is" with a literal. Did you mean "=="?


"is" with a literal. Did you mean "=="?


"is" with a literal. Did you mean "=="?



In [35]:
### 5. Merge app & labelled credit data & enrich it with age, empl in year, income, age, emp bins
app_and_credit_label_2=application_records_t.merge(credit_records_t_label,on='ID') #merege of app & credit labelled data
app_and_credit_label_2=app_and_credit_label_2.merge(df_result,on='ID')                 #merege of Mateusz's labells
# app_and_credit_label_2['GOOD_BAD_INV']=app_and_credit_label_2['GOOD_BAD'].apply(lambda x: 1 if x==0 else 0) #this calculater measure if client is bad

app_and_credit_label_2['income_bin']=pd.cut(app_and_credit_label_2['AMT_INCOME_TOTAL'],[50000, 75000,100000,150000,200000,250000,300000,float("inf")])
app_and_credit_label_2['age']=round((app_and_credit_label_2['DAYS_BIRTH']/365),0)
app_and_credit_label_2['age_bin']=pd.cut(app_and_credit_label_2['age'],[-70,-60,-50,-40,-30,-20])
app_and_credit_label_2['length_of_empl']=round((app_and_credit_label_2['DAYS_EMPLOYED']/365),0)
app_and_credit_label_2['length_of_empl_bin']=pd.cut(app_and_credit_label_2['length_of_empl'],[-30,-20,-10,-5,0])


In [20]:
### 6. DASH APP taking features and 2 tagets to compare targets relation

# We use here dash core components https://dash.plotly.com/dash-core-components like graph that shows graph and dropdowns, sliders where users provide input. 
# We use here dash bootstrap components https://dash-bootstrap-components.opensource.faculty.ai/docs/components/  like tabs, card, form, col, row etc. to organize
# everything into 2 tabs, where 1st tab has a side bar and content (rightbar) and all elements are packed into that. 

# This is just renaming columns to more friendly format so it displays nicely
# app_and_credit_label_2.rename(columns={'CODE_GENDER':'Gender', 'FLAG_OWN_CAR': 'Own Car','FLAG_OWN_REALTY':'Own Realty', 'CNT_CHILDREN':'Children Count', 'AMT_INCOME_TOTAL':'Yearly Income','NAME_INCOME_TYPE':'Income Type', 'NAME_EDUCATION_TYPE':'Education Type','NAME_FAMILY_STATUS':'Family Status', 'NAME_HOUSING_TYPE': 'Housing Type','DAYS_BIRTH':'Days Birth', 'DAYS_EMPLOYED':'Days Employed', 'FLAG_MOBIL':'Mobil', 'FLAG_WORK_PHONE':'Work Phone', 'FLAG_PHONE': 'Phone Flag', 'FLAG_EMAIL':'Email.Flag', 'OCCUPATION_TYPE':'Occupation Type', 'CNT_FAM_MEMBERS':'Family Members #','GOOD_BAD':'bad clients 1', 'all_late_flag':'bad clients 2'},inplace=True)

#we import here the necessary modules for dash
from dash import dash, dcc, html, Input, Output, State 
import dash_bootstrap_components as dbc
import plotly.express as px

#we set under the app a dash application instance
app = dash.Dash(external_stylesheets=[dbc.themes.BOOTSTRAP])

#Our app is built of tabs. below is exemplary tab. Its made up of 3 rows horizontally and 3 columns in row 2. You can define as many rows and columns as you wish this way you define layout. 
# All other components like dropdowns, buttons etc. need to go directly into row or column wrappers. You can use dash bootstrap components, dash core controls components and html components. 
# You can copy ane experiment with the code into either JW_tab, MR_tab, MK_tab,JB_tab

example_tab = html.Div(
    [
        dbc.Row(dbc.Col(html.H1('This is row 1')),style={"border-style": "ridge"}),
        dbc.Row(
            [
                html.H2('This is Row 2. there are 3 columns in it.'),
                dbc.Col([html.Div("This is column 1"),dbc.Button('Button in column1')],style={"border-style": "ridge"}),
                dbc.Col([html.Div("This is column 2"),dbc.Input(type="email", placeholder="example@internet.com")],style={"border-style": "ridge"}),
                dbc.Col([html.Div("This is column 3"),html.H1('Big Heading in column3'),html.H2('Medium Heading in column3'),html.H3('Smaller Heading in column3')],style={"border-style": "ridge"}),
            ],style={"border-style": "ridge"}
        ),
        dbc.Row(html.H1('This is Row 3'),style={"border-style": "ridge"})
    ]
)

JW_tab = html.Div()

#first tab starting app
MR_tab = html.Div([

    dbc.Card(
        [
            dbc.CardImg(src="https://raw.githubusercontent.com/infoshareacademy/jdszr6-slytherin_group/master/Slytherin_ClearBG.webp", top=True,),
            dbc.CardBody(
                [html.H5("SLYTHERIN BANK", className="card_title"),
                html.P(
                    "Welcome in our bank. Please select if you are "
                    "our client or advisor.",
                    className = "card_text",
                ),
                dbc.DropdownMenu(
                    label = "selection",
                    color = "success",
                    children=[
                        dbc.DropdownMenuItem("Client", id="client"),
                        dbc.DropdownMenuItem("Advisor", id="advisor")
                    ]
                )
                ]
            )
        ],
        style={
            "width": "18rem",
            "left": 650},
    )
    ]
)

#dropdown for client/advisor list
list_ca_tab = dbc.Form([
        dbc.CardHeader('Birth date'),
        dbc.Card(dcc.Input(value="yyyy-mm-dd", id='b_date', type="text")),
        dbc.CardHeader('Ocupation type'),
        dbc.Card(dcc.Dropdown(app_and_credit_label_2["OCCUPATION_TYPE"].unique(),app_and_credit_label_2["OCCUPATION_TYPE"].unique(),id='ocupation_type')),
        dbc.CardHeader('Yearly income'),
    #    dbc.Card(dcc.Dropdown(app_and_credit_label_2["AMT_INCOME_TOTAL_CATEGORIZED"].unique(),app_and_credit_label_2["AMT_INCOME_TOTAL_CATEGORIZED"].unique(),id='yerly_income')),
    #    dbc.CardHeader('Incom type'),
        dbc.Card(dcc.Dropdown(app_and_credit_label_2["NAME_INCOME_TYPE"].unique(),app_and_credit_label_2["NAME_INCOME_TYPE"].unique(),id='yerly_income')),
        dbc.CardHeader('Incom type'),
        dbc.Card(dcc.Dropdown(app_and_credit_label_2["NAME_INCOME_TYPE"].unique(),app_and_credit_label_2["NAME_INCOME_TYPE"].unique(),id='incom_type')),
        dbc.CardHeader('Education type'),
        dbc.Card(dcc.Dropdown(app_and_credit_label_2["NAME_EDUCATION_TYPE"].unique(),app_and_credit_label_2["NAME_EDUCATION_TYPE"].unique(),id='education_type')),
        dbc.CardHeader('Family status'),
        dbc.Card(dcc.Dropdown(app_and_credit_label_2["NAME_FAMILY_STATUS"].unique(),app_and_credit_label_2["NAME_FAMILY_STATUS"].unique(),id='family_status')),
      #  dbc.CardHeader('Family members'),
      #  dbc.Card(dcc.Dropdown(app_and_credit_label_2["CNT_FAM_MEMBERS_CATEGORIZED"].unique(),app_and_credit_label_2["CNT_FAM_MEMBERS_CATEGORIZED"].unique(),id='family_members')), 
        dbc.CardHeader('Family members'),
        dbc.Card(dcc.Dropdown(app_and_credit_label_2["NAME_FAMILY_STATUS"].unique(),app_and_credit_label_2["NAME_FAMILY_STATUS"].unique(),id='family_members')),       
        ])

app_and_credit_label_2
#secound tab with information about client
list_of_content = html.Div([

    dbc.Card(
        [
            dbc.CardImg(src="https://raw.githubusercontent.com/infoshareacademy/jdszr6-slytherin_group/master/Slytherin_ClearBG.webp", top=True,),
            dbc.CardBody(
                [html.H5("SLYTHERIN BANK", className="card_title")]
                
            ),
            html.Br(),
            dcc.Input(id="email", value="e-mail", type="text", ),
            html.Br(),
            dcc.Input(value="phone", type="text"),
            html.Hr(),
            list_ca_tab

        ],
        style={
            "width": "18rem",
            "left": 650,
            },
    )

]
)



MK_tab = html.Div()
JB_tab = html.Div()


#this defines what are the selections possibel in user inputs
l1=app_and_credit_label_2.columns[1:17]
t1=app_and_credit_label_2.columns[-21:-3]
p1=[*range(-60,1,1)]

#this defines what are the elements in rows
el1=dbc.Col(dcc.Graph(id='graph1'), md=6)
el2=dbc.Col(dcc.Graph(id='graph2'), md=6)
el3=dbc.Col(dcc.Graph(id='graph3'), md=6)
el4=dbc.Col(dcc.Graph(id='graph4'), md=6)

#this defines what is in rows
row0=dbc.Form([dbc.CardHeader('Select Period'), dcc.RangeSlider(-60, 0, 6, value=[-60, 0], id='start-end')])
row1=dbc.Row([el1,el2])
row2=dbc.Row([el3,el4])

#this defines the sidebar style
SIDEBAR_STYLE = {
    "position": "fixed",
    "top": 100,
    "left": 0,
    "bottom": 0,
    "width": "16rem",
    "padding": "2rem 1rem",
    "background-color": "#f8f9fa",
}

# this defines the style of content
CONTENT_STYLE = {
    "margin-left": "18rem",
    "margin-right": "2rem",
    "padding": "2rem 1rem",
}

TEXT_STYLE={}

#this defines that controls object is a form consisting of couple cards with couple dash core components called dropdawn. See dash core components here: https://dash.plotly.com/dash-core-components
controls = dbc.Form([
        dbc.CardHeader('Select Feature 1'),
        dbc.Card(dcc.Dropdown(l1,l1,id='l1_drop')),
        dbc.CardHeader('Select Feature 2'),
        dbc.Card(dcc.Dropdown(l1,l1,id='l2_drop')),
        dbc.CardHeader('Select target'),
        dbc.Card(dcc.Dropdown(t1,t1,id='l3_drop')),
        dbc.CardHeader('Select target 2'),
        dbc.Card(dcc.Dropdown(t1,t1,id='l4_drop'))
        ])

#this defines that what appears on the left side will have html headings (H2,H5) and controls object
sidebar = html.Div(
    [
        html.H2('Parameters', style=TEXT_STYLE),
        html.H5('Enter all parameters to see all visualisations'),
        html.Hr(),
        controls
    ],
    style=SIDEBAR_STYLE,
)

#this defines that what appears on the right side is built from 3 rows
content = html.Div(
    [
    row0,
    row1,
    row2,
    ],
    style=CONTENT_STYLE
)

#this defines that tab1 content is consisting of sidebar and content
tab1_content=html.Div([sidebar,content])

# Below defines that TABS object is built from Tabs and Tab components from dash bootstrap components https://dash-bootstrap-components.opensource.faculty.ai/docs/components/. Essentially
# its a list of tabs that the app will be divided into. Its easy to remove a tab (just remove element of the list) or add another tab just copy the line and change label.
TABS = dbc.Tabs(
    [
        dbc.Tab(tab1_content, label="Intro"), #information on project
        dbc.Tab(example_tab, label="Data Eploration"), #data exploration
        dbc.Tab(example_tab, label="Application simulator"), #yes or no card 
        dbc.Tab(example_tab, label="Profit & Loss simulator"), #board simulation 
        dbc.Tab(example_tab, label="Example tab" ),
        dbc.Tab(JW_tab, label="JW tab"),  #tab to for JW to experiment with dash
        dbc.Tab(MR_tab, label="MR tab"),  #tab for MR to to experiment with dash
        dbc.Tab(list_of_content, label="MR2 tab"),
        dbc.Tab(MK_tab, label="MK tab"),  #tab for MK to to experiment with dash
        dbc.Tab(JB_tab, label="JB tab"),  #tab for JB to to experiment with dash
    ]
)

#this defines that app layout will consiste of tabs object
app.layout = TABS

#this code binds defines inputs and outputs. 
@app.callback(
    Output('graph1', 'figure'),
    Output('graph2', 'figure'),
    Output('graph3', 'figure'),
    Output('graph4', 'figure'),
    Input('l1_drop', 'value'),
    Input('l2_drop','value'),
    Input('l3_drop','value'),
    Input('l4_drop','value'),
    [Input('start-end', 'value')]
    )

#this is function that is called by dash whenever user changes input in my case it draws charts 
def update_figure(l1_val,l2_val,t_val,t_val1,ss):
    lst=[]
    data=app_and_credit_label_2[(app_and_credit_label_2['start_mth']>=ss[0]) & (app_and_credit_label_2['start_mth']<=ss[1])]
    df=pd.pivot_table(data,index=l1_val,columns=l2_val,values=t_val,aggfunc='mean',margins=True).round(4)
    lst.append(px.bar(df,barmode='group',text_auto=True,title="{} bad clients % by {} and {}".format(t_val,l1_val,l2_val))),
    lst.append(px.imshow(df,color_continuous_scale='RdBu_r',text_auto=".2%",title="{} bad clients % by {} and {}".format(t_val,l1_val,l2_val))),
    df=pd.pivot_table(data,index=l1_val,columns=l2_val,values=t_val1,aggfunc='mean',margins=True).round(4)
    lst.append(px.bar(df,barmode='group',text_auto=True,title="{} bad clients % by {} and {}".format(t_val1,l1_val,l2_val))),
    lst.append(px.imshow(df,color_continuous_scale='RdBu_r',text_auto=True,title="{} bad clients % by {} and {}".format(t_val1,l1_val,l2_val)))
    for i in range(len(lst)):
        lst[i].update_layout(transition_duration=500)
        lst[i].layout.yaxis.tickformat = ',.2%' #show things as percentage with 2 decimal places
    lst[2].update_xaxes(type='category') #heatmap charts weren't working correclty withotu making axes categorical
    return lst

#command below will start the web application (in output cell there should be address which needs to be copied into webrowser to access app, likely: http://127.0.0.1:8050/ )
if __name__ == '__main__':
    app.run_server()

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [03/Apr/2022 12:09:46] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [03/Apr/2022 12:09:46] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [03/Apr/2022 12:09:46] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [03/Apr/2022 12:09:46] "GET /_favicon.ico?v=2.3.0 HTTP/1.1" 200 -
127.0.0.1 - - [03/Apr/2022 12:09:46] "GET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1" 200 -
127.0.0.1 - - [03/Apr/2022 12:09:46] "GET /_dash-component-suites/dash/dcc/async-slider.js HTTP/1.1" 200 -
127.0.0.1 - - [03/Apr/2022 12:09:46] "GET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1" 200 -
127.0.0.1 - - [03/Apr/2022 12:09:46] "GET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1" 200 -


Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\flask\app.py", line 2073, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\flask\app.py", line 1518, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\flask\app.py", line 1516, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\flask\app.py", line 1502, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**req.view_args)
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\dash\dash.py", line 1345, in dispatch
    response.set_data(func(*args, outputs_list=outputs_list))
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\dash\_callback.py", line 151, in add_context
    outpu

127.0.0.1 - - [03/Apr/2022 12:09:46] "POST /_dash-update-component HTTP/1.1" 500 -


In [49]:
### 6. DASH APP taking features and 2 tagets to compare targets relation

# We use here dash core components https://dash.plotly.com/dash-core-components like graph that shows graph and dropdowns, sliders where users provide input. 
# We use here dash bootstrap components https://dash-bootstrap-components.opensource.faculty.ai/docs/components/  like tabs, card, form, col, row etc. to organize
# everything into 2 tabs, where 1st tab has a side bar and content (rightbar) and all elements are packed into that. 

# This is just renaming columns to more friendly format so it displays nicely
# app_and_credit_label_2.rename(columns={'CODE_GENDER':'Gender', 'FLAG_OWN_CAR': 'Own Car','FLAG_OWN_REALTY':'Own Realty', 'CNT_CHILDREN':'Children Count', 'AMT_INCOME_TOTAL':'Yearly Income','NAME_INCOME_TYPE':'Income Type', 'NAME_EDUCATION_TYPE':'Education Type','NAME_FAMILY_STATUS':'Family Status', 'NAME_HOUSING_TYPE': 'Housing Type','DAYS_BIRTH':'Days Birth', 'DAYS_EMPLOYED':'Days Employed', 'FLAG_MOBIL':'Mobil', 'FLAG_WORK_PHONE':'Work Phone', 'FLAG_PHONE': 'Phone Flag', 'FLAG_EMAIL':'Email.Flag', 'OCCUPATION_TYPE':'Occupation Type', 'CNT_FAM_MEMBERS':'Family Members #','GOOD_BAD':'bad clients 1', 'all_late_flag':'bad clients 2'},inplace=True)

#we import here the necessary modules for dash
from dash import dash, dcc, html, Input, Output, State 
import dash_bootstrap_components as dbc
from dash.exceptions import PreventUpdate
import plotly.express as px

#we set under the app a dash application instance
app = dash.Dash(external_stylesheets=[dbc.themes.BOOTSTRAP])

#Our app is built of tabs. below is exemplary tab. Its made up of 3 rows horizontally and 3 columns in row 2. You can define as many rows and columns as you wish this way you define layout. 
# All other components like dropdowns, buttons etc. need to go directly into row or column wrappers. You can use dash bootstrap components, dash core controls components and html components. 
# You can copy ane experiment with the code into either JW_tab, MR_tab, MK_tab,JB_tab

example_tab = html.Div(
    [
        dbc.Row(dbc.Col(html.H1('This is row 1')),style={"border-style": "ridge"}),
        dbc.Row(
            [
                html.H2('This is Row 2. there are 3 columns in it.'),
                dbc.Col([html.Div("This is column 1"),dbc.Button('Button in column1')],style={"border-style": "ridge"}),
                dbc.Col([html.Div("This is column 2"),dbc.Input(type="email", placeholder="example@internet.com")],style={"border-style": "ridge"}),
                dbc.Col([html.Div("This is column 3"),html.H1('Big Heading in column3'),html.H2('Medium Heading in column3'),html.H3('Smaller Heading in column3')],style={"border-style": "ridge"}),
            ],style={"border-style": "ridge"}
        ),
        dbc.Row(html.H1('This is Row 3'),style={"border-style": "ridge"})
    ]
)

JW_tab = html.Div()
MR_tab = html.Div()
MK_tab = html.Div()
JB_tab = html.Div()

### START PL tab #############################################
CONTENT_STYLE2 = {
    # "padding-top":"300",
    "margin-left": "25rem",
    "margin-right": "2rem",
    "margin-top:": "25rem"
    # "padding": "2rem 1rem",
}

client_count=list(range(0,100000,1000))
default_prob=[x/100 for x in range(0,10,1)]
interest=[x/100 for x in range(0,15,1)]
average_credit=list(range(0,100000,1000))

PL_controls = dbc.Form([
        dbc.CardHeader('Number of clients'),
        dbc.Card(dcc.Slider(0, 500000, 1000,value=0,marks=None, tooltip={"placement": "top", "always_visible": True},id='PL_dropdown1')),
        # dbc.Card(dcc.Dropdown(client_count,client_count,id='PL_dropdown1')),
        dbc.CardHeader('Accepted level of default probability'),
        dbc.Card(dcc.Slider(0, 0.15, 0.01,value=0,marks=None, tooltip={"placement": "top", "always_visible": True},id='PL_dropdown2')),
        # dcc.Input(type="range"),
        dbc.CardHeader('Interest rate'),
        dbc.Card(dcc.Slider(0, 0.15, 0.01,value=0,marks=None, tooltip={"placement": "top", "always_visible": True},id='PL_dropdown3')),
        dbc.CardHeader('Average credit amount'),
        # dbc.Card(dcc.Dropdown(average_credit,average_credit, id='PL_dropdown4')),
        dbc.Card(dcc.Slider(0, 100000, 1000,value=0,marks=None, tooltip={"placement": "top", "always_visible": True},id='PL_dropdown4')),
        ])

PL_side_bar = html.Div(
    [
        html.H5('Simulation parameters'),
        html.Hr(),
        PL_controls
        ],
    style=SIDEBAR_STYLE,
)

profit_capital_name=dbc.Col(dbc.Alert("Profit \ Capital Return",color="success"),width={"size": 2})
profit_income_name=dbc.Col(dbc.Alert("Profit \ Income Ratio",color="success"),width={"size": 2})
profit_name=dbc.Col(dbc.Alert("Profit",color="success"),width={"size": 2})
income_name=dbc.Col(dbc.Alert("Income",color="success"),width={"size": 2})
loss_name=dbc.Col(dbc.Alert("Loss",color="success"),width={"size": 2})

profit_capital_val=dbc.Col(dbc.Alert(color="success",id="profit_capital"),width={"size": 2})
profit_income_val=dbc.Col(dbc.Alert(color="success",id="profit_income"),width={"size": 2})
profit_val=dbc.Col(dbc.Alert(color="success",id="profit_output"),width={"size": 2})
income_val=dbc.Col(dbc.Alert(color="success",id="income_output"),width={"size": 2},)
loss_val=dbc.Col(dbc.Alert(color="success",id="loss_output"),width={"size": 2})

row1=dbc.Row([profit_capital_name,profit_income_name,profit_name,income_name,loss_name])
row2=dbc.Row([profit_capital_val,profit_income_val,profit_val,income_val,loss_val])

# kontener1=dbc.Container([row1,row2])
# kontener2=dbc.Container([row1,row2])
# kontener3=dbc.Container([kontener1,kontener2])

# PL_content=html.Div([dbc.Row(),dbc.Row(),row1,dbc.Row(),row2],style=CONTENT_STYLE)
PL_content=html.Div([row1,row2],style=CONTENT_STYLE2)
PL_tab = html.Div([PL_side_bar,PL_content],style={"margin-top":"5rem"})

@app.callback(
    Output('profit_capital','children'),
    Output('profit_income','children'),
    Output('profit_output','children'),
    Output('income_output','children'),
    Output('loss_output','children'),
    Input('PL_dropdown1','value'),
    Input('PL_dropdown2','value'),
    Input('PL_dropdown3','value'),
    Input('PL_dropdown4','value'),prevent_initial_call=True
    )
def update_figure2(clients,default_probability,interest_rate,credit_amount):
        bad_clients=round(clients*default_probability)
        good_clients=clients-bad_clients
        income=round(good_clients*interest_rate*credit_amount)
        loss=round(bad_clients*credit_amount)
        profit=income-loss
        profit_income=profit/income
        profit_capital=profit/(clients*credit_amount)
        return profit_capital,profit_income,profit,income,loss

### END PL tab #############################################

MR_tab = html.Div([

    dbc.Card(
        [
            dbc.CardImg(src="https://raw.githubusercontent.com/infoshareacademy/jdszr6-slytherin_group/master/Slytherin_ClearBG.webp", top=True,),
            dbc.CardBody(
                [html.H5("SLYTHERIN BANK", className="card_title"),
                html.P(
                    "Welcome in our bank. Please select if you are "
                    "our client or advisor.",
                    className = "card_text",
                ),
                dbc.DropdownMenu(
                    label = "selection",
                    color = "success",
                    children=[
                        dbc.DropdownMenuItem("Client", id="client"),
                        dbc.DropdownMenuItem("Advisor", id="advisor")
                    ]
                )
                ]
            )
        ],
        style={
            "width": "18rem",
            "left": 650},
    )
    ]
)

#dropdown for client/advisor list
list_ca_tab = dbc.Form([
        # dbc.CardHeader('Birth date'),
        # dbc.Card(dcc.Input(value="yyyy-mm-dd", id='b_date', type="text")),
        # dbc.CardHeader('Ocupation type'),
        # dbc.Card(dcc.Dropdown(app_and_credit_label_2["OCCUPATION_TYPE"].unique(),app_and_credit_label_2["OCCUPATION_TYPE"].unique(),id='ocupation_type')),
        # dbc.CardHeader('Yearly income'),
        # dbc.Card(dcc.Dropdown(app_and_credit_label_2["AMT_INCOME_TOTAL_CATEGORIZED"].unique(),app_and_credit_label_2["AMT_INCOME_TOTAL_CATEGORIZED"].unique(),id='yerly_income')),
        # dbc.CardHeader('Incom type'),
        # dbc.Card(dcc.Dropdown(app_and_credit_label_2["NAME_INCOME_TYPE"].unique(),app_and_credit_label_2["NAME_INCOME_TYPE"].unique(),id='incom_type')),
        # dbc.CardHeader('Education type'),
        # dbc.Card(dcc.Dropdown(app_and_credit_label_2["NAME_EDUCATION_TYPE"].unique(),app_and_credit_label_2["NAME_EDUCATION_TYPE"].unique(),id='education_type')),
        # dbc.CardHeader('Family status'),
        # dbc.Card(dcc.Dropdown(app_and_credit_label_2["NAME_FAMILY_STATUS"].unique(),app_and_credit_label_2["NAME_FAMILY_STATUS"].unique(),id='family_status')),
        # dbc.CardHeader('Family members'),
        # dbc.Card(dcc.Dropdown(app_and_credit_label_2["CNT_FAM_MEMBERS_CATEGORIZED"].unique(),app_and_credit_label_2["CNT_FAM_MEMBERS_CATEGORIZED"].unique(),id='family_members')),  
        dcc.Dropdown(
        app_and_credit_label_2["CODE_GENDER"].unique(),
        #["M", "F"],
        placeholder = "CODE_GENDER",
        id = "gender"),
    # FLAG_OWN_CAR 2
    dcc.Dropdown(
        app_and_credit_label_2["FLAG_OWN_CAR"].unique(),
        placeholder = "FLAG_OWN_CAR",
        id = "car"),
    # FLAG_OWN_REALTY 3
    dcc.Dropdown(
        app_and_credit_label_2["FLAG_OWN_REALTY"].unique(),
        placeholder = "FLAG_OWN_REALTY",
        id = "realty"),
    # NAME_INCOME_TYPE 4
    dcc.Dropdown(
        app_and_credit_label_2["NAME_INCOME_TYPE"].unique(),
        placeholder = "NAME_INCOME_TYPE",
        id = "income_type"),
    # NAME_EDUCATION_TYPE 5
    dcc.Dropdown(
        app_and_credit_label_2["NAME_EDUCATION_TYPE"].unique(),
        placeholder = "NAME_EDUCATION_TYPE",
        id = "education_type2"),
    # NAME_FAMILY_STATUS 6
    dcc.Dropdown(
        app_and_credit_label_2["NAME_FAMILY_STATUS"].unique(),
        placeholder = "NAME_FAMILY_STATUS",
        id = "family_status2"),
    # NAME_HOUSING_TYPE 7
    dcc.Dropdown(
        app_and_credit_label_2["NAME_HOUSING_TYPE"].unique(),
        placeholder = "NAME_HOUSING_TYPE",
        id = "housing_type"),
    # FLAG_WORK_PHONE 8
    dcc.Dropdown(
        app_and_credit_label_2["FLAG_WORK_PHONE"].unique(),
        placeholder = "FLAG_WORK_PHONE",
        id = "work_phone"),
    # FLAG_PHONE 9
    dcc.Dropdown(
        app_and_credit_label_2["FLAG_PHONE"].unique(),
        placeholder = "FLAG_PHONE",
        id = "phone"),
    # FLAG_EMAIL 10
    dcc.Dropdown(
        app_and_credit_label_2["FLAG_EMAIL"].unique(),
        placeholder = "FLAG_EMAIL",
        id = "email"),
    # OCCUPATION_TYPE 11
    dcc.Dropdown(
        app_and_credit_label_2["OCCUPATION_TYPE"].unique(),
        placeholder = "OCCUPATION_TYPE",
        id = "occupation"),
    # CHILDREN 12
    dcc.Dropdown(
        app_and_credit_label_2["CNT_CHILDREN_CATEGORIZED"].unique(),
        placeholder = "CNT_CHILDREN_CATEGORIZED",
        id = "children"),
    # MEMBERS 13
    dcc.Dropdown(
        app_and_credit_label_2["CNT_FAM_MEMBERS_CATEGORIZED"].unique(),
        placeholder = "CNT_FAM_MEMBERS_CATEGORIZED",
        id = "members"),
    # AGE 14
    dcc.Dropdown(
        app_and_credit_label_2["DAYS_BIRTH_CATEGORIZED"].unique(),
        placeholder = "DAYS_BIRTH_CATEGORIZED",
        id = "age"),
    # YEARS_EMPLOYED 15
    dcc.Dropdown(
        app_and_credit_label_2["DAYS_EMPLOYED_CATEGORIZED"].unique(),
        placeholder = "DAYS_EMPLOYED_CATEGORIZED",
        id = "employed"),
    # INCOME 16
    dcc.Dropdown(
        app_and_credit_label_2["AMT_INCOME_TOTAL_CATEGORIZED"].unique(),
        placeholder = "AMT_INCOME_TOTAL_CATEGORIZED",
        id = "income")
        ])

#secound tab with information about client
list_of_content = html.Div([

    dbc.Card(
        [
            dbc.CardImg(src="https://raw.githubusercontent.com/infoshareacademy/jdszr6-slytherin_group/master/Slytherin_ClearBG.webp", top=True,),
            dbc.CardBody(
                [html.H5("SLYTHERIN BANK", className="card_title")]
                
            ),
            html.Br(),
            dcc.Input(id="email1", value="e-mail", type="text", ),
            html.Br(),
            dcc.Input(value="phone1", type="text"),
            html.Hr(),
            list_ca_tab,
            html.Button("Apply", id="aplu button"),
            html.Div(id="show_data")

        ],
        style={
            "width": "18rem",
            "left": 650,
            },
    )

]
)

#callback for inserted information from client

@app.callback(
    Output("result", "value"),
    Output('test123','value'),
    Input("gender", "value"),
    Input("car", "value"),
    Input("realty", "value"),
    Input("income_type", "value"),
    Input("education_type", "value"),
    Input("family_status", "value"),
    Input("housing_type", "value"),
    Input("work_phone", "value"),
    Input("phone", "value"),
    Input("email", "value"),
    Input("occupation", "value"),
    Input("children", "value"),
    Input("members", "value"),
    Input("age", "value"),
    Input("employed", "value"),
    Input("income", "value"))
def dash_function(gender, car, realty, income_type, education_type, family_status, housing_type,
                  work_phone, phone, email, occupation, children, members, age, employed, income):
    personal_data = {'CODE_GENDER': gender,
                     'FLAG_OWN_CAR': car,
                     'FLAG_OWN_REALTY': realty,
                     'NAME_INCOME_TYPE': income_type,
                     'NAME_EDUCATION_TYPE': education_type,
                     'NAME_FAMILY_STATUS': family_status,
                     'NAME_HOUSING_TYPE': housing_type,
                     'FLAG_WORK_PHONE': work_phone,
                     'FLAG_PHONE': phone,
                     'FLAG_EMAIL': email,
                     'OCCUPATION_TYPE': occupation,
                     'CHILDREN': children,
                     'MEMBERS': members,
                     'AGE': age,
                     'YEARS_EMPLOYED': employed,
                     'INCOME': income}
    main_features = ['OCCUPATION_TYPE',
                     'AGE',
                     'INCOME',
                     'FLAG_EMAIL',
                     'NAME_FAMILY_STATUS',
                     'NAME_EDUCATION_TYPE',
                     'NAME_INCOME_TYPE',
                     'MEMBERS']
    extra_features = ['FLAG_OWN_CAR',
                      'FLAG_OWN_REALTY',
                      'NAME_HOUSING_TYPE',
                      'FLAG_WORK_PHONE',
                      'FLAG_PHONE',
                      'CHILDREN',
                      'YEARS_EMPLOYED',
                      'CODE_GENDER']
    main_data = {feature: personal_data[feature] for feature in main_features}
    extra_data = {feature: personal_data[feature] for feature in extra_features}
    def conditional_probability(data_frame, personal_data, split_method, threshold):
        if split_method == "standard":
            good_clients = data_frame[(data_frame["GOOD"] == 1) | (data_frame["SUFFICIENT"] == 1)]
            bad_clients = data_frame[(data_frame["GOOD"] == 0) & (data_frame["SUFFICIENT"] == 0)]
        elif split_method == "good":
            good_clients = data_frame[data_frame["GOOD"] == 1]
            bad_clients = data_frame[data_frame["GOOD"] == 0]
        elif split_method == "sufficient":
            data_frame = data_frame[data_frame["GOOD"] == 0]
            good_clients = data_frame[data_frame["SUFFICIENT"] == 1]
            bad_clients = data_frame[data_frame["SUFFICIENT"] == 0]
        for i in range(1, len(personal_data)+1):
            records = 0
            for key, value in personal_data.items():
                conditional_records = len(data_frame[data_frame[key] == value])
                if conditional_records > records:
                    records = conditional_records
                    specific_key = key
                    specific_value = value
            good_records = len(good_clients[good_clients[specific_key] == specific_value])
            bad_records = len(bad_clients[bad_clients[specific_key] == specific_value])
            client_records = good_records + bad_records
            if client_records >= threshold and bad_records > 0:
                personal_data = {key:personal_data[key] for key in personal_data if key!=specific_key}
                good_clients = good_clients[good_clients[specific_key] == specific_value]
                bad_clients = bad_clients[bad_clients[specific_key] == specific_value]
                probability = round(bad_records/(bad_records + good_records), 5)
            else:
                break
        return probability, personal_data
    def executor(data_frame, personal_data, main_data, extra_data,
                 split_method = "standard", threshold = 1000, second_threshold = 500):
        result = conditional_probability(data_frame, main_data, split_method, threshold)
        probability, data = result
        if len(data) != 0:
            result = conditional_probability(data_frame, main_data, split_method, second_threshold)
            new_probability, new_data = result
            if len(new_data) < len(data) and new_probability <= probability:
                probability = new_probability
                data = new_data
            extra_data = dict(extra_data, **data)
        for key, value in extra_data.items():
            single_dict = {key: value}
            new_probability = conditional_probability(data_frame, dict(main_data, **single_dict),
                                                      split_method, second_threshold)[0]
            if new_probability <= probability:
                probability = new_probability
                main_data = dict(main_data, **single_dict)
        return probability
    x = conditional_probability(data_frame, main_data, "standard", 1000)[0]
    y = executor(data_frame, personal_data, main_data, extra_data)
    return gender #personal_data,x, y

MK_tab=html.Div(dbc.Textarea(id='test123'))

#this defines what are the selections possible in user inputs
l1=app_and_credit_label_2.columns[1:17]
t1=app_and_credit_label_2.columns[-21:-3]
p1=[*range(-60,1,1)]

#this defines what are the elements in rows
el1=dbc.Col(dcc.Graph(id='graph1'), md=6)
el2=dbc.Col(dcc.Graph(id='graph2'), md=6)
el3=dbc.Col(dcc.Graph(id='graph3'), md=6)
el4=dbc.Col(dcc.Graph(id='graph4'), md=6)

#this defines what is in rows
row0=dbc.Form([dbc.CardHeader('Select Period'), dcc.RangeSlider(-60, 0, 6, value=[-60, 0], id='start-end')])
row1=dbc.Row([el1,el2])
row2=dbc.Row([el3,el4])

#this defines the sidebar style
SIDEBAR_STYLE = {
    "position": "fixed",
    "top": 100,
    "left": 0,
    "bottom": 0,
    "width": "16rem",
    "padding": "2rem 1rem",
    "background-color": "#f8f9fa",
}

# this defines the style of content
CONTENT_STYLE = {
    "margin-left": "18rem",
    "margin-right": "2rem",
    "padding": "2rem 1rem",
}

TEXT_STYLE={}

#this defines that controls object is a form consisting of couple cards with couple dash core components called dropdawn. See dash core components here: https://dash.plotly.com/dash-core-components
controls = dbc.Form([
        dbc.CardHeader('Select Feature 1'),
        dbc.Card(dcc.Dropdown(l1,l1,id='l1_drop')),
        dbc.CardHeader('Select Feature 2'),
        dbc.Card(dcc.Dropdown(l1,l1,id='l2_drop')),
        dbc.CardHeader('Select target'),
        dbc.Card(dcc.Dropdown(t1,t1,id='l3_drop')),
        dbc.CardHeader('Select target 2'),
        dbc.Card(dcc.Dropdown(t1,t1,id='l4_drop'))
        ])

#this defines that what appears on the left side will have html headings (H2,H5) and controls object
sidebar = html.Div(
    [
        html.H2('Parameters', style=TEXT_STYLE),
        html.H5('Enter all parameters to see all visualisations'),
        html.Hr(),
        controls
    ],
    style=SIDEBAR_STYLE,
)

#this defines that what appears on the right side is built from 3 rows
content = html.Div(
    [
    row0,
    row1,
    row2,
    ],
    style=CONTENT_STYLE
)

#this defines that tab1 content is consisting of sidebar and content
tab1_content=html.Div([sidebar,content])

# Below defines that TABS object is built from Tabs and Tab components from dash bootstrap components https://dash-bootstrap-components.opensource.faculty.ai/docs/components/. Essentially
# its a list of tabs that the app will be divided into. Its easy to remove a tab (just remove element of the list) or add another tab just copy the line and change label.
TABS = dbc.Tabs(
    [
        dbc.Tab(tab1_content, label="Intro"), #information on project
        # dbc.Tab(example_tab, label="Data Eploration"), #data exploration
        # dbc.Tab(example_tab, label="Application simulator"), #yes or no card 
        dbc.Tab(PL_tab, label="Profit & Loss simulator"), #board simulation 
        # dbc.Tab(example_tab, label="Example tab" ),
        dbc.Tab(JW_tab, label="JW tab"),  #tab to for JW to experiment with dash
        dbc.Tab(MR_tab, label="MR tab"),  #tab for MR to to experiment with dash
        dbc.Tab(list_of_content, label="MR2 tab"),
        dbc.Tab(MK_tab, label="MK tab"),  #tab for MK to to experiment with dash
        # dbc.Tab(JB_tab, label="JB tab"),  #tab for JB to to experiment with dash
    ]
)

#this defines that app layout will consiste of tabs object
app.layout = TABS

#this code binds defines inputs and outputs. 
@app.callback(
    Output('graph1', 'figure'),
    Output('graph2', 'figure'),
    Output('graph3', 'figure'),
    Output('graph4', 'figure'),
    Input('l1_drop', 'value'),
    Input('l2_drop','value'),
    Input('l3_drop','value'),
    Input('l4_drop','value'),
    [Input('start-end', 'value')]
    )
#this is function that is called by dash whenever user changes input in my case it draws charts 
def update_figure(l1_val,l2_val,t_val,t_val1,ss):
    lst=[]
    data=app_and_credit_label_2[(app_and_credit_label_2['start_mth']>=ss[0]) & (app_and_credit_label_2['start_mth']<=ss[1])]
    df=pd.pivot_table(data,index=l1_val,columns=l2_val,values=t_val,aggfunc='mean',margins=True).round(4)
    lst.append(px.bar(df,barmode='group',text_auto=True,title="{} bad clients % by {} and {}".format(t_val,l1_val,l2_val))),
    lst.append(px.imshow(df,color_continuous_scale='RdBu_r',text_auto=".2%",title="{} bad clients % by {} and {}".format(t_val,l1_val,l2_val))),
    df=pd.pivot_table(data,index=l1_val,columns=l2_val,values=t_val1,aggfunc='mean',margins=True).round(4)
    lst.append(px.bar(df,barmode='group',text_auto=True,title="{} bad clients % by {} and {}".format(t_val1,l1_val,l2_val))),
    lst.append(px.imshow(df,color_continuous_scale='RdBu_r',text_auto=True,title="{} bad clients % by {} and {}".format(t_val1,l1_val,l2_val)))
    for i in range(len(lst)):
        lst[i].update_layout(transition_duration=500)
        lst[i].layout.yaxis.tickformat = ',.2%' #show things as percentage with 2 decimal places
    lst[2].update_xaxes(type='category') #heatmap charts weren't working correclty withotu making axes categorical
    return lst

#command below will start the web application (in output cell there should be address which needs to be copied into webrowser to access app, likely: http://127.0.0.1:8050/ )
if __name__ == '__main__':
    app.run_server()

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is run

 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)


In [7]:
### 7. Matrix 
import numpy as np

#files merging
new_file=pd.merge(application_records_t,df_credit,on="ID",how="inner")
new_file

Unnamed: 0,ID,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,...,CNT_FAM_MEMBERS,MONTHS_BALANCE,STATUS,LAST_RECORD,NUMBER_OF_RECORDS,MONTHS,ACTIVE,PARTIAL_STATUS,TOTAL_STATUS,GOOD_BAD
0,5008804,M,1,1,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,...,2.0,0,C,0,16,0,1,0.117647,0.931985,1
1,5008804,M,1,1,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,...,2.0,1,C,0,16,1,1,0.110294,0.931985,1
2,5008804,M,1,1,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,...,2.0,2,C,0,16,2,1,0.102941,0.931985,1
3,5008804,M,1,1,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,...,2.0,3,C,0,16,3,1,0.095588,0.931985,1
4,5008804,M,1,1,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,...,2.0,4,C,0,16,4,1,0.088235,0.931985,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
777710,5150337,M,0,1,0,112500.0,Working,Secondary / secondary special,Single / not married,Rented apartment,...,1.0,9,0,0,14,9,1,-0.023810,-0.521429,0
777711,5150337,M,0,1,0,112500.0,Working,Secondary / secondary special,Single / not married,Rented apartment,...,1.0,10,2,0,14,10,1,-0.033333,-0.521429,0
777712,5150337,M,0,1,0,112500.0,Working,Secondary / secondary special,Single / not married,Rented apartment,...,1.0,11,1,0,14,11,1,-0.021429,-0.521429,0
777713,5150337,M,0,1,0,112500.0,Working,Secondary / secondary special,Single / not married,Rented apartment,...,1.0,12,0,0,14,12,1,-0.009524,-0.521429,0


In [33]:
list[app_and_credit_label_2.columns]

list[Index([                 'ID',         'CODE_GENDER',        'FLAG_OWN_CAR',
           'FLAG_OWN_REALTY',        'CNT_CHILDREN',    'AMT_INCOME_TOTAL',
          'NAME_INCOME_TYPE', 'NAME_EDUCATION_TYPE',  'NAME_FAMILY_STATUS',
         'NAME_HOUSING_TYPE',
       ...
            'all_late_ratio',       'all_late_flag',              'ACTIVE',
              'TOTAL_STATUS',            'GOOD_BAD',          'income_bin',
                       'age',             'age_bin',      'length_of_empl',
        'length_of_empl_bin'],
      dtype='object', length=106)]

In [8]:
# files categorizing
conditions=[
    (new_file["CNT_CHILDREN"] == 0),
    (new_file["CNT_CHILDREN"] == 1),
    (new_file["CNT_CHILDREN"] == 2) & (new_file["CNT_CHILDREN"] ==3 ),
    (new_file["CNT_CHILDREN"] >3)
]
values=["No children", "1 child", "2-3 children", "Many children"]
new_file["CNT_CHILDREN_CATEGORIZED"]=np.select(conditions,values)
conditions1=[
    (new_file["CNT_FAM_MEMBERS"] == 1),
    (new_file["CNT_FAM_MEMBERS"] == 2),
    (new_file["CNT_FAM_MEMBERS"] == 3),
    (new_file["CNT_FAM_MEMBERS"] == 4),
    (new_file["CNT_FAM_MEMBERS"] >4)
]
values1=["1 person household", "couple", "2+1", "2+2","big family"]
new_file["CNT_FAM_MEMBERS_CATEGORIZED"]=np.select(conditions1,values1)
conditions2=[
    (new_file["AMT_INCOME_TOTAL"] <= 50000),
    (new_file["AMT_INCOME_TOTAL"] > 50000)&(new_file["AMT_INCOME_TOTAL"]<=100000),
    (new_file["AMT_INCOME_TOTAL"] > 100000)& (new_file["AMT_INCOME_TOTAL"]<=200000),
    (new_file["AMT_INCOME_TOTAL"] > 200000),
]
values2=["very low income", "medium income", "high income", "very high income"]
new_file["AMT_INCOME_TOTAL_CATEGORIZED"]=np.select(conditions2,values2)
conditions3=[
    (new_file["DAYS_BIRTH"] < 0 ) & (new_file["DAYS_BIRTH"]>=-7300),
    (new_file["DAYS_BIRTH"] < -7300) & (new_file["DAYS_BIRTH"]>=-10950),
    (new_file["DAYS_BIRTH"] < -10950) & (new_file["DAYS_BIRTH"]>=-14600),
    (new_file["DAYS_BIRTH"] < -14600) & (new_file["DAYS_BIRTH"]>=-18250),
    (new_file["DAYS_BIRTH"] < -18250) & (new_file["DAYS_BIRTH"]>=-21900),
    (new_file["DAYS_BIRTH"] < -21900) & (new_file["DAYS_BIRTH"]>=-25201)
]
values3=["under 20ties", "under 30ties", "under 40ties", "under 50ties","under 60ties","over 60ties"]
new_file["DAYS_BIRTH_CATEGORIZED"]=np.select(conditions3,values3)
conditions4=[
    (new_file["DAYS_EMPLOYED"] > 0),
    (new_file["DAYS_EMPLOYED"] > 0),
    (new_file["DAYS_EMPLOYED"] < 0 ) & (new_file["DAYS_EMPLOYED"]>=-1825),
    (new_file["DAYS_EMPLOYED"] < -1825) & (new_file["DAYS_EMPLOYED"]>=-3650),
    (new_file["DAYS_EMPLOYED"] < -3650) & (new_file["DAYS_EMPLOYED"]>=-5475),
    (new_file["DAYS_EMPLOYED"] < -5475),
             ]
values4=["pensioner","no working experience", "under 5 years", "under 10 years", "under 15 years", "more than 15 years"]
new_file["DAYS_EMPLOYED_CATEGORIZED"]=np.select(conditions4,values4)
new_file

Unnamed: 0,ID,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,...,MONTHS,ACTIVE,PARTIAL_STATUS,TOTAL_STATUS,GOOD_BAD,CNT_CHILDREN_CATEGORIZED,CNT_FAM_MEMBERS_CATEGORIZED,AMT_INCOME_TOTAL_CATEGORIZED,DAYS_BIRTH_CATEGORIZED,DAYS_EMPLOYED_CATEGORIZED
0,5008804,M,1,1,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,...,0,1,0.117647,0.931985,1,No children,couple,very high income,under 40ties,under 15 years
1,5008804,M,1,1,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,...,1,1,0.110294,0.931985,1,No children,couple,very high income,under 40ties,under 15 years
2,5008804,M,1,1,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,...,2,1,0.102941,0.931985,1,No children,couple,very high income,under 40ties,under 15 years
3,5008804,M,1,1,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,...,3,1,0.095588,0.931985,1,No children,couple,very high income,under 40ties,under 15 years
4,5008804,M,1,1,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,...,4,1,0.088235,0.931985,1,No children,couple,very high income,under 40ties,under 15 years
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
777710,5150337,M,0,1,0,112500.0,Working,Secondary / secondary special,Single / not married,Rented apartment,...,9,1,-0.023810,-0.521429,0,No children,1 person household,high income,under 30ties,under 5 years
777711,5150337,M,0,1,0,112500.0,Working,Secondary / secondary special,Single / not married,Rented apartment,...,10,1,-0.033333,-0.521429,0,No children,1 person household,high income,under 30ties,under 5 years
777712,5150337,M,0,1,0,112500.0,Working,Secondary / secondary special,Single / not married,Rented apartment,...,11,1,-0.021429,-0.521429,0,No children,1 person household,high income,under 30ties,under 5 years
777713,5150337,M,0,1,0,112500.0,Working,Secondary / secondary special,Single / not married,Rented apartment,...,12,1,-0.009524,-0.521429,0,No children,1 person household,high income,under 30ties,under 5 years


In [40]:
new_file.drop("FLAG_OWN_CAR", axis="columns", inplace=True)
new_file.drop("FLAG_OWN_REALTY", axis="columns", inplace=True)
new_file.drop("CNT_CHILDREN", axis="columns", inplace=True)
new_file.drop("AMT_INCOME_TOTAL", axis="columns", inplace=True)
new_file.drop("NAME_HOUSING_TYPE", axis="columns", inplace=True)

In [41]:
new_file.drop("CODE_GENDER", axis="columns", inplace=True)
new_file.drop("DAYS_BIRTH", axis="columns", inplace=True)
new_file.drop("DAYS_EMPLOYED", axis="columns", inplace=True)
new_file.drop("FLAG_WORK_PHONE", axis="columns", inplace=True)

In [42]:
new_file.drop("FLAG_PHONE", axis="columns", inplace=True)
new_file.drop("CNT_FAM_MEMBERS", axis="columns", inplace=True)
new_file.drop("MONTHS_BALANCE", axis="columns", inplace=True)
new_file.drop("STATUS", axis="columns", inplace=True)

In [43]:
new_file.drop("LAST_RECORD", axis="columns", inplace=True)
new_file.drop("MONTHS", axis="columns", inplace=True)
new_file.drop("ACTIVE", axis="columns", inplace=True)
new_file.drop("PARTIAL_STATUS", axis="columns", inplace=True)
new_file.drop("TOTAL_STATUS", axis="columns", inplace=True)
new_file.drop("CNT_CHILDREN_CATEGORIZED", axis="columns", inplace=True)
new_file.drop("DAYS_EMPLOYED_CATEGORIZED", axis="columns", inplace=True)

In [53]:
#matrix preparation

matrix=new_file.groupby(["OCCUPATION_TYPE","DAYS_BIRTH_CATEGORIZED","AMT_INCOME_TOTAL_CATEGORIZED","FLAG_EMAIL","NAME_FAMILY_STATUS","NAME_EDUCATION_TYPE",
                     "NAME_INCOME_TYPE", "CNT_FAM_MEMBERS_CATEGORIZED","GOOD_BAD"]).count()

matrix

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,ID,NUMBER_OF_RECORDS
OCCUPATION_TYPE,DAYS_BIRTH_CATEGORIZED,AMT_INCOME_TOTAL_CATEGORIZED,FLAG_EMAIL,NAME_FAMILY_STATUS,NAME_EDUCATION_TYPE,NAME_INCOME_TYPE,CNT_FAM_MEMBERS_CATEGORIZED,GOOD_BAD,Unnamed: 9_level_1,Unnamed: 10_level_1
Accountants,over 60ties,high income,0,Married,Higher education,State servant,couple,0,26,26
Accountants,over 60ties,high income,0,Married,Higher education,State servant,couple,1,289,289
Accountants,over 60ties,high income,0,Married,Higher education,Working,couple,0,132,132
Accountants,over 60ties,high income,0,Married,Higher education,Working,couple,1,205,205
Accountants,over 60ties,high income,0,Married,Secondary / secondary special,Commercial associate,couple,1,52,52
...,...,...,...,...,...,...,...,...,...,...
Waiters/barmen staff,under 60ties,high income,0,Separated,Secondary / secondary special,State servant,1 person household,1,9,9
Waiters/barmen staff,under 60ties,very high income,0,Married,Secondary / secondary special,State servant,couple,0,50,50
Waiters/barmen staff,under 60ties,very high income,0,Married,Secondary / secondary special,State servant,couple,1,74,74
Waiters/barmen staff,under 60ties,very high income,0,Widow,Secondary / secondary special,Working,1 person household,0,4,4


In [54]:
matrix["cumsum"]=matrix.groupby(["OCCUPATION_TYPE","DAYS_BIRTH_CATEGORIZED","AMT_INCOME_TOTAL_CATEGORIZED","FLAG_EMAIL","NAME_FAMILY_STATUS","NAME_EDUCATION_TYPE",
                     "NAME_INCOME_TYPE", "CNT_FAM_MEMBERS_CATEGORIZED"]).sum()["ID"]
matrix

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,ID,NUMBER_OF_RECORDS,cumsum
OCCUPATION_TYPE,DAYS_BIRTH_CATEGORIZED,AMT_INCOME_TOTAL_CATEGORIZED,FLAG_EMAIL,NAME_FAMILY_STATUS,NAME_EDUCATION_TYPE,NAME_INCOME_TYPE,CNT_FAM_MEMBERS_CATEGORIZED,GOOD_BAD,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Accountants,over 60ties,high income,0,Married,Higher education,State servant,couple,0,26,26,315
Accountants,over 60ties,high income,0,Married,Higher education,State servant,couple,1,289,289,315
Accountants,over 60ties,high income,0,Married,Higher education,Working,couple,0,132,132,337
Accountants,over 60ties,high income,0,Married,Higher education,Working,couple,1,205,205,337
Accountants,over 60ties,high income,0,Married,Secondary / secondary special,Commercial associate,couple,1,52,52,52
...,...,...,...,...,...,...,...,...,...,...,...
Waiters/barmen staff,under 60ties,high income,0,Separated,Secondary / secondary special,State servant,1 person household,1,9,9,27
Waiters/barmen staff,under 60ties,very high income,0,Married,Secondary / secondary special,State servant,couple,0,50,50,124
Waiters/barmen staff,under 60ties,very high income,0,Married,Secondary / secondary special,State servant,couple,1,74,74,124
Waiters/barmen staff,under 60ties,very high income,0,Widow,Secondary / secondary special,Working,1 person household,0,4,4,92


In [55]:
matrix["Percentage"]=matrix["ID"]/matrix["cumsum"]*100

In [56]:
matrix

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,ID,NUMBER_OF_RECORDS,cumsum,Percentage
OCCUPATION_TYPE,DAYS_BIRTH_CATEGORIZED,AMT_INCOME_TOTAL_CATEGORIZED,FLAG_EMAIL,NAME_FAMILY_STATUS,NAME_EDUCATION_TYPE,NAME_INCOME_TYPE,CNT_FAM_MEMBERS_CATEGORIZED,GOOD_BAD,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Accountants,over 60ties,high income,0,Married,Higher education,State servant,couple,0,26,26,315,8.253968
Accountants,over 60ties,high income,0,Married,Higher education,State servant,couple,1,289,289,315,91.746032
Accountants,over 60ties,high income,0,Married,Higher education,Working,couple,0,132,132,337,39.169139
Accountants,over 60ties,high income,0,Married,Higher education,Working,couple,1,205,205,337,60.830861
Accountants,over 60ties,high income,0,Married,Secondary / secondary special,Commercial associate,couple,1,52,52,52,100.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
Waiters/barmen staff,under 60ties,high income,0,Separated,Secondary / secondary special,State servant,1 person household,1,9,9,27,33.333333
Waiters/barmen staff,under 60ties,very high income,0,Married,Secondary / secondary special,State servant,couple,0,50,50,124,40.322581
Waiters/barmen staff,under 60ties,very high income,0,Married,Secondary / secondary special,State servant,couple,1,74,74,124,59.677419
Waiters/barmen staff,under 60ties,very high income,0,Widow,Secondary / secondary special,Working,1 person household,0,4,4,92,4.347826


In [None]:
8. #MK tab

data_frame = pd.read_csv("executor_prepared_data.csv").drop("Unnamed: 0", axis=1)
app = dash.Dash(external_stylesheets=[dbc.themes.BOOTSTRAP])
app.layout = html.Div([
    dcc.Textarea(
        placeholder='Wynik',
        disabled = 1,
        id = "result"),
    # CODE_GENDER 1
    dcc.Dropdown(
        data_frame["CODE_GENDER"].unique(),
        #["M", "F"],
        placeholder = "CODE_GENDER",
        id = "gender"),
    # FLAG_OWN_CAR 2
    dcc.Dropdown(
        data_frame["FLAG_OWN_CAR"].unique(),
        placeholder = "FLAG_OWN_CAR",
        id = "car"),
    # FLAG_OWN_REALTY 3
    dcc.Dropdown(
        data_frame["FLAG_OWN_REALTY"].unique(),
        placeholder = "FLAG_OWN_REALTY",
        id = "realty"),
    # NAME_INCOME_TYPE 4
    dcc.Dropdown(
        data_frame["NAME_INCOME_TYPE"].unique(),
        placeholder = "NAME_INCOME_TYPE",
        id = "income_type"),
    # NAME_EDUCATION_TYPE 5
    dcc.Dropdown(
        data_frame["NAME_EDUCATION_TYPE"].unique(),
        placeholder = "NAME_EDUCATION_TYPE",
        id = "education_type"),
    # NAME_FAMILY_STATUS 6
    dcc.Dropdown(
        data_frame["NAME_FAMILY_STATUS"].unique(),
        placeholder = "NAME_FAMILY_STATUS",
        id = "family_status"),
    # NAME_HOUSING_TYPE 7
    dcc.Dropdown(
        data_frame["NAME_HOUSING_TYPE"].unique(),
        placeholder = "NAME_HOUSING_TYPE",
        id = "housing_type"),
    # FLAG_WORK_PHONE 8
    dcc.Dropdown(
        data_frame["FLAG_WORK_PHONE"].unique(),
        placeholder = "FLAG_WORK_PHONE",
        id = "work_phone"),
    # FLAG_PHONE 9
    dcc.Dropdown(
        data_frame["FLAG_PHONE"].unique(),
        placeholder = "FLAG_PHONE",
        id = "phone"),
    # FLAG_EMAIL 10
    dcc.Dropdown(
        data_frame["FLAG_EMAIL"].unique(),
        placeholder = "FLAG_EMAIL",
        id = "email"),
    # OCCUPATION_TYPE 11
    dcc.Dropdown(
        data_frame["OCCUPATION_TYPE"].unique(),
        placeholder = "OCCUPATION_TYPE",
        id = "occupation"),
    # CHILDREN 12
    dcc.Dropdown(
        data_frame["CHILDREN"].unique(),
        placeholder = "CHILDREN",
        id = "children"),
    # MEMBERS 13
    dcc.Dropdown(
        data_frame["MEMBERS"].unique(),
        placeholder = "MEMBERS",
        id = "members"),
    # AGE 14
    dcc.Dropdown(
        data_frame["AGE"].unique(),
        placeholder = "AGE",
        id = "age"),
    # YEARS_EMPLOYED 15
    dcc.Dropdown(
        data_frame["YEARS_EMPLOYED"].unique(),
        placeholder = "YEARS_EMPLOYED",
        id = "employed"),
    # INCOME 16
    dcc.Dropdown(
        data_frame["INCOME"].unique(),
        placeholder = "INCOME",
        id = "income")
])
@app.callback(
    Output("result", "value"),
    Input("gender", "value"),
    Input("car", "value"),
    Input("realty", "value"),
    Input("income_type", "value"),
    Input("education_type", "value"),
    Input("family_status", "value"),
    Input("housing_type", "value"),
    Input("work_phone", "value"),
    Input("phone", "value"),
    Input("email", "value"),
    Input("occupation", "value"),
    Input("children", "value"),
    Input("members", "value"),
    Input("age", "value"),
    Input("employed", "value"),
    Input("income", "value"))
def dash_function(gender, car, realty, income_type, education_type, family_status, housing_type,
                  work_phone, phone, email, occupation, children, members, age, employed, income):
    personal_data = {'CODE_GENDER': gender,
                     'FLAG_OWN_CAR': car,
                     'FLAG_OWN_REALTY': realty,
                     'NAME_INCOME_TYPE': income_type,
                     'NAME_EDUCATION_TYPE': education_type,
                     'NAME_FAMILY_STATUS': family_status,
                     'NAME_HOUSING_TYPE': housing_type,
                     'FLAG_WORK_PHONE': work_phone,
                     'FLAG_PHONE': phone,
                     'FLAG_EMAIL': email,
                     'OCCUPATION_TYPE': occupation,
                     'CHILDREN': children,
                     'MEMBERS': members,
                     'AGE': age,
                     'YEARS_EMPLOYED': employed,
                     'INCOME': income}
    main_features = ['OCCUPATION_TYPE',
                     'AGE',
                     'INCOME',
                     'FLAG_EMAIL',
                     'NAME_FAMILY_STATUS',
                     'NAME_EDUCATION_TYPE',
                     'NAME_INCOME_TYPE',
                     'MEMBERS']
    extra_features = ['FLAG_OWN_CAR',
                      'FLAG_OWN_REALTY',
                      'NAME_HOUSING_TYPE',
                      'FLAG_WORK_PHONE',
                      'FLAG_PHONE',
                      'CHILDREN',
                      'YEARS_EMPLOYED',
                      'CODE_GENDER']
    main_data = {feature: personal_data[feature] for feature in main_features}
    extra_data = {feature: personal_data[feature] for feature in extra_features}
    def conditional_probability(data_frame, personal_data, split_method, threshold):
        if split_method == "standard":
            good_clients = data_frame[(data_frame["GOOD"] == 1) | (data_frame["SUFFICIENT"] == 1)]
            bad_clients = data_frame[(data_frame["GOOD"] == 0) & (data_frame["SUFFICIENT"] == 0)]
        elif split_method == "good":
            good_clients = data_frame[data_frame["GOOD"] == 1]
            bad_clients = data_frame[data_frame["GOOD"] == 0]
        elif split_method == "sufficient":
            data_frame = data_frame[data_frame["GOOD"] == 0]
            good_clients = data_frame[data_frame["SUFFICIENT"] == 1]
            bad_clients = data_frame[data_frame["SUFFICIENT"] == 0]
        for i in range(1, len(personal_data)+1):
            records = 0
            for key, value in personal_data.items():
                conditional_records = len(data_frame[data_frame[key] == value])
                if conditional_records > records:
                    records = conditional_records
                    specific_key = key
                    specific_value = value
            good_records = len(good_clients[good_clients[specific_key] == specific_value])
            bad_records = len(bad_clients[bad_clients[specific_key] == specific_value])
            client_records = good_records + bad_records
            if client_records >= threshold and bad_records > 0:
                personal_data = {key:personal_data[key] for key in personal_data if key!=specific_key}
                good_clients = good_clients[good_clients[specific_key] == specific_value]
                bad_clients = bad_clients[bad_clients[specific_key] == specific_value]
                probability = round(bad_records/(bad_records + good_records), 5)
            else:
                break
        return probability, personal_data
    def executor(data_frame, personal_data, main_data, extra_data,
                 split_method = "standard", threshold = 1000, second_threshold = 500):
        result = conditional_probability(data_frame, main_data, split_method, threshold)
        probability, data = result
        if len(data) != 0:
            result = conditional_probability(data_frame, main_data, split_method, second_threshold)
            new_probability, new_data = result
            if len(new_data) < len(data) and new_probability <= probability:
                probability = new_probability
                data = new_data
            extra_data = dict(extra_data, **data)
        for key, value in extra_data.items():
            single_dict = {key: value}
            new_probability = conditional_probability(data_frame, dict(main_data, **single_dict),
                                                      split_method, second_threshold)[0]
            if new_probability <= probability:
                probability = new_probability
                main_data = dict(main_data, **single_dict)
        return probability
    x = conditional_probability(data_frame, main_data, "standard", 1000)[0]
    y = executor(data_frame, personal_data, main_data, extra_data)
    return x, y
