In [1]:
#File description & paths to csv's
"""
Code is divided into the following sections:
1. Import data & create dataframes:
    -credit_records_o contains credit_records from csv file, same structure as in the file (long form)
    -application_records_o contains data from application csv, file same structure as in the file
    -credit_records_t is wide form of credit_records_o 
2. Prepare data in application records (i.e. fill in blanks in occupation type, standardize values and more)
3. Enrich credit data with start, end, duration, count of individual statues (0_count, 1_count... 5 ratio) and count all statuses (all_late_count),
ratio of individual status (0_ratio, 1_ratio... 5_ratio) and all late statuse ratio (all_later_ratio)
    - credit_records_t_label
4. Enrich credit data with Active & TOTAL_STATUS & GOOD_BAD CLIENT measures calculated wiht Mateusz method
5. Merge app & labelled credit data and calculate bins for discrete variables (birth date, employment date & amount income)
    - app_and_credit_label_2
6. Functions for GUI in IPWIDGET that take & store user input from ipywidgets
5. 
"""

#read script location and look for folder called data in which there should be files to import
import os
data_dir=os.path.join(os.path.abspath(''),"data")
path_to_credit_records_csv=os.path.join(data_dir,"credit_record.csv")
path_to_application_records_csv=os.path.join(data_dir,"application_record.csv")

Final version of app for exploring clients default probability

In [2]:
### 1. Import of data & dataframe def 
import pandas as pd

#create credit_records_o from data in csv
file=open(path_to_credit_records_csv) 
credit_records_o=pd.read_csv(file) 
file.close()

#create application_records_o from data in csv
file=open(path_to_application_records_csv) 
application_records_o=pd.read_csv(file) 
file.close()

#create wide version of credit_records
credit_records_t=pd.pivot(credit_records_o, index='ID',columns='MONTHS_BALANCE',values='STATUS') 
credit_records_t = credit_records_t[credit_records_t.columns.sort_values(ascending=False)] #reorder months balance column in descending order

In [3]:
### 2. Application Data Preparation Asia

import numpy as np
application_records_t=application_records_o.copy()
application_records_t.drop("FLAG_MOBIL", axis=1, inplace=True) #dropping as its 1 across all rows
application_records_t["FLAG_OWN_CAR"].replace(["Y","N"],["1","0"],inplace=True) #standardize to same format as other flags i.e. 1 for True and 0 for False
application_records_t["FLAG_OWN_REALTY"].replace(["Y","N"],["1","0"],inplace=True) #standardize to same format as other flags i.e. 1 for True and 0 for False
application_records_t["FLAG_OWN_CAR"]=application_records_t["FLAG_OWN_CAR"].astype(int) #standardize to int format
application_records_t["FLAG_OWN_REALTY"]=application_records_t["FLAG_OWN_REALTY"].astype(int) #standardize to int format
application_records_t["OCCUPATION_TYPE"].fillna(value="Not provided", inplace=True) #fill in blanks in occupation type

# files categorizing
conditions=[
    (application_records_t["CNT_CHILDREN"] == 0),
    (application_records_t["CNT_CHILDREN"] == 1),
    (application_records_t["CNT_CHILDREN"] == 2) & (application_records_t["CNT_CHILDREN"] ==3 ),
    (application_records_t["CNT_CHILDREN"] >3)
]
values=["No children", "1 child", "2-3 children", "Many children"]
application_records_t["CNT_CHILDREN_CATEGORIZED"]=np.select(conditions,values)
conditions1=[
    (application_records_t["CNT_FAM_MEMBERS"] == 1),
    (application_records_t["CNT_FAM_MEMBERS"] == 2),
    (application_records_t["CNT_FAM_MEMBERS"] == 3),
    (application_records_t["CNT_FAM_MEMBERS"] == 4),
    (application_records_t["CNT_FAM_MEMBERS"] >4)
]
values1=["1 person household", "couple", "2+1", "2+2","big family"]
application_records_t["CNT_FAM_MEMBERS_CATEGORIZED"]=np.select(conditions1,values1)
conditions2=[
    (application_records_t["AMT_INCOME_TOTAL"] <= 50000),
    (application_records_t["AMT_INCOME_TOTAL"] > 50000)&(application_records_t["AMT_INCOME_TOTAL"]<=100000),
    (application_records_t["AMT_INCOME_TOTAL"] > 100000)& (application_records_t["AMT_INCOME_TOTAL"]<=200000),
    (application_records_t["AMT_INCOME_TOTAL"] > 200000),
]
values2=["very low income", "medium income", "high income", "very high income"]
application_records_t["AMT_INCOME_TOTAL_CATEGORIZED"]=np.select(conditions2,values2)
conditions3=[
    (application_records_t["DAYS_BIRTH"] < 0 ) & (application_records_t["DAYS_BIRTH"]>=-7300),
    (application_records_t["DAYS_BIRTH"] < -7300) & (application_records_t["DAYS_BIRTH"]>=-10950),
    (application_records_t["DAYS_BIRTH"] < -10950) & (application_records_t["DAYS_BIRTH"]>=-14600),
    (application_records_t["DAYS_BIRTH"] < -14600) & (application_records_t["DAYS_BIRTH"]>=-18250),
    (application_records_t["DAYS_BIRTH"] < -18250) & (application_records_t["DAYS_BIRTH"]>=-21900),
    (application_records_t["DAYS_BIRTH"] < -21900) & (application_records_t["DAYS_BIRTH"]>=-25201)
]
values3=["under 20ties", "under 30ties", "under 40ties", "under 50ties","under 60ties","over 60ties"]
application_records_t["DAYS_BIRTH_CATEGORIZED"]=np.select(conditions3,values3)
conditions4=[
    (application_records_t["DAYS_EMPLOYED"] > 0),
    (application_records_t["DAYS_EMPLOYED"] > 0),
    (application_records_t["DAYS_EMPLOYED"] < 0 ) & (application_records_t["DAYS_EMPLOYED"]>=-1825),
    (application_records_t["DAYS_EMPLOYED"] < -1825) & (application_records_t["DAYS_EMPLOYED"]>=-3650),
    (application_records_t["DAYS_EMPLOYED"] < -3650) & (application_records_t["DAYS_EMPLOYED"]>=-5475),
    (application_records_t["DAYS_EMPLOYED"] < -5475),
             ]
values4=["pensioner","no working experience", "under 5 years", "under 10 years", "under 15 years", "more than 15 years"]
application_records_t["DAYS_EMPLOYED_CATEGORIZED"]=np.select(conditions4,values4)


In [4]:
### 3. Enrich credit data and define labels 

from collections import Counter
p=credit_records_t.copy() 

#add colulmns with val_count(count of statuses), start (first month of loan), end (last month of loan), duration (loan duration)
p['val_count']=p.apply(lambda x: Counter(x),axis=1) 
grouped_id=credit_records_o.groupby('ID')
p['start_mth']=grouped_id['MONTHS_BALANCE'].min() #month at which the credit started for client
p['end_mth']=grouped_id['MONTHS_BALANCE'].max() #month at which the credit ended for client
p['duration']=abs(p['start_mth']-p['end_mth'])+1 #duration of the credit for client

#add columns with 0,1,2,3,4,5 statuses count and their ratios per client
p['0_count']=p['val_count'].apply(lambda x: x.get('0',0)) 
p['0_ratio']=p['0_count']/p['duration'] 
p['1_count']=p['val_count'].apply(lambda x: x.get('1',0)) 
p['1_ratio']=p['1_count']/p['duration'] 
p['2_count']=p['val_count'].apply(lambda x: x.get('2',0))
p['2_ratio']=p['2_count']/p['duration'] 
p['3_count']=p['val_count'].apply(lambda x: x.get('3',0))
p['3_ratio']=p['3_count']/p['duration'] 
p['4_count']=p['val_count'].apply(lambda x: x.get('4',0))
p['4_ratio']=p['4_count']/p['duration'] 
p['5_count']=p['val_count'].apply(lambda x: x.get('5',0))
p['5_ratio']=p['5_count']/p['duration'] 
# p['all_late_count']=p['val_count'].apply(lambda x: x.get('0',0)+x.get('1',0)+x.get('2',0)+x.get('3',0)+x.get('4',0)+x.get('5',0)) 
p['all_late_count']=p['val_count'].apply(lambda x: x.get('2',0)+x.get('3',0)+x.get('4',0)+x.get('5',0)) 
p['all_late_ratio']=p['all_late_count']/p['duration'] 
p['all_late_flag']=p['all_late_ratio'].apply(lambda x: 1 if x>0.20 else 0)

credit_records_t_label=p.copy()


In [5]:
# ### 4. Enrich credit data with Mateusz method
# df=credit_records_o.copy() 

# df["MONTHS_BALANCE"] = df["MONTHS_BALANCE"].abs()

# df_last = df.groupby(by="ID")["MONTHS_BALANCE"].min().reset_index(name="LAST_RECORD")
# df = pd.merge(df, df_last, on="ID", how="left")

# df_all = df.groupby(by="ID")["MONTHS_BALANCE"].count().reset_index(name="NUMBER_OF_RECORDS")
# df = pd.merge(df, df_all, on="ID", how="left")

# df["MONTHS"] = df["MONTHS_BALANCE"] - df["LAST_RECORD"]

# def account_activity(ID, last_record):
    
#     """
#     Funkcja zwracająca obecny stan konta klienta.
#     Konto aktywne - 1
#     Konto nieaktywne - 0
#     """
        
#     if last_record == 0:
#         active = 1
#     else:
#         active = 0
        
        
#     return active

# df_upgrade = df.copy()
# df_upgrade["ACTIVE"] = df_upgrade.apply(lambda df: account_activity(df["ID"], df["LAST_RECORD"]), axis=1)

# def month_weight(month, number_of_records):
    
#     """
#     Funkcja przyporządkowująca wagę danemu miesiącu (month) z uwzględnieniem liczby wszystkich miesięcy (number_of_months).
#     Wagi maleją w sposób liniowy, a ich suma daje 1.
#     """
    
#     k = abs(month) + 1
#     n = number_of_records
    
#     if n < 4:
#         weight = 0
#     elif 4 <= n <= 24:
#         weight = -2*k/(n*(n+1))+2/n
#     else:
#         n = 24
#         if k <= 24:
#             weight = -2*k/(n*(n+1))+2/n
#         else:
#             weight = 0
              
                
#     return weight

# def status2number(month, status, number_of_records):
    
#     """
#     W oparciu o funkcję month_weight, status2number przekształca status z wybranego miesiąca na liczbę z przedziału <-1,1>.

#     """
    
#     if month <= 23:
#         weight = month_weight(month, number_of_records)

#         if status is "5":
#             value = -1
#         elif status is "X":
#             value = 0
#         elif status is "C":
#             value = 1
#         else:
#             n = int(status)
#             value = 1/2**(n+1)-1
#     else:
#         weight = 1
#         value = 0
    
    
#     return weight * value

# df_upgrade["PARTIAL_STATUS"] = df_upgrade.apply(lambda df: status2number(df["MONTHS"], df["STATUS"], df["NUMBER_OF_RECORDS"]), axis=1)

# df_status = df_upgrade.groupby(by = "ID")["PARTIAL_STATUS"].sum().reset_index(name="TOTAL_STATUS")

# df_credit = pd.merge(df_upgrade, df_status, on="ID", how="left")

# def good_bad_function(ID, status):
    
#     """
#     Funkcja określająca czy dany klient może otrzymać kredyt.
#     """
    
#     if status >= 0:
#         judgment = 1
#     else:
#         judgment = 0
    
    
#     return judgment

# df_credit["GOOD_BAD"] = df_credit.apply(lambda df: good_bad_function(df["ID"], df["TOTAL_STATUS"]), axis=1)

# df_result = df_credit[["ID", "ACTIVE", "TOTAL_STATUS", "GOOD_BAD"]].drop_duplicates().copy()

In [6]:
### 4. Enrich credit data with Matuesz method 2
import numpy as np
import pandas as pd

def account_activity(ID, last_record):
    
    """
    Funkcja zwracająca obecny stan konta klienta.
    Konto aktywne - 1
    Konto nieaktywne - 0
    """
        
    if last_record == 0:
        active = 1
    else:
        active = 0
        
        
    return active

def stat2int(status):
    
    """
    text
    text
    """
    
    if status is "5":
        value = 6
    elif status is "4":
        value = 5
    elif status is "3":
        value = 4
    elif status is "2":
        value = 3
    elif status is "1":
        value = 2
    elif status is "0":
        value = 1
    else:
        value = 0
    
    
    return value

def delay_calculator(status_list, historical_range, method):
    
    """
    text
    text
    
    """
    
    status_list = status_list[:historical_range][::-1]
    length = len(status_list)

    delay_list = []

    for i in range(length):
        if i == 0:
            delay_list.append(30*status_list[i])
        else:
            if status_list[i] - 1 == status_list[i-1]:
                delay_list.append(30)
            elif status_list[i] == status_list[i-1]:
                delay_list.append(30*status_list[i])   
            else:
                delay_list.append(30*status_list[i]) 

    if method == "mean":
        delay = np.mean(delay_list)       
    elif method == "median":
        delay = np.median(delay_list)     
    elif method == "median-class":
        value = np.median(delay_list)
        if value in delay_list:
            delay = value
        else:
            while value not in delay_list:
                value = value - 15
            delay = value
            
    return delay

def month_weight(month, number_of_records, mnor, historical_range):
    
    """
    Funkcja przyporządkowująca wagę danemu miesiącu (month) z uwzględnieniem liczby wszystkich miesięcy (number_of_months).
    Wagi maleją w sposób liniowy, a ich suma daje 1.
    """
    
    k = abs(month) + 1
    n = number_of_records
    
    if n < mnor:
        weight = 0
    elif mnor <= n <= historical_range:
        weight = -2*k/(n*(n+1))+2/n
    else:
        n = historical_range
        if k <= historical_range:
            weight = -2*k/(n*(n+1))+2/n
        else:
            weight = 0
              
                
    return weight

def status2number(month, status, number_of_records, mnor, historical_range):
    
    """
    W oparciu o funkcję month_weight, status2number przekształca status z wybranego miesiąca na liczbę z przedziału <-1,1>.
    
    """
    
    if month <= historical_range-1:
        weight = month_weight(month,
                              number_of_records,
                              mnor,
                              historical_range)

        if status is "5":
            value = -1
        elif status is "X":
            value = 0
        elif status is "C":
            value = 1
        else:
            n = int(status)
            value = 1/2**(n+1)-1
    else:
        weight = 1
        value = 0
    
    
    return weight * value

def identify_good(status, threshold):
    
    """
    Funkcja określająca czy dany klient może otrzymać kredyt.
    """
    
    if status >= threshold:
        judgment = 1
    else:
        judgment = 0
    
    
    return judgment

def identify_sufficient(status, threshold, second_threshold):

    """
    
    """
    
    if second_threshold <= status < threshold:
        judgment = 1
    else:
        judgment = 0
    
    
    return judgment

def main(data_frame,
         minimum_number_of_records = 4,
         historical_range = 24,
         threshold = 0,
         second_threshold = 0,
         method = "mean"):
    
    """
    text
    text
    
    """
    
    data_frame["MONTHS_BALANCE"] = data_frame["MONTHS_BALANCE"].abs()
    
    extra_frame = data_frame.groupby(by="ID")["MONTHS_BALANCE"].min().reset_index(name="LAST_RECORD")
    data_frame = pd.merge(data_frame, extra_frame, on="ID", how="left")
    
    extra_frame = data_frame.groupby(by="ID")["MONTHS_BALANCE"].count().reset_index(name="NUMBER_OF_RECORDS")
    data_frame = pd.merge(data_frame, extra_frame, on="ID", how="left")
    
    data_frame["MONTHS"] = data_frame["MONTHS_BALANCE"] - data_frame["LAST_RECORD"]
    

    data_frame["ACTIVITY"] = data_frame.apply(lambda df: account_activity(df["ID"],
                                                                        df["LAST_RECORD"]), axis=1)
    
    data_frame["INTEGER_STATUS"] = data_frame.apply(lambda df: stat2int(df["STATUS"]), axis=1)

    status_frame = data_frame.groupby(by="ID")["INTEGER_STATUS"].apply(list).reset_index(name="STATUS_LIST")
    data_frame = pd.merge(data_frame, status_frame, on="ID", how="left")

    data_frame["DELAY_PAYMENT"] = data_frame.apply(lambda df: delay_calculator(df["STATUS_LIST"],
                                                                               historical_range,
                                                                               method), axis=1)
    
    data_frame["PARTIAL_STATUS"] = data_frame.apply(lambda df: status2number(df["MONTHS"],
                                                                             df["STATUS"],
                                                                             df["NUMBER_OF_RECORDS"],
                                                                             minimum_number_of_records,
                                                                             historical_range), axis=1)
    
    extra_frame = data_frame.groupby(by = "ID")["PARTIAL_STATUS"].sum().reset_index(name="TOTAL_STATUS")
    data_frame = pd.merge(data_frame, extra_frame, on="ID", how="left")
    
    data_frame["GOOD"] = data_frame.apply(lambda df: identify_good(df["TOTAL_STATUS"],
                                                                   threshold), axis=1)
    
    if second_threshold != threshold:
        data_frame["SUFFICIENT"] = data_frame.apply(lambda df: identify_sufficient(df["TOTAL_STATUS"],
                                                                                   threshold,
                                                                                   second_threshold), axis=1)

    average_frame = round(data_frame.groupby(by="GOOD")["DELAY_PAYMENT"].mean(), 2).reset_index(name="AVERAGE_DELAY")
    data_frame = pd.merge(data_frame,
                          average_frame,
                          on="GOOD",
                          how="left")
    
    std_frame = round(data_frame.groupby(by="GOOD")["DELAY_PAYMENT"].std(), 2).reset_index(name="STD_DELAY")
    data_frame = pd.merge(data_frame,
                          std_frame,
                          on="GOOD",
                          how="left")
    
    median_frame = round(data_frame.groupby(by="GOOD")["DELAY_PAYMENT"].median(), 2).reset_index(name="MEDIAN_DELAY")
    data_frame = pd.merge(data_frame,
                          median_frame,
                          on="GOOD",
                          how="left")
    
    
    return data_frame

    # credit_record = pd.read_csv("credit_record.csv")

data = main(credit_records_o, threshold = 0, second_threshold = -0.5)
data = data[["ID", "ACTIVITY", "GOOD", "SUFFICIENT"]].drop_duplicates(subset="ID")

  if status is "5":
  elif status is "4":
  elif status is "3":
  elif status is "2":
  elif status is "1":
  elif status is "0":
  if status is "5":
  elif status is "X":
  elif status is "C":


In [7]:
### 5. Merge app & labelled credit data & enrich it with age, empl in year, income, age, emp bins
app_and_credit_label_2=application_records_t.merge(credit_records_t_label,on='ID') #merege of app & credit labelled data
# app_and_credit_label_2=app_and_credit_label_2.merge(df_result,on='ID')                 #merege of Mateusz's labells
app_and_credit_label_2=app_and_credit_label_2.merge(data,on='ID')                 #merege of Mateusz's labells

app_and_credit_label_2['income_bin']=pd.cut(app_and_credit_label_2['AMT_INCOME_TOTAL'],[50000, 75000,100000,150000,200000,250000,300000,float("inf")])
app_and_credit_label_2['age']=round((app_and_credit_label_2['DAYS_BIRTH']/365),0)
app_and_credit_label_2['age_bin']=pd.cut(app_and_credit_label_2['age'],[-70,-60,-50,-40,-30,-20])
app_and_credit_label_2['length_of_empl']=round((app_and_credit_label_2['DAYS_EMPLOYED']/365),0)
app_and_credit_label_2['length_of_empl_bin']=pd.cut(app_and_credit_label_2['length_of_empl'],[-30,-20,-10,-5,0])


In [8]:
#6 DASH APP Profit & Loss Tab

from dash import dash, dcc, html, Input, Output, State 
import dash_bootstrap_components as dbc
from dash.exceptions import PreventUpdate
import plotly.express as px

### START PL tab #############################################

client_count=list(range(0,100000,1000))
default_prob=[x/100 for x in range(0,10,1)]
interest=[x/100 for x in range(0,15,1)]
average_credit=list(range(0,100000,1000))

PL_controls = dbc.Form([
        dbc.Card([dbc.CardHeader('Number of clients'),
        dbc.CardBody(dcc.Slider(0, 500000, 1000,value=0,marks=None, tooltip={"placement": "top", "always_visible": True},id='PL_dropdown1'))]),
        dbc.Card([dbc.CardHeader('Accepted level of default probability'),
        dbc.CardBody(dcc.Slider(0, 0.15, 0.01,value=0,marks=None, tooltip={"placement": "top", "always_visible": True},id='PL_dropdown2'))]),
        dbc.Card([dbc.CardHeader('Interest rate'),
        dbc.CardBody(dcc.Slider(0, 0.15, 0.01,value=0,marks=None, tooltip={"placement": "top", "always_visible": True},id='PL_dropdown3'))]),
        dbc.Card([dbc.CardHeader('Average credit amount'),
        dbc.CardBody(dcc.Slider(0, 100000, 1000,value=0,marks=None, tooltip={"placement": "top", "always_visible": True},id='PL_dropdown4'))]),
        ])

PL_side_bar = html.Div(
    [
        dbc.CardHeader('Enter parameters'),
        html.Br(),
        PL_controls
        ],
)

profit_capital_name=dbc.Col(dbc.Alert("Profit \ Capital Return",color="success"),width={"size": 2})
profit_income_name=dbc.Col(dbc.Alert("Profit \ Income Ratio",color="success"),width={"size": 2})
profit_name=dbc.Col(dbc.Alert("Profit",color="success"),width={"size": 2})
income_name=dbc.Col(dbc.Alert("Income",color="success"),width={"size": 2})
loss_name=dbc.Col(dbc.Alert("Loss",color="success"),width={"size": 2})

profit_capital_val=dbc.Col(dbc.Alert(color="success",id="profit_capital"),width={"size": 2})
profit_income_val=dbc.Col(dbc.Alert(color="success",id="profit_income"),width={"size": 2})
profit_val=dbc.Col(dbc.Alert(color="success",id="profit_output"),width={"size": 2})
income_val=dbc.Col(dbc.Alert(color="success",id="income_output"),width={"size": 2},)
loss_val=dbc.Col(dbc.Alert(color="success",id="loss_output"),width={"size": 2})

row1=dbc.Row([profit_capital_name,profit_income_name,profit_name,income_name,loss_name])
row2=dbc.Row([profit_capital_val,profit_income_val,profit_val,income_val,loss_val])

PL_content=html.Div([row1,row2])
PL_tab = html.Div(dbc.Row([dbc.Col(PL_side_bar, width=2),dbc.Col([html.Br(),html.Br(),html.Br(),PL_content])]))



In [9]:
#6 DASH APP INTRO TAB

##### START INTRO TAB  ######
#this defines what are the selections possible in user inputs
l1=app_and_credit_label_2.columns[1:17]
t1=app_and_credit_label_2.columns[-21:-3]
p1=[*range(-60,1,1)]

#this defines what are the elements in rows
el1=dbc.Col(dcc.Graph(id='graph1'), md=6)
el2=dbc.Col(dcc.Graph(id='graph2'), md=6)
el3=dbc.Col(dcc.Graph(id='graph3'), md=6)
el4=dbc.Col(dcc.Graph(id='graph4'), md=6)

#this defines what is in rows
row0=dbc.Form([dbc.CardHeader('Select Period'), dcc.RangeSlider(-60, 0, 6, value=[-60, 0], id='start-end')])
row1=dbc.Row([el1,el2])
row2=dbc.Row([el3,el4])

#this defines that controls object is a form consisting of couple cards with couple dash core components called dropdawn. See dash core components here: https://dash.plotly.com/dash-core-components
controls = dbc.Form([
        dbc.Card([dbc.CardHeader('Select Feature 1'),
        dbc.CardBody(dcc.Dropdown(l1,l1,id='l1_drop'))]),
        dbc.Card([dbc.CardHeader('Select Feature 2'),
        dbc.CardBody(dcc.Dropdown(l1,l1,id='l2_drop'))]),
        dbc.Card([dbc.CardHeader('Select target'),
        dbc.CardBody(dcc.Dropdown(t1,t1,id='l3_drop'))]),
        dbc.Card([dbc.CardHeader('Select target 2'),
        dbc.CardBody(dcc.Dropdown(t1,t1,id='l4_drop'))])
        ])

#this defines that what appears on the left side will have html headings (H2,H5) and controls object
sidebar = html.Div(
    [
        dbc.CardHeader('Select visualisation parameters'),
        controls
    ],
)

#this defines that what appears on the right side is built from 3 rows
content = html.Div(
    [
    row0,
    row1,
    row2,
    ],
)

#this defines that tab1 content is consisting of sidebar and content
tab1_content=html.Div(dbc.Row([dbc.Col(sidebar,width=2),dbc.Col(content)]))
##### END INTRO TAB  ######

In [30]:
### 6. DASH APP taking features and 2 tagets to compare targets relation

# We use here dash core components https://dash.plotly.com/dash-core-components like graph that shows graph and dropdowns, sliders where users provide input. 
# We use here dash bootstrap components https://dash-bootstrap-components.opensource.faculty.ai/docs/components/  like tabs, card, form, col, row etc. to organize
# everything into 2 tabs, where 1st tab has a side bar and content (rightbar) and all elements are packed into that. 

# This is just renaming columns to more friendly format so it displays nicely
# app_and_credit_label_2.rename(columns={'CODE_GENDER':'Gender', 'FLAG_OWN_CAR': 'Own Car','FLAG_OWN_REALTY':'Own Realty', 'CNT_CHILDREN':'Children Count', 'AMT_INCOME_TOTAL':'Yearly Income','NAME_INCOME_TYPE':'Income Type', 'NAME_EDUCATION_TYPE':'Education Type','NAME_FAMILY_STATUS':'Family Status', 'NAME_HOUSING_TYPE': 'Housing Type','DAYS_BIRTH':'Days Birth', 'DAYS_EMPLOYED':'Days Employed', 'FLAG_MOBIL':'Mobil', 'FLAG_WORK_PHONE':'Work Phone', 'FLAG_PHONE': 'Phone Flag', 'FLAG_EMAIL':'Email.Flag', 'OCCUPATION_TYPE':'Occupation Type', 'CNT_FAM_MEMBERS':'Family Members #','GOOD_BAD':'bad clients 1', 'all_late_flag':'bad clients 2'},inplace=True)

#we import here the necessary modules for dash
from dash import dash, dcc, html, Input, Output, State
from dash._callback_context import callback_context
import dash_bootstrap_components as dbc
from dash.exceptions import PreventUpdate
import plotly.express as px

#we set under the app a dash application instance
# app = dash.Dash(external_stylesheets=[dbc.themes.BOOTSTRAP])
app = dash.Dash(__name__,external_stylesheets=[dbc.themes.BOOTSTRAP])

#Our app is built of tabs. below is exemplary tab. Its made up of 3 rows horizontally and 3 columns in row 2. You can define as many rows and columns as you wish this way you define layout. 
# All other components like dropdowns, buttons etc. need to go directly into row or column wrappers. You can use dash bootstrap components, dash core controls components and html components. 
# You can copy ane experiment with the code into either JW_tab, MR_tab, MK_tab,JB_tab

JW_tab = html.Div()
JB_tab = html.Div()

########## START Profit & Loss callback function
@app.callback(
    Output('profit_capital','children'),
    Output('profit_income','children'),
    Output('profit_output','children'),
    Output('income_output','children'),
    Output('loss_output','children'),
    Input('PL_dropdown1','value'),
    Input('PL_dropdown2','value'),
    Input('PL_dropdown3','value'),
    Input('PL_dropdown4','value'),prevent_initial_call=True
    )
def update_figure2(clients,default_probability,interest_rate,credit_amount):
        bad_clients=round(clients*default_probability)
        good_clients=clients-bad_clients
        income=round(good_clients*interest_rate*credit_amount)
        loss=round(bad_clients*credit_amount)
        profit=income-loss
        profit_income=profit/income
        profit_capital=profit/(clients*credit_amount)
        return profit_capital,profit_income,profit,income,loss
########## END Profit & Loss callback function

########## START WELCOME PAGE 
el10=html.Div([

    dbc.Card(
        [
            # dbc.CardImg(src="https://raw.githubusercontent.com/infoshareacademy/jdszr6-slytherin_group/master/Slytherin_ClearBG.webp", top=True,),
            dbc.CardBody(
                [
                    # html.H5("SLYTHERIN BANK", className="card_title"),
                html.P(
                    "Welcome in our bank. Please select if you are "
                    "our client or advisor.",
                    className = "card_text",
                ),
                # dbc.DropdownMenu(
                #     label = "selection",
                #     color = "success",
                #     children=[
                #         dbc.DropdownMenuItem("Client", id="client"),
                #         dbc.DropdownMenuItem("Advisor", id="advisor")])
                dcc.Dropdown(id="special_dropdown",options=["Client","Advisor"],value=["Client","Advisor"])
                ]
            )
        ],
    )
    ]
)

logo_pic=        dbc.Card([
            dbc.CardImg(src="https://raw.githubusercontent.com/infoshareacademy/jdszr6-slytherin_group/master/Slytherin_ClearBG.webp", top=True),
            dbc.CardBody(html.H5("SLYTHERIN BANK", className="card_title")
            )]) 

MR_tab=html.Div(dbc.Row([dbc.Col([html.Br(),logo_pic],width=2),dbc.Col([html.Br(),el10],width=2)]))

# MR_tab = (dbc.Row([dbc.Col(dbc.Card(dbc.CardImg(src="https://raw.githubusercontent.com/infoshareacademy/jdszr6-slytherin_group/master/Slytherin_ClearBG.webp", top=True,)),width=2),dbc.Col(el10)]))

######### END WELCOME PAGE

########## START SELECTION PAGE 
#dropdown for client/advisor list
data_frame = app_and_credit_label_2
list_ca_tab1 = dbc.Form([
       # NAME_INCOME_TYPE 4
    dcc.Dropdown(
        data_frame["NAME_INCOME_TYPE"].unique(),
        placeholder = "NAME_INCOME_TYPE",
        id = "income_type"),
        # NAME_EDUCATION_TYPE 5
    dcc.Dropdown(
        data_frame["NAME_EDUCATION_TYPE"].unique(),
        placeholder = "NAME_EDUCATION_TYPE",
        id = "education_type"),
    # NAME_FAMILY_STATUS 6
    dcc.Dropdown(
        data_frame["NAME_FAMILY_STATUS"].unique(),
        placeholder = "NAME_FAMILY_STATUS",
        id = "family_status"),
    # FLAG_EMAIL 10
    dcc.Dropdown(
        data_frame["FLAG_EMAIL"].unique(),
        placeholder = "FLAG_EMAIL",
        id = "email"),
    # OCCUPATION_TYPE 11
    dcc.Dropdown(
        data_frame["OCCUPATION_TYPE"].unique(),
        placeholder = "OCCUPATION_TYPE",
        id = "occupation"),
    # MEMBERS 13
    dcc.Dropdown(
        data_frame["CNT_FAM_MEMBERS_CATEGORIZED"].unique(),
        placeholder = "MEMBERS",
        id = "members"),
    # AGE 14
    dcc.Dropdown(
        data_frame["DAYS_BIRTH_CATEGORIZED"].unique(),
        placeholder = "AGE",
        id = "age"),
    # INCOME 16
    dcc.Dropdown(
        data_frame["AMT_INCOME_TOTAL_CATEGORIZED"].unique(),
        placeholder = "INCOME",
        id = "income")
    ])
list_ca_tab2 = dbc.Form([
    # CODE_GENDER 1
    dcc.Dropdown(
        data_frame["CODE_GENDER"].unique(),
        #["M", "F"],
        placeholder = "CODE_GENDER",
        id = "gender"),
    # FLAG_OWN_CAR 2
    dcc.Dropdown(
        data_frame["FLAG_OWN_CAR"].unique(),
        placeholder = "FLAG_OWN_CAR",
        id = "car"),
    # FLAG_OWN_REALTY 3
    dcc.Dropdown(
        data_frame["FLAG_OWN_REALTY"].unique(),
        placeholder = "FLAG_OWN_REALTY",
        id = "realty"),
    # NAME_HOUSING_TYPE 7
    dcc.Dropdown(
        data_frame["NAME_HOUSING_TYPE"].unique(),
        placeholder = "NAME_HOUSING_TYPE",
        id = "housing_type"),
    # FLAG_WORK_PHONE 8
    dcc.Dropdown(
        data_frame["FLAG_WORK_PHONE"].unique(),
        placeholder = "FLAG_WORK_PHONE",
        id = "work_phone"),
    # FLAG_PHONE 9
    dcc.Dropdown(
        data_frame["FLAG_PHONE"].unique(),
        placeholder = "FLAG_PHONE",
        id = "phone"),
    # CHILDREN 12
    dcc.Dropdown(
        data_frame["CNT_CHILDREN_CATEGORIZED"].unique(),
        placeholder = "CHILDREN",
        id = "kids"),
    # YEARS_EMPLOYED 15
    dcc.Dropdown(
        data_frame["DAYS_EMPLOYED_CATEGORIZED"].unique(),
        placeholder = "YEARS_EMPLOYED",
        id = "employed"),
    ])
@app.callback(
    Output("textarea_id5", "children"), #first output goes to tab Decision Client
    Output("textarea_id3", "children"), #second output goes to tab Decision Page Advisor
    Output("textarea_id4", "children"), #second output goes to tab Decision Page Advisor
    Input("gender", "value"),
    Input("car","value"),
    Input("realty", "value"),
    Input("income_type", "value"),
    Input("education_type", "value"),
    Input("family_status", "value"),
    Input("housing_type", "value"),
    Input("work_phone", "value"),
    Input("phone", "value"),
    Input("email", "value"),
    Input("occupation", "value"),
    Input("kids", "value"),
    Input("members", "value"),
    Input("age", "value"),
    Input("employed", "value"),
    Input("income", "value")

#     ['ID',
#  'CODE_GENDER',
#  'FLAG_OWN_CAR',
#  'FLAG_OWN_REALTY',
#  'CNT_CHILDREN',
#  'AMT_INCOME_TOTAL',
#  'NAME_INCOME_TYPE',
#  'NAME_EDUCATION_TYPE',
#  'NAME_FAMILY_STATUS',
#  'NAME_HOUSING_TYPE',
#  'DAYS_BIRTH',
#  'DAYS_EMPLOYED',
#  'FLAG_WORK_PHONE',
#  'FLAG_PHONE',
#  'FLAG_EMAIL',
#  'OCCUPATION_TYPE',
#  'CNT_FAM_MEMBERS',
#  'CNT_CHILDREN_CATEGORIZED',
#  'CNT_FAM_MEMBERS_CATEGORIZED',
#  'AMT_INCOME_TOTAL_CATEGORIZED',
#  'DAYS_BIRTH_CATEGORIZED',
#  'DAYS_EMPLOYED_CATEGORIZED',
)
def dash_function(gender,car,realty,income_type,education_type, family_status, housing_type,
                  work_phone, phone,email, occupation, children, members, age, employed, income):
    personal_data = {'CODE_GENDER': gender,
                     'FLAG_OWN_CAR': car,
                     'FLAG_OWN_REALTY': realty,
                     'NAME_INCOME_TYPE': income_type,
                     'NAME_EDUCATION_TYPE': education_type,
                     'NAME_FAMILY_STATUS': family_status,
                     'NAME_HOUSING_TYPE': housing_type,
                     'FLAG_WORK_PHONE': work_phone,
                     'FLAG_PHONE': phone,
                     'FLAG_EMAIL': email,
                     'OCCUPATION_TYPE': occupation,
                     'CNT_CHILDREN_CATEGORIZED': children,
                     'CNT_FAM_MEMBERS_CATEGORIZED': members,
                     'DAYS_BIRTH_CATEGORIZED': age,
                     'DAYS_EMPLOYED_CATEGORIZED': employed,
                     'AMT_INCOME_TOTAL_CATEGORIZED': income
    }
    z=personal_data.copy()
    main_features = ['OCCUPATION_TYPE',
                     'DAYS_BIRTH_CATEGORIZED',
                     'AMT_INCOME_TOTAL_CATEGORIZED',
                     'FLAG_EMAIL',
                     'NAME_FAMILY_STATUS',
                     'NAME_EDUCATION_TYPE',
                     'NAME_INCOME_TYPE',
                     'CNT_FAM_MEMBERS_CATEGORIZED']
    extra_features = ['FLAG_OWN_CAR',
                      'FLAG_OWN_REALTY',
                      'NAME_HOUSING_TYPE',
                      'FLAG_WORK_PHONE',
                      'FLAG_PHONE',
                      'CNT_CHILDREN_CATEGORIZED',
                      'DAYS_BIRTH_CATEGORIZED',
                      'CODE_GENDER']
    main_data = {feature: personal_data[feature] for feature in main_features}
    extra_data = {feature: personal_data[feature] for feature in extra_features}
    
    def conditional_probability(data_frame, personal_data, split_method, threshold):
        
        """
        TEXT
        TEXT
        """
        
        if split_method == "standard":
            good_clients = data_frame[(data_frame["GOOD"] == 1) | (data_frame["SUFFICIENT"] == 1)]
            bad_clients = data_frame[(data_frame["GOOD"] == 0) & (data_frame["SUFFICIENT"] == 0)]
        elif split_method == "good":
            good_clients = data_frame[data_frame["GOOD"] == 1]
            bad_clients = data_frame[data_frame["GOOD"] == 0]
        elif split_method == "sufficient":
            data_frame = data_frame[data_frame["GOOD"] == 0]
            good_clients = data_frame[data_frame["SUFFICIENT"] == 1]
            bad_clients = data_frame[data_frame["SUFFICIENT"] == 0]
        
        for i in range(1, len(personal_data)+1):
            records = 0

            for key, value in personal_data.items():
                conditional_records = len(data_frame[data_frame[key] == value])
                if conditional_records > records:
                    records = conditional_records
                    specific_key = key
                    specific_value = value
                    
            good_records = len(good_clients[good_clients[specific_key] == specific_value])
            bad_records = len(bad_clients[bad_clients[specific_key] == specific_value])
            client_records = good_records + bad_records

            if client_records >= threshold and bad_records > 0:
                personal_data = {key:personal_data[key] for key in personal_data if key!=specific_key}
                good_clients = good_clients[good_clients[specific_key] == specific_value]
                bad_clients = bad_clients[bad_clients[specific_key] == specific_value]
                probability = round(bad_records/(bad_records + good_records), 5)
            else:
                break

            
        return probability, personal_data

    def executor(data_frame, personal_data, main_data, extra_data, split_method = "standard", threshold = 1000, second_threshold = 500):
        result = conditional_probability(data_frame, main_data, split_method, threshold)
        probability, data = result
        if len(data) != 0:
            result = conditional_probability(data_frame, main_data, split_method, second_threshold)
            new_probability, new_data = result
        if len(new_data) < len(data) and new_probability <= probability:
            probability = new_probability
            data = new_data
            extra_data = dict(extra_data, **data)
        for key, value in extra_data.items():
            single_dict = {key: value}
            new_probability = conditional_probability(data_frame, dict(main_data, **single_dict),split_method, second_threshold)[0]
        if new_probability <= probability:
            probability = new_probability
            main_data = dict(main_data, **single_dict)
        return probability
    personal_data = data_frame.drop(["ID", "GOOD", "SUFFICIENT"], axis=1).sample().to_dict("record")[0]
    x = conditional_probability(data_frame, main_data, "standard", 1000)[0]
    y = executor(data_frame, personal_data, main_data, extra_data)

    inflation = 0.1
    def decision(probability, inflation):
        if probability >= 0.04:
            interest_rate = round(((1 + inflation/12)/(1 - probability) - 1)* 100,2)
            statement_cl = "We are not able to provide you with an offer. Please contact client advisor."
            statement_ad = "Client above accepted default probability treshold. Accept only with interest rate {}%.".format(interest_rate)
        else:
            interest_rate = round(((1 + inflation/12)/(1 - probability) - 1)* 100,2)
            statement_cl = "We can offer you loan with: {}% interest rate.".format(interest_rate)
            statement_ad = "Client within accepted default probability treshold. Propose interest rate {}%.".format(interest_rate)
        return statement_cl,statement_ad
    
    z_cl,z_ad=decision(y,0.1)
           
    return [html.H6(str("Decision: {}".format(z_cl)))],[html.H6(str("Probability of default based on key characteriscs: {}".format(x))),html.H6(str("Probability of default based on all characteriscs: {}".format(y)))],[html.H6(str("Decision: {}".format(z_ad)))]



el11= dbc.Card([dbc.CardHeader("Key Characteristics"),
            list_ca_tab1,
        ])
el12= dbc.Card([dbc.CardHeader("Optional Characteristics"),
            list_ca_tab2,
        ]) 

button= dbc.Card([html.Button("Apply", id="apply_button")])

list_of_content = html.Div([dbc.Row([
    dbc.Col([html.Br(),logo_pic],width=2),
        dbc.Col([dbc.Row([
            dbc.Col([html.Br(),el11],width=2),
            dbc.Col([html.Br(),el12],width=2)]),
            dbc.Col([html.Br(),button],width={"size": 2, "offset": 1})])])
            ])

# This callback enables switch of tabs with apply button on selection page and dropdown menu on welcome page
@app.callback(
    Output("tabs", "active_tab"),
    Input("apply_button", "n_clicks"),
    Input("special_dropdown", "value"))
def tab_select(click,dropdown):
    ctx = callback_context
    if not ctx.triggered:
        triger_id = None
        return "tab2"
    else:
        triger_id = ctx.triggered[0]['prop_id'].split('.')[0]
        print(triger_id)
    if triger_id=="apply_button" and click:
        if dropdown=='Client':
            return "tab4"
        elif dropdown=='Advisor':
            return "tab5"
        else:
            return "tab3"
    elif triger_id=="special_dropdown":
        if dropdown == "Client":
            return "tab3"
        elif dropdown == "Advisor":
            return "tab3"
######## END SELECTION PAGE

######## START DECISION PAGE ADVISOR  #########
el13 =html.Div(
[      
        dbc.Card([dbc.CardHeader('Highlevel Default Probability \ Exact Default Probability'),dbc.CardBody(id="textarea_id3")]),
        dbc.Card([dbc.CardHeader('Credit Guidelines'),dbc.CardBody(id='textarea_id4')]),
        dbc.Card([dbc.CardHeader('Put down your notes here'),dbc.CardBody(
        [dcc.Textarea(
            id="textarea_id1",
            value="Notes",
            style={"height": 100},
                ),
            dcc.Clipboard(
            target_id="textarea_id2",
            title="copy",
            style={
                "display": "inline-block",
                "fontSize": 20,
                "verticalAlign": "top",
                    },
                    )
                                ])
        ]),
        dbc.Card([
            dbc.CardHeader('Download final offer here'), dbc.CardBody(dbc.Button(id='btn1',
            children=[html.I(className="fa fa-download mr-1"), "Download offer"],
            color="info",
            className="mt-1"
        ),)
                    ]),
        dcc.Download(id="download-component1"),
])

MK_tab=html.Div(dbc.Row([dbc.Col([html.Br(),logo_pic],width=2),dbc.Col([html.Br(),el13],width=2)]))
    
@app.callback(
    Output("download-component1", "data"),
    Input("btn1", "n_clicks"),
    prevent_initial_call=True,
)
def func(n_clicks):
    return dict(content="Credit card approved", filename="offer.txt")

@app.callback(
    Output('output-container-range-slider', 'children'),
    [Input('my-range-slider', 'value')])
def update_output(value):
    return 'You have selected "{}"'.format(value)
@app.callback(
    Output('textarea-state-example-output', 'children'),
    Input('textarea-state-example-button', 'n_clicks'),
    State('textarea-state-example', 'value')
)
def update_output(n_clicks, value):
    if n_clicks > 0:
        return 'You have entered: \n{}'.format(value)       
######### END DECISION PAGE ADVISOR ##########


##### START DECISION PAGE CLIENT ######
from dash import dash_table

df_to_dict_exp = pd.DataFrame([["abc@mail.com","123 456 789","2000-01-01","Sales staff",
                                "high income","Working","Higher education","Married","2+1"]],
                                columns = ["E-Mail","Phone","Birth date","Occupation type","Yearly income",
                                "Income type","Education type","Family status","Family members"])
# dict_exp = df_to_dict_exp.to_dict()
# table_tab
# dcit_exp=z.to_dict
# print(dict_exp)
el14 = html.Div(
[      
        dbc.Card([dbc.CardHeader('Our proposal for credit'),dbc.CardBody(id='textarea_id5')]),
        dbc.Card([dbc.CardHeader('Put down your notes here'),dbc.CardBody(
        [dcc.Textarea(
            id="textarea_id6",
            value="Notes",
            style={"height": 100},
                ),
            dcc.Clipboard(
            target_id="textarea_id7",
            title="copy",
            style={
                "display": "inline-block",
                "fontSize": 20,
                "verticalAlign": "top",
                    },
                    )
                                ])
        ]),
        dbc.Card([
            dbc.CardHeader('Download final offer here'), dbc.CardBody(dbc.Button(id='btn8',
            children=[html.I(className="fa fa-download mr-1"), "Download offer"],
            color="info",
            className="mt-1"
        ),)
                    ]),
        dcc.Download(id="download-component2"),
])

JW_tab=html.Div(dbc.Row([dbc.Col([html.Br(),logo_pic],width=2),dbc.Col([html.Br(),el14],width=2)]))
##### END DECISION PAGE CLIENT ######

# Below defines that TABS object is built from Tabs and Tab components from dash bootstrap components https://dash-bootstrap-components.opensource.faculty.ai/docs/components/. Essentially
# its a list of tabs that the app will be divided into. Its easy to remove a tab (just remove element of the list) or add another tab just copy the line and change label.
TABS = dbc.Tabs(
    [
        # dbc.Tab(example_tab, label="Data Eploration"), #data exploration
        # dbc.Tab(example_tab, label="Application simulator"), #yes or no card 
        dbc.Tab(MR_tab, label="Welcome Page",tab_id="tab2"),  #tab for MR to to experiment with dash
        dbc.Tab(list_of_content, label="Selection Page",tab_id="tab3"),
        # dbc.Tab(example_tab, label="Example tab" ),
        dbc.Tab(JW_tab, label="Decision Page Client",tab_id="tab4"),  #tab to for JW to experiment with dash
        dbc.Tab(MK_tab, label="Decision Page Advisor",tab_id="tab5"),  #tab for MK to to experiment with dash
        # dbc.Tab(JB_tab, label="JB tab"),  #tab for JB to to experiment with dash
        dbc.Tab(PL_tab, label="Profit & Loss simulator",tab_id="tab6"), #board simulation 
        dbc.Tab(tab1_content, label="Explore Historical Data",tab_id="tab1"), #information on project
    ], id="tabs"
)

#this defines that app layout will consiste of tabs object
app.layout = html.Div([TABS,dcc.Store(id='intermediate-value')])

####### START INTRO TAB CALLBACK
#this code binds defines inputs and outputs. 
@app.callback(
    Output('graph1', 'figure'),
    Output('graph2', 'figure'),
    Output('graph3', 'figure'),
    Output('graph4', 'figure'),
    Input('l1_drop', 'value'),
    Input('l2_drop','value'),
    Input('l3_drop','value'),
    Input('l4_drop','value'),
    [Input('start-end', 'value')]
    )
#this is function that is called by dash whenever user changes input in my case it draws charts 
def update_figure(l1_val,l2_val,t_val,t_val1,ss):
    lst=[]
    data=app_and_credit_label_2[(app_and_credit_label_2['start_mth']>=ss[0]) & (app_and_credit_label_2['start_mth']<=ss[1])]
    df=pd.pivot_table(data,index=l1_val,columns=l2_val,values=t_val,aggfunc='mean',margins=True).round(4)
    lst.append(px.bar(df,barmode='group',text_auto=True,title="{} bad clients % by {} and {}".format(t_val,l1_val,l2_val))),
    lst.append(px.imshow(df,color_continuous_scale='RdBu_r',text_auto=".2%",title="{} bad clients % by {} and {}".format(t_val,l1_val,l2_val))),
    df=pd.pivot_table(data,index=l1_val,columns=l2_val,values=t_val1,aggfunc='mean',margins=True).round(4)
    lst.append(px.bar(df,barmode='group',text_auto=True,title="{} bad clients % by {} and {}".format(t_val1,l1_val,l2_val))),
    lst.append(px.imshow(df,color_continuous_scale='RdBu_r',text_auto=True,title="{} bad clients % by {} and {}".format(t_val1,l1_val,l2_val)))
    for i in range(len(lst)):
        lst[i].update_layout(transition_duration=500)
        lst[i].layout.yaxis.tickformat = ',.2%' #show things as percentage with 2 decimal places
    lst[2].update_xaxes(type='category') #heatmap charts weren't working correclty withotu making axes categorical
    return lst
####### END INTRO TAB CALLBACK


#command below will start the web application (in output cell there should be address which needs to be copied into webrowser to access app, likely: http://127.0.0.1:8050/ )
if __name__ == '__main__':
    app.run_server()

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production

 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [08/Apr/2022 19:08:42] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [08/Apr/2022 19:08:42] "GET /assets/style.css?m=1649023007.9403305 HTTP/1.1" 304 -
127.0.0.1 - - [08/Apr/2022 19:08:42] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [08/Apr/2022 19:08:42] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [08/Apr/2022 19:08:42] "GET /_favicon.ico?v=2.3.0 HTTP/1.1" 200 -
127.0.0.1 - - [08/Apr/2022 19:08:42] "GET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1" 200 -
127.0.0.1 - - [08/Apr/2022 19:08:42] "GET /_dash-component-suites/dash/dcc/async-slider.js HTTP/1.1" 200 -
127.0.0.1 - - [08/Apr/2022 19:08:42] "GET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1" 200 -
127.0.0.1 - - [08/Apr/2022 19:08:42] "GET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1" 200 -
127.0.0.1 - - [08/Apr/2022 19:08:42] "POST /_dash-update-component HTTP/1.1" 200 -


Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\flask\app.py", line 2073, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\flask\app.py", line 1518, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\flask\app.py", line 1516, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\flask\app.py", line 1502, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**req.view_args)
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\dash\dash.py", line 1345, in dispatch
    response.set_data(func(*args, outputs_list=outputs_list))
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\dash\_callback.py", line 151, in add_context
    outpu





Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\flask\app.py", line 2073, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\flask\app.py", line 1518, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\flask\app.py", line 1516, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\flask\app.py", line 1502, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**req.view_args)
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\dash\dash.py", line 1345, in dispatch
    response.set_data(func(*args, outputs_list=outputs_list))
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\dash\_callback.py", line 151, in add_context
    outpu

127.0.0.1 - - [08/Apr/2022 19:08:42] "

Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\flask\app.py", line 2073, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\flask\app.py", line 1518, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\flask\app.py", line 1516, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\flask\app.py", line 1502, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**req.view_args)
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\dash\dash.py", line 1345, in dispatch
    response.set_data(func(*args, outputs_list=outputs_list))
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\dash\_callback.py", line 151, in add_context
    outpu

POST /_dash-update-component HTTP/1.1" 500 -


Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\flask\app.py", line 2073, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\flask\app.py", line 1518, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\flask\app.py", line 1516, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\flask\app.py", line 1502, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**req.view_args)
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\dash\dash.py", line 1345, in dispatch
    response.set_data(func(*args, outputs_list=outputs_list))
  File "C:\Users\User\anaconda3\envs\Dash_Env\lib\site-packages\dash\_callback.py", line 151, in add_context
    outpu

127.0.0.1 - - [08/Apr/2022 19:08:42] "POST /_dash-update-component HTTP/1.1" 500 -


In [None]:
pd.DataFrame({'z':'1','b':'2'},index=['1'])

Unnamed: 0,z,b
1,1,2


In [None]:
### 7. Matrix 
import numpy as np

#files merging
new_file=pd.merge(application_records_t,df_credit,on="ID",how="inner")
new_file

new_file.drop("FLAG_OWN_CAR", axis="columns", inplace=True)
new_file.drop("FLAG_OWN_REALTY", axis="columns", inplace=True)
new_file.drop("CNT_CHILDREN", axis="columns", inplace=True)
new_file.drop("AMT_INCOME_TOTAL", axis="columns", inplace=True)
new_file.drop("NAME_HOUSING_TYPE", axis="columns", inplace=True)

new_file.drop("CODE_GENDER", axis="columns", inplace=True)
new_file.drop("DAYS_BIRTH", axis="columns", inplace=True)
new_file.drop("DAYS_EMPLOYED", axis="columns", inplace=True)
new_file.drop("FLAG_WORK_PHONE", axis="columns", inplace=True)

new_file.drop("FLAG_PHONE", axis="columns", inplace=True)
new_file.drop("CNT_FAM_MEMBERS", axis="columns", inplace=True)
new_file.drop("MONTHS_BALANCE", axis="columns", inplace=True)
new_file.drop("STATUS", axis="columns", inplace=True)

new_file.drop("LAST_RECORD", axis="columns", inplace=True)
new_file.drop("MONTHS", axis="columns", inplace=True)
new_file.drop("ACTIVE", axis="columns", inplace=True)
new_file.drop("PARTIAL_STATUS", axis="columns", inplace=True)
new_file.drop("TOTAL_STATUS", axis="columns", inplace=True)
new_file.drop("CNT_CHILDREN_CATEGORIZED", axis="columns", inplace=True)
new_file.drop("DAYS_EMPLOYED_CATEGORIZED", axis="columns", inplace=True)

#matrix preparation

matrix=new_file.groupby(["OCCUPATION_TYPE","DAYS_BIRTH_CATEGORIZED","AMT_INCOME_TOTAL_CATEGORIZED","FLAG_EMAIL","NAME_FAMILY_STATUS","NAME_EDUCATION_TYPE",
                     "NAME_INCOME_TYPE", "CNT_FAM_MEMBERS_CATEGORIZED","GOOD_BAD"]).count()

matrix["cumsum"]=matrix.groupby(["OCCUPATION_TYPE","DAYS_BIRTH_CATEGORIZED","AMT_INCOME_TOTAL_CATEGORIZED","FLAG_EMAIL","NAME_FAMILY_STATUS","NAME_EDUCATION_TYPE",
                     "NAME_INCOME_TYPE", "CNT_FAM_MEMBERS_CATEGORIZED"]).sum()["ID"]

matrix["Percentage"]=matrix["ID"]/matrix["cumsum"]*100


NameError: name 'df_credit' is not defined