## Notes

• All of the following calculations are conditional on race/ethnicity. That is, the calcu-
lation described below is carried out separately for each race.

• The figures/tables containing the data can be found here:
https://www.federalreserve.gov/boarddocs/rptcongress/creditscore/figures.htm

## Calculations

In [21]:
import pandas as pd
import numpy as np
import sys
totalData = pd.read_csv("Figure7A.csv")
CumulativeData = pd.read_csv("Figure3A.csv")

In [22]:
%matplotlib inline

### Step 1) Calculating π

In [2]:
# Get the dataframe that holds the cumulative percentage, by demographic group
def getPD(data, col, raceName):
    pd = data["Score"].to_frame(name="Score")
    race = np.full(len(data), raceName)
    pd["Demographic"] = race
    pd["Percentage"] = data[col]
    return pd

whites = getPD(CumulativeData, "White", "white")
blacks = getPD(CumulativeData, "Black", "black")
asians = getPD(CumulativeData, "Asian", "asian")
hispanics = getPD(CumulativeData, "Hispanic", "hispanic")
pds = [whites, blacks, asians, hispanics]
cumulative = pd.concat(pds)

In [32]:
good = totalData.set_index("Score")["Asian (Good)"]
bad = totalData.set_index("Score")["Asian (Bad)"]

total = asians.set_index("Score").Percentage

In [34]:
((total - bad) / (good - bad))

0.7858001718623332

In [3]:
# Example of finding pi with whites at s = 40.0
total = cumulative[cumulative["Demographic"] == "white"].loc[40].Percentage.copy()
good = totalData[totalData["Demographic"] == "white"].loc[40].Good.copy()
bad = totalData[totalData["Demographic"] == "white"].loc[40].Bad.copy()
pi = (total - bad) / (good - bad)
pi

0.82316240982647693

In [4]:
#sanity check for pi with whites
total = cumulative[cumulative["Demographic"] == "white"].loc[80].Percentage.copy()
good = totalData[totalData["Demographic"] == "white"].loc[80].Good.copy()
bad = totalData[totalData["Demographic"] == "white"].loc[80].Bad.copy()
(total - bad) / (good - bad)

0.8887006606810095

In [5]:
#sanity check for pi with blacks
total = cumulative[cumulative["Demographic"] == "black"].loc[10].Percentage.copy()
good = totalData[totalData["Demographic"] == "black"].loc[208].Good.copy()
bad = totalData[totalData["Demographic"] == "black"].loc[208].Bad.copy()
(total - bad) / (good - bad)

0.38817159516867977

In [6]:
#sanity check for pi with another value from blacks
total = cumulative[cumulative["Demographic"] == "black"].loc[102].Percentage.copy()
good = totalData[totalData["Demographic"] == "black"].loc[300].Good.copy()
bad = totalData[totalData["Demographic"] == "black"].loc[300].Bad.copy()
(total - bad) / (good - bad)

0.29741863075196429

### Step 2) Convert Cumulative Probabilities into Equalities

In [7]:
whitesEq = totalData[totalData["Demographic"] == "white"].copy()
blacksEq = totalData[totalData["Demographic"] == "black"].copy()
asiansEq = totalData[totalData["Demographic"] == "asian"].copy()
hispanicsEq = totalData[totalData["Demographic"] == "hispanic"].copy()

In [8]:
whiteTotal = cumulative[cumulative["Demographic"] == "white"].copy()
blackTotal = cumulative[cumulative["Demographic"] == "black"].copy()
asianTotal = cumulative[cumulative["Demographic"] == "asian"].copy()
hispanicTotal = cumulative[cumulative["Demographic"] == "hispanic"].copy()

In [9]:
whitesEq["Good"] = whitesEq["Good"].diff().fillna(value=0).copy()
whitesEq["Bad"] = whitesEq["Bad"].diff().fillna(value=0).copy()
whitesEq["Both"] = whiteTotal["Percentage"].diff().fillna(value=0).copy()

In [10]:
whitesEq.head()

Unnamed: 0,TransRisk Score,Demographic,Good,Bad,Both
0,0.0,white,0.0,0.0,0.0
1,0.5,white,0.03,1.68,0.25
2,1.0,white,0.19,5.41,0.9
3,1.5,white,0.04,1.59,0.27
4,2.0,white,0.09,1.73,0.37


In [11]:
blacksEq["Good"] = blacksEq["Good"].diff().fillna(value=0).copy()
blacksEq["Bad"] = blacksEq["Bad"].diff().fillna(value=0).copy()
blacksEq["Both"] = blackTotal["Percentage"].diff().fillna(value=0).copy()

In [12]:
## This is what I'd like the blacksEq["Both"] to be, but it isn't setting it correctly
#blackTotal["Percentage"].diff().fillna(value=0).copy()

In [13]:
blacksEq.head()

Unnamed: 0,TransRisk Score,Demographic,Good,Bad,Both
198,0.0,black,0.0,0.0,
199,0.5,black,0.08,2.06,
200,1.0,black,0.64,6.58,
201,1.5,black,0.13,2.0,
202,2.0,black,0.19,2.5,


In [14]:
asiansEq["Good"] = asiansEq["Good"].diff().fillna(value=0).copy()
asiansEq["Bad"] = asiansEq["Bad"].diff().fillna(value=0).copy()
asiansEq["Both"] = asianTotal["Percentage"].diff().fillna(value=0).copy()

In [15]:
hispanicsEq["Good"] = hispanicsEq["Good"].diff().fillna(value=0).copy()
hispanicsEq["Bad"] = hispanicsEq["Bad"].diff().fillna(value=0).copy()
hispanicsEq["Both"] = hispanicTotal["Percentage"].diff().fillna(value=0).copy()

### Step 3) Calculate the probability using Bayes' Rule

In [16]:
# Calculating an example of whites just for the transrisk score being 10
total = whitesEq.loc[20].Both
good = whitesEq.loc[20].Good
bad = whitesEq.loc[20].Bad
total, good, bad

(0.35000000000000053, 0.14999999999999991, 1.1000000000000014)

In [17]:
finalProbability = (pi * good) / ((pi * good) + ((1 - pi)*(bad)))
finalProbability

0.38828939301042265

### Final Step) Calculate Probabilities for all scores / demographics

In [18]:
def getProbability(data, pi):
    probabilities = []
    for index, row in data.iterrows():
        total = data.loc[index].Both
        good = data.loc[index].Good
        bad = data.loc[index].Bad
        probability = (pi * good) / ((pi * good) + ((1 - pi)*(bad)))
        probabilities.append(probability)
        series = pd.Series(probabilities).fillna(value=0)
    return series

In [19]:
whitesEq["Final Probability of Good"] = getProbability(whitesEq, pi)
blacksEq["Final Probability of Good"] = getProbability(blacksEq, pi)
asiansEq["Final Probability of Good"] = getProbability(asiansEq, pi)
hispanicsEq["Final Probability of Good"] = getProbability(hispanicsEq, pi)

  import sys


In [20]:
whitesEq.head()

Unnamed: 0,TransRisk Score,Demographic,Good,Bad,Both,Final Probability of Good
0,0.0,white,0.0,0.0,0.0,0.0
1,0.5,white,0.03,1.68,0.25,0.076744
2,1.0,white,0.19,5.41,0.9,0.14051
3,1.5,white,0.04,1.59,0.27,0.104829
4,2.0,white,0.09,1.73,0.37,0.194953


*** Encountering a problem where the rest of the demographics aren't correctly calculating the "Both" field - unsure how to solve ***

In [21]:
blacksEq.head()

Unnamed: 0,TransRisk Score,Demographic,Good,Bad,Both,Final Probability of Good
198,0.0,black,0.0,0.0,,
199,0.5,black,0.08,2.06,,
200,1.0,black,0.64,6.58,,
201,1.5,black,0.13,2.0,,
202,2.0,black,0.19,2.5,,
