In [1]:
#2018 HMDA Edit Testing File Generator
from collections import OrderedDict
import json
import pandas as pd
import random

import lar_constraints
import lar_generator



In [2]:
LAR_field_names = [
    "record_id","lei","uli","app_date","loan_type","loan_purpose",
    "preapproval","const_method","occ_type","loan_amount","action_taken",
    "action_date","street_address","city","state","zip_code","county",
    "tract","app_eth_1","app_eth_2","app_eth_3","app_eth_4","app_eth_5",
    "app_eth_code_14","app_eth_text","co_app_eth_1","co_app_eth_2",
    "co_app_eth_3","co_app_eth_4","co_app_eth_5","co_app_eth_code_14",
    "app_eth_basis","co_app_eth_basis","app_race_1","app_race_2",
    "app_race_3","app_race_4","app_race_5","app_race_code_1",
    "app_race_code_27","app_race_code_44","co_app_race_1","co_app_race_2",
    "co_app_race_3","co_app_race_4","co_app_race_5","co_app_race_code_1",
    "co_app_race_code_27","co_app_race_code_44","app_race_basis",
    "co_app_race_basis","app_sex","co_app_sex","app_sex_basis",
    "co_app_sex_basis","app_age","co_app_age","income","purchaser_type",
    "rate_spread","hoepa","lien","app_credit_score","co_app_credit_score",
    "app_score_name","app_score_code_8","co_app_score_name",
    "co_app_score_code_8","denial_1","denial_2","denial_3","denial_4",
    "denial_code_9","loan_costs","points_fees","origination_fee",
    "discount_points","lender_credits","interest_rate","prepayment_penalty",
    "dti","cltv","loan_term","intro_rate","balloon","int_only_pmts",
    "neg_amort","non_amort_features","property_value","manufactured_type",
    "manufactured_interest","total_units","affordable_units",
    "submission_type","initially_payable","mlo_id","aus_1","aus_2",
    "aus_3","aus_4","aus_5","aus_code_5","aus_result_1","aus_result_2",
    "aus_result_3","aus_result_4","aus_result_5","aus_code_16",
    "reverse_mortgage","open_end_credit","business_purpose"]

TS_field_names= [
    "record_id","inst_name","calendar_year","calendar_quarter",
    "contact_name","contact_tel","contact_email","contact_street_address",
    "office_city","office_state","office_zip","federal_agency",
    "lar_entries","tax_id","lei"]

In [3]:
#data lists used in the construction of LARs


In [4]:
#2018 Filing Instruction Guide: https://www.consumerfinance.gov/data-research/hmda/static/for-filers/2018/2018-HMDA-FIG.pdf
#Note: Add a format field? use to check format of entered data?

#Note: add valid value list/function to the valid vals list field
#IE list of ages 1 to n (range(200))
#IE rate_spread float generator


use_cols = ['name', 'metDivName', 'countyFips', 'geoIdMsa', 'metDivFp', 'smallCounty', 'tracts']
cbsa_cols = ['name', 'metDivName', 'state', 'countyFips', 'county', 'tracts','geoIdMsa', 'metDivFp', 'smallCounty', 
             'stateCode', 'tractDecimal']
cbsas = pd.read_csv('../dependancies/tract_to_cbsa_2015.txt', usecols=use_cols, delimiter='|', 
                    header=None, names=cbsa_cols, dtype=str) #load tract to CBSA data from platform file
cbsas["tractFips"] = cbsas.countyFips + cbsas.tracts
counties = list(cbsas.countyFips)
tracts = list(cbsas.tractFips)



In [5]:

lar_schema_df = pd.DataFrame(json.load(open("../schemas/lar_schema.json", "r")))
ts_schema_df = pd.DataFrame(json.load(open("../schemas/ts_schema.json", "r")))

lar_schema_df.head()
ts_schema_df.head()

Unnamed: 0,data_type,dtypes_,field,valid_vals
0,numeric,[int],record_id,[1]
1,alphanumeric,[string],inst_name,[]
2,numeric,[int],calendar_year,[]
3,numeric,[int],calendar_quarter,[4]
4,alphanumeric,[string],contact_name,[]


In [79]:

boop = lar_generator.lar_gen(lar_schema_df, ts_schema_df, counties=counties, tracts=tracts) #instantiate generator
darp = lar_constraints.lar_constraints(counties=counties, tracts=tracts)#instantiate constraints

#test = lar_generator.lar_gen(lar_schema_df, ts_schema_df)
row = boop.make_row() #create test row
row2 = boop.make_row()

#run constraint functions on row
diff = [1]

while len(diff) >0:
    row_base = row.copy()
    for func in darp.constraint_funcs: 
        row = getattr(darp, func)(row)


        initial_row = set(row_base.items())
        changed_row = set(row.items())

        print(len(initial_row), len(changed_row))
        print("changes from {func}:".format(func=str(func)))
        diff = (initial_row - changed_row)
        if len(diff) > 0:
            print(diff)
        print("changed values:",len(diff))






110 110
changes from v612_const:
changed values: 0
110 110
changes from v610_const:
changed values: 0
110 110
changes from v613_const:
{('action_taken', '1')}
changed values: 1
110 110
changes from v614_const:
{('action_taken', '1')}
changed values: 1
110 110
changes from v615_const:
{('action_taken', '1')}
changed values: 1
110 110
changes from v619_const:
{('action_taken', '1')}
changed values: 1
110 110
changes from v622_const:
{('action_taken', '1')}
changed values: 1
110 110
changes from v627_const:
{('action_taken', '1')}
changed values: 1
110 110
changes from v628_const:
{('action_taken', '1'), ('app_eth_3', ''), ('app_eth_4', ''), ('app_eth_5', '')}
changed values: 4
110 110
changes from v629_const:
{('app_eth_3', ''), ('app_eth_4', ''), ('app_eth_1', ''), ('action_taken', '1'), ('app_eth_5', '')}
changed values: 5
110 110
changes from v630_const:
{('app_eth_3', ''), ('app_eth_4', ''), ('app_eth_1', ''), ('action_taken', '1'), ('app_eth_5', '')}
changed values: 5
110 110
change

In [78]:
diff = [1]
while len(diff) > 0:
    row_base = row.copy()
    for func in darp.constraint_funcs: 
        row = getattr(darp, func)(row)


        initial_row = set(row_base.items())
        changed_row = set(row.items())

        print(len(initial_row), len(changed_row))
        print("changes from {func}:".format(func=str(func)))
        diff = (initial_row - changed_row)
        if len(diff) > 0:
            print(diff)
        print("changed values:",len(diff))


111 111
changes from v612_const:
changed values: 0
111 111
changes from v610_const:
changed values: 0
111 111
changes from v613_const:
{('action_taken', '3')}
changed values: 1
111 111
changes from v614_const:
{('action_taken', '3')}
changed values: 1
111 111
changes from v615_const:
{('action_taken', '3')}
changed values: 1
111 111
changes from v619_const:
{('action_taken', '3')}
changed values: 1
111 111
changes from v622_const:
{('action_taken', '3')}
changed values: 1
111 111
changes from v627_const:
{('action_taken', '3')}
changed values: 1
111 111
changes from v628_const:
{('action_taken', '3')}
changed values: 1
111 111
changes from v629_const:
{('action_taken', '3')}
changed values: 1
111 111
changes from v630_const:
{('action_taken', '3')}
changed values: 1
111 111
changes from v631_const:
{('action_taken', '3')}
changed values: 1
111 111
changes from v632_const:
{('action_taken', '3')}
changed values: 1
111 111
changes from v633_const:
{('action_taken', '3')}
changed values: 

In [65]:
print(row["app_credit_score"], row["app_score_name"],row["app_score_code_8"])
print(row_base["app_credit_score"], row_base["app_score_name"],row_base["app_score_code_8"])

8888 9 
498 3 7S4SUIVRWRHMZHPALJI6VJJC4F9RZTH55C1NRY4FHGG12NW7XHMV06DD2CP738FBF45IE7METFDTL


In [59]:
for key, value in row.items():
    print(key,value)

record_id 2
lei DHW2NL34P4WYGYOSUBJZ
uli DHW2NL34P4WYGYOSUBJZIAMP9NRCKDJUFM3EB52RQBWJE
app_date 20180212
loan_type 4
loan_purpose 31
preapproval 2
const_method 2
occ_type 3
loan_amount 2001
action_taken 3
action_date 20181003
street_address 1234 Hocus Potato Way
city Tatertown
state UT
zip_code 84096
county 41039
tract 21067000200
app_eth_1 2
app_eth_2 
app_eth_3 
app_eth_4 
app_eth_5 
app_eth_code_14 110PRGO0SLDDG9GPNS7GF4YEIGYASFE3W1ZURMJ566GRHZ6TS
co_app_eth_1 1
co_app_eth_2 2
co_app_eth_3 
co_app_eth_4 
co_app_eth_5 
co_app_eth_code_14 4620QV059600R6A33
app_eth_basis 1
co_app_eth_basis 1
app_race_1 2
app_race_2 1
app_race_3 4
app_race_4 25
app_race_5 2
app_race_code_1 S3BK6JWYVLHL
app_race_code_27 DYNDTWTRNGHY12PV4ACOGHSYCA2Z65BAUTPKZ4G4430S69WG4P3YYQSV25D204VGUZST54WAIESS3XRP3I68RLZO
app_race_code_44 HYO86GVR9RG8H36BJSRK5HMYSJXD89M0O9YSA4HTJ7TEQXRPR0EV18CPL5S34GHHS75MNDLMG8W2YKT834XXHX2OK7X37SO
co_app_race_1 8
co_app_race_2 21
co_app_race_3 42
co_app_race_4 23
co_app_race_5 22
co_

In [7]:
#Quality and Macro field interrelationship constraints:

#set file length parameters
#LAR rows (small=200, medium=1,000, large=10,000)
lar_small = 200
lar_medium = 1000
lar_large = 10000

#Notes: increase chance for enum added to range
# free text not valid without certain selections
# mlo_id needs NA option




#set dummy values for TS row
ts_row_small = OrderedDict()

ts_row_small["record_id"]="1"
ts_row_small["inst_name"]="Ficus Bank"
ts_row_small["calendar_year"]=str(2018)
ts_row_small["calendar_quarter"]=str(TS_df.valid_vals[TS_df.field=="calendar_quarter"].iloc[0][0])
ts_row_small["contact_name"]="Mr. Smug Pockets"
ts_row_small["contact_tel"]="555-555-5555"
ts_row_small["contact_email"]="pockets@ficus.com"
ts_row_small["contact_street_address"]="1234 Ficus Lane"
ts_row_small["office_city"]="Ficusville"
ts_row_small["office_state"]="UT"
ts_row_small["office_zip"]="84096"
ts_row_small["federal_agency"]=str(TS_df.valid_vals[TS_df.field=="federal_agency"].iloc[0][0])
ts_row_small["lar_entries"]=str(lar_small)
ts_row_small["tax_id"]="01-0123456"
ts_row_small["lei"]="12345678901234567890"

#create TS medium row
ts_row_medium = ts_row_small.copy()
ts_row_medium["lar_entries"] = str(lar_medium)

#create TS large row
ts_row_large = ts_row_small.copy()
ts_row_large["lar_entries"] = str(lar_large)

