In [1]:
#2018 HMDA Edit Testing File Generator
from collections import OrderedDict
from io import StringIO
import json
import os
import pandas as pd
import random

#custom imports
import lar_constraints
import lar_generator
from rules_engine import rules_engine


In [2]:
#2018 Filing Instruction Guide: https://www.consumerfinance.gov/data-research/hmda/static/for-filers/2018/2018-HMDA-FIG.pdf

use_cols = ['name', 'metDivName', 'countyFips', 'geoIdMsa', 'metDivFp', 'smallCounty', 'tracts']
cbsa_cols = ['name', 'metDivName', 'state', 'countyFips', 'county', 'tracts','geoIdMsa', 'metDivFp', 'smallCounty', 
             'stateCode', 'tractDecimal']
cbsas = pd.read_csv('../dependancies/tract_to_cbsa_2015.txt', usecols=use_cols, delimiter='|', 
                    header=None, names=cbsa_cols, dtype=str) #load tract to CBSA data from platform file
cbsas["tractFips"] = cbsas.countyFips + cbsas.tracts
counties = list(cbsas.countyFips)
tracts = list(cbsas.tractFips)

In [3]:
#load schemas for LAR and transmittal sheet
lar_schema_df = pd.DataFrame(json.load(open("../schemas/lar_schema.json", "r")))
ts_schema_df = pd.DataFrame(json.load(open("../schemas/ts_schema.json", "r")))

In [4]:
lar_gen = lar_generator.lar_gen(lar_schema_df, ts_schema_df, counties=counties, tracts=tracts) #instantiate generator
lar_const = lar_constraints.lar_constraints(counties=counties, tracts=tracts)#instantiate constraints


In [5]:
#run constraint functions on row
file_length = 5 #set number of rows in test file
first = True
lei = None

for i in range(0, file_length):
    print("making new row\n\n\n\n\n\n\n")
    if lei:
        row = lar_gen.make_row(lei=lei)
    else:
        row = lar_gen.make_row() #create new row
    lei = row["lei"]
    diff = [1] #initialize diff for loop
    iters = 0
    while len(diff) > 0:
        row_base = row.copy() #copy row to enable diff
        #apply constraint functions to LAR row
        for func in lar_const.constraint_funcs: 
            print("starting constraints\n\n\n\n\n")
            row = getattr(lar_const, func)(row)
            #convert initial and copied rows to sets for diff
            initial_row = set(row_base.items())
            changed_row = set(row.items())
            diff = (initial_row - changed_row) #subtract row sets to show changes from constraint funcs
#            print(len(initial_row), len(changed_row)) #check number of fields, should be 110
            #show readout of number of changes made to new LAR row
            print("changes from {func}: {vals}".format(func=str(func), vals=len(diff)))
            #print(row["app_date"], row["action_taken"])
            print(diff)
        iters+=1
    #create first row of dataframe
    if first:
        lar_frame = pd.DataFrame(row, index=[1])
        first = False
    #add additional rows to dataframe
    else:
        #print("concating")
        new_lar = pd.DataFrame(row, index=[1])
        lar_frame = pd.concat([lar_frame, new_lar], axis=0)
lar_frame.reset_index(inplace=True) #reset index
lar_frame.drop("index", inplace=True, axis=1) #drop additional index column
print(iters)

making new row







starting constraints





changes from v612_const: 1
{('loan_purpose', '2')}
starting constraints





changes from v610_const: 1
{('loan_purpose', '2')}
starting constraints





changes from v613_const: 2
{('action_taken', '4'), ('loan_purpose', '2')}
starting constraints





changes from v614_const: 3
{('preapproval', '1'), ('action_taken', '4'), ('loan_purpose', '2')}
starting constraints





changes from v615_const: 3
{('preapproval', '1'), ('action_taken', '4'), ('loan_purpose', '2')}
starting constraints





changes from v619_const: 4
{('preapproval', '1'), ('action_taken', '4'), ('action_date', '20180824'), ('loan_purpose', '2')}
starting constraints





changes from v622_const: 4
{('preapproval', '1'), ('action_taken', '4'), ('action_date', '20180824'), ('loan_purpose', '2')}
starting constraints





changes from v627_const: 4
{('preapproval', '1'), ('action_taken', '4'), ('action_date', '20180824'), ('loan_purpose', '2')}
starting constraints





c

set()
starting constraints





changes from v610_const: 0
set()
starting constraints





changes from v613_const: 0
set()
starting constraints





changes from v614_const: 0
set()
starting constraints





changes from v615_const: 0
set()
starting constraints





changes from v619_const: 0
set()
starting constraints





changes from v622_const: 0
set()
starting constraints





changes from v627_const: 0
set()
starting constraints





changes from v628_const: 0
set()
starting constraints





changes from v629_const: 0
set()
starting constraints





changes from v630_const: 0
set()
starting constraints





changes from v631_const: 0
set()
starting constraints





changes from v632_const: 0
set()
starting constraints





changes from v633_const: 0
set()
starting constraints





changes from v634_const: 0
set()
starting constraints





changes from v635_const: 1
{('app_race_5', '1')}
starting constraints





changes from v636_const: 1
{('app_race_5', '1')}
starting constrain

starting constraints





changes from v662_const: 1
{('co_app_eth_1', '3')}
starting constraints





changes from v663_const: 1
{('co_app_eth_1', '3')}
starting constraints





changes from v664_const: 1
{('co_app_eth_1', '3')}
starting constraints





changes from v666_const: 1
{('co_app_eth_1', '3')}
starting constraints





changes from v667_const: 1
{('co_app_eth_1', '3')}
starting constraints





changes from v668_const: 1
{('co_app_eth_1', '3')}
starting constraints





changes from v669_const: 2
{('co_app_eth_1', '3'), ('denial_4', '9')}
starting constraints





changes from v670_const: 2
{('co_app_eth_1', '3'), ('denial_4', '9')}
starting constraints





changes from v671_const: 1
{('co_app_eth_1', '3')}
starting constraints





changes from v672_const: 1
{('co_app_eth_1', '3')}
starting constraints





changes from v673_const: 1
{('co_app_eth_1', '3')}
starting constraints





changes from v674_const: 1
{('co_app_eth_1', '3')}
starting constraints





changes from

starting constraints





changes from v703_const: 0
set()
starting constraints





changes from v704_const: 0
set()
starting constraints





changes from v705_const: 0
set()
making new row







starting constraints





changes from v612_const: 1
{('loan_purpose', '2')}
starting constraints





changes from v610_const: 1
{('loan_purpose', '2')}
starting constraints





changes from v613_const: 2
{('action_taken', '4'), ('loan_purpose', '2')}
starting constraints





changes from v614_const: 3
{('preapproval', '1'), ('action_taken', '4'), ('loan_purpose', '2')}
starting constraints





changes from v615_const: 4
{('preapproval', '1'), ('action_taken', '4'), ('const_method', '1'), ('loan_purpose', '2')}
starting constraints





changes from v619_const: 5
{('preapproval', '1'), ('const_method', '1'), ('action_date', '20180427'), ('action_taken', '4'), ('loan_purpose', '2')}
starting constraints





changes from v622_const: 5
{('preapproval', '1'), ('const_method', '1'), ('actio

changes from v703_const: 1
{('app_race_5', '5')}
starting constraints





changes from v704_const: 1
{('app_race_5', '5')}
starting constraints





changes from v705_const: 1
{('app_race_5', '5')}
starting constraints





changes from v612_const: 0
set()
starting constraints





changes from v610_const: 0
set()
starting constraints





changes from v613_const: 0
set()
starting constraints





changes from v614_const: 0
set()
starting constraints





changes from v615_const: 0
set()
starting constraints





changes from v619_const: 0
set()
starting constraints





changes from v622_const: 0
set()
starting constraints





changes from v627_const: 0
set()
starting constraints





changes from v628_const: 0
set()
starting constraints





changes from v629_const: 0
set()
starting constraints





changes from v630_const: 0
set()
starting constraints





changes from v631_const: 0
set()
starting constraints





changes from v632_const: 0
set()
starting constraints





changes 

In [6]:
#Quality and Macro field interrelationship constraints:
print(iters)

3


In [7]:
#Create a sample TS row
#Note: this will need to be more robust to include other federal agencies
# mlo_id needs NA option
#set dummy values for TS row
ts_row_small = OrderedDict()
ts_row_small["record_id"]="1"
ts_row_small["inst_name"]="Ficus Bank"
ts_row_small["calendar_year"]=str(2018)
ts_row_small["calendar_quarter"]="4"
ts_row_small["contact_name"]="Mr. Smug Pockets"
ts_row_small["contact_tel"]="555-555-5555"
ts_row_small["contact_email"]="pockets@ficus.com"
ts_row_small["contact_street_address"]="1234 Ficus Lane"
ts_row_small["office_city"]="Ficusville"
ts_row_small["office_state"]="UT"
ts_row_small["office_zip"]="84096"
ts_row_small["federal_agency"]="9"
ts_row_small["lar_entries"]= str(len(lar_frame))
ts_row_small["tax_id"]="01-0123456"
ts_row_small["lei"]=lar_frame.get_value(0, "lei")

In [8]:
#join LAR and TS rows to make an output file
def write_file(ts_input=None, lar_input=None, directory="../edits_files/", name="passes_all.txt"):
    """Takes a TS row as a dictionary and LAR data as a dataframe. Writes LAR data to file and 
    re-reads it to combine with TS data to make a full file."""
    #make directories for files if they do not exist
    if not os.path.exists(directory):
        os.makedirs(directory)

    #write LAR dataframe to file
    parts_dir = directory+"file_parts/"
    if not os.path.exists(parts_dir):
        os.makedirs(parts_dir)
        
    lar_input.to_csv(parts_dir + "lar_data.txt", sep="|", header=False, index=False, index_label=False)
    #load LAR data as file rows
    with open(parts_dir + "lar_data.txt", 'r') as lar_data:
        lar = lar_data.readlines()

    with open(directory + name, 'w') as final_file:
        final_file.write("|".join(ts_input.values())+"\n")
        for line in lar:
            final_file.write("{line}".format(line=line))

In [9]:
#write sample file to disk
write_file(ts_input=ts_row_small, lar_input=lar_frame) #writes created file to disk
#validator engine uses the default: path="../edits_files/", data_file="passes_all.txt" for data files
validator = rules_engine(lar_schema_df, ts_schema_df) #instantiate edits rules engine

In [10]:
#split TS and LAR using validator function
#validator creates class objects of each of these internally as well
ts_df, lar_df = validator.split_ts_row(path="../edits_files/", data_file="passes_all.txt")

In [11]:
for func in dir(validator):
    if func[:1] in ("s", "v") and func[1:4].isdigit()==True:
        print("applying:", func)
        getattr(validator, func)()
validator.results

applying: s300
applying: s301
applying: s302
applying: s304
applying: s305
applying: v600
applying: v601
applying: v602
applying: v603
applying: v604
applying: v605
applying: v606
applying: v607
applying: v608
applying: v609
applying: v610_1
applying: v610_2
applying: v611
applying: v612_1
applying: v612_2


{'s300': OrderedDict([('row_type', 'TS/LAR'),
              ('record_id_ts', 'passed'),
              ('record_id_lar', 'passed')]),
 's301': OrderedDict([('row_type', 'LAR'), ('LEI', 'passed')]),
 's302': OrderedDict([('row_type', 'TS'), ('calendar_year', 'passed')]),
 's304': OrderedDict([('row_type', 'TS/LAR'), ('lar_entries', 'passed')]),
 's305': OrderedDict([('row_type', 'LAR'), ('all', 'passed')]),
 'v600': OrderedDict([('row_type', 'LAR'), ('LEI', 'passed')]),
 'v601': OrderedDict([('row_type', 'TS'),
              ('contact_name', 'passed'),
              ('contact_tel', 'passed'),
              ('contact_street_address', 'passed'),
              ('office_city', 'passed'),
              ('contact_email', 'passed')]),
 'v602': OrderedDict([('row_type', 'TS'), ('calendar_quarter', 'passed')]),
 'v603': OrderedDict([('row_type', 'TS'), ('contact_tel', 'passed')]),
 'v604': OrderedDict([('row_type', 'TS'), ('office_state', 'passed')]),
 'v605': OrderedDict([('row_type', 'TS'), ('o

In [12]:
validator.lar_df[["action_taken", "app_date", "denial_1", "submission_type"]]

Unnamed: 0,action_taken,app_date,denial_1,submission_type
0,3,20180921.0,5,1
1,6,,10,3
2,5,20181204.0,10,1
3,4,20181101.0,10,2
4,5,20180809.0,10,2
