In [14]:
import yaml
import json
import pandas as pd
import os
from os import listdir
from os.path import isfile, join

from rules_engine import rules_engine
import utils

pd.options.display.max_columns = 999

In [15]:
with open('config.yaml') as f:
	# use safe_load instead load
	data_map = yaml.safe_load(f)
file_length = data_map['file_length']['value']


In [16]:
use_cols = ['name', 'metDivName', 'countyFips', 'geoIdMsa', 'metDivFp', 'smallCounty', 'tracts']
cbsa_cols = ['name', 'metDivName', 'state', 'countyFips', 'county', 'tracts','geoIdMsa', 'metDivFp', 'smallCounty', 
			 'stateCode', 'tractDecimal']
cbsas = pd.read_csv('../dependencies/tract_to_cbsa_2015.txt', usecols=use_cols, delimiter='|', 
					header=None, names=cbsa_cols, dtype=str) #load tract to CBSA data from platform file
cbsas["tractFips"] = cbsas.countyFips + cbsas.tracts
counties = list(cbsas.countyFips)
tracts = list(cbsas.tractFips)
small_counties = list(cbsas.countyFips[cbsas.smallCounty=="1"])
#load schemas for LAR and transmittal sheet
#schemas contain valid enumerations, including NA values, for each field in the dataset
lar_schema_df = pd.DataFrame(json.load(open("../schemas/lar_schema.json", "r")))
ts_schema_df = pd.DataFrame(json.load(open("../schemas/ts_schema.json", "r")))
!pwd

/Users/roellk/hmda/hmda-test-files/python


In [17]:
path = "../edits_files/quality/"
file = "q603.txt"
#lar validator checks a dataframe and returns a JSON with generate_error_files
checker = rules_engine(lar_schema=lar_schema_df, ts_schema=ts_schema_df, tracts=tracts, 
                       counties=counties, small_counties=small_counties) 
#load data to checker
ts_df, lar_df = utils.read_data_file(path=path, data_file=file)
checker.load_data_frames(ts_df, lar_df)
for func in dir(checker):
    if func[:1] in ("s", "v", "q") and func[1:4].isdigit()==True:
        #print("applying:", func)
        getattr(checker, func)()
res_df = pd.DataFrame(checker.results)
res_df[(res_df.status=="failed")&(res_df.edit_name==file[:-4])]

Unnamed: 0,edit_name,fail_count,fields,row_ids,row_type,status
3,q603,94.0,County/Census Tract,[64TL7LOMOR0RNWE9CGHM2JI1IXE8B6RJQVRUO4WRXVH23...,LAR,failed


In [18]:
#get all edit test files in single list
val_path = "../edits_files/validity/"
syn_path = "../edits_files/syntax/"
val_files = [f for f in listdir(val_path) if isfile(join(val_path, f)) and f[0]=="v"] #get list of only files
syn_files = [f for f in listdir(syn_path) if isfile(join(syn_path, f)) and f[0]=="s"]

In [19]:
#check validity test files for error rate
for file in val_files:
    print(file)
    #lar validator checks a dataframe and returns a JSON with generate_error_files
    checker = rules_engine(lar_schema=lar_schema_df, ts_schema=ts_schema_df, tracts=tracts, counties=counties)
    #load data to checker
    ts_df, lar_df = utils.read_data_file(path=val_path, data_file=file)
    checker.load_data_frames(ts_df, lar_df)
    for func in dir(checker):
        if func[:1] in ("s", "v") and func[1:4].isdigit()==True:
            getattr(checker, func)()
    res_df = pd.DataFrame(checker.results)
    res_df = res_df[(res_df.edit_name==file[:-4])] #set res_df for only the edit in the file name
    if res_df.row_type.iloc[0]=="TS" and res_df.status.iloc[0]=="failed":
        print("file is good")
        
    if res_df.row_type[(res_df.edit_name==file[:-4])].iloc[0] == "LAR" and \
       res_df.fail_count[(res_df.edit_name==file[:-4])].iloc[0] != file_length:
        print("WARNING BOOOOOP", len(res_df.fail_count[res_df.edit_name==file[:-4]]))
    elif res_df.row_type[(res_df.edit_name==file[:-4])].iloc[0] == "LAR" and fail_count == file_length:
        print("file is good")
    print()

v600.txt


NameError: name 'fail_count' is not defined

In [None]:
#check syntax test files for error rate
for file in syn_files:
    print(file)
    #lar validator checks a dataframe and returns a JSON with generate_error_files
    checker = rules_engine(lar_schema=lar_schema_df, ts_schema=ts_schema_df, tracts=tracts, counties=counties)
    #load data to checker
    ts_df, lar_df = utils.read_data_file(path=syn_path, data_file=file)
    checker.load_data_frames(ts_df, lar_df)
    for func in dir(checker):
        if func[:1] in ("s", "v") and func[1:4].isdigit()==True:
            getattr(checker, func)()
    res_df = pd.DataFrame(checker.results)
    res_df = res_df[(res_df.edit_name==file[:-4])] #set res_df for only the edit in the file name
    
    if res_df.row_type.iloc[0]=="TS" and res_df.status.iloc[0]=="failed":
        print("file is good")
    if res_df.row_type[(res_df.edit_name==file[:-4])].iloc[0] == "LAR":
        fail_count = res_df.fail_count[(res_df.edit_name==file[:-4])].iloc[0]
    
    if row_type == "LAR" and int(fail_count) != file_length:
        print("WARNING BOOOOOP\n", "*"*100)
    else:
        print("file is good")
    print()

In [None]:
#load CBSA data for geography testing edits
use_cols = ['name', 'metDivName', 'countyFips', 'geoIdMsa', 'metDivFp', 'smallCounty', 'tracts']
cbsa_cols = ['name', 'metDivName', 'state', 'countyFips', 'county', 'tracts','geoIdMsa', 'metDivFp', 'smallCounty', 
'stateCode', 'tractDecimal']
cbsa_data = pd.read_csv('../dependencies/tract_to_cbsa_2015.txt', usecols=use_cols, delimiter='|', 
header=None, names=cbsa_cols, dtype=object) #load tract to CBSA data from platform file


In [20]:
list(cbsa_data.countyFips[cbsa_data.smallCounty!="1"])

NameError: name 'cbsa_data' is not defined

In [21]:
big_counties = cbsas.countyFips[(cbsas.smallCounty!="1")]
small_counties = cbsas.countyFips[cbsas.smallCounty=="1"]

In [22]:
for county in big_counties:
    if county in small_counties:
        print('boop')

In [23]:
lar_df[lar_df.county.isin(small_counties)]

field,record_id,lei,uli,app_date,loan_type,loan_purpose,preapproval,const_method,occ_type,loan_amount,action_taken,action_date,street_address,city,state,zip_code,county,tract,app_eth_1,app_eth_2,app_eth_3,app_eth_4,app_eth_5,app_eth_free,co_app_eth_1,co_app_eth_2,co_app_eth_3,co_app_eth_4,co_app_eth_5,co_app_eth_free,app_eth_basis,co_app_eth_basis,app_race_1,app_race_2,app_race_3,app_race_4,app_race_5,app_race_native_text,app_race_asian_text,app_race_islander_text,co_app_race_1,co_app_race_2,co_app_race_3,co_app_race_4,co_app_race_5,co_app_race_native_text,co_app_race_asian_text,co_app_race_islander_text,app_race_basis,co_app_race_basis,app_sex,co_app_sex,app_sex_basis,co_app_sex_basis,app_age,co_app_age,income,purchaser_type,rate_spread,hoepa,lien,app_credit_score,co_app_credit_score,app_score_name,app_score_code_8,co_app_score_name,co_app_score_code_8,denial_1,denial_2,denial_3,denial_4,denial_code_9,loan_costs,points_fees,origination_fee,discount_points,lender_credits,interest_rate,prepayment_penalty,dti,cltv,loan_term,intro_rate,balloon,int_only_pmts,neg_amort,non_amort_features,property_value,manufactured_type,manufactured_interest,total_units,affordable_units,app_submission,initially_payable,mlo_id,aus_1,aus_2,aus_3,aus_4,aus_5,aus_code_5,aus_result_1,aus_result_2,aus_result_3,aus_result_4,aus_result_5,aus_code_16,reverse_mortgage,open_end_credit,business_purpose
11,2,1XH57Q2X2Z,64TL7LOMOR0RNWE9CGHMU7N9WQ7V3P9Z8XJ0K4WEQ6R58,,4,1,2,2,3,4119,6,20181125,1234 Hocus Potato Way,Tatertown,WI,33679,72133,72133953600,1,,,,,2582DV0C0U4UONT1X3ZEIMG5A,1,2.0,,,,U8G638P82DNMM7LOHQZ3JDP3MNS9TVEEI20OU6,1,1,7,,,,,PBL1FJGH1UAXQ5A14E5ZML8LO5578PYIUTBK53IVWN5WGI...,VJV8HKAL6Y27A86ONEHMK8I37MA1DQRT1M2RQMLPR32GIX...,9YS94UYJWSHKUOKJJNDTH79IHXPNRZA8NUMH3KRQGX1YNM...,1,,,,,RE1I2XBHHYYZV21DV1EMGCHBH7XIFDR7OXE,L6OCPMWHVR16RYZ0GCDW77KR5WJI3YAPP9RB57Y6D2IVND...,48CZ6JYDCD8CYZJM2VOZR,3,1,1,1,1,1,32,41,,0,,3,1,8888,8888,9,,9,,10,,,,,,,,,,18.18,,,,,17,2,2,2,1,5704.0,3,5,20,1.0,3,3,,6,,,,,,17,,,,,,1,1,2
19,2,1XH57Q2X2Z,64TL7LOMOR0RNWE9CGHM4C60QW2EYHT2CMBSY6YGP2027,20180321.0,1,4,2,2,3,8485,4,20180419,1234 Hocus Potato Way,Tatertown,AZ,8045,49017,49017000400,4,,,,,L4ODW664RTQJG5VWQKG8JLJ53DCE94GHLXEAY4EDTHVGEC...,1,2.0,,,,94AXKUYHMLMBGS,3,1,2,3.0,,44.0,,KTO74MRQAGXZUQVXGK5SEVEEUSVS3,N14YFARX0WQ6IBG0WMP3AX8R7KDUZMRSNRM8CAE02XORVY...,JZ4Y0V96RUHIAP514IJOW7W1HGFZUQUPF7HVJ5VHXZPF4C...,3,,,,,EHWE07AHBK9CD39B328TQBTL5WBDTPO2COPJKPRN1BMBTPEL,DR0HYZORIKYL,RBNS0Y9MAL40UA92UQFTWRLW59MP4TDFYMZRATRB8VHD4Y...,2,1,1,5,2,4,74,104,,0,,3,2,8888,8888,9,,9,,10,,,,,,,,,,,,,,,18,1,2,1,2,,3,5,26,8.0,1,2,,3,,,,,,6,,,,,,1,1,2
39,2,1XH57Q2X2Z,64TL7LOMOR0RNWE9CGHMZ7UVUUIU12YLVSDZLMKSI1455,,1,2,2,2,3,19437,6,20181028,1234 Hocus Potato Way,Tatertown,AZ,10573,38097,38097970100,2,,,,,K9TH1KGICH7RL9AB4CGD,5,,,,,CJ541P6SGZRCEGXMA8JQD5NRJUBM11RX5L3DJNH83LJKWP...,1,4,7,,,,,JWZKYN65AIK58L,ZFSZPK7JQO3Y9E1J9VTI1G9K,3669K1S8JC8JDZ3Z6FGJF6ZOQUJZ8W2IFCP0P1ZTX95,41,,,,,U610V9YDHPCAJRI2CVLC61OGQ2D1M4ZMPRPSJQUDKMTEZY...,XTN1PQ48IU7RH1NL46XIYNJJ2OHNRGLI6Z7XZBKKPQFHG1...,UEIW8THCM276CEHNIOTPF46TG0GT96E1WYMB7WO9R4ZK9L...,3,2,1,1,2,2,41,30,1084.0,1,,2,2,8888,8888,9,,9,,10,,,,,,,4967.0,2895.0,4894.0,8.08,,,,302.0,14,2,2,1,1,10493.0,2,2,17,,3,3,,6,,,,,,17,,,,,,2,2,2
67,2,1XH57Q2X2Z,64TL7LOMOR0RNWE9CGHMJCK8SCOLPIBN6T3F1473KA619,20180428.0,1,32,2,2,3,6799,3,20180711,1234 Hocus Potato Way,Tatertown,NV,850,31155,31155968400,3,,,,,QB9MU4P55EAQKYZB5BCW9HDZ9GUIW6JAA674IEDL0ELF1T...,12,,,,,D0C89G9PXUQTNLCSS1LKEDWY0A7G80C4N42C8IAAQNKICS...,3,2,4,23.0,,,,8K0ZJ6FK80LOWWO0YWNL3UNH752JVF3D7CPC4O3UQSFJ1K...,QP35VP,3TBNQDTA03TF5KY4PX96O6MBOYY16JYPOPU2F8796Z8W3H...,2,,,,,JNL1KWSZ9E89X2WUYIX4QVI0LX7FOTPK022HIDIHBNVQ1L...,JV033K21BL7618A5ZLTXJIQ7QADH1VOI6ZU8MR8VD60O0G7K,D30BZMWMM0431P62DTJBEDHQSCB5P,2,2,1,2,1,1,65,100,,0,,3,1,445,350,3,,3,,7,3.0,5.0,6.0,,,,,,,,,,137.0,,10,2,2,1,1,6551.0,3,5,23,11.0,2,1,,6,,,,,,17,,,,,,1,2,2
83,2,1XH57Q2X2Z,64TL7LOMOR0RNWE9CGHMSBBGBA4F5VSEIZCAUC6OV5414,20181010.0,3,31,2,2,2,6856,2,20181010,1234 Hocus Potato Way,Tatertown,MN,65231,2185,2185000100,1,,,,,8I4FK,5,,,,,Z8ZLKR1WOHOD8VEDKNS,1,4,5,1.0,4.0,3.0,2.0,UTDD32XGVGK8O59KZ3IB3SY5J52326SENINYJ9,NTD5,730M2JFK40HFD1ZODSKWA1YC5E96YVETBZTH9TGHLDG4ZE...,7,,,,,SHAJT6U7IBNM2ZKTT9AA,M5RVYCSQI12U6MZHZL9ZI0QXN5XRIDNEXJ5M0U05HYSHQR...,I1J87ML28RGIOHA63GVD0Q2M1ORVGQG1BHJHUO1W3T5O38...,1,3,2,2,1,1,103,68,,0,,3,2,729,635,6,,2,,10,,,,,,,,,,24.24,,,197.0,,31,1,2,2,1,18978.0,3,5,23,6.0,2,2,,2,,,,,,10,,,,,,1,2,2
89,2,1XH57Q2X2Z,64TL7LOMOR0RNWE9CGHMR9I3JYU85CJ201H60TERIXE55,20180416.0,1,4,2,2,2,3068,4,20180927,1234 Hocus Potato Way,Tatertown,SC,90670,13209,13209950300,11,,,,,GEWGAG0Z1ZWKWFMABE1DQXWQ9LM0DL6LUC6OF19O2NVB5Z...,5,,,,,V0SU88N80AA28I3Q1BX3TNZ7NJTE1MO3NUC28F8T7D,2,4,25,,,,,5GIO96RFN425FEBI3WYS05GX2O9JD7E2BEZTTH488,GXF0HCK50J43K6BG2NQNK,Z71MWVRN,6,,,,,XY0WQDZ2LK11WECI0CBYYU7NAMMVJX1E9J87IZVTMJZ,FFS8HZRPO80TOXOL7W95L,NQ13VMSPZYJ0LRJNFDRN5HGWDRIKY6NB920W4BDATAHKDF...,2,3,1,5,1,4,62,5,,0,,3,1,8888,8888,9,,9,,10,,,,,,,,,,,13.0,,,309.0,26,1,1,2,1,,3,5,8,8.0,1,3,,6,,,,,,17,,,,,,2,2,2


In [28]:
'72133' in cbsas.countyFips

False

0        06037
1        41005
2        04013
3        42017
4        37183
5        36103
6        48201
7        48113
8        25017
9        40095
10       32003
11       02275
12       06085
13       36061
14       29189
15       08013
16       36081
17       09009
18       13215
19       06065
20       26163
21       36081
22       36005
23       72053
24       39035
25       50007
26       36081
27       13067
28       31153
29       42071
         ...  
73972    47037
73973    06067
73974    29095
73975    27151
73976    13067
73977    54033
73978    17031
73979    06019
73980    17111
73981    36059
73982    06037
73983    13277
73984    25013
73985    13169
73986    45013
73987    06037
73988    13297
73989    06075
73990    18071
73991    04003
73992    48113
73993    41031
73994    36005
73995    06073
73996    17031
73997    06037
73998    06073
73999    24027
74000    37051
74001    27005
Name: countyFips, Length: 74002, dtype: object