In [41]:
import numpy as np
import pandas as pd
import sqlite3

## Categorical

In [42]:
actions = {
    1: 'Loan Originated',
    2: 'Application approved but not accepted',
    3: 'Application denied', 
    4: 'Application withdrawn by applicant',
    5: 'File closed for incompleteness',
    6: 'Purchased loan', 
    7: 'Preapproval request denied', 
    8: 'Preapproval request approved but not accepted'
}
actions_l = lambda k: purchasers[int(k)]

purchasers = {
    0:  'NA',
    1:  'Fannie Mae',
    2:  'Ginnia Mae',
    3:  'Freddie Mac',
    4:  'Farmer Mac',
    5:  'Private securitizer',
    6:  'Commercial bank, savings bank, or savings association',
    71: 'Credit union, mortgage company, or finance company',
    72: 'Life insurance company',
    8:  'Affiliate institution',
    9:  'Other type of purchaser'
}
purchasers_l = (lambda k: purchasers[int(k)])

preapproval = {
    1: 'Preapproval requested',
    2: 'Preapproval not requested'
}
preapproval_l = lambda k: preapproval[int(k)]

loanTypes = {
    1: 'Conventional (not insured or guaranteed by FHA, VA, RHS, or FSA)',
    2: 'Federal Housing Administration insured (FHA)',
    3: 'Veterans Affairs guaranteed (VA)',
    4: 'USDA Rural Housing Service or Farm Service Agency guaranteed (RHS or FSA)'
}
loanTypes_l = lambda k: loanTypes[int(k)]

loanPurposes = {
    1:  'Home purchase',
    2:  'Home improvement',
    31: 'Refinancing',
    32: 'Cash-out refinancing',
    4:  'Other purpose',
    5:  'NA'
}
loanPurposes_l = lambda k: loanPurposes[int(k)]

liens = {
    1: 'First lien',
    2: 'Subordinate lien'
}
liens_l = lambda k: liens[int(k)]

reverse = {
    1:    'Yes',
    2:    'No', 
    1111: 'Exempt'
}
reverse_l = lambda k: reverse[int(k)]

openLine = {
    1:    'Open-end line of credit',
    2:    'Not an open-end line of credit',
    1111: 'Exempt'
}
openLine_l = lambda k: openLine[int(k)]

commercial = {
    1:    'Primarily for a business or commercial purpose',
    2:    'Not primarily for a business or commercial purpose',
    1111: 'Exempt'
}
commercial_l = lambda k: commercial[int(k)]

hoepas = {
    1: 'High-cost mortgage', 
    2: 'Not high-cost mortgage',
    3: 'NA'
}
hoepas_l = lambda k: hoepas[int(k)]

negativeAmos = {
    1:    'Negative amortization',
    2:    'No negative amortization',
    1111: 'Exempt'
}
negativeAmos_l = lambda k: negativeAmos[int(k)]

interestOnlys = {
    1:    'Interest-only payments',
    2:    'No interest-only payments',
    1111: 'Exempt'
}
interestOnlys_l = lambda k: interestOnlys[int(k)]

balloons = {
    1:    'Balloon payment',
    2:    'No balloon payment',
    1111: 'Exempt'
}
balloons_l = lambda k: balloons[int(k)]

otherNonAmos = {
    1:    'Other non-fully amortizing features',
    2:    'No other non-fully amortizing features',
    1111: 'Exempt'
}
otherNonAmos_l = lambda k: otherNonAmos[int(k)]

constructions = {
    1: 'Site-built',
    2: 'Manufactured home'
}
constructions_l = lambda k: constructions[int(k)]

occupancies = {
    1: 'Principal residence',
    2: 'Second residence',
    3: 'Investment property'
}
occupancies_l = lambda k: occupancies[int(k)]

manufacturedSecured = {
    1:    'Manufactured home and land',
    2:    'Manufactured home and not land',
    3:    'NA',
    1111: 'Exempt'
}
manufacturedSecured_l = lambda k: manufacturedSecured[int(k)]

manufacturedLand = {
    1:    'Direct ownership',
    2:    'Indirect ownership',
    3:    'Paid leasehold',
    4:    'Unpaid leasehold',
    5:    'NA',
    1111: 'Exempt'
}
manufacturedLand_l = lambda k: manufacturedLand[int(k)]

creditScoreType = {
    1:    'Equifax Beacon 5.0',
    2:    'Experian Fair Isaac',
    3:    'FICO Risk Score Classic 04',
    4:    'FICO Risk Score Classic 98',
    5:    'VantageScore 2.0',
    6:    'VantageScore 3.0',
    7:    'More than one credit scoring model',
    8:    'Other credit scoring model',
    9:    'NA',
    10:   'No co-applicant',
    11:   'Exempt',
    1111: 'Exempt'
}
creditScoreType_l = lambda k: creditScoreType[int(k)]

ethnicities = {
    1:  'Hispannc or Latino',
    11: 'Mexican',
    12: 'Puerto Rican',
    13: 'Cuban',
    14: 'Other Hispanic or Latino',
    2:  'Not Hispanic or Latino',
    3:  'Information not provided by applicant',
    4:  'NA'
}
ethnicities_l = lambda k: ethnicities[int(k)]

ethnicityObsers = {
    1: 'Collected on the basis of visual observation or surname',
    2: 'Not collected on the basis of visual observation or surname',
    3: 'NA'
}
ethnicityObsers_l = lambda k: ethnicityObsers[int(k)]

coEthObs = {
    1: 'Collected on the basis of visual observation or surname',
    2: 'Not collected on the basis of visual observation or surname',
    3: 'NA',
    4: 'No co-applicant'
}
coEthObs_l = lambda k: coEthObs[int(k)]

races = {
    1:  'American Indian or Alaska Native',
    2:  'Asian',
    21: 'Asian-Indian',
    22: 'Chinese',
    23: 'Filipino', 
    24: 'Japanese',
    25: 'Korean',
    26: 'Vietnamese',
    27: 'Other Asian',
    3:  'Black or African American',
    4:  'Native Hawaiian or Other Pacific Island',
    41: 'Native Hawaiian',
    42: 'Guamanian or Chamorro',
    43: 'Samoan',
    44: 'Other Pacific Islander',
    5:  'White',
    6:  'Information not provided by applicant',
    7:  'NA'
}
races_l = lambda k: races[int(k)]

sexes = {
    1: 'Male',
    2: 'Female',
    3: 'Information not provided by appliant',
    4: 'NA',
    5: 'No co-applicant',
    6: 'Selected both male and female'
}
sexes_l = lambda k: sexes[int(k)]

above62_l = lambda x: True if x == 'Yes' else False if x == 'No' else x

submissionTypes = {
    1:    'Submitted directly to your institution',
    2:    'Not submitted directly to your institution',
    3:    'NA',
    1111: 'Exempt'
}
submissionTypes_l = lambda k: submissionTypes[int(k)]

payableTypes = {
    1:    'Initially payable to your institution',
    2:    'Not initially payable to your institution',
    3:    'NA',
    1111: 'Exempt'
}
payableTypes_l = lambda k: payableTypes[int(k)]

underwriterSystems = {
    1:    'Desktop Underwriter',
    2:    'Loan Prospector (LP) or Loan Product Advisor',
    3:    'Technology Open to Approved Lenders (TOTAL) Scorecard',
    4:    'Guaranteed Underwriting System (GUS)',
    5:    'Other',
    6:    'NA',
    7:    'Internal Proprietary System',
    1111: 'Exempt'
}
underwriterSystems_l = lambda k: underwriterSystems[int(k)]

denials = {
    1:  'Debt-to-income ratio',
    2:  'Employment history',
    3:  'Credit history',
    4:  'Collateral',
    5:  'Insufficient cash (downpayment, closing costs)',
    6:  'Unverifiable information',
    7:  'Credit application incomplete',
    8:  'Mortgage insurance denied',
    9:  'Other',
    10: 'NA'
}
denials_l = lambda k: denials[int(k)]

In [43]:
converters = {
    'action_taken':actions_l,
    'purchaser_type':purchasers_l,
    'preapproval':preapproval_l,
    'loan_type':loanTypes_l,
    'loan_purpose':loanPurposes_l,
    'lien_status':liens_l,
    'reverse_mortgage':reverse_l,
    'open_end_line_of_credit':openLine_l,
    'business_or_commercial_purpose':commercial_l,
    'hoepa_status':hoepas_l,
    'negative_amortization':negativeAmos_l,
    'interest_only_payment':interestOnlys_l,
    'balloon_payment':balloons_l,
    'other_nonamortizing_features':otherNonAmos_l,
    'construction_method':constructions_l,
    'occupancy_type':occupancies_l,
    'manufactured_home_secured_property_type':manufacturedSecured_l,
    'manufactured_home_land_property_interest':manufacturedLand_l,
    'applicant_credit_score_type':creditScoreType_l,
    'co_applicant_credit_score_type':creditScoreType_l,
    'applicant_ethnicity-1':ethnicities_l,
    'applicant_ethnicity-2':ethnicities_l,
    'applicant_ethnicity-3':ethnicities_l,
    'applicant_ethnicity-4':ethnicities_l,
    'applicant_ethnicity-5':ethnicities_l,
    'co_applicant_ethnicity-1':ethnicities_l,
    'co_applicant_ethnicity-2':ethnicities_l,
    'co_applicant_ethnicity-3':ethnicities_l,
    'co_applicant_ethnicity-4':ethnicities_l,
    'co_applicant_ethnicity-5':ethnicities_l,
    'applicant_ethnicity_observed':ethnicityObsers_l,
    'co_applicant_ethnicity_observed':coEthObs_l,
    'applicant_race-1':races_l,
    'applicant_race-2':races_l,
    'applicant_race-3':races_l,
    'applicant_race-4':races_l,
    'applicant_race-5':races_l,
    'co_applicant_race-1':races_l,
    'co_applicant_race-2':races_l,
    'co_applicant_race-3':races_l,
    'co_applicant_race-4':races_l,
    'co_applicant_race-5':races_l,
    'applicant_race_observed':ethnicityObsers_l,
    'co_applicant_race_observed':coEthObs_l, 
    'applicant_sex':sexes_l,
    'co_applicant_sex':sexes_l,
    'applicant_sex_observed':ethnicityObsers_l,
    'co_applicant_sex_observed':coEthObs_l, 
    'applicant_age_above_62':above62_l,
    'co_applicant_age_above_62':above62_l,
    'submission_of_application':submissionTypes_l,
    'aus_1':underwriterSystems_l,
    'aus_2':underwriterSystems_l,
    'aus_3':underwriterSystems_l,
    'aus_4':underwriterSystems_l,
    'aus_5':underwriterSystems_l,
    'denial_reason_1':denials_l,
    'denial_reason_2':denials_l,
    'denial_reason_3':denials_l,
    'denial_reason_4':denials_l    
}

In [44]:
exempts = ['combined_loan_to_value_ratio', 'interest_rate', 'rate_spread', 
'total_loan_costs', 'total_points_and_fees', 'origination_charges',
'discount_points', 'lender_credits', 'loan_term',
'prepayment_penalty_term', 'intro_rate_period', 'property_value',
'total_units', 'multifamily_affordable_units', 'debt_to_income_ratio']
exemptNulls = dict(zip(exempts, ['Exempt']*len(exempts)))

In [45]:
data = "C:\\Users\\WilliamRobinson\\OneDrive - HW Publishing LLC\\Documents\\Data\\HMDA\\2022\\2022_lar.txt"
df = pd.read_csv(data, delimiter='|', 
                 converters=converters, 
                 na_values = exemptNulls,
                 dtype={
                    'state_code':'str', 
                    'applicant_age':'str', 
                    'co_applicant_age':'str'
                }
        )

  df = pd.read_csv(data, delimiter='|',


## To SQL

In [46]:
with sqlite3.connect("C:\\Users\\WilliamRobinson\\OneDrive - HW Publishing LLC\\Documents\\Data\\HMDA\\HMDA.sqlite") as conn:
    df.to_sql('Annual LAR', conn, index=False)