# Ingesting Modified Loan Application Register Data

In [1]:
import pandas as pd
import sqlite3 as sql
import os

CFPB's schema for MLAR data:

In [2]:
schema = pd.read_html('https://ffiec.cfpb.gov/documentation/publications/modified-lar/modified-lar-schema')[0]
schema.set_index('Data Field Number', inplace=True)
schema.index.name = ''
schema.head()

Unnamed: 0,Data Field Name,Data Field Type,Field Type Description,Valid Values,Descriptions and Examples,Data Point Name
,,,,,,
1.0,Activity Year,Numeric,Integer,2017 2018 2019 2020 2021 2022 2023,,
2.0,Legal Entity Identifier (LEI),Alphanumeric. Width is 20 characters,String,,Example: 12121212121212121212,Legal Entity Identifier (LEI)
3.0,Loan Type,Numeric,Integer,1 2 3 4,Descriptions: 1. Conventional (not insured or...,Loan Type
4.0,Loan Purpose,Numeric,Integer,1 2 31 32 4 5,Descriptions: 1. Home purchase 2. Home impro...,Loan Purpose
5.0,Preapproval,Numeric,Integer,1 2,Descriptions: 1. Preapproval requested 2. Pr...,Preapproval


Use schema to create SQLite table:

In [3]:
schema['row_type'] = ''
schema.loc[schema['Data Field Type']=='Alphanumeric. Width is 20 characters', 'row_type'] = 'CHAR(20)'
schema.loc[schema['Field Type Description'].isin(['Integer or Blank', 'Integer or NA', 'Integer or NA or Exempt']), 'row_type'] = 'INT'
schema.loc[schema['Field Type Description']=='Integer', 'row_type'] = 'INT NOT NULL'
schema.loc[schema['Field Type Description'].str.contains('Double'), 'row_type'] = 'FLOAT'
schema.loc[(schema['Field Type Description']=='String')&(schema['Data Field Type']!='Alphanumeric. Width is 20 characters'), 'row_type'] = 'VARCHAR'

In [4]:
schema_str = "CREATE TABLE MLAR(\n"
schema_str += ',\n'.join(schema['Data Field Name'].apply(lambda x: f"'{x}'") + " " + schema['row_type']) + ");"

In [5]:
connection = sql.connect('./cleaned/hmda.db')
cursor = connection.cursor()
cursor.execute("DROP TABLE IF EXISTS MLAR")
cursor.execute(schema_str)
connection.close()

Raw data downloaded from <a href="https://ffiec.cfpb.gov/data-publication/modified-lar/2023">here</a>.

Read data and save to SQLite:

In [8]:
# for every file in raw directory
for filename in os.listdir('./raw'):
    filepath = f'./raw/{filename}' 

    # read data into Pandas DataFrame
    print(f'Reading {filename[0:4]}...')
    df = pd.read_csv(filepath, delimiter='|', na_values=['NA','Exempt'], header=None, names=schema['Data Field Name'],
                    dtype={'State':'str', 
                            'Census Tract':'str',
                            'Age of Applicant or Borrower':'str',
                            'Age of Applicant >= 62':'str',
                            'Age of Co-Applicant or Co-Borrower':'str',
                            'Age of Co-Applicant >= 62':'str',
                            'Debt-to-Income Ratio':'str',
                            'Total Units':'str'
                            }
        )
    
    # save to SQLite database, delete Pandas DataFrame to remove from active memory
    print(f'Saving to SQLite...')
    with sql.connect('./cleaned/hmda.db') as connection:
        df.head().to_sql(name='MLAR', con=connection, index=False, if_exists='append')
        del df

print('Done.')
        


Reading 2022...
Saving to SQLite...
Reading 2023...
Saving to SQLite...
Done.
