## Pricing Population Generator

In [6]:
from faker import Faker
from datetime import datetime, timedelta
import random
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

fake = Faker()

# Function to generate a random date within the year 2023
def custom_HmdaActionDate():
    days_offset = random.randint(0, 364)
    start_date = datetime(2023, 1, 1)
    hmda_date = start_date + timedelta(days=days_offset)
    return hmda_date.strftime('%Y-%m-%d')

# Function to generate a random AIP with more dispersion
def generate_random_AIP(mean=100, std_dev=10):
    aip = random.gauss(mean, std_dev)
    aip = max(min(aip, 110), 90)
    return round(aip, 2)

# Mapping of Branch Names to MSA Codes
branch_to_msa = {
    'New York' : '35614',
    'Los Angeles' : '31080',
    'Chicago' : '16980',
    'Dallas' : '19100',
    'Houston' : '26420',
    'Washington' : '47780',
    'Philadelphia' : '37964',
    'Atlanta' : '12060',
    'Miami' : '33060',
    'Phoenix' : '38060',
    'Boston' : '14454',
    'San Francisco' : '41860',
    'Riverside' : '40140',
    'Detroit' : '19804',
    'Seattle' : '42644',
    'Minneapolis' : '33460',
    'Tampa' : '45300',
    'San Diego' : '41740',
    'Denver' : '19740',
    'Baltimore' : '12580',
    'St. Louis' : '41180',
    'Orlando' : '36740',
    'Charlotte' : '16740',
    'San Antonio' : '41700',
    'Portland' : '38860',
    'Sacramento' : '40900',
    'Pittsburgh' : '38300',
    'Las Vegas' : '29780',
    'Cincinnati' : '17140',
    'Kansas City' : '28140',
    'Columbus' : '18020',
    'Indianapolis' : '26900',
    'Cleveland' : '17380',
    'Nashville' : '34980',
    'San Jose' : '41940',
    'Virginia Beach' : '47260',
    'Jacksonville' : '27260',
    'Providence' : '39300',
    'Milwaukee' : '33340',
    'Raleigh' : '39580',
    'Oklahoma City' : '36420',
    'Richmond' : '39980',
    'Memphis' : '32820',
    'Louisville' : '31140',
    'Salt Lake City' : '41620',
    'New Orleans' : '35380',
    'Hartford' : '25540',
    'Buffalo' : '15380',
    'Birmingham' : '13820',
    'Rochester' : '40340',
    'Grand Rapids' : '24330',
    'Tucson' : '46060',
    'Fresno' : '23420',
    'Tulsa' : '46140',
    'Honolulu' : '46520',
    'Bridgeport' : '14860',
    'Worcester' : '49340',
    'Albuquerque' : '10740',
    'Omaha' : '36540',
    'Albany' : '10500',
    'New Haven' : '35300',
    'Bakersfield' : '12540',
    'Knoxville' : '28940',
    'Greenville' : '24740',
    'Ventura' : '37100',
    'Allentown' : '10900',
    'El Paso' : '21340',
    'Baton Rouge' : '12940',
    'Dayton' : '19420',
    'McAllen' : '32580',
    'Columbia' : '17860',
    'Greensboro' : '24660',
    'Akron' : '10420',
    'North Port' : '35840',
    'Little Rock' : '30780',
    'Stockton' : '44700',
    'Charleston' : '16620',
    'Syracuse' : '45060',
    'Colorado Springs' : '17820',
    'Winston-Salem' : '49180',
    'Wichita' : '48620',
    'Springfield' : '44100',
    'Fort Myers' : '15980',
    'Boise City' : '14260',
    'Toledo' : '45780',
    'Madison' : '31500',
    'Lakeland' : '29460',
    'Ogden' : '36260',
    'Des Moines' : '19780',
    'Jackson' : '27100',
    'Augusta' : '12260',
    'Scranton' : '42540',
    'Youngstown' : '49660',
    'Harrisburg' : '25420',
    'Provo' : '39340',
    'Palm Bay' : '37340',
    'Chattanooga' : '16860',
    'Spokane' : '44060',
    'Lancaster' : '29540',
    'Modesto' : '33700',
}

# Function to generate a single record of fake data
def generate_fake_record():
    hmdaactiondate = custom_HmdaActionDate()
    branchname = random.choice(list(branch_to_msa.keys()))
    msa_code = branch_to_msa[branchname]
    sex = fake.random_element(elements=('Male', 'Female'))
    age = random.randint(21, 100)
    age_category = 'Under 62 Yrs Old' if age < 62 else '62 Yrs or Over'
    ethnicity = fake.random_element(elements=('NonHispanic', 'Hispanic'))
    class_type = ethnicity
    race = fake.random_element(elements=('White', 'Black or African American', 'Asian', 'American Alaska or Indian', 'Native Hawaiian or Other Pacific Islander'))

    pricing_increase_factor = 1
    if race != 'White':
        pricing_increase_factor += 0.25  # Increase by 25% for non-White applicants
    if ethnicity == 'Hispanic':
        pricing_increase_factor += 0.25  # Additional increase for Hispanic applicants
    if sex == 'Female':
        pricing_increase_factor += 0.25  # Additional increase for female applicants
    if age_category == '62 Yrs or Over':
        pricing_increase_factor += 0.25  # Additional increase for applicants over 62

    # Ensure the pricing_increase_factor does not exceed a certain threshold
    pricing_increase_factor = min(pricing_increase_factor, 2)  # For example, cap at 2

    # Generate pricing information with increased weights
    rate_spread = round(random.uniform(0, 2) * pricing_increase_factor, 3)
    apr = round(random.uniform(2, 5) * pricing_increase_factor, 3)
    apor = round(random.uniform(1, 4) * pricing_increase_factor, 3)
    interest_rate = round(random.uniform(2, 5) * pricing_increase_factor, 3)

    return {
        'Ethnicity': ethnicity,
        'Class': class_type,
        'LoanId': fake.random_number(digits=7, fix_len=True),
        'Race': race,
        'AIP': generate_random_AIP(),
        'Rate_Spread': rate_spread,
        'APR': apr,
        'APOR': apor,
        'Interest_Rate': interest_rate,
        'HmdaActionTaken': 'Loan Originated',
        'HmdaActionDate': hmdaactiondate,
        'branchname': branchname,
        'HmdaMsa': msa_code,
        'LoanOfficer': fake.name(),
        'State': fake.state_abbr(),
        'Purpose': fake.random_element(elements=('Refinance', 'Purchase')),
        'Program': fake.random_element(elements=('CONVENTIONAL FIXED 30-YEAR', 'HomeReady 30 Year Fixed')),
        'Occupancy': fake.random_element(elements=('Owner', 'Second Home', 'Investment')),
        'LoanAmount': fake.random_number(digits=6, fix_len=True),
        'DebtRatio': round(random.uniform(20, 50), 3),
        'LTV': round(random.uniform(30, 100), 3),
        'CLTV': round(random.uniform(30, 100), 3),
        'LoanFICO': random.randint(300, 850),
        'LoanTerm': random.choice([240, 360]),
        'LoanType': fake.random_element(elements=('Conventional', 'FHA', 'VA', 'USDA')),
        'Sex': sex,
        'Age': age_category,
    }

# Function to generate and export data
def generate_and_export_data(b):
    records = [generate_fake_record() for _ in range(population_size)]
    df = pd.DataFrame(records)
    csv_file_path = r'C:\Users\colby\OneDrive\Documents\Data Analysis\Python_Project_Fair_Lending_Analysis\datasets\fake_pricing_data.csv'
    df.to_csv(csv_file_path, index=False)
    print(f"Data exported to CSV file successfully at {csv_file_path}")

# Create a button widget
generate_button = widgets.Button(
    description='Generate Data',
    disabled=False,
    button_style='',
    tooltip='Click to generate data',
    icon='check'
)

# Set the on_click event to the generate_and_export_data function
generate_button.on_click(generate_and_export_data)

# Create a text input widget for the population size
population_input = widgets.Text(
    value='',
    description='Population Size:',
    disabled=False
)

# Function to update the population size
def update_population_size(change):
    global population_size
    population_size = int(change.new)

# Observe changes in the text input and update the population size
population_input.observe(update_population_size, names='value')

# Display the widgets
display(population_input, generate_button)


Text(value='', description='Population Size:')

Button(description='Generate Data', icon='check', style=ButtonStyle(), tooltip='Click to generate data')

Data exported to CSV file successfully at C:\Users\colby\OneDrive\Documents\Data Analysis\Python_Project_Fair_Lending_Analysis\datasets\fake_pricing_data.csv
Data exported to CSV file successfully at C:\Users\colby\OneDrive\Documents\Data Analysis\Python_Project_Fair_Lending_Analysis\datasets\fake_pricing_data.csv


In [1]:
from faker import Faker
from datetime import datetime, timedelta
import random
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

fake = Faker()

# Function to generate a random date within the year 2023
def custom_HmdaActionDate():
    days_offset = random.randint(0, 364)
    start_date = datetime(2023, 1, 1)
    hmda_date = start_date + timedelta(days=days_offset)
    return hmda_date.strftime('%Y-%m-%d')

# Function to generate a random AIP with more dispersion
def generate_random_AIP(mean=100, std_dev=10):
    # Generate a number from a Gaussian distribution
    aip = random.gauss(mean, std_dev)
    # Ensure the AIP is within the desired range, for example, 90 to 110
    aip = max(min(aip, 110), 90)
    return round(aip, 2)

# Mapping of Branch Names to MSA Codes
branch_to_msa = {
    'New York' : '35614',
    'Los Angeles' : '31080',
    'Chicago' : '16980',
    'Dallas' : '19100',
    'Houston' : '26420',
    'Washington' : '47780',
    'Philadelphia' : '37964',
    'Atlanta' : '12060',
    'Miami' : '33060',
    'Phoenix' : '38060',
    'Boston' : '14454',
    'San Francisco' : '41860',
    'Riverside' : '40140',
    'Detroit' : '19804',
    'Seattle' : '42644',
    'Minneapolis' : '33460',
    'Tampa' : '45300',
    'San Diego' : '41740',
    'Denver' : '19740',
    'Baltimore' : '12580',
    'St. Louis' : '41180',
    'Orlando' : '36740',
    'Charlotte' : '16740',
    'San Antonio' : '41700',
    'Portland' : '38860',
    'Sacramento' : '40900',
    'Pittsburgh' : '38300',
    'Las Vegas' : '29780',
    'Cincinnati' : '17140',
    'Kansas City' : '28140',
    'Columbus' : '18020',
    'Indianapolis' : '26900',
    'Cleveland' : '17380',
    'Nashville' : '34980',
    'San Jose' : '41940',
    'Virginia Beach' : '47260',
    'Jacksonville' : '27260',
    'Providence' : '39300',
    'Milwaukee' : '33340',
    'Raleigh' : '39580',
    'Oklahoma City' : '36420',
    'Richmond' : '39980',
    'Memphis' : '32820',
    'Louisville' : '31140',
    'Salt Lake City' : '41620',
    'New Orleans' : '35380',
    'Hartford' : '25540',
    'Buffalo' : '15380',
    'Birmingham' : '13820',
    'Rochester' : '40340',
    'Grand Rapids' : '24330',
    'Tucson' : '46060',
    'Fresno' : '23420',
    'Tulsa' : '46140',
    'Honolulu' : '46520',
    'Bridgeport' : '14860',
    'Worcester' : '49340',
    'Albuquerque' : '10740',
    'Omaha' : '36540',
    'Albany' : '10500',
    'New Haven' : '35300',
    'Bakersfield' : '12540',
    'Knoxville' : '28940',
    'Greenville' : '24740',
    'Ventura' : '37100',
    'Allentown' : '10900',
    'El Paso' : '21340',
    'Baton Rouge' : '12940',
    'Dayton' : '19420',
    'McAllen' : '32580',
    'Columbia' : '17860',
    'Greensboro' : '24660',
    'Akron' : '10420',
    'North Port' : '35840',
    'Little Rock' : '30780',
    'Stockton' : '44700',
    'Charleston' : '16620',
    'Syracuse' : '45060',
    'Colorado Springs' : '17820',
    'Winston-Salem' : '49180',
    'Wichita' : '48620',
    'Springfield' : '44100',
    'Fort Myers' : '15980',
    'Boise City' : '14260',
    'Toledo' : '45780',
    'Madison' : '31500',
    'Lakeland' : '29460',
    'Ogden' : '36260',
    'Des Moines' : '19780',
    'Jackson' : '27100',
    'Augusta' : '12260',
    'Scranton' : '42540',
    'Youngstown' : '49660',
    'Harrisburg' : '25420',
    'Provo' : '39340',
    'Palm Bay' : '37340',
    'Chattanooga' : '16860',
    'Spokane' : '44060',
    'Lancaster' : '29540',
    'Modesto' : '33700',
}

# Function to generate a single record of fake data
def generate_fake_record():
    hmdaactiondate = custom_HmdaActionDate()
    branchname = random.choice(list(branch_to_msa.keys()))  # Randomly select a branch name
    msa_code = branch_to_msa[branchname]  # Get the corresponding MSA code
    sex = fake.random_element(elements=('Male', 'Female'))
    age = random.randint(21, 100)
    class_type = fake.random_element(elements=('Hispanic', 'NonHispanic'))
    return {
        'Ethnicity': fake.random_element(elements=('NonHispanic', 'Hispanic')),
        'LoanId': fake.random_number(digits=7, fix_len=True),
        'Race': fake.random_element(elements=('White', 'Black or African American', 'Asian', 'American Alaska or Indian', 'Native Hawaiian or Other Pacific Islander')),
        'AIP': generate_random_AIP(),  # Call the new function to generate AIP
        'Rate_Spread': round(random.uniform(0, 2), 3),
        'APR': round(random.uniform(2, 5), 3),
        'APOR': round(random.uniform(1, 4), 3),
        'Interest_Rate': round(random.uniform(2, 5), 3),
        'HmdaActionTaken': 'Loan Originated',
        'HmdaActionDate': hmdaactiondate,
        #'Region': fake.state(),
        'branchname': branchname,  # Add the branch name
        'HmdaMsa': msa_code,  # Add the MSA code
        'LoanOfficer': fake.name(),
        'State': fake.state_abbr(),
        'Purpose': fake.random_element(elements=('Refinance', 'Purchase')),
        'Program': fake.random_element(elements=('CONVENTIONAL FIXED 30-YEAR', 'HomeReady 30 Year Fixed')),
        #'ProgramCategory': None,
        #'Channel': 'Banked - Retail',
        'Occupancy': fake.random_element(elements=('Owner', 'Second Home', 'Investment')),
        'LoanAmount': fake.random_number(digits=6, fix_len=True),
        'DebtRatio': round(random.uniform(20, 50), 3),
        'LTV': round(random.uniform(30, 100), 3),
        'CLTV': round(random.uniform(30, 100), 3),
        'LoanFICO': random.randint(300, 850),
        #'MortgageInsurance': round(random.uniform(0, 50), 2) if random.choice([True, False]) else None,
        #'HmdaDiscountPoints': round(random.uniform(0, 5000), 2) if random.choice([True, False]) else None,
        'LoanTerm': random.choice([240, 360]),
        'LoanType': fake.random_element(elements=('Conventional', 'FHA', 'VA', 'USDA')),
        'Sex': sex,
        'Age': age,
        'Class': class_type,        
    }
    
## New way to generate fake data that includes a button to generate the data after typing in the population size.
# Function to generate and export data
def generate_and_export_data(b):
    # Generate the fake records and store them in a list
    records = [generate_fake_record() for _ in range(population_size)]
    
    # Convert the list of dictionaries to a pandas DataFrame
    df = pd.DataFrame(records)
    
    # Export the DataFrame to a CSV file
    csv_file_path = r'C:\Users\colby\OneDrive\Documents\Data Analysis\Python_Project_Fair_Lending_Analysis\datasets\fake_pricing_data.csv'  # Specify your file path and name
    df.to_csv(csv_file_path, index=False)
    
    print(f"Data exported to CSV file successfully at {csv_file_path}")

# Create a button widget
generate_button = widgets.Button(
    description='Generate Data',
    disabled=False,
    button_style='',  # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to generate data',
    icon='check'  # FontAwesome icon name (without 'fa-')
)

# Set the on_click event to the generate_and_export_data function
generate_button.on_click(generate_and_export_data)

# Create a text input widget for the population size
population_input = widgets.Text(
    value='',  # Set a default value or leave it empty
    description='Population Size:',
    disabled=False
)

# Function to update the population size
def update_population_size(change):
    global population_size
    population_size = int(change.new)

# Observe changes in the text input and update the population size
population_input.observe(update_population_size, names='value')

# Display the widgets
display(population_input, generate_button)

Text(value='', description='Population Size:')

Button(description='Generate Data', icon='check', style=ButtonStyle(), tooltip='Click to generate data')

---

## UW Population Generator

In [3]:
from faker import Faker
from datetime import datetime, timedelta
import random
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

fake = Faker()

# Function to generate a random date within the year 2023
def custom_HmdaActionDate():
    days_offset = random.randint(0, 364)
    start_date = datetime(2023, 1, 1)
    hmda_date = start_date + timedelta(days=days_offset)
    return hmda_date.strftime('%Y-%m-%d')

# Function to generate a random AIP with more dispersion
def generate_random_AIP(mean=100, std_dev=10):
    aip = random.gauss(mean, std_dev)
    aip = max(min(aip, 110), 90)
    return round(aip, 2)

# Function to generate a single record of fake data
def generate_fake_record():
    race = fake.random_element(elements=('White', 'Black or African American', 'Asian', 'American Alaska or Indian', 'Native Hawaiian or Other Pacific Islander'))
    ethnicity = fake.random_element(elements=('NonHispanic', 'Hispanic'))
    #class_type = ethnicity
    sex = fake.random_element(elements=('Male', 'Female'))
    age = random.randint(21, 100)
    age_category = 'Under 62 Yrs Old' if age < 62 else '62 Yrs or Over'

    # Define weights for HmdaActionTaken based on race, ethnicity, sex, and age
    if race != 'White' or ethnicity == 'Hispanic or Latino' or sex == 'Female' or age_category == '62 Yrs or Over':
        hmda_action_taken_weights = [0.7, 0.3]  # Higher chance of denial for specified demographics
    else:
        hmda_action_taken_weights = [0.3, 0.7]  # Lower chance of denial for others

    hmda_action_taken = random.choices(
        ['Application denied', 'Loan Originated'],
        weights=hmda_action_taken_weights,
        k=1
    )[0]

    return {
        'Ethnicity': ethnicity,
        'Class': ethnicity,
        'Race': race,
        'Sex': sex,
        'Age': age_category,
        'HmdaActionTaken': hmda_action_taken,
        'HmdaActionDate': custom_HmdaActionDate(),
        'ApplicationDate': '2023-01-01',
        'LoanId': fake.random_number(digits=7, fix_len=True),
        'AIP': generate_random_AIP(),
        'Rate_Spread': round(random.uniform(0, 2), 3),
        'APR': round(random.uniform(2, 5), 3),
        'APOR': round(random.uniform(1, 4), 3),
        'Interest_Rate': round(random.uniform(2, 5), 3),
        'LoanOfficer': fake.name(),
        'State': fake.state_abbr(),
        'Purpose': fake.random_element(elements=('Refinance', 'Purchase')),
        'Program': fake.random_element(elements=('CONVENTIONAL FIXED 30-YEAR', 'HomeReady 30 Year Fixed')),
        'Occupancy': fake.random_element(elements=('Owner', 'Second Home', 'Investment')),
        'LoanAmount': fake.random_number(digits=6, fix_len=True),
        'DebtRatio': round(random.uniform(20, 50), 3),
        'LTV': round(random.uniform(30, 100), 3),
        'CLTV': round(random.uniform(30, 100), 3),
        'LoanFICO': random.randint(300, 850),
        'LoanTerm': random.choice([240, 360]),
        'LoanType': fake.random_element(elements=('Conventional', 'FHA', 'VA', 'FmHA')),
    }

# Function to generate and export data
def generate_and_export_data(b):
    records = [generate_fake_record() for _ in range(population_size)]
    df = pd.DataFrame(records)
    csv_file_path = r'C:\Users\colby\OneDrive\Documents\Data Analysis\Python_Project_Fair_Lending_Analysis\datasets\fake_UW_data.csv'  
    df.to_csv(csv_file_path, index=False)
    print(f"Data exported to CSV file successfully at {csv_file_path}")

# Create a button widget
generate_button = widgets.Button(
    description='Generate Data',
    disabled=False,
    button_style='',
    tooltip='Click to generate data',
    icon='check'
)

# Set the on_click event to the generate_and_export_data function
generate_button.on_click(generate_and_export_data)

# Create a text input widget for the population size
population_input = widgets.Text(
    value='',
    description='Population Size:',
    disabled=False
)

# Function to update the population size
def update_population_size(change):
    global population_size
    population_size = int(change.new)

# Observe changes in the text input and update the population size
population_input.observe(update_population_size, names='value')

# Display the widgets
display(population_input, generate_button)


Text(value='', description='Population Size:')

Button(description='Generate Data', icon='check', style=ButtonStyle(), tooltip='Click to generate data')

Data exported to CSV file successfully at C:\Users\colby\OneDrive\Documents\Data Analysis\Python_Project_Fair_Lending_Analysis\datasets\fake_UW_data.csv


---