# **Pricing Analysis - National Level**

### *Race*

In [15]:
# New Analysis
from ipyfilechooser import FileChooser
from IPython.display import clear_output, display
from ipywidgets import Output

# Create a FileChooser widget
fc = FileChooser()

# Create an output widget
output = Output()

# Define a function that will be called when a file is selected
def on_file_selected(filechooser):
    # Clear the output widget
    output.clear_output(wait=True)
    
    file_name = filechooser.selected
    
    # Analysis
    import pandas as pd
    import numpy as np
    import seaborn as sns
    import plotly.express as px
    import matplotlib.pyplot as plt
    import textwrap
    from scipy import stats as st
    from bioinfokit.analys import stat
    from ipywidgets import interact, interactive, fixed, interact_manual
    import ipywidgets as widgets
    from IPython.display import display, HTML
    #from IPython.display import clear_output
    from plotly.offline import iplot, init_notebook_mode
    import warnings
    warnings.filterwarnings('ignore', category=FutureWarning)
    init_notebook_mode(connected=True)
    from tqdm.notebook import tqdm

    # formatting of the results section
    BOLD = '<b>'
    UNDERLINE = '<u>'
    ITALIC = '<i>'
    END = '</b></u></i>'

    # Read in the CSV files
    all_my_data = pd.read_csv(file_name)

    # Get the unique loan types from the data
    loan_types = all_my_data['LoanType'].unique()

    def apply_filters(stat_sign, mean_diff_range, min_observations, show_stats, show_charts):
        # Loop through each loan type and filter the data accordingly
        for loan_type in tqdm(loan_types):
            display(HTML("<br>"))
            display(HTML("<br>"))
            display(HTML(UNDERLINE + BOLD + f'Loan Type: {loan_type}' + END))
            display(HTML("<br>"))

            #filtering to only originated loans and non-HECM programs
            all_da_data = all_my_data[all_my_data['LoanType'] == loan_type]
            all_data = all_da_data[all_da_data['HmdaActionTaken'] =='Loan Originated']
            big_all_data = all_data[all_data['Program'].str.contains("HECM")==False]


            #narrowing down the fields for more accurate analysis
            race_all_data = big_all_data[["AIP","Rate_Spread","Race","branchname","Occupancy"]]

            res = stat()
            #creating race filters for white vs protected race
            race_filters = {
                'Black or African American': race_all_data['Race'].isin(['White','Black or African American']),
                'American Alaska or Indian': race_all_data['Race'].isin(['White','American Alaska or Indian']),
                'Native Hawaiian or Other Pacific Islander': race_all_data['Race'].isin(['White','Native Hawaiian or Other Pacific Islander']),
                'Asian': race_all_data['Race'].isin(['White','Asian'])
            }
            purch_filter = big_all_data['Purpose'].isin(['Purchase'])
            refin_filter = big_all_data['Purpose'].isin(['Refinance'])
            
            # Loop through each race filter and group the filtered data by 'branchname' for 'Conventional' Loan Type
            for race, race_filter in race_filters.items():
                display(HTML("<br>"))
                display(HTML("&nbsp;&nbsp;&nbsp;" + ITALIC + BOLD + f'{race} Analysis' + END))
                #display(HTML("<br>"))
                
                filtered_data = race_all_data[race_filter]
                
                # Check if there are exactly two levels in the 'Race' column
                if len(filtered_data['Race'].unique()) == 2:
                    
                    # Check if there are enough observations for each level
                    if filtered_data['Race'].value_counts().min() >= 10 and len(filtered_data) >= 30:
                        
                        res.ttest(df=filtered_data, xfac='Race', res='AIP', evar=False, test_type=2)
                        
                        # Calculate loan count for each race
                        loan_counts = filtered_data['Race'].value_counts()
                        
                        # Extract the Mean Diff from the summary
                        summary = res.summary
                        lines = summary.split('\n')
                        mean_diff_line = lines[4]
                        mean_diff = float(mean_diff_line.split()[-1])
                        
                        # Extract the p-value from the summary
                        p_value_line = lines[8]
                        p_value = float(p_value_line.split()[-1])
                        
                         # Round the mean_diff and p-value to 2 decimal places
                        mean_diff = round(mean_diff, 2)
                        p_value = round(p_value, 4)
                                    
                        if p_value <= stat_sign and mean_diff >= mean_diff_range:
                            # Print the Branchname, Mean and P Value, and the full Results
                            display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + f'Loan Type: {loan_type}'))
                            display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + f'BPS Diff: {BOLD}{mean_diff}{END}'))
                            display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + f'p-value: {p_value}'))
                            display(HTML("<br>"))
                            for race_name, count in loan_counts.items():
                                    display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + f"Loan Count ({race_name}): {count}"))
                            display(HTML("<br>"))
                            
                            if show_stats == 'Include':
                                display(HTML(BOLD + UNDERLINE + f'Statistical Results' + END))
                                display(HTML(f'<pre>{res.summary}</pre>'))
                                display(HTML("<br>"))
                                                
                            # Create a strip chart of the AIP values grouped by Race using plotly.express
                            if show_charts == 'Strip Chart ':
                                display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + BOLD + UNDERLINE + f'Strip Chart' + END))
                                plt.figure(figsize=(10, 6))
                                sns.set_style('darkgrid')
                                stripplot = sns.stripplot(x='Race', y='AIP', hue='Race', data=filtered_data, jitter=0.4, dodge=True, linewidth=1, palette="Dark2", legend=False, size=5)
                                plt.title(f'Race: {race} ({loan_type} Loans)')
                                plt.show()
                                display(HTML("<br>"))
                                
                                
                            # Create a box chart of the AIP values grouped by Sex using plotly.express
                            '''if show_charts == 'Boxplot ':
                                display(HTML(BOLD + UNDERLINE + f'Boxplot' + END))
                                display(HTML("<br>"))
                                plt.figure(figsize=(10, 6))
                                sns.set_style('darkgrid')
                                sns.boxplot(x='Race', y='AIP', data=filtered_data, color='skyblue', showmeans=True, fliersize=0, meanprops={"marker":"o","markerfacecolor":"white", "markeredgecolor":"black", "markersize":"10"})
                                sns.stripplot(x='Race', y='AIP', hue='Race', data=filtered_data, jitter=0.4, dodge=False, linewidth=1, palette="Dark2", legend=True, size=5)
                                plt.show()
                                display(HTML("<br>"))'''
                                
                            if show_charts == 'Boxplot ':
                                    display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + BOLD + UNDERLINE + f'Boxplot' + END))
                                    plt.figure(figsize=(10, 6))
                                    sns.set_style('darkgrid')
                                    # Your original color
                                    original_color = (234, 234, 242)
                                    # Convert to RGB color for seaborn
                                    seaborn_color = tuple([val/255 for val in original_color]) + (0,)  # Add alpha for transparency
                                    ax = sns.boxplot(x='Race', y='AIP', data=filtered_data, color=seaborn_color, showmeans=True, fliersize=0, meanprops={"marker":"o","markerfacecolor":"white", "markeredgecolor":"black", "markersize":"10"})
                                    for patch in ax.artists:
                                        patch.set_zorder(1)
                                    strip = sns.stripplot(x='Race', y='AIP', hue='Race', data=filtered_data, jitter=0.4, dodge=False, linewidth=1, palette="Dark2", legend=False, size=5)
                                    for collection in strip.collections:
                                        collection.set_zorder(2)
                                    plt.title(f'Race: {race} ({loan_type} Loans)')
                                    plt.show()
                                    display(HTML("<br>"))

                            # Print the list of loans included in the analysis
                            if show_charts == 'List of Loans ':
                                filtered_data = filtered_data.sort_values('Race', ascending=True)
                                display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + BOLD + UNDERLINE + f'List of Loans' + END))
                                
                                # Use to_html() to format the DataFrame as an HTML table and exclude the index
                                html_table = filtered_data.to_html(index=False)
                                
                                # Add inline CSS to adjust the font size
                                html_table = '<style> table {font-size: 0.9em;} </style>' + html_table
                                display(HTML(html_table))
                                display(HTML("<br>"))

    # Create a radio button widget with options for statistically significant results
    stat_sign_widget = widgets.ToggleButtons(
        options=[0.05, 0.01, 0.2, 1.0],
        description='Statistical Significance:',
        disabled=False,
        button_style='', # 'success', 'info', 'warning', 'danger' or ''
        tooltips=['95% (Default)', '99%', '80%', '100%'],
    )

    # Create a radio button widget with options for mean diff ranges
    mean_diff_widget = widgets.ToggleButtons(
        options=[0.25, 0.20, 0.15, 0.10],
        description='Mean Diff (BPS):',
        disabled=False,
        button_style='', # 'success', 'info', 'warning', 'danger' or ''
        tooltips=['25 BPS (Default)', '20 BPS', '15 BPS', '10 BPS'],
    )

    # Create a radio button widget with options for minimum number of observations
    min_observations_widget = widgets.ToggleButtons(
        options=[10, 20, 30],
        description='Min Observations (Loan Count):',
        disabled=False,
        button_style='', # 'success', 'info', 'warning', 'danger' or ''
        tooltips=['10 Loans (Default)', '20 Loans', '30 Loans'],
    )

    # Create a radio button widget with options for showing statistic results
    show_stats_widget = widgets.ToggleButtons(
        options=["Exclude", "Include"],
        description='Show Statistical Results:',
        disabled=False,
        button_style='', # 'success', 'info', 'warning', 'danger' or ''
        tooltips=['Exclude the stats (Default)', 'Include the stats'],
    )
    
    # Create a radio button widget with options for showing plots
    show_charts_widget = widgets.ToggleButtons(
        options=["None ", "Strip Chart ", "Boxplot ", "List of Loans "],
        description='Charts and List of Loans:',
        disabled=False,
        button_style='', # 'success', 'info', 'warning', 'danger' or ''
        tooltips=['Exclude the plots (Default)', 'Include the plots'],
        icons=['times', 'area-chart', 'bar-chart', 'list']
    )

    # Use the interact function to apply the selected filters to the code
    interact(apply_filters, stat_sign=stat_sign_widget, mean_diff_range=mean_diff_widget, min_observations=min_observations_widget, show_stats=show_stats_widget, show_charts=show_charts_widget);

# Set the function to be called when a file is selected
fc.register_callback(on_file_selected)

# Display the FileChooser widget
display(fc)
fc.title = '<b>Select File (.csv) for Race National Analysis</b>'

# Display the output widget
display(output)


FileChooser(path='C:\Users\colby.kellersberger\Documents\Fair Lending Analysis\Pricing', filename='', title=''…

Output()

interactive(children=(ToggleButtons(description='Statistical Significance:', options=(0.05, 0.01, 0.2, 1.0), t…

In [None]:
# File Selection for analysis
from ipyfilechooser import FileChooser

# Create and display a FileChooser widget
fc = FileChooser()
display(fc)


In [None]:
# Analysis
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
import textwrap
from scipy import stats as st
from bioinfokit.analys import stat
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display, HTML
#from IPython.display import clear_output
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode(connected=True)
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
from tqdm.notebook import tqdm

# formatting of the results section
BOLD = '<b>'
UNDERLINE = '<u>'
ITALIC = '<i>'
END = '</b></u></i>'

# Check if a file has been selected
if fc.selected is not None:
    file_name = fc.selected
    # The rest of your code goes here...
else:
    print("No file selected. Please select a file.")

#reading in data
all_my_data = pd.read_csv(file_name)

# remove .csv from file_name
file_name_no_ext = file_name.split('.csv')[0]

# display
display(HTML(BOLD + UNDERLINE + file_name_no_ext + END))

# Get the unique loan types from the data
loan_types = all_my_data['LoanType'].unique()

def apply_filters(stat_sign, mean_diff_range, min_observations, show_stats, show_stripplot, show_boxplot, print_list):
    # Loop through each loan type and filter the data accordingly
    for loan_type in tqdm(loan_types):
        display(HTML("<br>"))
        display(HTML("<br>"))
        display(HTML(UNDERLINE + BOLD + f'Loan Type: {loan_type}' + END))
        display(HTML("<br>"))

        #filtering to only originated loans and non-HECM programs
        all_da_data = all_my_data[all_my_data['LoanType'] == loan_type]
        all_data = all_da_data[all_da_data['HmdaActionTaken'] =='Loan Originated']
        big_all_data = all_data[all_data['Program'].str.contains("HECM")==False]


        #narrowing down the fields for more accurate analysis
        race_all_data = big_all_data[["AIP","Rate_Spread","Race","branchname","Occupancy"]]

        res = stat()
        #creating race filters for white vs protected race
        race_filters = {
            'Black or African American': race_all_data['Race'].isin(['White','Black or African American']),
            'American Alaska or Indian': race_all_data['Race'].isin(['White','American Alaska or Indian']),
            'Native Hawaiian or Other Pacific Islander': race_all_data['Race'].isin(['White','Native Hawaiian or Other Pacific Islander']),
            'Asian': race_all_data['Race'].isin(['White','Asian'])
        }
        purch_filter = big_all_data['Purpose'].isin(['Purchase'])
        refin_filter = big_all_data['Purpose'].isin(['Refinance'])
        
        # Loop through each race filter and group the filtered data by 'branchname' for 'Conventional' Loan Type
        for race, race_filter in race_filters.items():
            display(HTML("<br>"))
            display(HTML(ITALIC + BOLD + f'{race} Analysis' + END))
            #display(HTML("<br>"))
            
            filtered_data = race_all_data[race_filter]
            
            # Check if there are exactly two levels in the 'Race' column
            if len(filtered_data['Race'].unique()) == 2:
                
                # Check if there are enough observations for each level
                if filtered_data['Race'].value_counts().min() >= min_observations:
                    res.ttest(df=filtered_data, xfac='Race', res='AIP', evar=False, test_type=2)
                    
                    # Extract the Mean Diff from the summary
                    summary = res.summary
                    lines = summary.split('\n')
                    mean_diff_line = lines[4]
                    mean_diff = float(mean_diff_line.split()[-1])
                    
                    # Extract the p-value from the summary
                    p_value_line = lines[8]
                    p_value = float(p_value_line.split()[-1])
                                
                    if p_value <= stat_sign and mean_diff >= mean_diff_range:
                        # Print the Branchname, Mean and P Value, and the full Results
                        display(HTML(f'Loan Type: {loan_type}'))
                        display(HTML(f'BPS Diff: {BOLD}{mean_diff}{END}'))
                        display(HTML(f'p-value: {p_value}'))
                        display(HTML("<br>"))
                        
                        if show_stats == 'Include':
                            display(HTML(BOLD + UNDERLINE + f'Statistical Results' + END))
                            display(HTML(f'<pre>{res.summary}</pre>'))
                            display(HTML("<br>"))
                                            
                        # Create a strip chart of the AIP values grouped by Race using plotly.express
                        if show_stripplot == 'Include':
                            display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plots' + END))
                            plt.figure(figsize=(10, 6))
                            sns.set_style('darkgrid')
                            stripplot = sns.stripplot(x='Race', y='AIP', hue='Race', data=filtered_data, jitter=0.4, dodge=True, linewidth=1, palette="Dark2", legend=False, size=7)
                            plt.title(f'Race: {race} ({loan_type} Loans)')
                            plt.show()
                            display(HTML("<br>"))
                            
                        # Create a box chart of the AIP values grouped by Sex using plotly.express
                        if show_boxplot == 'Include':
                            display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plot' + END))
                            display(HTML("<br>"))
                            plt.figure(figsize=(10, 6))
                            sns.set_style('darkgrid')
                            sns.boxplot(x='Race', y='AIP', data=filtered_data, color='skyblue', showmeans=True, fliersize=0, meanprops={"marker":"o","markerfacecolor":"white", "markeredgecolor":"black", "markersize":"10"})
                            sns.stripplot(x='Race', y='AIP', hue='Race', data=filtered_data, jitter=0.4, dodge=False, linewidth=1, palette="Dark2", legend=True, size=5)
                            plt.show()
                            display(HTML("<br>"))

                        # Print the list of loans included in the analysis
                        if print_list == 'Include':
                            filtered_data = filtered_data.sort_values('Race', ascending=True)
                            display(HTML(BOLD + UNDERLINE + f'List of Loans' + END))
                            
                            # Use to_html() to format the DataFrame as an HTML table and exclude the index
                            html_table = filtered_data.to_html(index=False)
                            
                            # Add inline CSS to adjust the font size
                            html_table = '<style> table {font-size: 0.9em;} </style>' + html_table
                            display(HTML(html_table))
                            display(HTML("<br>"))

# Create a radio button widget with options for statistically significant results
stat_sign_widget = widgets.ToggleButtons(
    options=[0.05, 0.01, 0.2, 1.0],
    description='Statistical Significance:',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['95% (Default)', '99%', '80%', '100%'],
)

# Create a radio button widget with options for mean diff ranges
mean_diff_widget = widgets.ToggleButtons(
    options=[0.25, 0.20, 0.15, 0.10],
    description='Mean Diff (BPS):',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['25 BPS (Default)', '20 BPS', '15 BPS', '10 BPS'],
)

# Create a radio button widget with options for minimum number of observations
min_observations_widget = widgets.ToggleButtons(
    options=[10, 20, 30],
    description='Min Observations (Loan Count):',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['10 Loans (Default)', '20 Loans', '30 Loans'],
)

# Create a radio button widget with options for showing statistic results
show_stats_widget = widgets.ToggleButtons(
    options=["Exclude", "Include"],
    description='Show Statistical Results:',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Exclude the stats (Default)', 'Include the stats'],
)

# Create a radio button widget with options for showing plots
show_stripplot_widget = widgets.ToggleButtons(
    options=["Exclude", "Include"],
    description='Show Plots:',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Exclude the plots (Default)', 'Include the plots'],
)

# Create a radio button widget with options for showing plots
show_boxplot_widget = widgets.ToggleButtons(
    options=["Exclude", "Include"],
    description='Show Boxplots:',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Exclude the plots (Default)', 'Include the plots'],
)

# Create a radio button widget with options for print list of loans
print_list_widget = widgets.ToggleButtons(
    options=["Exclude", "Include"],
    description='Show List of Loans:',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Exclude the list of loans (Default)', 'Include the list of loans'],
)

# Use the interact function to apply the selected filters to the code
interact(apply_filters, stat_sign=stat_sign_widget, mean_diff_range=mean_diff_widget, min_observations=min_observations_widget, show_stats=show_stats_widget, show_stripplot=show_stripplot_widget, show_boxplot=show_boxplot_widget, print_list=print_list_widget);


---

### *Ethnicity, Age, Sex*

In [None]:
# File Selection for analysis
from ipyfilechooser import FileChooser
from IPython.display import clear_output, display
from ipywidgets import Output

# Create a FileChooser widget
fc = FileChooser()

# Create an output widget
output = Output()

# Define a function that will be called when a file is selected
def on_file_selected(filechooser):
    # Clear the output widget
    output.clear_output(wait=True)
    
    file_name = filechooser.selected
    
    # Analysis
    import pandas as pd
    import numpy as np
    import seaborn as sns
    import warnings
    warnings.filterwarnings('ignore', category=FutureWarning)
    import plotly.express as px
    import matplotlib.pyplot as plt
    from scipy import stats as st
    from bioinfokit.analys import stat
    from ipywidgets import interact, interactive, fixed, interact_manual, Output, IntProgress
    import ipywidgets as widgets
    from IPython.display import display, HTML
    from plotly.offline import iplot, init_notebook_mode
    init_notebook_mode(connected=True)
    from tqdm.notebook import tqdm

    # formatting of the results section
    BOLD = '<b>'
    UNDERLINE = '<u>'
    ITALIC = '<i>'
    END = '</b></u></i>'

    #reading in data
    all_my_data = pd.read_csv(file_name)

    # Get the unique loan types from the data
    loan_types = all_my_data['LoanType'].unique()

    def apply_filters(stat_sign, mean_diff_range, min_observations, show_stats, show_stripplot, show_boxplot, print_list):
        # Loop through each loan type and filter the data accordingly
        for loan_type in tqdm(loan_types):
            display(HTML("<br>"))
            display(HTML(UNDERLINE + BOLD + f'Loan Type: {loan_type}' + END))
            display(HTML("<br>"))

            #filtering to only originated loans and non-HECM programs (Loan Programs: 'Conventional')
            all_da_data = all_my_data[all_my_data['LoanType'] == loan_type]
            all_data = all_da_data[all_da_data['HmdaActionTaken']=='Loan Originated']
            big_all_data = all_data[all_data['Program'].str.contains("HECM")==False]

            #narrowing down the fields for more accurate analysis
            gender_all_data = big_all_data[["LoanId","AIP","Sex","branchname","Rate_Spread"]]
            age_all_data = big_all_data[["LoanId","AIP","Age","branchname","Rate_Spread"]]
            ethn_all_data = big_all_data[["LoanId","AIP","Class","Ethnicity","branchname","Rate_Spread"]]

            res = stat()
            #creating filters
            #Gender Filter
            gender_filter = gender_all_data['Sex'].isin(['Male','Female'])

            # Filter out rows with 'Exclude' in the 'Class' column
            ethn_all_data = ethn_all_data[ethn_all_data['Class'] != 'EXCLUDED']

            # Create a boolean mask to exclude rows with 'Excluded' in the 'Sex' column
            gender_mask = gender_all_data['Sex'] != 'Excluded'

            # Apply the mask to the data frame
            gender_all_data = gender_all_data[gender_mask]

            #applying filters
            gender = gender_all_data[gender_filter]

            # Group the ETHNICITY data by 'branchname'
            #display(HTML("<br>"))
            display(HTML(BOLD + ITALIC + f'Ethnicity Analysis' + END))
            #display(HTML("<br>"))

            # Check if there are exactly two levels in the 'Class' column
            if len(ethn_all_data['Class'].unique()) == 2:
                
                # Check if there are enough observations for each level
                if ethn_all_data['Class'].value_counts().min() >= min_observations:
                    res.ttest(df=ethn_all_data, xfac='Class', res='AIP', evar=False, test_type=2)
                    
                    # Extract the Mean Diff from the summary
                    summary = res.summary
                    lines = summary.split('\n')
                    mean_diff_line = lines[4]
                    mean_diff = float(mean_diff_line.split()[-1])
                
                    # Extract the p-value from the summary
                    p_value_line = lines[8]
                    p_value = float(p_value_line.split()[-1])
                    
                     # Round the mean_diff and p-value to 2 decimal places
                    mean_diff = round(mean_diff, 2)
                    p_value = round(p_value, 4)

                    if p_value <= stat_sign and mean_diff >= mean_diff_range:
                        # Print the Branchname, Mean and P Value, and the full Results
                        display(HTML(f'Loan Type: {loan_type}'))
                        display(HTML(f'BPS Diff: {BOLD}{mean_diff}{END}'))
                        display(HTML(f'p-value: {p_value}'))
                        display(HTML("<br>"))
                        
                        if show_stats == 'Include':
                            display(HTML(BOLD + UNDERLINE + f'Statistical Results' + END))
                            display(HTML(f'<pre>{res.summary}</pre>'))
                            display(HTML("<br>"))
                        
                        # Create a strip chart of the AIP values grouped by Class using plotly.express
                        if show_stripplot == 'Include':
                            display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plots' + END))
                            plt.figure(figsize=(10, 6))
                            sns.set_style('darkgrid')
                            stripplot = sns.stripplot(x='Class', y='AIP', hue='Class', data=ethn_all_data, jitter=0.4, dodge=True, linewidth=1, palette="Dark2", legend=False, size=7)
                            plt.title(f'Companywide Ethnicity for {loan_type} Loans')
                            plt.show()
                            display(HTML("<br>"))
                            
                        # Create a box chart of the AIP values grouped by Sex using plotly.express
                        if show_boxplot == 'Include':
                            display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plot' + END))
                            display(HTML("<br>"))
                            plt.figure(figsize=(10, 6))
                            sns.set_style('darkgrid')
                            sns.boxplot(x='Class', y='AIP', data=ethn_all_data, color='skyblue', showmeans=True, fliersize=0, meanprops={"marker":"o","markerfacecolor":"white", "markeredgecolor":"black", "markersize":"10"})
                            sns.stripplot(x='Class', y='AIP', hue='Class', data=ethn_all_data, jitter=0.4, dodge=False, linewidth=1, palette="Dark2", legend=True, size=7)
                            plt.show()
                            display(HTML("<br>"))  
                        
                        # Print the list of loans included in the analysis
                        if print_list == 'Include':
                            display(HTML(BOLD + UNDERLINE + f'List of Loans' + END))
                            # Use to_html() to format the DataFrame as an HTML table and exclude the index
                            html_table = ethn_all_data.to_html(index=False)
                            
                            # Add inline CSS to adjust the font size
                            html_table = '<style> table {font-size: 0.9em;} </style>' + html_table
                            display(HTML(html_table))
                            display(HTML("<br>"))
                            display(HTML(f'<pre>{ethn_all_data.to_string()}</pre>'))
                            display(HTML("<br>"))

            # Group the AGE data by 'branchname'
            display(HTML("<br>"))
            display(HTML(ITALIC + BOLD + f'Age Analysis' + END))
            #display(HTML("<br>"))

            # Check if there are exactly two levels in the 'Age' column
            if len(age_all_data['Age'].unique()) != 2:
                #print('Not exactly two levels in the Age column')
                continue
            
            # Check if there are enough observations for each level
            if age_all_data['Age'].value_counts().min() >= min_observations:
                res.ttest(df=age_all_data, xfac='Age', res='AIP', evar=False, test_type=2)
                
                # Extract the Mean Diff from the summary
                summary = res.summary
                lines = summary.split('\n')
                mean_diff_line = lines[4]
                mean_diff = float(mean_diff_line.split()[-1])
                
                # Extract the p-value from the summary
                p_value_line = lines[8]
                p_value = float(p_value_line.split()[-1])
                
                 # Round the mean_diff and p-value to 2 decimal places
                mean_diff = round(mean_diff, 2)
                p_value = round(p_value, 4)

                if p_value <= stat_sign and mean_diff >= mean_diff_range:
                    # Print the Branchname, Mean and P Value, and the full Results
                    display(HTML(f'Loan Type: {loan_type}'))
                    display(HTML(f'BPS Diff: {BOLD}{mean_diff}{END}'))
                    display(HTML(f'p-value: {p_value}'))
                    display(HTML("<br>"))
                    
                    if show_stats == 'Include':
                        display(HTML(BOLD + UNDERLINE + f'Statistical Results' + END))
                        display(HTML(f'<pre>{res.summary}</pre>'))
                        display(HTML("<br>"))
                    
                    # Create a strip chart of the AIP values grouped by Age using plotly.express
                    if show_stripplot == 'Include':
                        display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plots' + END))
                        plt.figure(figsize=(10, 6))
                        sns.set_style('darkgrid')
                        stripplot = sns.stripplot(x='Age', y='AIP', hue='Age', data=age_all_data, jitter=0.4, dodge=True, linewidth=1, palette="Dark2", legend=False, size=7)
                        plt.title(f'Companywide Age {loan_type} Loans')
                        plt.show()
                        display(HTML("<br>"))
                        
                    # Create a box chart of the AIP values grouped by Sex using plotly.express
                    if show_boxplot == 'Include':
                        display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plot' + END))
                        display(HTML("<br>"))
                        plt.figure(figsize=(10, 6))
                        sns.set_style('darkgrid')
                        sns.boxplot(x='Age', y='AIP', data=age_all_data, color='skyblue', showmeans=True, fliersize=0, meanprops={"marker":"o","markerfacecolor":"white", "markeredgecolor":"black", "markersize":"10"})
                        sns.stripplot(x='Age', y='AIP', hue='Age', data=age_all_data, jitter=0.4, dodge=False, linewidth=1, palette="Dark2", legend=True, size=7)
                        plt.show()
                        display(HTML("<br>"))
                
                    # Print the list of loans included in the analysis
                    if print_list == 'Include':
                        age_all_data = age_all_data.sort_values('Age', ascending=False)
                        display(HTML(BOLD + UNDERLINE + f'List of Loans' + END))
                        # Use to_html() to format the DataFrame as an HTML table and exclude the index
                        html_table = age_all_data.to_html(index=False)
                        
                        # Add inline CSS to adjust the font size
                        html_table = '<style> table {font-size: 0.9em;} </style>' + html_table
                        display(HTML(html_table))
                        display(HTML("<br>"))
                                
            # Group the SEX data by 'branchname'
            display(HTML("<br>"))
            display(HTML(ITALIC + BOLD + f'Sex Analysis' + END))
            display(HTML("<br>"))

            # Check if there are exactly two levels in the 'Class' column
            if len(gender['Sex'].unique()) == 2:
                
                # Check if there are enough observations for each level
                if gender['Sex'].value_counts().min() >= min_observations:
                    res.ttest(df=gender, xfac='Sex', res='AIP', evar=False, test_type=2)
                
                    # Extract the Mean Diff from the summary
                    summary = res.summary
                    lines = summary.split('\n')
                    mean_diff_line = lines[4]
                    mean_diff = float(mean_diff_line.split()[-1])
                
                    # Extract the p-value from the summary
                    p_value_line = lines[8]
                    p_value = float(p_value_line.split()[-1])
                    
                     # Round the mean_diff and p-value to 2 decimal places
                    mean_diff = round(mean_diff, 2)
                    p_value = round(p_value, 4)

                    if p_value <= stat_sign and mean_diff >= mean_diff_range:
                        # Print the Branchname, Mean and P Value, and the full Results
                        display(HTML(f'Loan Type: {loan_type}'))
                        display(HTML(f'BPS Diff: {BOLD}{mean_diff}{END}'))
                        display(HTML(f'p-value: {p_value}'))
                        display(HTML("<br>"))
                        
                        if show_stats == 'Include':
                            display(HTML(BOLD + UNDERLINE + f'Statistical Results' + END))
                            display(HTML(f'<pre>{res.summary}</pre>'))
                            display(HTML("<br>"))
                        
                        # Create a strip chart of the AIP values grouped by Sex using plotly.express
                        if show_stripplot == 'Include':
                            display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plots' + END))
                            plt.figure(figsize=(10, 6))
                            sns.set_style('darkgrid')
                            stripplot = sns.stripplot(x='Sex', y='AIP', hue='Sex', data=gender, jitter=0.4, dodge=True, linewidth=1, palette="Dark2", legend=False, size=7)
                            plt.title(f'Companywide Gender for {loan_type} Loans)')
                            plt.show()
                            display(HTML("<br>")) 
                            
                        # Create a box chart of the AIP values grouped by Sex using plotly.express
                        if show_boxplot == 'Include':
                            display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plot' + END))
                            plt.figure(figsize=(10, 6))
                            sns.set_style('darkgrid')
                            sns.boxplot(x='Sex', y='AIP', data=gender, color='skyblue', showmeans=True, fliersize=0, meanprops={"marker":"o","markerfacecolor":"white", "markeredgecolor":"black", "markersize":"10"})
                            sns.stripplot(x='Sex', y='AIP', hue='Sex', data=gender, jitter=0.4, dodge=False, linewidth=1, palette="Dark2", legend=True, size=7)
                            plt.show()
                            display(HTML("<br>"))
                    
                        # Print the list of loans included in the analysis
                        if print_list == 'Include':
                            gender = gender.sort_values('Sex', ascending=False)
                            display(HTML(BOLD + UNDERLINE + f'List of Loans' + END))
                            # Use to_html() to format the DataFrame as an HTML table and exclude the index
                            html_table = gender.to_html(index=False)
                            
                            # Add inline CSS to adjust the font size
                            html_table = '<style> table {font-size: 0.9em;} </style>' + html_table
                            display(HTML(html_table))
                            display(HTML("<br>"))

    # Create a radio button widget with options for statistically significant results
    stat_sign_widget = widgets.ToggleButtons(
        options=[0.05, 0.01, 0.2, 1.0],
        description='Statistical Significance:',
        disabled=False,
        button_style='', # 'success', 'info', 'warning', 'danger' or ''
        tooltips=['95% (Default)', '99%', '80%', '100%'],
    )
                                    
    # Create a radio button widget with options for mean diff ranges
    mean_diff_widget = widgets.ToggleButtons(
        options=[0.25, 0.20, 0.15, 0.10],
        description='Mean Diff (BPS):',
        disabled=False,
        button_style='', # 'success', 'info', 'warning', 'danger' or ''
        tooltips=['25 BPS (Default)', '20 BPS', '15 BPS', '10 BPS'],
    )

    # Create a radio button widget with options for minimum number of observations
    min_observations_widget = widgets.ToggleButtons(
        options=[10, 20, 30],
        description='Min Observations (Loan Count):',
        disabled=False,
        button_style='', # 'success', 'info', 'warning', 'danger' or ''
        tooltips=['10 Loans (Default)', '20 Loans', '30 Loans'],
    )

    # Create a radio button widget with options for showing statistic results
    show_stats_widget = widgets.ToggleButtons(
        options=["Exclude", "Include"],
        description='Show Statistical Results:',
        disabled=False,
        button_style='', # 'success', 'info', 'warning', 'danger' or ''
        tooltips=['Exclude the stats (Default)', 'Include the stats'],
    )

    # Create a radio button widget with options for showing plots
    show_stripplot_widget = widgets.ToggleButtons(
        options=["Exclude", "Include"],
        description='Show Plots:',
        disabled=False,
        button_style='', # 'success', 'info', 'warning', 'danger' or ''
        tooltips=['Exclude the plots (Default)', 'Include the plots'],
    )

    # Create a radio button widget with options for showing plots
    show_boxplot_widget = widgets.ToggleButtons(
        options=["Exclude", "Include"],
        description='Show Boxplots:',
        disabled=False,
        button_style='', # 'success', 'info', 'warning', 'danger' or ''
        tooltips=['Exclude the plots (Default)', 'Include the plots'],
    )

    # Create a radio button widget with options for print list of loans
    print_list_widget = widgets.ToggleButtons(
        options=["Exclude", "Include"],
        description='Show List of Loans:',
        disabled=False,
        button_style='', # 'success', 'info', 'warning', 'danger' or ''
        tooltips=['Exclude the list of loans (Default)', 'Include the list of loans'],
    )

    # Use the interact function to apply the selected filters to the code
    interact(apply_filters, stat_sign=stat_sign_widget, mean_diff_range=mean_diff_widget, min_observations=min_observations_widget, show_stats=show_stats_widget, show_boxplot=show_boxplot_widget, show_stripplot=show_stripplot_widget, print_list=print_list_widget);

# Set the function to be called when a file is selected
fc.register_callback(on_file_selected)

# Display the FileChooser widget
display(fc)
fc.title = '<b>Select File (.csv) for Ethnicity, Age, Sex, National Analysis</b>'

# Display the output widget
display(output)

In [None]:
# File Selection for analysis
from ipyfilechooser import FileChooser

# Create and display a FileChooser widget
fc = FileChooser()
display(fc)


In [None]:
# Analysis
import pandas as pd
import numpy as np
import plotly.express as px
from scipy import stats as st
from bioinfokit.analys import stat
from ipywidgets import interact, interactive, fixed, interact_manual, Output, IntProgress
import ipywidgets as widgets
from IPython.display import display, HTML
from plotly.offline import iplot, init_notebook_mode
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
init_notebook_mode(connected=True)
from tqdm.notebook import tqdm

# Check if a file has been selected
if fc.selected is not None:
    file_name = fc.selected
    # The rest of your code goes here...
else:
    print("No file selected. Please select a file.")

# formatting of the results section
BOLD = '<b>'
UNDERLINE = '<u>'
ITALIC = '<i>'
END = '</b></u></i>'

#reading in data
all_my_data = pd.read_csv(file_name)

# remove .csv from file_name
file_name_no_ext = file_name.split('.csv')[0]

# display
display(HTML(BOLD + UNDERLINE + file_name_no_ext + END))

# Get the unique loan types from the data
loan_types = all_my_data['LoanType'].unique()

def apply_filters(stat_sign, mean_diff_range, min_observations, show_stats, show_stripplot, show_boxplot, print_list):
    # Loop through each loan type and filter the data accordingly
    for loan_type in tqdm(loan_types):
        display(HTML("<br>"))
        display(HTML(UNDERLINE + BOLD + f'Loan Type: {loan_type}' + END))
        display(HTML("<br>"))

        #filtering to only originated loans and non-HECM programs (Loan Programs: 'Conventional')
        all_da_data = all_my_data[all_my_data['LoanType'] == loan_type]
        all_data = all_da_data[all_da_data['HmdaActionTaken']=='Loan Originated']
        big_all_data = all_data[all_data['Program'].str.contains("HECM")==False]

        #narrowing down the fields for more accurate analysis
        gender_all_data = big_all_data[["LoanId","AIP","Sex","branchname","Rate_Spread"]]
        age_all_data = big_all_data[["LoanId","AIP","Age","branchname","Rate_Spread"]]
        ethn_all_data = big_all_data[["LoanId","AIP","Class","Ethnicity","branchname","Rate_Spread"]]

        res = stat()
        #creating filters
        #Gender Filter
        gender_filter = gender_all_data['Sex'].isin(['Male','Female'])

        # Filter out rows with 'Exclude' in the 'Class' column
        ethn_all_data = ethn_all_data[ethn_all_data['Class'] != 'EXCLUDED']

        # Create a boolean mask to exclude rows with 'Excluded' in the 'Sex' column
        gender_mask = gender_all_data['Sex'] != 'Excluded'

        # Apply the mask to the data frame
        gender_all_data = gender_all_data[gender_mask]

        #applying filters
        gender = gender_all_data[gender_filter]

        # Group the ETHNICITY data by 'branchname'
        #display(HTML("<br>"))
        display(HTML(BOLD + ITALIC + f'Ethnicity Analysis' + END))
        #display(HTML("<br>"))

        # Check if there are exactly two levels in the 'Class' column
        if len(ethn_all_data['Class'].unique()) == 2:
            
            # Check if there are enough observations for each level
            if ethn_all_data['Class'].value_counts().min() >= min_observations:
                res.ttest(df=ethn_all_data, xfac='Class', res='AIP', evar=False, test_type=2)
                
                # Extract the Mean Diff from the summary
                summary = res.summary
                lines = summary.split('\n')
                mean_diff_line = lines[4]
                mean_diff = float(mean_diff_line.split()[-1])
            
                # Extract the p-value from the summary
                p_value_line = lines[8]
                p_value = float(p_value_line.split()[-1])

                if p_value <= stat_sign and mean_diff >= mean_diff_range:
                    # Print the Branchname, Mean and P Value, and the full Results
                    display(HTML(f'Loan Type: {loan_type}'))
                    display(HTML(f'BPS Diff: {BOLD}{mean_diff}{END}'))
                    display(HTML(f'p-value: {p_value}'))
                    display(HTML("<br>"))
                    
                    if show_stats == 'Include':
                        display(HTML(BOLD + UNDERLINE + f'Statistical Results' + END))
                        display(HTML(f'<pre>{res.summary}</pre>'))
                        display(HTML("<br>"))
                    
                    # Create a strip chart of the AIP values grouped by Class using plotly.express
                    if show_stripplot == 'Include':
                        display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plots' + END))
                        plt.figure(figsize=(10, 6))
                        sns.set_style('darkgrid')
                        stripplot = sns.stripplot(x='Class', y='AIP', hue='Class', data=ethn_all_data, jitter=0.4, dodge=True, linewidth=1, palette="Dark2", legend=False, size=7)
                        plt.title(f'Companywide Ethnicity for {loan_type} Loans')
                        plt.show()
                        display(HTML("<br>"))
                        
                    # Create a box chart of the AIP values grouped by Sex using plotly.express
                    if show_boxplot == 'Include':
                        display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plot' + END))
                        display(HTML("<br>"))
                        plt.figure(figsize=(10, 6))
                        sns.set_style('darkgrid')
                        sns.boxplot(x='Class', y='AIP', data=ethn_all_data, color='skyblue', showmeans=True, fliersize=0, meanprops={"marker":"o","markerfacecolor":"white", "markeredgecolor":"black", "markersize":"10"})
                        sns.stripplot(x='Class', y='AIP', hue='Class', data=ethn_all_data, jitter=0.4, dodge=False, linewidth=1, palette="Dark2", legend=True, size=7)
                        plt.show()
                        display(HTML("<br>"))  
                    
                    # Print the list of loans included in the analysis
                    if print_list == 'Include':
                        display(HTML(BOLD + UNDERLINE + f'List of Loans' + END))
                        # Use to_html() to format the DataFrame as an HTML table and exclude the index
                        html_table = ethn_all_data.to_html(index=False)
                        
                        # Add inline CSS to adjust the font size
                        html_table = '<style> table {font-size: 0.9em;} </style>' + html_table
                        display(HTML(html_table))
                        display(HTML("<br>"))
                        display(HTML(f'<pre>{ethn_all_data.to_string()}</pre>'))
                        display(HTML("<br>"))

        # Group the AGE data by 'branchname'
        display(HTML("<br>"))
        display(HTML(ITALIC + BOLD + f'Age Analysis' + END))
        #display(HTML("<br>"))

        # Check if there are exactly two levels in the 'Age' column
        if len(age_all_data['Age'].unique()) != 2:
            #print('Not exactly two levels in the Age column')
            continue
        
        # Check if there are enough observations for each level
        if age_all_data['Age'].value_counts().min() >= min_observations:
            res.ttest(df=age_all_data, xfac='Age', res='AIP', evar=False, test_type=2)
            
            # Extract the Mean Diff from the summary
            summary = res.summary
            lines = summary.split('\n')
            mean_diff_line = lines[4]
            mean_diff = float(mean_diff_line.split()[-1])
            
            # Extract the p-value from the summary
            p_value_line = lines[8]
            p_value = float(p_value_line.split()[-1])

            if p_value <= stat_sign and mean_diff >= mean_diff_range:
                # Print the Branchname, Mean and P Value, and the full Results
                display(HTML(f'Loan Type: {loan_type}'))
                display(HTML(f'BPS Diff: {BOLD}{mean_diff}{END}'))
                display(HTML(f'p-value: {p_value}'))
                display(HTML("<br>"))
                
                if show_stats == 'Include':
                    display(HTML(BOLD + UNDERLINE + f'Statistical Results' + END))
                    display(HTML(f'<pre>{res.summary}</pre>'))
                    display(HTML("<br>"))
                
                # Create a strip chart of the AIP values grouped by Age using plotly.express
                if show_stripplot == 'Include':
                    display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plots' + END))
                    plt.figure(figsize=(10, 6))
                    sns.set_style('darkgrid')
                    stripplot = sns.stripplot(x='Age', y='AIP', hue='Age', data=age_all_data, jitter=0.4, dodge=True, linewidth=1, palette="Dark2", legend=False, size=7)
                    plt.title(f'Companywide Age {loan_type} Loans')
                    plt.show()
                    display(HTML("<br>"))
                    
                # Create a box chart of the AIP values grouped by Sex using plotly.express
                if show_boxplot == 'Include':
                    display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plot' + END))
                    display(HTML("<br>"))
                    plt.figure(figsize=(10, 6))
                    sns.set_style('darkgrid')
                    sns.boxplot(x='Age', y='AIP', data=age_all_data, color='skyblue', showmeans=True, fliersize=0, meanprops={"marker":"o","markerfacecolor":"white", "markeredgecolor":"black", "markersize":"10"})
                    sns.stripplot(x='Age', y='AIP', hue='Age', data=age_all_data, jitter=0.4, dodge=False, linewidth=1, palette="Dark2", legend=True, size=7)
                    plt.show()
                    display(HTML("<br>"))
            
                # Print the list of loans included in the analysis
                if print_list == 'Include':
                    age_all_data = age_all_data.sort_values('Age', ascending=False)
                    display(HTML(BOLD + UNDERLINE + f'List of Loans' + END))
                    # Use to_html() to format the DataFrame as an HTML table and exclude the index
                    html_table = age_all_data.to_html(index=False)
                    
                    # Add inline CSS to adjust the font size
                    html_table = '<style> table {font-size: 0.9em;} </style>' + html_table
                    display(HTML(html_table))
                    display(HTML("<br>"))
                            
        # Group the SEX data by 'branchname'
        display(HTML("<br>"))
        display(HTML(ITALIC + BOLD + f'Sex Analysis' + END))
        display(HTML("<br>"))

        # Check if there are exactly two levels in the 'Class' column
        if len(gender['Sex'].unique()) == 2:
            
            # Check if there are enough observations for each level
            if gender['Sex'].value_counts().min() >= min_observations:
                res.ttest(df=gender, xfac='Sex', res='AIP', evar=False, test_type=2)
            
                # Extract the Mean Diff from the summary
                summary = res.summary
                lines = summary.split('\n')
                mean_diff_line = lines[4]
                mean_diff = float(mean_diff_line.split()[-1])
            
                # Extract the p-value from the summary
                p_value_line = lines[8]
                p_value = float(p_value_line.split()[-1])

                if p_value <= stat_sign and mean_diff >= mean_diff_range:
                    # Print the Branchname, Mean and P Value, and the full Results
                    display(HTML(f'Loan Type: {loan_type}'))
                    display(HTML(f'BPS Diff: {BOLD}{mean_diff}{END}'))
                    display(HTML(f'p-value: {p_value}'))
                    display(HTML("<br>"))
                    
                    if show_stats == 'Include':
                        display(HTML(BOLD + UNDERLINE + f'Statistical Results' + END))
                        display(HTML(f'<pre>{res.summary}</pre>'))
                        display(HTML("<br>"))
                    
                    # Create a strip chart of the AIP values grouped by Sex using plotly.express
                    if show_stripplot == 'Include':
                        display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plots' + END))
                        plt.figure(figsize=(10, 6))
                        sns.set_style('darkgrid')
                        stripplot = sns.stripplot(x='Sex', y='AIP', hue='Sex', data=gender, jitter=0.4, dodge=True, linewidth=1, palette="Dark2", legend=False, size=7)
                        plt.title(f'Companywide Gender for {loan_type} Loans)')
                        plt.show()
                        display(HTML("<br>")) 
                        
                    # Create a box chart of the AIP values grouped by Sex using plotly.express
                    if show_boxplot == 'Include':
                        display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plot' + END))
                        plt.figure(figsize=(10, 6))
                        sns.set_style('darkgrid')
                        sns.boxplot(x='Sex', y='AIP', data=gender, color='skyblue', showmeans=True, fliersize=0, meanprops={"marker":"o","markerfacecolor":"white", "markeredgecolor":"black", "markersize":"10"})
                        sns.stripplot(x='Sex', y='AIP', hue='Sex', data=gender, jitter=0.4, dodge=False, linewidth=1, palette="Dark2", legend=True, size=7)
                        plt.show()
                        display(HTML("<br>"))
                
                    # Print the list of loans included in the analysis
                    if print_list == 'Include':
                        gender = gender.sort_values('Sex', ascending=False)
                        display(HTML(BOLD + UNDERLINE + f'List of Loans' + END))
                        # Use to_html() to format the DataFrame as an HTML table and exclude the index
                        html_table = gender.to_html(index=False)
                        
                        # Add inline CSS to adjust the font size
                        html_table = '<style> table {font-size: 0.9em;} </style>' + html_table
                        display(HTML(html_table))
                        display(HTML("<br>"))

# Create a radio button widget with options for statistically significant results
stat_sign_widget = widgets.ToggleButtons(
    options=[0.05, 0.01, 0.2, 1.0],
    description='Statistical Significance:',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['95% (Default)', '99%', '80%', '100%'],
)
                                
# Create a radio button widget with options for mean diff ranges
mean_diff_widget = widgets.ToggleButtons(
    options=[0.25, 0.20, 0.15, 0.10],
    description='Mean Diff (BPS):',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['25 BPS (Default)', '20 BPS', '15 BPS', '10 BPS'],
)

# Create a radio button widget with options for minimum number of observations
min_observations_widget = widgets.ToggleButtons(
    options=[10, 20, 30],
    description='Min Observations (Loan Count):',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['10 Loans (Default)', '20 Loans', '30 Loans'],
)

# Create a radio button widget with options for showing statistic results
show_stats_widget = widgets.ToggleButtons(
    options=["Exclude", "Include"],
    description='Show Statistical Results:',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Exclude the stats (Default)', 'Include the stats'],
)

# Create a radio button widget with options for showing plots
show_stripplot_widget = widgets.ToggleButtons(
    options=["Exclude", "Include"],
    description='Show Plots:',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Exclude the plots (Default)', 'Include the plots'],
)

# Create a radio button widget with options for showing plots
show_boxplot_widget = widgets.ToggleButtons(
    options=["Exclude", "Include"],
    description='Show Boxplots:',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Exclude the plots (Default)', 'Include the plots'],
)

# Create a radio button widget with options for print list of loans
print_list_widget = widgets.ToggleButtons(
    options=["Exclude", "Include"],
    description='Show List of Loans:',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Exclude the list of loans (Default)', 'Include the list of loans'],
)

# Use the interact function to apply the selected filters to the code
interact(apply_filters, stat_sign=stat_sign_widget, mean_diff_range=mean_diff_widget, min_observations=min_observations_widget, show_stats=show_stats_widget, show_boxplot=show_boxplot_widget, show_stripplot=show_stripplot_widget, print_list=print_list_widget);

---

### *Race All Compared by Loan Type*

In [None]:
# File Selection for analysis
from ipyfilechooser import FileChooser

# Create and display a FileChooser widget
fc = FileChooser()
display(fc)


In [None]:
# Analysis
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
import textwrap
from scipy import stats as st
from bioinfokit.analys import stat
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display, HTML
#from IPython.display import clear_output
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode(connected=True)
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
from tqdm.notebook import tqdm

# formatting of the results section
BOLD = '<b>'
UNDERLINE = '<u>'
ITALIC = '<i>'
END = '</b></u></i>'

# Check if a file has been selected
if fc.selected is not None:
    file_name = fc.selected
    # The rest of your code goes here...
else:
    print("No file selected. Please select a file.")

#reading in data
all_my_data = pd.read_csv(file_name)

# remove .csv from file_name
file_name_no_ext = file_name.split('.csv')[0]

# display
display(HTML(BOLD + UNDERLINE + file_name_no_ext + END))

# Get the unique loan types from the data
loan_types = all_my_data['LoanType'].unique()

def apply_filters(show_stripplot, show_boxplot, print_list):
    # Loop through each loan type and filter the data accordingly
    for loan_type in tqdm(loan_types):
        display(HTML("<br>"))
        display(HTML("<br>"))
        display(HTML(UNDERLINE + BOLD + f'Loan Type: {loan_type}' + END))
        display(HTML("<br>"))

        #filtering to only originated loans and non-HECM programs
        all_da_data = all_my_data[all_my_data['LoanType'] == loan_type]
        all_data = all_da_data[all_da_data['HmdaActionTaken'] =='Loan Originated']
        big_all_data = all_data[all_data['Program'].str.contains("HECM")==False]


        #narrowing down the fields for more accurate analysis
        race_all_data = big_all_data[["AIP","Rate_Spread","Race","branchname","Occupancy"]]

        # Loop through each race filter and group the filtered data by 'branchname' for 'Conventional' Loan Type
        loan_count = len(race_all_data)
        display(HTML(f'{BOLD} Total Loan Count: {loan_count}') )
                                         
        # Create a strip chart of the AIP values grouped by Race using plotly.express
        if show_stripplot == 'Include':
            display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plots' + END))
            plt.figure(figsize=(10, 6))
            sns.set_style('darkgrid')
            stripplot = sns.stripplot(x='Race', y='AIP', hue='Race', data=race_all_data, jitter=0.4, dodge=True, linewidth=1, palette="Dark2", legend=False, size=7)
            plt.show()
            display(HTML("<br>"))
            
        # Create a box chart of the AIP values grouped by Race
        if show_boxplot == 'Include':
            display(HTML("<br>"))
            plt.figure(figsize=(10, 6))
            sns.set_style('darkgrid')
            box_plot = sns.boxplot(x='Race', y='AIP', data=race_all_data, color='skyblue', showmeans=True, fliersize=0, meanprops={"marker":"o","markerfacecolor":"white", "markeredgecolor":"black", "markersize":"10"})
            sns.stripplot(x='Race', y='AIP', hue='Race', data=race_all_data, jitter=0.4, dodge=False, linewidth=1, palette="Dark2", legend=False, size=4)
            
            # Wrapping the labels
            box_plot.set_xticklabels([textwrap.fill(label.get_text(), 10) for label in box_plot.get_xticklabels()])
            
            plt.show()
            display(HTML("<br>"))

        # Print the list of loans included in the analysis
        if print_list == 'Include':
            race_all_data = race_all_data.sort_values('Race', ascending=True)
            display(HTML(BOLD + UNDERLINE + f'List of Loans' + END))
            
            # Use to_html() to format the DataFrame as an HTML table and exclude the index
            html_table = race_all_data.to_html(index=False)
            
            # Add inline CSS to adjust the font size
            html_table = '<style> table {font-size: 0.9em;} </style>' + html_table
            display(HTML(html_table))
            display(HTML("<br>"))

# Create a radio button widget with options for showing plots
show_stripplot_widget = widgets.ToggleButtons(
    options=["Exclude", "Include"],
    description='Show Plots:',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Exclude the plots (Default)', 'Include the plots'],
)

# Create a radio button widget with options for showing plots
show_boxplot_widget = widgets.ToggleButtons(
    options=["Include", "Exclude"],
    description='Show Boxplots:',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Exclude the plots (Default)', 'Include the plots'],
)

# Create a radio button widget with options for print list of loans
print_list_widget = widgets.ToggleButtons(
    options=["Exclude", "Include"],
    description='Show List of Loans:',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Exclude the list of loans (Default)', 'Include the list of loans'],
)

# Use the interact function to apply the selected filters to the code
interact(apply_filters, show_stripplot=show_stripplot_widget, show_boxplot=show_boxplot_widget, print_list=print_list_widget);


---

### *All Ethnicity, Age, Sex Compared by Loan Type* 

In [None]:
# File Selection for analysis
from ipyfilechooser import FileChooser

# Create and display a FileChooser widget
fc = FileChooser()
display(fc)


In [None]:
# Analysis
import pandas as pd
import numpy as np
import plotly.express as px
from scipy import stats as st
from bioinfokit.analys import stat
from ipywidgets import interact, interactive, fixed, interact_manual, Output, IntProgress
import ipywidgets as widgets
from IPython.display import display, HTML
from plotly.offline import iplot, init_notebook_mode
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)
init_notebook_mode(connected=True)
from tqdm.notebook import tqdm

# Check if a file has been selected
if fc.selected is not None:
    file_name = fc.selected
    # The rest of your code goes here...
else:
    print("No file selected. Please select a file.")

# formatting of the results section
BOLD = '<b>'
UNDERLINE = '<u>'
ITALIC = '<i>'
END = '</b></u></i>'

#reading in data
all_my_data = pd.read_csv(file_name)

# remove .csv from file_name
file_name_no_ext = file_name.split('.csv')[0]

# display
display(HTML(BOLD + UNDERLINE + file_name_no_ext + END))

# Get the unique loan types from the data
loan_types = all_my_data['LoanType'].unique()

def apply_filters(show_stripplot, show_boxplot, print_list):
    # Loop through each loan type and filter the data accordingly
    for loan_type in tqdm(loan_types):
        display(HTML("<br>"))
        display(HTML(UNDERLINE + BOLD + f'Loan Type: {loan_type}' + END))
        display(HTML("<br>"))

        #filtering to only originated loans and non-HECM programs (Loan Programs: 'Conventional')
        all_da_data = all_my_data[all_my_data['LoanType'] == loan_type]
        all_data = all_da_data[all_da_data['HmdaActionTaken']=='Loan Originated']
        big_all_data = all_data[all_data['Program'].str.contains("HECM")==False]

        #narrowing down the fields for more accurate analysis
        gender_all_data = big_all_data[["LoanId","AIP","Sex","branchname","Rate_Spread"]]
        age_all_data = big_all_data[["LoanId","AIP","Age","branchname","Rate_Spread"]]
        ethn_all_data = big_all_data[["LoanId","AIP","Class","Ethnicity","branchname","Rate_Spread"]]

        res = stat()
        #creating filters
        #Gender Filter
        gender_filter = gender_all_data['Sex'].isin(['Male','Female'])

        # Filter out rows with 'Exclude' in the 'Class' column
        ethn_all_data = ethn_all_data[ethn_all_data['Class'] != 'EXCLUDED']

        # Create a boolean mask to exclude rows with 'Excluded' in the 'Sex' column
        gender_mask = gender_all_data['Sex'] != 'Excluded'

        # Apply the mask to the data frame
        gender_all_data = gender_all_data[gender_mask]

        #applying filters
        gender = gender_all_data[gender_filter]

        # Group the ETHNICITY data by 'branchname'
        #display(HTML("<br>"))
        display(HTML(BOLD + ITALIC + f'Ethnicity Analysis' + END))
        #display(HTML("<br>"))
        
        loan_count = len(ethn_all_data)
        display(HTML(f'{BOLD} Total Loan Count: {loan_count}') )
            
        # Create a strip chart of the AIP values grouped by Class using plotly.express
        if show_stripplot == 'Include':
            display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plots' + END))
            plt.figure(figsize=(10, 6))
            sns.set_style('darkgrid')
            stripplot = sns.stripplot(x='Class', y='AIP', hue='Class', data=ethn_all_data, jitter=0.4, dodge=True, linewidth=1, palette="Dark2", legend=False, size=7)
            plt.title(f'Companywide Ethnicity for {loan_type} Loans')
            plt.show()
            display(HTML("<br>"))
            
        # Create a box chart of the AIP values grouped by Sex using plotly.express
        if show_boxplot == 'Include':
            #display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plot' + END))
            display(HTML("<br>"))
            plt.figure(figsize=(10, 6))
            sns.set_style('darkgrid')
            sns.boxplot(x='Class', y='AIP', data=ethn_all_data, color='skyblue', showmeans=True, fliersize=0, meanprops={"marker":"o","markerfacecolor":"white", "markeredgecolor":"black", "markersize":"10"})
            sns.stripplot(x='Class', y='AIP', hue='Class', data=ethn_all_data, jitter=0.4, dodge=False, linewidth=1, palette="Dark2", legend=True, size=4)
            plt.show()
            display(HTML("<br>"))  
        
        # Print the list of loans included in the analysis
        if print_list == 'Include':
            display(HTML(BOLD + UNDERLINE + f'List of Loans' + END))
            # Use to_html() to format the DataFrame as an HTML table and exclude the index
            html_table = ethn_all_data.to_html(index=False)
            
            # Add inline CSS to adjust the font size
            html_table = '<style> table {font-size: 0.9em;} </style>' + html_table
            display(HTML(html_table))
            display(HTML("<br>"))
            display(HTML(f'<pre>{ethn_all_data.to_string()}</pre>'))
            display(HTML("<br>"))

        # Group the AGE data by 'branchname'
        display(HTML("<br>"))
        display(HTML(ITALIC + BOLD + f'Age Analysis' + END))
        #display(HTML("<br>"))
        
        loan_count = len(age_all_data)
        display(HTML(f'{BOLD} Total Loan Count: {loan_count}') )
       
        # Create a strip chart of the AIP values grouped by Age using plotly.express
        if show_stripplot == 'Include':
            display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plots' + END))
            plt.figure(figsize=(10, 6))
            sns.set_style('darkgrid')
            stripplot = sns.stripplot(x='Age', y='AIP', hue='Age', data=age_all_data, jitter=0.4, dodge=True, linewidth=1, palette="Dark2", legend=False, size=4)
            plt.title(f'Companywide Age {loan_type} Loans')
            plt.show()
            display(HTML("<br>"))
            
        # Create a box chart of the AIP values grouped by Sex using plotly.express
        if show_boxplot == 'Include':
            #display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plot' + END))
            display(HTML("<br>"))
            plt.figure(figsize=(10, 6))
            sns.set_style('darkgrid')
            sns.boxplot(x='Age', y='AIP', data=age_all_data, color='skyblue', showmeans=True, fliersize=0, meanprops={"marker":"o","markerfacecolor":"white", "markeredgecolor":"black", "markersize":"10"})
            sns.stripplot(x='Age', y='AIP', hue='Age', data=age_all_data, jitter=0.4, dodge=False, linewidth=1, palette="Dark2", legend=True, size=4)
            plt.show()
            display(HTML("<br>"))
    
        # Print the list of loans included in the analysis
        if print_list == 'Include':
            age_all_data = age_all_data.sort_values('Age', ascending=False)
            display(HTML(BOLD + UNDERLINE + f'List of Loans' + END))
            # Use to_html() to format the DataFrame as an HTML table and exclude the index
            html_table = age_all_data.to_html(index=False)
            
            # Add inline CSS to adjust the font size
            html_table = '<style> table {font-size: 0.9em;} </style>' + html_table
            display(HTML(html_table))
            display(HTML("<br>"))
                    
        # Group the SEX data by 'branchname'
        display(HTML("<br>"))
        display(HTML(ITALIC + BOLD + f'Sex Analysis' + END))
        display(HTML("<br>"))
        
        loan_count = len(gender)
        display(HTML(f'{BOLD} Total Loan Count: {loan_count}') )

        # Create a strip chart of the AIP values grouped by Sex using plotly.express
        if show_stripplot == 'Include':
            display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plots' + END))
            plt.figure(figsize=(10, 6))
            sns.set_style('darkgrid')
            stripplot = sns.stripplot(x='Sex', y='AIP', hue='Sex', data=gender, jitter=0.4, dodge=True, linewidth=1, palette="Dark2", legend=False, size=4)
            plt.title(f'Companywide Gender for {loan_type} Loans)')
            plt.show()
            display(HTML("<br>")) 
            
        # Create a box chart of the AIP values grouped by Sex using plotly.express
        if show_boxplot == 'Include':
            #display(HTML(BOLD + UNDERLINE + f'Loan Pricing Plot' + END))
            plt.figure(figsize=(10, 6))
            sns.set_style('darkgrid')
            sns.boxplot(x='Sex', y='AIP', data=gender, color='skyblue', showmeans=True, fliersize=0, meanprops={"marker":"o","markerfacecolor":"white", "markeredgecolor":"black", "markersize":"10"})
            sns.stripplot(x='Sex', y='AIP', hue='Sex', data=gender, jitter=0.4, dodge=False, linewidth=1, palette="Dark2", legend=True, size=4)
            plt.show()
            display(HTML("<br>"))
    
        # Print the list of loans included in the analysis
        if print_list == 'Include':
            gender = gender.sort_values('Sex', ascending=False)
            display(HTML(BOLD + UNDERLINE + f'List of Loans' + END))
            # Use to_html() to format the DataFrame as an HTML table and exclude the index
            html_table = gender.to_html(index=False)
            
            # Add inline CSS to adjust the font size
            html_table = '<style> table {font-size: 0.9em;} </style>' + html_table
            display(HTML(html_table))
            display(HTML("<br>"))

# Create a radio button widget with options for showing plots
show_stripplot_widget = widgets.ToggleButtons(
    options=["Exclude", "Include"],
    description='Show Plots:',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Exclude the plots (Default)', 'Include the plots'],
)

# Create a radio button widget with options for showing plots
show_boxplot_widget = widgets.ToggleButtons(
    options=["Include", "Exclude"],
    description='Show Boxplots:',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Exclude the plots (Default)', 'Include the plots'],
)

# Create a radio button widget with options for print list of loans
print_list_widget = widgets.ToggleButtons(
    options=["Exclude", "Include"],
    description='Show List of Loans:',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Exclude the list of loans (Default)', 'Include the list of loans'],
)

# Use the interact function to apply the selected filters to the code
interact(apply_filters, show_boxplot=show_boxplot_widget, show_stripplot=show_stripplot_widget, print_list=print_list_widget);

---