# **Fair Lending Analysis**

### **Pricing Analysis**

In [4]:
# Fair Lendnig Pricing Analysis (Combined Dataset)
from ipyfilechooser import FileChooser
from IPython.display import clear_output, display
from ipywidgets import Output
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import textwrap
from scipy import stats as st
import bioinfokit
from bioinfokit.analys import stat
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display, HTML
# from IPython.display import clear_output
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
#init_notebook_mode(connected=True)
from tqdm.notebook import tqdm

# Create a FileChooser widget
fc = FileChooser()

# Create an output widget
output = Output()

# Define a function that will be called when a file is selected
def on_file_selected(filechooser):
    # Clear the output widget
    output.clear_output(wait=True)

    file_name = filechooser.selected

    # formatting of the results section
    BOLD = "<b>"
    UNDERLINE = "<u>"
    ITALIC = "<i>"
    END = "</b></u></i>"

    # Read in the CSV files
    all_my_data = pd.read_csv(file_name)

    # Get the unique loan types from the data
    loan_types = all_my_data["LoanType"].unique()

    def apply_filters(anlys_lvl, analysis, stat_sign, mean_diff_range, min_observations, show_stats, show_plot, checkbox):
        display(HTML("<br>"))
        
        display(HTML("<p style='font-size:16px;'>" + BOLD + f"Level of Analysis: {anlys_lvl}" + END + "</p>"))
        #display(HTML("<br>"))
        
        display(HTML("<p style='font-size:16px;'>" + BOLD + f"Demographic Analysis: {analysis}" + END + "</p>"))
        display(HTML("<br>"))
        
        # Loop through each loan type and filter the data accordingly
        for loan_type in tqdm(loan_types):
            #display(HTML("<br>"))
            
            # filtering to only originated loans and non-HECM programs (Loan Type: 'Conventional')
            all_da_data = all_my_data[all_my_data["LoanType"] == loan_type]
            all_data = all_da_data[all_da_data["HmdaActionTaken"] == "Loan Originated"]
            big_all_data = all_data[all_data["Program"].str.contains("HECM") == False]

            # narrowing down the fields for more accurate analysis
            race_all_data = big_all_data[["LoanId", "AIP","Rate_Spread", "Race", "branchname"]]

            if anlys_lvl == "MSA/MD ":
                # MSA/MD Names Data for Merge
                msa_md_data = pd.read_csv(r"C:\Users\colby\OneDrive\Documents\Data Analysis\Python_Project_Fair_Lending_Analysis\datasets\!MSA MD Listing 2023.csv")

                # Merge the dataframes on the 'HmdaMsa' and 'MSA_MD_Code' columns
                merged_data = pd.merge(
                    big_all_data, msa_md_data, left_on="HmdaMsa", right_on="MSA_MD_Code"
                )
                race_all_data = race_all_data.copy()
                race_all_data.loc[:, "MSA_MD_Code"] = merged_data["MSA_MD_Code"]
                race_all_data.loc[:, "MSA_MD_Name"] = merged_data["MSA_MD_Name"]

            res = stat()
            # creating race filters for white vs protected race
            race_filters = {
                "Black or African American": race_all_data["Race"].isin(["White", "Black or African American"]),
                "American Alaska or Indian": race_all_data["Race"].isin(["White", "American Alaska or Indian"]),
                "Native Hawaiian or Other Pacific Islander": race_all_data["Race"].isin(["White", "Native Hawaiian or Other Pacific Islander"]),
                "Asian": race_all_data["Race"].isin(["White", "Asian"]),
            }

# Loop through each RACE filter and group the filtered data by the selected level of analysis  #####
            if analysis == "All" or analysis == "Race":
                for race, race_filter in race_filters.items():
                    
#####  RACE ANALYSIS SECTION  #####                    
                    filtered_data = race_all_data[race_filter]
                    
                    if anlys_lvl == "Branch ":
                        grouped_data = filtered_data.groupby("branchname")
                    elif anlys_lvl == "MSA/MD ":
                        grouped_data = filtered_data.groupby("MSA_MD_Name")
                    elif anlys_lvl == "Aggregate ":
                        grouped_data = filtered_data.groupby(lambda _: "")
                    else:
                        raise ValueError(f"Invalid analysis level: {anlys_lvl}")
                    
                    # Loop through each group and perform a t-test
                    for group_name, group in grouped_data:
                        
                        # Check if there are exactly two levels in the 'Race' column
                        if len(group["Race"].unique()) == 2:
                            
                            # Check if there are enough observations for each level
                            if group["Race"].value_counts().min() >= min_observations:
                                
                                # Check if the total sample size of the group is at least 30, if the checkbox is checked
                                if not checkbox or len(group) >= 30:
                                
                                    # T-Test for AIP
                                    res.ttest(df=group, xfac="Race", res="AIP", evar=False, test_type=2)

                                    # Calculate loan count for each race
                                    loan_counts = group["Race"].value_counts()

                                    # Extract the Mean Diff from the summary
                                    summary = res.summary
                                    lines = summary.split("\n")
                                    mean_diff_line = lines[4]
                                    mean_diff = float(mean_diff_line.split()[-1])

                                    # Extract the p-value from the summary
                                    p_value_line = lines[8]
                                    p_value = float(p_value_line.split()[-1])

                                    # Round the p-value to 4 decimal places
                                    mean_diff = round(mean_diff, 2)
                                    p_value = round(p_value, 4)

                                    # Check if the p-value is less than or equal to the significance level
                                    if p_value <= stat_sign and mean_diff >= mean_diff_range:
                                        # Print the group_name, Mean and P Value, and the full Results
                                        display(HTML("<br>"))
                                        display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + BOLD + group_name + END))
                                        display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + f"Group: {BOLD}{race}{END}"))
                                        display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + f"Loan Type: {loan_type}"))
                                        display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + f"AIP BPS Diff: {BOLD}{mean_diff}{END}"))
                                        display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + f"AIP p-value: {p_value}"))
                                        for race_name, count in loan_counts.items():
                                            display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + f"Loan Count ({race_name}): {count}"))
                                        

                                        if show_stats == "Include ":
                                            display(HTML("<br>"))
                                            display(HTML(BOLD + UNDERLINE + f"Statistical Results (AIP)" + END))
                                            display(HTML(f"<pre>{res.summary}</pre>"))
                                            
                                            
                                        # T-Test for 'Rate_Spread'
                                        res.ttest(df=group, xfac="Race", res="Rate_Spread", evar=False, test_type=2)
                                        
                                        # Extract the Mean Diff and p-value for 'Rate Spread'
                                        summary = res.summary
                                        lines = summary.split('\n')

                                        # For Mean Diff
                                        mean_diff_line = lines[4]
                                        mean_diff_rate_spread = float(mean_diff_line.split()[-1])

                                        # For p-value
                                        p_value_line = lines[8]
                                        p_value_rate_spread = float(p_value_line.split()[-1])

                                        # Round the p-value to 4 decimal places
                                        mean_diff_rate_spread = round(mean_diff_rate_spread, 2)
                                        p_value_rate_spread = round(p_value_rate_spread, 4)
                                        
                                        if p_value_rate_spread <= 0.05 and mean_diff_rate_spread >= mean_diff_range:
                                        # Print Rate Spread, Mean and P Value, if the p-value is less than or equal to the significance level
                                            #display(HTML('&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;' + BOLD + f'Rate Spread' + END))
                                            display(HTML('&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;' + f'Rate Spread BPS Diff: {BOLD}{mean_diff_rate_spread}{END}'))
                                            display(HTML('&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;' + f'Rate Spread p-value: {p_value_rate_spread}'))
                                            
                                        
                                            if show_stats == "Include ":
                                                display(HTML("<br>"))
                                                display(HTML('&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;' + BOLD + UNDERLINE + f'Statistical Results (Rate Spread)' + END))
                                                display(HTML(f'<pre>{res.summary}</pre>'))
                                                

                                        # Apply fivethrityeight style to all plots
                                        plt.style.use('fivethirtyeight')
                                                                                                            
                                        # Create a box chart of the AIP values grouped by Sex using plotly.express
                                        if show_plot == "Boxplot ":
                                            display(HTML("<br>"))
                                            display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;"+ BOLD + UNDERLINE + f"AIP Boxplot" + END))
                                            display(HTML("<br>"))
                                            plt.figure(figsize=(10, 6))
                                            #sns.set_style("darkgrid")
                                            # Your original color
                                            original_color = (240, 240, 240)
                                            # Convert to RGB color for seaborn
                                            seaborn_color = tuple([val / 255 for val in original_color]) + (0,)  # Add alpha for transparency
                                            ax = sns.boxplot(x="Race", y="AIP", data=group, color=seaborn_color, showmeans=True, linewidth=0.6, fliersize=0, meanprops={"marker": "X", "markerfacecolor": "yellow", "markeredgecolor": "black", "markersize": "8",},)
                                            for patch in ax.artists:
                                                patch.set_zorder(1)
                                            strip = sns.stripplot( x="Race", y="AIP", hue="Race", data=group, jitter=0.4, dodge=False, linewidth=1, legend=False, size=6,)
                                            for collection in strip.collections:
                                                collection.set_zorder(2)
                                            plt.show()
                                            
                                            
                                            # Create a box chart of the Rate Spread values grouped by Sex using plotly.express
                                            if show_plot == "Boxplot " and p_value_rate_spread <= 0.05 and mean_diff_rate_spread >= 0.15:
                                                display(HTML("<br>"))
                                                display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;"+ BOLD + UNDERLINE + f"Rate Spread Boxplot" + END))
                                                display(HTML("<br>"))
                                                plt.figure(figsize=(10, 6))
                                                #sns.set_style("darkgrid")
                                                # Your original color
                                                original_color = (240, 240, 240)
                                                # Convert to RGB color for seaborn
                                                seaborn_color = tuple([val / 255 for val in original_color]) + (0,)  # Add alpha for transparency
                                                ax = sns.boxplot(x="Race", y="Rate_Spread", data=group, color=seaborn_color, showmeans=True, linewidth=0.6, fliersize=0, meanprops={"marker": "X", "markerfacecolor": "yellow", "markeredgecolor": "black", "markersize": "8",},)
                                                for patch in ax.artists:
                                                    patch.set_zorder(1)
                                                strip = sns.stripplot( x="Race", y="Rate_Spread", hue="Race", data=group, jitter=0.4, dodge=False, linewidth=1, legend=False, size=6,)
                                                for collection in strip.collections:
                                                    collection.set_zorder(2)
                                                plt.show()
                                                

                                        # Print the list of loans included in the analysis
                                        if show_plot == "List of Loans ":
                                            group = group.sort_values("Race", ascending=True)
                                            display(HTML("<br>"))
                                            display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + BOLD + UNDERLINE + f"List of Loans for {anlys_lvl}: {group_name}" + END))
                                            display(HTML("<br>"))
                                            # Use to_html() to format the DataFrame as an HTML table and exclude the index
                                            html_table = group.to_html(index=False)

                                            # Add inline CSS to adjust the font size
                                            html_table = ("<style> table {font-size: 0.9em;} </style>" + html_table)
                                            display(HTML(html_table))
        
            elif analysis == "Age" or analysis == "Sex" or analysis == "Ethnicity":
                    pass

##### ETHNICITY, AGE, SEX ANALYSIS SECTION ######

# If the analysis level is "MSA/MD", merge the data with the MSA/MD names data
            if anlys_lvl == "MSA/MD ":
                # MSA/MD Names Data for Merge
                msa_md_data = pd.read_csv(r"C:\Users\colby\OneDrive\Documents\Data Analysis\Python_Project_Fair_Lending_Analysis\datasets\!MSA MD Listing 2023.csv")

                # Merge the dataframes on the 'HmdaMsa' and 'MSA_MD_Code' columns
                big_all_data = pd.merge(
                    big_all_data, msa_md_data, left_on="HmdaMsa", right_on="MSA_MD_Code"
                )
    
            # narrowing down the fields for more accurate analysis
            gender_all_data = big_all_data[["LoanId", "AIP", "Rate_Spread", "Sex", "branchname", "HmdaActionDate"]]
            age_all_data = big_all_data[[ "LoanId", "AIP", "Rate_Spread", "Age", "branchname", "HmdaActionDate"]]
            ethn_all_data = big_all_data[[ "LoanId", "AIP", "Rate_Spread", "Class", "Ethnicity", "branchname", "HmdaActionDate"]]

            if anlys_lvl == "MSA/MD ":
                gender_all_data = gender_all_data.copy()
                gender_all_data["MSA_MD_Code"] = big_all_data["MSA_MD_Code"]
                gender_all_data["MSA_MD_Name"] = big_all_data["MSA_MD_Name"]

                age_all_data = age_all_data.copy()
                age_all_data["MSA_MD_Code"] = big_all_data["MSA_MD_Code"]
                age_all_data["MSA_MD_Name"] = big_all_data["MSA_MD_Name"]

                ethn_all_data = ethn_all_data.copy()
                ethn_all_data["MSA_MD_Code"] = big_all_data["MSA_MD_Code"]
                ethn_all_data["MSA_MD_Name"] = big_all_data["MSA_MD_Name"]
                
            res = stat()
            # creating filters
            # Gender Filter
            gender_filter = gender_all_data["Sex"].isin(["Male", "Female"])

            # Filter out rows with 'Exclude' in the 'Class' column
            ethn_all_data = ethn_all_data[ethn_all_data["Class"] != "EXCLUDED"]

            # Create a boolean mask to exclude rows with 'Excluded' in the 'Sex' column
            gender_mask = gender_all_data["Sex"] != "Excluded"

            # Apply the mask to the data frame
            gender_all_data = gender_all_data[gender_mask]

            # applying filters
            gender = gender_all_data[gender_filter]

##### Group the ETHNICITY data by Levels  #######
            if analysis == "All" or analysis == "Ethnicity":
                if anlys_lvl == "Branch ":
                    grouped_ethn_data = ethn_all_data.groupby("branchname")
                elif anlys_lvl == "MSA/MD ":
                    grouped_ethn_data = ethn_all_data.groupby("MSA_MD_Name")
                elif anlys_lvl == "Aggregate ":
                    grouped_ethn_data = ethn_all_data.groupby(lambda _: "")

                # Loop through each group and perform a t-test
                for group_name, group in grouped_ethn_data:
                    
                    # Check if there are exactly two levels in the 'Class' column
                    if len(group["Class"].unique()) == 2:
                        
                        # Check if there are enough observations for each level
                        if group["Class"].value_counts().min() >= min_observations:
                            
                            # Check if the total sample size of the group is at least 30, if the checkbox is checked
                            if not checkbox or len(group) >= 30:
                            
                                # T-Test for 'AIP'
                                res.ttest(df=group, xfac="Class", res="AIP", evar=False, test_type=2)

                                # Extract the Mean Diff from the summary
                                summary = res.summary
                                lines = summary.split("\n")
                                mean_diff_line = lines[4]
                                mean_diff = float(mean_diff_line.split()[-1])

                                # Extract the p-value from the summary
                                p_value_line = lines[8]
                                p_value = float(p_value_line.split()[-1])

                                # Round the mean_diff and p-value to 2 decimal places
                                mean_diff = round(mean_diff, 2)
                                p_value = round(p_value, 4)
                                
                                # Calculate loan count by group
                                loan_count_by_group = group["Class"].value_counts()

                                if p_value <= stat_sign and mean_diff >= mean_diff_range:
                                    # Print the group_name, Mean and P Value, and the full Results
                                    display(HTML("<br>"))
                                    display(HTML("      " + BOLD + group_name + END))
                                    display(HTML("      " + f"Group: Hispanic or Latino"))
                                    display(HTML("      " + f"Loan Type: {loan_type}"))
                                    display(HTML("      " + f"BPS Diff: {BOLD}{mean_diff}{END}"))
                                    display(HTML("      " + f"p-value: {p_value}"))
                                    for group_name, count in loan_count_by_group.items():
                                        display(HTML("      " + f"Loan Count ({group_name}): {count}"))
                                    

                                    if show_stats == "Include ":
                                        display(HTML("<br>"))
                                        display(HTML("      " + BOLD + UNDERLINE + f"Statistical Results (AIP)" + END))
                                        display(HTML(f"<pre>{res.summary}</pre>"))
                                        
                                        
                                    # T-Test for 'Rate_Spread'
                                    res.ttest(df=group, xfac="Class", res="Rate_Spread", evar=False, test_type=2)
                                    
                                    # Extract the Mean Diff and p-value for 'Rate Spread'
                                    summary = res.summary
                                    lines = summary.split('\n')

                                    # For Mean Diff
                                    mean_diff_line = lines[4]
                                    mean_diff_rate_spread = float(mean_diff_line.split()[-1])

                                    # For p-value
                                    p_value_line = lines[8]
                                    p_value_rate_spread = float(p_value_line.split()[-1])

                                    # Round the mean_diff and p-value to 2 decimal places
                                    mean_diff_rate_spread = round(mean_diff_rate_spread, 2)
                                    p_value_rate_spread = round(p_value_rate_spread, 4)

                                    if p_value_rate_spread <= stat_sign and mean_diff_rate_spread >= mean_diff_range:
                                    # Print Rate Spread, Mean and P Value, if the p-value is less than or equal to the significance level
                                        #display(HTML('&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;' + BOLD + f'Rate Spread ({group_name})' + END))
                                        display(HTML('&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;' + f'Rate Spread BPS Diff: {BOLD}{mean_diff_rate_spread}{END}'))
                                        display(HTML('&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;' + f'Rate Spread p-value: {p_value_rate_spread}'))
                                        
                                    
                                        if show_stats == "Include ":
                                            display(HTML("<br>"))
                                            display(HTML('&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;' + BOLD + UNDERLINE + f'Statistical Results (Rate Spread)' + END))
                                            display(HTML(f'<pre>{res.summary}</pre>'))   

                                    # Set plot style to fivethirtheight
                                    plt.style.use('fivethirtyeight')
                                    
                                    # Create a box chart of the AIP values grouped by Sex using plotly.express
                                    if show_plot == "Boxplot ":
                                        display(HTML("<br>"))
                                        display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + BOLD + UNDERLINE + f"AIP Boxplot" + END))
                                        display(HTML("<br>"))
                                        plt.figure(figsize=(10, 6))
                                        # Your original color
                                        original_color = (240, 240, 240)
                                        # Convert to RGB color for seaborn
                                        seaborn_color = tuple([val / 255 for val in original_color]) + (0.,)  # Add alpha for transparency
                                        ax = sns.boxplot(x="Class", y="AIP", data=group, color=seaborn_color, showmeans=True, linewidth=0.4, fliersize=0, meanprops={"marker": "X","markerfacecolor": "yellow", "markeredgecolor": "black", "markersize": "8",},)
                                        for patch in ax.artists:
                                            patch.set_zorder(1)
                                        strip = sns.stripplot(x="Class", y="AIP", hue="Class", data=group, jitter=0.4, dodge=False, linewidth=1, legend=False,
                                            size=6,)
                                        for collection in strip.collections:
                                            collection.set_zorder(2)
                                        plt.show()
                                        
                                        
                                        # Create a box chart of the Rate Spread values grouped by Sex using plotly.express
                                        if show_plot == "Boxplot " and p_value_rate_spread <= 0.05 and mean_diff_rate_spread >= 0.15:
                                            display(HTML("<br>"))
                                            display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + BOLD + UNDERLINE + f"Rate Spread Boxplot" + END))
                                            display(HTML("<br>"))
                                            plt.figure(figsize=(10, 6))
                                            # Your original color
                                            original_color = (240, 240, 240)
                                            # Convert to RGB color for seaborn
                                            seaborn_color = tuple([val / 255 for val in original_color]) + (0.,)  # Add alpha for transparency
                                            ax = sns.boxplot(x="Class", y="Rate_Spread", data=group, color=seaborn_color, showmeans=True, linewidth=0.6, fliersize=0, meanprops={"marker": "X","markerfacecolor": "yellow", "markeredgecolor": "black", "markersize": "8",},)
                                            for patch in ax.artists:
                                                patch.set_zorder(1)
                                            strip = sns.stripplot(x="Class", y="Rate_Spread", hue="Class", data=group, jitter=0.4, dodge=False, linewidth=1, legend=False,
                                                size=6,)
                                            for collection in strip.collections:
                                                collection.set_zorder(2)
                                            plt.show()
                                            
                                    
                                    # Print the list of loans included in the analysis
                                    if show_plot == "List of Loans ":
                                        group = group.sort_values("Class", ascending=False)
                                        display(HTML("<br>"))
                                        display(HTML("      " + BOLD + UNDERLINE + f"List of Loans for {anlys_lvl}: {group_name}" + END))
                                        display(HTML("<br>"))
                                        # Use to_html() to format the DataFrame as an HTML table and exclude the index
                                        html_table = group.to_html(index=False)

                                        # Add inline CSS to adjust the font size
                                        html_table = ("<style> table {font-size: 0.9em;} </style>" + html_table)
                                        display(HTML(html_table))

            elif analysis == "Age" or analysis == "Sex" or analysis == "Race":
                pass

##### Group the AGE data by Levels  ######
            if analysis == "All" or analysis == "Age":
                if anlys_lvl == "Branch ":
                    grouped_age_data = age_all_data.groupby("branchname")
                elif anlys_lvl == "MSA/MD ":
                    grouped_age_data = age_all_data.groupby("MSA_MD_Name")
                elif anlys_lvl == "Aggregate ":
                    grouped_age_data = age_all_data.groupby(lambda _: "")

                #display(HTML("   " + ITALIC + BOLD + f"Age Analysis" + END))
                #display(HTML("<br>"))

                # Loop through each group and perform a t-test
                for group_name, group in grouped_age_data:
                    
                    # Check if there are exactly two levels in the 'Age' column
                    if len(group["Age"].unique()) != 2:
                        # print('Not exactly two levels in the Age column')
                        continue

                    # Check if there are enough observations for each level
                    if group["Age"].value_counts().min() >= min_observations:
                        
                        # Check if the total sample size of the group is at least 30, if the checkbox is checked
                        if not checkbox or len(group) >= 30:
                        
                            # T-Test for 'AIP'
                            res.ttest(df=group, xfac="Age", res="AIP", evar=False, test_type=2)
                            
                            # Extract the Mean Diff from the summary
                            summary = res.summary
                            lines = summary.split("\n")
                            mean_diff_line = lines[4]
                            mean_diff = float(mean_diff_line.split()[-1])

                            # Extract the p-value from the summary
                            p_value_line = lines[8]
                            p_value = float(p_value_line.split()[-1])

                            # Round the mean_diff and p-value to 2 decimal places
                            mean_diff = round(mean_diff, 2)
                            p_value = round(p_value, 4)
                            
                            # Calculate loan count by group
                            loan_count_by_group = group["Age"].value_counts()

                            if p_value <= stat_sign and mean_diff >= mean_diff_range:
                                # Print the group_name, Mean and P Value, and the full Results
                                display(HTML("<br>"))
                                display(HTML("      " + BOLD + group_name + END))
                                display(HTML("      " + f"Group: Seniors"))
                                display(HTML("      " + f"Loan Type: {loan_type}"))
                                display(HTML("      " + f"BPS Diff: {BOLD}{mean_diff}{END}"))
                                display(HTML("      " + f"p-value: {p_value}"))
                                for group_name, count in loan_count_by_group.items():
                                    display(HTML("      " + f"Loan Count ({group_name}): {count}"))
                                

                                if show_stats == "Include ":
                                    display(HTML("<br>"))
                                    display(HTML("      " + BOLD + UNDERLINE + f"Statistical Results (AIP)" + END))
                                    display(HTML(f"<pre>{res.summary}</pre>"))
                                    
                                    
                                # T-Test for 'Rate_Spread'
                                res.ttest(df=group, xfac="Age", res="Rate_Spread", evar=False, test_type=2)

                                # Extract the Mean Diff and p-value for 'Rate Spread'
                                summary = res.summary
                                lines = summary.split('\n')

                                # For Mean Diff
                                mean_diff_line = lines[4]
                                mean_diff_rate_spread = float(mean_diff_line.split()[-1])

                                # For p-value
                                p_value_line = lines[8]
                                p_value_rate_spread = float(p_value_line.split()[-1])

                                # Round the p-value to 4 decimal places
                                mean_diff_rate_spread = round(mean_diff_rate_spread, 2)
                                p_value_rate_spread = round(p_value_rate_spread, 4)
                                
                                if p_value_rate_spread <= 0.05 and mean_diff_rate_spread >= 0.15:
                                    # Print Rate Spread, Mean and P Value, if the p-value is less than or equal to the significance level
                                    #display(HTML('         ' + BOLD + f'Rate Spread ({group_name})' + END))
                                    display(HTML('         ' + f'Rate Spread BPS Diff: {BOLD}{mean_diff_rate_spread}{END}'))
                                    display(HTML('         ' + f'Rate Spread p-value: {p_value_rate_spread}'))
                                    

                                    if show_stats == "Include ":
                                        display(HTML("<br>"))
                                        display(HTML('         ' + BOLD + UNDERLINE + f'Statistical Results (Rate Spread)' + END))
                                        display(HTML(f'<pre>{res.summary}</pre>'))
                                        

                                # Create a box chart of the AIP values grouped by Age using plotly.express
                                if show_plot == "Boxplot ":
                                    display(HTML("<br>"))
                                    display(HTML("      " + BOLD + UNDERLINE + f"AIP Boxplot" + END))
                                    display(HTML("<br>"))
                                    plt.figure(figsize=(10, 6))
                                    # Your original color
                                    original_color = (240, 240, 240)
                                    # Convert to RGB color for seaborn
                                    seaborn_color = tuple([val / 255 for val in original_color]) + (0.,)  # Add alpha for transparency
                                    ax = sns.boxplot(x="Age", y="AIP", data=group, color=seaborn_color, showmeans=True, linewidth=0.6, fliersize=0, meanprops={ "marker": "X", "markerfacecolor": "yellow","markeredgecolor": "black", "markersize": "8",},)
                                    for patch in ax.artists:
                                        patch.set_zorder(1)
                                    strip = sns.stripplot(x="Age", y="AIP", hue="Age", data=group, jitter=0.4, dodge=False, linewidth=1, legend=False, size=6,)
                                    for collection in strip.collections:
                                        collection.set_zorder(2)
                                    #plt.title(f"{anlys_lvl}: {group_name} ({loan_type} Loans)")
                                    plt.show()
                                    
                                    
                                    # Create a box chart of the Rate Spread values grouped by Age using plotly.express
                                    if show_plot == "Boxplot " and p_value_rate_spread <= 0.05 and mean_diff_rate_spread >= 0.15:
                                        display(HTML("<br>"))
                                        display(HTML("      " + BOLD + UNDERLINE + f"Rate Spread Boxplot" + END))
                                        display(HTML("<br>"))
                                        plt.figure(figsize=(10, 6))
                                        # Your original color
                                        original_color = (240, 240, 240)
                                        # Convert to RGB color for seaborn
                                        seaborn_color = tuple([val / 255 for val in original_color]) + (0.,)  # Add alpha for transparency
                                        ax = sns.boxplot(x="Age", y="Rate_Spread", data=group, color=seaborn_color, showmeans=True, linewidth=0.6, fliersize=0, meanprops={ "marker": "X", "markerfacecolor": "yellow","markeredgecolor": "black", "markersize": "8",},)
                                        for patch in ax.artists:
                                            patch.set_zorder(1)
                                        strip = sns.stripplot(x="Age", y="Rate_Spread", hue="Age", data=group, jitter=0.4, dodge=False, linewidth=1, legend=False, size=6,)
                                        for collection in strip.collections:
                                            collection.set_zorder(2)
                                        #plt.title(f"{anlys_lvl}: {group_name} ({loan_type} Loans)")
                                        plt.show()
                                        

                                # Print the list of loans included in the analysis
                                if show_plot == "List of Loans ":
                                    group = group.sort_values("Age", ascending=False)
                                    display(HTML("<br>"))
                                    display(HTML("      " + BOLD + UNDERLINE + f"List of Loans" + END))
                                    display(HTML("<br>"))

                                    # Use to_html() to format the DataFrame as an HTML table and exclude the index
                                    html_table = group.to_html(index=False)

                                    # Add inline CSS to adjust the font size
                                    html_table = ("<style> table {font-size: 0.9em;} </style>" + html_table)
                                    display(HTML(html_table))
                                    

            elif analysis == "Sex" or analysis == "Ethnicity" or analysis == "Race":
                pass
       
##### Group the SEX data by Levels  #####
            if analysis == "All" or analysis == "Sex":
                if anlys_lvl == "Branch ":
                    grouped_gender_data = gender.groupby("branchname")
                elif anlys_lvl == "MSA/MD ":
                    grouped_gender_data = gender.groupby("MSA_MD_Name")
                elif anlys_lvl == "Aggregate ":
                    grouped_gender_data = gender.groupby(lambda _: "")

                #display(HTML("   " + ITALIC + BOLD + f"Sex Analysis" + END))
                #display(HTML("<br>"))

                # Loop through each group and perform a t-test
                for group_name, group in grouped_gender_data:
                    
                    # Check if there are exactly two levels in the 'Class' column
                    if len(group['Sex'].unique()) == 2:
                    
                        # Check if there are enough observations for each level
                        if group["Sex"].value_counts().min() >= min_observations:

                            # Check if the total sample size of the group is at least 30, if the checkbox is checked
                            if not checkbox or len(group) >= 30:
                            
                                # T-Test for 'AIP'
                                res.ttest(df=group, xfac="Sex", res="AIP", evar=False, test_type=2)

                                # Extract the Mean Diff from the summary
                                summary = res.summary
                                lines = summary.split("\n")
                                mean_diff_line = lines[4]
                                mean_diff = float(mean_diff_line.split()[-1])

                                # Extract the p-value from the summary
                                p_value_line = lines[8]
                                p_value = float(p_value_line.split()[-1])

                                # Round the mean_diff and p-value to 2 decimal places
                                mean_diff = round(mean_diff, 2)
                                p_value = round(p_value, 4)
                                
                                # Calculate loan count by group
                                loan_count_by_group = group["Sex"].value_counts()

                                if p_value <= stat_sign and mean_diff >= mean_diff_range:
                                    # Print the group_name, Mean and P Value, and the full Results
                                    display(HTML("<br>"))
                                    display(HTML("      " + BOLD + group_name + END))
                                    display(HTML("      " + f"Group: Female"))
                                    display(HTML("      " + f"Loan Type: {loan_type}"))
                                    display(HTML("      " + f"BPS Diff: {BOLD}{mean_diff}{END}"))
                                    display(HTML("      " + f"p-value: {p_value}"))
                                    for group_name, count in loan_count_by_group.items():
                                        display(HTML("      " + f"Loan Count ({group_name}): {count}"))
                                    

                                    if show_stats == "Include ":
                                        display(HTML("<br>"))
                                        display(HTML("      " + BOLD + UNDERLINE + f"Statistical Results (AIP)" + END))
                                        display(HTML(f"<pre>{res.summary}</pre>"))
                                        
                                        
                                    # T-Test for 'Rate_Spread'
                                    res.ttest(df=group, xfac="Sex", res="Rate_Spread", evar=False, test_type=2)
                                    
                                    # Extract the Mean Diff and p-value for 'Rate Spread'
                                    summary = res.summary
                                    lines = summary.split('\n')

                                    # For Mean Diff
                                    mean_diff_line = lines[4]
                                    mean_diff_rate_spread = float(mean_diff_line.split()[-1])

                                    # For p-value
                                    p_value_line = lines[8]
                                    p_value_rate_spread = float(p_value_line.split()[-1])

                                    # Round the p-value to 4 decimal places
                                    mean_diff_rate_spread = round(mean_diff_rate_spread, 2)
                                    p_value_rate_spread = round(p_value_rate_spread, 4)
                                
                                    if p_value_rate_spread <= stat_sign and mean_diff_rate_spread >= mean_diff_range:
                                        # Print Rate Spread, Mean and P Value, if the p-value is less than or equal to the significance level
                                        #display(HTML('&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;' + BOLD + f'Rate Spread ({group_name})' + END))
                                        display(HTML('&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;' + f'Rate Spread BPS Diff: {BOLD}{mean_diff_rate_spread}{END}'))
                                        significance = "Statistically significant" if p_value_rate_spread <= 0.05 else "Not Statistically Significant"
                                        display(HTML('&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;' + f'Rate Spread p-value: {p_value_rate_spread}, {significance}'))
                                        
                                    
                                        if show_stats == "Include ":
                                            display(HTML("<br>"))
                                            display(HTML('&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;' + BOLD + UNDERLINE + f'Statistical Results (Rate Spread)' + END))
                                            display(HTML(f'<pre>{res.summary}</pre>'))
                                            

                                    # Create a box chart of the AIP values grouped by Sex using plotly.express
                                    if show_plot == "Boxplot ":
                                        display(HTML("<br>"))
                                        display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + BOLD + UNDERLINE + f"AIP Boxplot" + END))
                                        display(HTML("<br>"))
                                        plt.figure(figsize=(10, 6))
                                        # Your original color
                                        original_color = (240, 240, 240)
                                        # Convert to RGB color for seaborn
                                        seaborn_color = tuple([val / 255 for val in original_color]) + (0.,)  # Add alpha for transparency
                                        ax = sns.boxplot( x="Sex", y="AIP", data=group, color=seaborn_color, showmeans=True, linewidth=0.6, fliersize=0, meanprops={"marker": "X", "markerfacecolor": "yellow", "markeredgecolor": "black", "markersize": "8",},)
                                        for patch in ax.artists:
                                            patch.set_zorder(1)
                                        strip = sns.stripplot( x="Sex", y="AIP", hue="Sex", data=group, jitter=0.4, dodge=False, linewidth=1, legend=False, size=6,)
                                        for collection in strip.collections:
                                            collection.set_zorder(2)
                                        plt.show()
                                        
                                        
                                        # Create a box chart of the Rate Spread values grouped by Sex using plotly.express
                                        if show_plot == "Boxplot " and p_value_rate_spread <= 0.05 and mean_diff_rate_spread >= 0.15:
                                            display(HTML("<br>"))
                                            display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + BOLD + UNDERLINE + f"Rate Spread Boxplot" + END))
                                            display(HTML("<br>"))
                                            plt.figure(figsize=(10, 6))
                                            # Your original color
                                            original_color = (240, 240, 240)
                                            # Convert to RGB color for seaborn
                                            seaborn_color = tuple([val / 255 for val in original_color]) + (0.,)  # Add alpha for transparency
                                            ax = sns.boxplot( x="Sex", y="Rate_Spread", data=group, color=seaborn_color, showmeans=True, linewidth=0.6, fliersize=0, meanprops={"marker": "X", "markerfacecolor": "yellow", "markeredgecolor": "black", "markersize": "8",},)
                                            for patch in ax.artists:
                                                patch.set_zorder(1)
                                            strip = sns.stripplot( x="Sex", y="Rate_Spread", hue="Sex", data=group, jitter=0.4, dodge=False, linewidth=1, legend=False, size=6,)
                                            for collection in strip.collections:
                                                collection.set_zorder(2)
                                            plt.show()
                                            

                                    # Print the list of loans included in the analysis
                                    if show_plot == "List of Loans ":
                                        group = group.sort_values("Sex", ascending=False)
                                        display(HTML("<br>"))
                                        display(HTML("&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;" + BOLD + UNDERLINE + f"List of Loans" + END))
                                        display(HTML("<br>"))
                                        # Use to_html() to format the DataFrame as an HTML table and exclude the index
                                        html_table = group.to_html(index=False)

                                        # Add inline CSS to adjust the font size
                                        html_table = ("<style> table {font-size: 0.9em;} </style>" + html_table)
                                        display(HTML(html_table))
                                        

            elif analysis == "Age" or analysis == "Ethnicity" or analysis == "Race":
                pass
                                                
   
    # Create a radio button widget with options for Level of Analysis
    anlys_lvl_widget = widgets.ToggleButtons(
        options=["Branch ", "MSA/MD ", "Aggregate "],
        description='Level of Analysis:',
        disabled=False,
        button_style='', # 'success', 'info', 'warning', 'danger' or ''
        tooltips=['Branch (Default)', 'MSA/MD', 'Aggregate'],
        icons=["building", "map-marker", "globe"]
    )  
    
    # Create a radio button widget with options for statistically significant results
    analysis_widget = widgets.ToggleButtons(
        options=["All", "Race", "Ethnicity", "Age", "Sex"],
        description="Demographic Analysis:",
        disabled=False,
        button_style="",  # 'success', 'info', 'warning', 'danger' or ''
        tooltips=["All (Default)", "Race", "Ethnicity", "Age", "Sex"],
    )
    
    # Create a radio button widget with options for statistically significant results
    stat_sign_widget = widgets.ToggleButtons(
        options=[0.05, 0.01, 0.2, 1.0],
        description="Statistical Significance (p-value):",
        disabled=False,
        button_style="",  # 'success', 'info', 'warning', 'danger' or ''
        tooltips=["95% (Default)", "99%", "80%", "100%"],
    )

    # Create a radio button widget with options for mean diff ranges
    mean_diff_widget = widgets.ToggleButtons(
        options=[0.25, 0.20, 0.15, 0.10, 0.03],
        description="Mean Diff (BPS):",
        disabled=False,
        button_style="",  # 'success', 'info', 'warning', 'danger' or ''
        tooltips=[],
    )
    
    # Create a radio button widget with options for minimum number of observations
    min_observations_widget = widgets.ToggleButtons(
        options=[10, 20, 30],
        description='Min Observations per Group (Loan Count):',
        disabled=False,
        button_style='', # 'success', 'info', 'warning', 'danger' or ''
        tooltips=['10 Loans (Default)', '20 Loans', '30 Loans'],
    )

    # Create a radio button widget with options for showing statistic results
    show_stats_widget = widgets.ToggleButtons(
        options=["Exclude ", "Include "],
        description="Statistical Results:",
        disabled=False,
        button_style="",  # 'success', 'info', 'warning', 'danger' or ''
        tooltips=["Exclude the stats (Default)", "Include the stats"],
        icons=["minus-square", "plus-square"],
    )

    # Create a radio button widget with options for showing plots
    show_plot_widget = widgets.ToggleButtons(
        options=["None ",  "Boxplot ", "List of Loans "],
        description="Charts and List of Loans:",
        disabled=False,
        button_style="",  # 'success', 'info', 'warning', 'danger' or ''
        tooltips=["Exclude the plots (Default)", "Include the plots"],
        icons=["ban", "area-chart", "list"],
    )
    
    # Create a checkbox widget
    checkbox_widget = widgets.Checkbox(
        value=True,
        description='Enforce Minimum Population of 30 (n=30)',
        disabled=False,
        indent=False
    )

    # Use the interact function to apply the selected filters to the code
    interact(apply_filters, anlys_lvl=anlys_lvl_widget, analysis=analysis_widget, stat_sign=stat_sign_widget, mean_diff_range=mean_diff_widget, min_observations=min_observations_widget,  show_stats=show_stats_widget, show_plot=show_plot_widget, checkbox=checkbox_widget);

# Set the function to be called when a file is selected
fc.register_callback(on_file_selected)

# Create and display the HTML widget with your title or instructions
title_widget = HTML('<b>Select Dataset for Race Pricing Analysis</b>')
display(title_widget)

# Display the FileChooser widget
display(fc)

# Display the output widget
#display(output)

FileChooser(path='C:\Users\colby\OneDrive\Documents\Data Analysis\Python_Project_Fair_Lending_Analysis\ipynb_f…

interactive(children=(ToggleButtons(description='Level of Analysis:', icons=('building', 'map-marker', 'globe'…