In [13]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight


In [16]:
import pandas as pd
import numpy as np
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.express as px
import plotly.graph_objs as go

# Data Preparation
def prepare_data():
    # Simulated data generation based on statistical summary
    np.random.seed(42)
    
    # Number of samples
    n_samples = 15000
    
    data = {
        'Age': np.random.normal(43.45, 14.91, n_samples).clip(18, 80),
        'Gender': np.random.choice(['Male', 'Female', 'Non-binary'], n_samples, 
                   p=[0.6, 0.3, 0.1]),
        'Education': np.random.choice([
            "High School", "Bachelor's", "Master's", "PhD"
        ], n_samples),
        'Marital Status': np.random.choice([
            'Single', 'Married', 'Divorced', 'Widowed'
        ], n_samples),
        'Income': np.random.normal(69933, 29163, n_samples).clip(20000, 250000),
        'Credit Score': np.random.normal(699, 57, n_samples).clip(500, 850),
        'Loan Amount': np.random.normal(27450, 12949, n_samples).clip(5000, 150000),
        'Loan Purpose': np.random.choice([
            'Personal', 'Home', 'Auto', 'Business', 'Education'
        ], n_samples),
        'Employment Status': np.random.choice([
            'Employed', 'Self-Employed', 'Unemployed', 'Student'
        ], n_samples),
        'Years at Current Job': np.random.normal(9.48, 5.77, n_samples).clip(0, 30),
        'Payment History': np.random.choice([
            'Excellent', 'Good', 'Fair', 'Poor'
        ], n_samples),
        'Debt-to-Income Ratio': np.random.normal(0.35, 0.14, n_samples).clip(0, 1),
        'Risk Rating': np.random.choice([
            'Low', 'Medium', 'High'
        ], n_samples, p=[0.6, 0.3, 0.1])
    }
    
    return pd.DataFrame(data)

# Generate DataFrame
df = prepare_data()

# Initialize Dash App
app = dash.Dash(__name__)

# App Layout
app.layout = html.Div([
    html.H1("Comprehensive Loan Analysis Dashboard", 
            style={'textAlign': 'center', 'color': '#1E90FF'}),
    
    # Filters Row
    html.Div([
        # Gender Filter
        html.Div([
            html.Label("Select Gender:"),
            dcc.Dropdown(
                id='gender-filter',
                options=[{'label': gender, 'value': gender} for gender in df['Gender'].unique()],
                multi=True,
                value=df['Gender'].unique()
            )
        ], style={'width': '30%', 'display': 'inline-block', 'padding': '10px'}),
        
        # Risk Rating Filter
        html.Div([
            html.Label("Select Risk Rating:"),
            dcc.Dropdown(
                id='risk-filter',
                options=[{'label': rating, 'value': rating} for rating in df['Risk Rating'].unique()],
                multi=True,
                value=df['Risk Rating'].unique()
            )
        ], style={'width': '30%', 'display': 'inline-block', 'padding': '10px'}),
        
        # Loan Purpose Filter
        html.Div([
            html.Label("Select Loan Purpose:"),
            dcc.Dropdown(
                id='purpose-filter',
                options=[{'label': purpose, 'value': purpose} for purpose in df['Loan Purpose'].unique()],
                multi=True,
                value=df['Loan Purpose'].unique()
            )
        ], style={'width': '30%', 'display': 'inline-block', 'padding': '10px'}),
    ], style={'backgroundColor': '#F0F8FF'}),
    
    # Visualizations
    html.Div([
        # First Row of Graphs
        html.Div([
            dcc.Graph(id='age-distribution'),
            dcc.Graph(id='income-distribution')
        ], style={'display': 'flex'}),
        
        # Second Row of Graphs
        html.Div([
            dcc.Graph(id='loan-amount-boxplot'),
            dcc.Graph(id='credit-score-scatter')
        ], style={'display': 'flex'})
    ])
])

# Callbacks to update graphs
@app.callback(
    [Output('age-distribution', 'figure'),
     Output('income-distribution', 'figure'),
     Output('loan-amount-boxplot', 'figure'),
     Output('credit-score-scatter', 'figure')],
    [Input('gender-filter', 'value'),
     Input('risk-filter', 'value'),
     Input('purpose-filter', 'value')]
)
def update_graphs(selected_genders, selected_risks, selected_purposes):
    # Filter DataFrame
    filtered_df = df[
        df['Gender'].isin(selected_genders) & 
        df['Risk Rating'].isin(selected_risks) &
        df['Loan Purpose'].isin(selected_purposes)
    ]
    
    # Age Distribution
    age_fig = px.histogram(
        filtered_df, 
        x='Age', 
        color='Gender', 
        marginal='box', 
        title='Age Distribution by Gender'
    )
    
    # Income Distribution
    income_fig = px.box(
        filtered_df, 
        x='Risk Rating', 
        y='Income', 
        color='Gender',
        title='Income Distribution by Risk Rating and Gender'
    )
    
    # Loan Amount Boxplot
    loan_boxplot = px.box(
        filtered_df, 
        x='Loan Purpose', 
        y='Loan Amount', 
        color='Risk Rating',
        title='Loan Amount by Purpose and Risk Rating'
    )
    
    # Credit Score Scatter
    credit_scatter = px.scatter(
        filtered_df, 
        x='Credit Score', 
        y='Debt-to-Income Ratio', 
        color='Risk Rating',
        size='Loan Amount',
        hover_data=['Age', 'Gender', 'Income'],
        title='Credit Score vs Debt-to-Income Ratio'
    )
    
    return age_fig, income_fig, loan_boxplot, credit_scatter

# Run the server
if __name__ == '__main__':
    app.run_server(debug=True)