In [1]:
# Test Cell 1
print("--- Running Test Cell 1 ---")

my_variable_1 = "Hello from Cell 1"
my_variable_2 = 12345

print(f"Variable 1 is: '{my_variable_1}'")
print(f"Variable 2 is: {my_variable_2}")
print("✅ Test Cell 1 Finished. Variables have been created.")

--- Running Test Cell 1 ---
Variable 1 is: 'Hello from Cell 1'
Variable 2 is: 12345
✅ Test Cell 1 Finished. Variables have been created.


In [2]:
# Test Cell 2
print("--- Running Test Cell 2 ---")
print("Attempting to access variables from Cell 1...")

try:
    # Check if the variables from the previous cell still exist
    if 'my_variable_1' in locals() or 'my_variable_1' in globals():
        print(f"SUCCESS! Found variable 1: '{my_variable_1}'")
    else:
        print("FAILURE! Could not find variable 1.")

    if 'my_variable_2' in locals() or 'my_variable_2' in globals():
         print(f"SUCCESS! Found variable 2: {my_variable_2}")
    else:
         print("FAILURE! Could not find variable 2.")

    print("\n✅ --- Diagnosis Complete ---")

except NameError:
    print("\n❌❌❌ CRITICAL ERROR: A NameError occurred. The notebook is definitely losing its memory between cells.")

--- Running Test Cell 2 ---
Attempting to access variables from Cell 1...
SUCCESS! Found variable 1: 'Hello from Cell 1'
SUCCESS! Found variable 2: 12345

✅ --- Diagnosis Complete ---


In [3]:
# --- All necessary imports for this step ---
import pandas as pd
import numpy as np
import torch
import gc

print("--- Cell Execution Started ---")

# --- Step 1: Define the Configuration Class ---
class Configuration:
    """A centralized class to hold all project configurations."""
    def __init__(self):
        self.N_SAMPLES: int = 5000
        self.RANDOM_STATE: int = 42
        self.DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- Step 2: Instantiate the Configuration ---
try:
    CONFIG = Configuration()
    print("✅ Configuration object created successfully.")
except Exception as e:
    print(f"❌ Error creating Configuration object: {e}")

# --- Step 3: Define the Data Generator Class ---
class SyntheticSMEDataGenerator:
    """Generates a high-fidelity synthetic dataset for SME credit risk assessment."""
    def __init__(self, config: Configuration):
        self.config = config
        self.rng = np.random.default_rng(self.config.RANDOM_STATE)
        self.n_samples = self.config.N_SAMPLES
        print(f"✅ SyntheticSMEDataGenerator initialized for {self.n_samples} samples.")

    def _generate_base_profile(self):
        """Generates the base probability of default."""
        self.base_business_quality = self.rng.normal(0.5, 0.2, self.n_samples)
        self.base_business_quality = np.clip(self.base_business_quality, 0, 1)
        self.location_zone = self.rng.choice(['Zone A (Urban)', 'Zone B (Suburban)', 'Zone C (Rural)'], self.n_samples, p=[0.5, 0.3, 0.2])
        location_bias = {'Zone A (Urban)': 0.0, 'Zone B (Suburban)': -0.05, 'Zone C (Rural)': 0.1}
        self.location_adjustment = np.array([location_bias[zone] for zone in self.location_zone])
        prob_default = 1 / (1 + np.exp(-((0.5 - self.base_business_quality) * 5 + self.location_adjustment * 2)))
        self.credit_default = self.rng.binomial(1, np.clip(prob_default, 0.01, 0.99))

    def _generate_features(self):
        """Generates all feature categories."""
        self.avg_monthly_inflow = (self.base_business_quality + self.rng.normal(0, 0.1, self.n_samples)) * 50000 + 10000
        self.balance_volatility = (1 - self.base_business_quality + self.rng.normal(0, 0.1, self.n_samples)) * 5000 + 500
        self.num_late_utility_payments_last_year = self.rng.poisson((1 - self.base_business_quality) * 5)
        self.website_quality_score = np.clip(self.base_business_quality + self.rng.normal(0, 0.15, self.n_samples), 0, 1) * 10
        self.online_review_score = np.clip(self.base_business_quality * 5 + self.rng.normal(0, 0.5, self.n_samples), 1, 5)
        positive_keywords = ["experienced", "proven track record", "innovative", "strategic", "successful exit", "industry veteran"]
        negative_keywords = ["first-time founder", "learning curve", "market challenges", "pivoting", "operational hurdles", "bootstrap"]
        self.founder_bios = []
        for quality in self.base_business_quality:
            if quality > 0.6:
                text = f"An {self.rng.choice(positive_keywords)} leader with a {self.rng.choice(positive_keywords)}. Focused on scalable solutions and sustainable growth."
            elif quality < 0.4:
                text = f"A passionate {self.rng.choice(negative_keywords)}. Currently navigating {self.rng.choice(negative_keywords)} and adapting the business model."
            else:
                text = "A driven entrepreneur with a solid plan, building on previous experience to secure a strong product-market fit."
            self.founder_bios.append(text)

    def generate_dataset(self) -> pd.DataFrame:
        """Generates and returns the complete dataset."""
        print("⚙️ Generating synthetic data...")
        self._generate_base_profile()
        self._generate_features()
        df = pd.DataFrame({
            'sme_id': [f"SME_{1000+i}" for i in range(self.n_samples)],
            'credit_default': self.credit_default,
            'business_location_zone': self.location_zone,
            'avg_monthly_inflow': self.avg_monthly_inflow,
            'balance_volatility': self.balance_volatility,
            'num_late_utility_payments_last_year': self.num_late_utility_payments_last_year,
            'website_quality_score': self.website_quality_score,
            'online_review_score': self.online_review_score,
            'founder_bio': self.founder_bios
        })
        print(f"✅ Synthetic dataset generated with {len(df)} records.")
        return df

# --- Step 4: Execute the Data Generation ---
try:
    print("\n--- Attempting to Generate Data ---")
    data_generator = SyntheticSMEDataGenerator(CONFIG)
    raw_df = data_generator.generate_dataset()
    print("\n--- Data Generation Cell Complete. `raw_df` variable is created. ---")
    # Display the head of the created dataframe to confirm success
    display(raw_df.head())
except Exception as e:
    print(f"❌❌❌ An error occurred during execution: {e}")

# --- Step 5: Clean Memory ---
gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()

--- Cell Execution Started ---
✅ Configuration object created successfully.

--- Attempting to Generate Data ---
✅ SyntheticSMEDataGenerator initialized for 5000 samples.
⚙️ Generating synthetic data...
✅ Synthetic dataset generated with 5000 records.

--- Data Generation Cell Complete. `raw_df` variable is created. ---


Unnamed: 0,sme_id,credit_default,business_location_zone,avg_monthly_inflow,balance_volatility,num_late_utility_payments_last_year,website_quality_score,online_review_score,founder_bio
0,SME_1000,0,Zone A (Urban),43865.642581,2382.378205,4,5.198247,2.508531,"A driven entrepreneur with a solid plan, build..."
1,SME_1001,1,Zone A (Urban),26076.910921,3842.27586,5,0.117698,1.044701,A passionate operational hurdles. Currently na...
2,SME_1002,0,Zone B (Suburban),48337.212965,2217.426395,1,6.3156,4.134901,An successful exit leader with a innovative. F...
3,SME_1003,0,Zone B (Suburban),37014.762804,2743.876049,0,5.57063,3.441893,An innovative leader with a strategic. Focused...
4,SME_1004,1,Zone A (Urban),13324.801085,4523.096012,5,3.186333,1.0,A passionate learning curve. Currently navigat...


In [None]:
# --- All necessary imports for this step ---
import pandas as pd
import numpy as np
import torch
import gc
from sentence_transformers import SentenceTransformer
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize
from numba import jit
import time

print("--- Cell 2: Feature Engineering ---")

# --- Step 1: Define the Configuration and Helper functions ---
# We redefine these here to make the cell completely self-contained.
class Configuration:
    """A centralized class to hold all project configurations."""
    def __init__(self):
        self.ST_MODEL_NAME: str = 'all-MiniLM-L6-v2'
        self.DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def download_nltk_data():
    """Downloads required NLTK data if not already present."""
    required = [('sentiment', 'vader_lexicon'), ('corpora', 'stopwords'), ('tokenizers', 'punkt')]
    for package_id, resource in required:
        try:
            nltk.data.find(f"{package_id}/{resource}.zip")
        except LookupError:
            print(f"⬇️ Downloading NLTK data: {resource}...")
            nltk.download(resource.split('.')[0], quiet=True)

CONFIG = Configuration()
download_nltk_data()

# --- Step 2: Define the Feature Engineer Class ---
class FeatureEngineer:
    """Handles all feature engineering tasks in a structured pipeline."""
    def __init__(self, config: Configuration):
        self.config = config
        print("FeatureEngineer initialized.")
        try:
            print(f"Loading SentenceTransformer model ('{self.config.ST_MODEL_NAME}')... This may take a minute.")
            self.st_model = SentenceTransformer(self.config.ST_MODEL_NAME, device=self.config.DEVICE)
            self.sentiment_analyzer = SentimentIntensityAnalyzer()
            print("✅ NLP models loaded successfully.")
        except Exception as e:
            raise RuntimeError(f"Failed to load NLP models: {e}")

    @staticmethod
    @jit(nopython=True)
    def _calculate_stability_score(inflows, volatilities):
        """A Numba-accelerated function for a custom stability score."""
        scores = np.zeros(len(inflows))
        for i in range(len(inflows)):
            ratio = inflows[i] / (volatilities[i] + 1e-6)
            scores[i] = 1 - np.exp(-ratio / 100) # Score between 0 and 1
        return scores

    def transform(self, df: pd.DataFrame) -> pd.DataFrame:
        """Applies all feature engineering steps."""
        print("\n--- Starting Feature Engineering Pipeline ---")
        df_featured = df.copy()

        # NLP Features
        print("⚙️ Engineering NLP features from 'founder_bio'...")
        df_featured['founder_sentiment'] = df_featured['founder_bio'].apply(lambda x: self.sentiment_analyzer.polarity_scores(x)['compound'])
        
        print("🧠 Generating semantic embeddings... (This is the longest step in this cell)")
        with torch.no_grad():
            embeddings = self.st_model.encode(df_featured['founder_bio'].tolist(), show_progress_bar=True, convert_to_tensor=True, device=self.config.DEVICE)
        embedding_df = pd.DataFrame(embeddings.cpu().numpy(), index=df_featured.index)
        embedding_df.columns = [f'bio_emb_{i}' for i in range(embedding_df.shape[1])]
        df_featured = pd.concat([df_featured, embedding_df], axis=1)
        print(f"✅ Generated {embedding_df.shape[1]}-dimensional embeddings.")

        # Numba-accelerated Feature
        print("🚀 Applying Numba-accelerated function...")
        df_featured['financial_stability_score'] = self._calculate_stability_score(
            df_featured['avg_monthly_inflow'].to_numpy(),
            df_featured['balance_volatility'].to_numpy()
        )
        
        # Final cleanup
        df_featured = df_featured.drop(columns=['founder_bio'])
        print("✅ Feature Engineering Complete.")
        return df_featured

# --- Step 3: Execute the Feature Engineering ---
try:
    # This assumes 'raw_df' was created successfully in the previous cell.
    if 'raw_df' in locals():
        feature_engineer = FeatureEngineer(CONFIG)
        featured_df = feature_engineer.transform(raw_df)
        print("\n--- Feature Engineering Cell Complete. `featured_df` variable is created. ---")
        display(featured_df.head())
    else:
        print("❌ ERROR: The `raw_df` DataFrame was not found. Please run the data generation cell first.")
except Exception as e:
    print(f"❌❌❌ An error occurred during feature engineering: {e}")

# --- Step 4: Clean Memory ---
gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()

--- Cell 2: Feature Engineering ---
FeatureEngineer initialized.
Loading SentenceTransformer model ('all-MiniLM-L6-v2')... This may take a minute.
✅ NLP models loaded successfully.

--- Starting Feature Engineering Pipeline ---
⚙️ Engineering NLP features from 'founder_bio'...
🧠 Generating semantic embeddings... (This is the longest step in this cell)


Batches:   0%|          | 0/157 [00:00<?, ?it/s]

In [None]:
# --- All necessary imports for this step ---
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import torch.nn.functional as F
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import gc

print("--- Cell 3: Model Training ---")

# --- Step 1: Define the Configuration ---
class Configuration:
    """A centralized class to hold all project configurations."""
    def __init__(self):
        self.RANDOM_STATE: int = 42
        self.TEST_SIZE: float = 0.25
        self.BATCH_SIZE: int = 128
        self.EPOCHS: int = 20
        self.LEARNING_RATE: float = 0.001
        self.DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

CONFIG = Configuration()

# --- Step 2: Define the Model Trainer Class ---
class ModelTrainer:
    """Manages the training and evaluation of all specified models."""
    def __init__(self, config: Configuration):
        self.config = config
        self.preprocessor = None
        self.models = {}
        self.metrics = {}
        self.X_train, self.X_test, self.y_train, self.y_test = [None] * 4
        print("ModelTrainer initialized.")

    class _SME_DNN(nn.Module):
        """Internal DNN model definition."""
        def __init__(self, input_size):
            super().__init__()
            self.network = nn.Sequential(
                nn.Linear(input_size, 256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(0.4),
                nn.Linear(256, 128), nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(0.3),
                nn.Linear(128, 1), nn.Sigmoid()
            )
        def forward(self, x):
            return self.network(x)

    def _prepare_data(self, df: pd.DataFrame):
        """Splits and preprocesses the data."""
        print("\n--- Preparing Data for Modeling ---")
        TARGET = 'credit_default'
        features = [col for col in df.columns if col not in [TARGET, 'sme_id']]
        X = df[features]
        y = df[TARGET]

        numerical_features = X.select_dtypes(include=np.number).columns.tolist()
        categorical_features = X.select_dtypes(include=['object', 'category']).columns.tolist()
        
        self.preprocessor = ColumnTransformer(
            [('num', StandardScaler(), numerical_features),
             ('cat', OneHotEncoder(handle_unknown='ignore', sparse_output=False), categorical_features)],
            remainder='passthrough'
        )
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            X, y, test_size=self.config.TEST_SIZE, random_state=self.config.RANDOM_STATE, stratify=y
        )
        self.X_test_orig = self.X_test.copy()
        
        self.X_train = self.preprocessor.fit_transform(self.X_train)
        self.X_test = self.preprocessor.transform(self.X_test)
        print("✅ Data prepared and preprocessed.")

    def _train_xgb(self):
        """Trains the XGBoost model."""
        print("\n--- Training XGBoost Model ---")
        model = xgb.XGBClassifier(
            objective='binary:logistic', eval_metric='logloss', use_label_encoder=False,
            random_state=self.config.RANDOM_STATE, tree_method='gpu_hist'
        )
        model.fit(self.X_train, self.y_train)
        self.models['XGBoost'] = model
        print("✅ XGBoost model trained.")

    def _train_dnn(self):
        """Trains the PyTorch DNN model."""
        print("\n--- Training PyTorch DNN Model ---")
        X_train_t = torch.tensor(self.X_train, dtype=torch.float32)
        y_train_t = torch.tensor(self.y_train.values, dtype=torch.float32).unsqueeze(1)
        train_loader = DataLoader(TensorDataset(X_train_t, y_train_t), batch_size=self.config.BATCH_SIZE, shuffle=True)
        model = self._SME_DNN(self.X_train.shape[1]).to(self.config.DEVICE)
        criterion = nn.BCELoss()
        optimizer = optim.Adam(model.parameters(), lr=self.config.LEARNING_RATE)

        for epoch in range(self.config.EPOCHS):
            model.train()
            for batch_X, batch_y in train_loader:
                batch_X, batch_y = batch_X.to(self.config.DEVICE), batch_y.to(self.config.DEVICE)
                optimizer.zero_grad()
                outputs = model(batch_X)
                loss = criterion(outputs, batch_y)
                loss.backward()
                optimizer.step()
        
        self.models['PyTorch DNN'] = model
        print(f"✅ PyTorch DNN model trained after {self.config.EPOCHS} epochs.")

    def evaluate(self):
        """Evaluates all trained models."""
        print("\n--- Evaluating Models ---")
        for name, model in self.models.items():
            if name == 'XGBoost':
                y_pred_proba = model.predict_proba(self.X_test)[:, 1]
            elif name == 'PyTorch DNN':
                model.eval()
                with torch.no_grad():
                    X_test_t = torch.tensor(self.X_test, dtype=torch.float32).to(self.config.DEVICE)
                    y_pred_proba = model(X_test_t).cpu().numpy().flatten()
            y_pred = (y_pred_proba > 0.5).astype(int)
            self.metrics[name] = {
                'Accuracy': accuracy_score(self.y_test, y_pred),
                'Precision': precision_score(self.y_test, y_pred),
                'Recall': recall_score(self.y_test, y_pred),
                'F1-Score': f1_score(self.y_test, y_pred),
                'AUC-ROC': roc_auc_score(self.y_test, y_pred_proba),
                'predictions_proba': y_pred_proba
            }
        print("✅ Models evaluated.")
        
    def run(self, df: pd.DataFrame):
        """Runs the entire training and evaluation pipeline."""
        self._prepare_data(df)
        self._train_xgb()
        self._train_dnn()
        self.evaluate()
        return self

# --- Step 3: Execute the Model Training ---
try:
    if 'featured_df' in locals():
        trainer = ModelTrainer(CONFIG).run(featured_df)
        print("\n--- Model Training Cell Complete. `trainer` object is created. ---")
    else:
        print("❌ ERROR: The `featured_df` DataFrame was not found. Please run the feature engineering cell first.")
except Exception as e:
    print(f"❌❌❌ An error occurred during model training: {e}")

# --- Step 4: Clean Memory ---
gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()

In [None]:
# --- All necessary imports for this step ---
import pandas as pd
import numpy as np
import torch
import gc
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from ipywidgets import widgets, Layout
from IPython.display import display, clear_output
from sklearn.metrics import confusion_matrix, roc_curve, recall_score
import shap

print("--- Cell 4: Interactive Dashboard (Workaround Version) ---")

# --- Define the Dashboard Class (with FigureWidget replaced) ---
class Dashboard:
    """Creates an interactive dashboard in Jupyter Notebook to explore the data, model performance, and interpretability results."""
    def __init__(self, raw_df: pd.DataFrame, trainer):
        self.raw_df = raw_df
        self.trainer = trainer
        self.tab = widgets.Tab()
        self._precompute_shap()
        self.create_tabs()
        print("✅ Dashboard Initialized. Call the .show() method to display.")

    def _precompute_shap(self):
        """Calculates SHAP values once to make the dashboard faster."""
        print("🔬 Pre-calculating SHAP values for dashboard... (This may take a moment)")
        model = self.trainer.models['XGBoost']
        X_test_proc = self.trainer.preprocessor.transform(self.trainer.X_test_orig)
        explainer = shap.TreeExplainer(model)
        self.shap_values = explainer(X_test_proc)
        print("✅ SHAP values calculated and stored.")

    def create_tabs(self):
        """Creates all the tabs for the dashboard."""
        children = [self._create_eda_tab(), self._create_performance_tab(), self._create_shap_tab(), self._create_fairness_tab()]
        self.tab.children = children
        titles = ['📊 EDA Explorer', '📈 Model Performance', '🔬 Local Interpretability', '⚖️ Fairness Audit']
        for i, title in enumerate(titles):
            self.tab.set_title(i, title)

    def _create_eda_tab(self):
        feature_dropdown = widgets.Dropdown(options=self.raw_df.columns.drop(['sme_id', 'founder_bio']), value='avg_monthly_inflow', description='Feature:', style={'description_width': 'initial'})
        output = widgets.Output()
        def on_feature_change(change):
            with output:
                clear_output(wait=True)
                feature = change['new']
                fig = make_subplots(rows=1, cols=2, subplot_titles=(f'Distribution of {feature}', f'{feature} by Credit Default'))
                if self.raw_df[feature].dtype == 'object':
                    counts = self.raw_df.groupby([feature, 'credit_default']).size().reset_index(name='count')
                    fig_cat = px.bar(counts, x=feature, y='count', color='credit_default', barmode='group')
                    for trace in fig_cat.data: fig.add_trace(trace, row=1, col=1)
                    for trace in fig_cat.data: fig.add_trace(trace, row=1, col=2)
                else:
                    fig_hist = px.histogram(self.raw_df, x=feature, color='credit_default', marginal='box', barmode='overlay', opacity=0.7)
                    fig_box = px.box(self.raw_df, x='credit_default', y=feature, color='credit_default')
                    for trace in fig_hist.data: fig.add_trace(trace, row=1, col=1)
                    for trace in fig_box.data: fig.add_trace(trace, row=1, col=2)
                fig.update_layout(height=400, title_text=f"Analysis of '{feature}'", legend_title_text='Default')
                fig.show()
        feature_dropdown.observe(on_feature_change, names='value')
        on_feature_change({'new': feature_dropdown.value})
        return widgets.VBox([feature_dropdown, output])

    def _create_performance_tab(self):
        metrics_df = pd.DataFrame(self.trainer.metrics).T.drop(columns=['predictions_proba']).round(4)
        html = widgets.HTML(value=f"<h3>Model Performance Metrics</h3>{metrics_df.to_html(classes='table table-striped')}")
        
        # --- WORKAROUND --- Changed go.FigureWidget to the standard go.Figure
        fig_roc = go.Figure()
        
        fig_roc.add_shape(type='line', line=dict(dash='dash'), x0=0, x1=1, y0=0, y1=1)
        for name, metrics in self.trainer.metrics.items():
            fpr, tpr, _ = roc_curve(self.trainer.y_test, metrics['predictions_proba'])
            fig_roc.add_trace(go.Scatter(x=fpr, y=tpr, name=f"{name} (AUC={metrics['AUC-ROC']:.3f})", mode='lines'))
        fig_roc.update_layout(title='ROC Curves', xaxis_title='False Positive Rate', yaxis_title='True Positive Rate', height=400)
        
        # We need to wrap the standard Figure in a FigureWidget to place it in the VBox
        # but the error originates from creating the FigureWidget in the first place.
        # Let's build the VBox with a placeholder and then display the figure separately if needed.
        # A simpler approach is to just use an output widget for everything.
        
        performance_output = widgets.Output()
        with performance_output:
            display(html)
            display(fig_roc) # Display the static figure

            model_dropdown = widgets.Dropdown(options=list(self.trainer.models.keys()), description='Model:')
            cm_output = widgets.Output()
            def on_cm_model_change(change):
                with cm_output:
                    clear_output(wait=True)
                    model_name = change['new']
                    y_pred = (self.trainer.metrics[model_name]['predictions_proba'] > 0.5).astype(int)
                    cm = confusion_matrix(self.trainer.y_test, y_pred)
                    fig_cm = px.imshow(cm, text_auto=True, labels=dict(x="Predicted", y="Actual", color="Count"), x=['No Default', 'Default'], y=['No Default', 'Default'], color_continuous_scale='Blues')
                    fig_cm.update_layout(title=f'Confusion Matrix for {model_name}', height=400)
                    fig_cm.show()
            model_dropdown.observe(on_cm_model_change, names='value')
            display(widgets.HBox([model_dropdown, cm_output]))
            on_cm_model_change({'new': model_dropdown.value})

        return performance_output


    def _create_shap_tab(self):
        sme_dropdown = widgets.Dropdown(options=self.trainer.X_test_orig.index, description='Select SME ID:', layout=Layout(width='50%'))
        output = widgets.Output()
        def on_sme_select(change):
            with output:
                clear_output(wait=True)
                sme_index = change['new']
                loc_index = self.trainer.X_test_orig.index.get_loc(sme_index)
                sme_info = self.raw_df.loc[sme_index]
                info_html = f"<b>SME ID:</b> {sme_info.name}<br><b>Founder Bio:</b> \"{sme_info.founder_bio}\"<br><b>Actual Outcome:</b> {'Default' if self.trainer.y_test.loc[sme_index] == 1 else 'No Default'}<br><b>Predicted Risk Score:</b> {self.trainer.metrics['XGBoost']['predictions_proba'][loc_index]:.3f}"
                display(widgets.HTML(value=info_html))
                feature_names = self.trainer.preprocessor.get_feature_names_out().tolist()
                clean_names = [name.split('__')[-1] for name in feature_names]
                base_value = self.shap_values.base_values[loc_index]
                shap_plot_values = self.shap_values.values[loc_index]
                top_features_indices = np.argsort(np.abs(shap_plot_values))[-10:]
                fig = go.Figure(go.Waterfall(
                    orientation="h", x=np.append(shap_plot_values[top_features_indices], shap_plot_values.sum()),
                    y = np.append([clean_names[i] for i in top_features_indices], ['Final Prediction']),
                    measure = np.append(["relative"] * len(top_features_indices), ["total"]),
                    base = base_value
                ))
                fig.update_layout(title=f"Local Interpretation for {sme_info.name}", height=500, yaxis=dict(autorange="reversed"))
                fig.show()
        sme_dropdown.observe(on_sme_select, names='value')
        on_sme_select({'new': sme_dropdown.value})
        return widgets.VBox([sme_dropdown, output])

    def _create_fairness_tab(self):
        attribute_dropdown = widgets.Dropdown(options=['business_location_zone'], description='Protected Attribute:')
        output = widgets.Output()
        def on_attribute_change(change):
            with output:
                clear_output(wait=True)
                attribute = change['new']
                audit_df = self.trainer.X_test_orig.copy()
                audit_df['true_default'] = self.trainer.y_test
                audit_df['predicted_default'] = (self.trainer.metrics['XGBoost']['predictions_proba'] > 0.5).astype(int)
                results = {group: recall_score(subgroup['true_default'], subgroup['predicted_default']) for group, subgroup in audit_df.groupby(attribute) if len(subgroup['true_default'].unique()) > 1}
                results_df = pd.DataFrame.from_dict(results, orient='index', columns=['Recall (Equal Opportunity)'])
                fig = px.bar(results_df, x=results_df.index, y='Recall (Equal Opportunity)', title=f'Fairness: Recall by {attribute}', text_auto='.3f', height=400)
                fig.update_yaxes(range=[0, 1])
                fig.show()
        attribute_dropdown.observe(on_attribute_change, names='value')
        on_attribute_change({'new': attribute_dropdown.value})
        return widgets.VBox([attribute_dropdown, output])

    def show(self):
        """Displays the entire dashboard."""
        display(self.tab)

# --- Execute the Dashboard (Workaround Version) ---
try:
    dashboard = Dashboard(raw_df, trainer)
    dashboard.show()
except NameError as e:
    print(f"❌❌❌ A NameError occurred: {e}. This confirms a variable was not created in a previous step.")
    print("Please use 'Kernel -> Restart & Run All' to ensure all steps run in order.")
except Exception as e:
    print(f"❌❌❌ An unexpected error occurred during dashboard creation: {e}")

In [None]:
!pip install anywidget