In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# German Credit Risk Dataset - Exploratory Data Analysis\n",
    "\n",
    "This notebook provides a comprehensive exploration of the German Credit dataset, preparing it for multi-class credit risk assessment.\n",
    "\n",
    "## Objectives:\n",
    "1. **Data Loading & Overview**: Load and examine the dataset structure\n",
    "2. **Feature Analysis**: Detailed analysis of each feature\n",
    "3. **Target Distribution**: Understand the original binary classification\n",
    "4. **Multi-Class Preparation**: Explore strategies for creating risk categories\n",
    "5. **Visualization**: Create comprehensive visualizations\n",
    "6. **Data Quality**: Assess missing values, outliers, and data consistency"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import necessary libraries\n",
    "import sys\n",
    "import os\n",
    "sys.path.append('../src')\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import plotly.express as px\n",
    "import plotly.graph_objects as go\n",
    "from plotly.subplots import make_subplots\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "# Import custom modules\n",
    "from data.data_loader import GermanCreditDataLoader\n",
    "from data.risk_categorizer import RiskCategorizer\n",
    "\n",
    "# Set plotting style\n",
    "plt.style.use('seaborn-v0_8')\n",
    "sns.set_palette(\"husl\")\n",
    "\n",
    "# Configure pandas display\n",
    "pd.set_option('display.max_columns', None)\n",
    "pd.set_option('display.max_rows', 100)\n",
    "\n",
    "print(\"✅ Libraries imported successfully!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Data Loading & Initial Overview"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the dataset\n",
    "loader = GermanCreditDataLoader()\n",
    "features, targets = loader.load_data()\n",
    "\n",
    "print(f\"Dataset Shape: {features.shape}\")\n",
    "print(f\"Target Shape: {targets.shape}\")\n",
    "print(f\"\\nFeature Names: {list(features.columns)}\")\n",
    "print(f\"\\nTarget Values: {sorted(targets.unique())}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get comprehensive data information\n",
    "data_info = loader.get_data_info()\n",
    "\n",
    "print(\"=\" * 60)\n",
    "print(\"DATASET INFORMATION\")\n",
    "print(\"=\" * 60)\n",
    "\n",
    "for key, value in data_info.items():\n",
    "    print(f\"\\n{key.upper()}:\")\n",
    "    if isinstance(value, dict):\n",
    "        for k, v in value.items():\n",
    "            print(f\"  {k}: {v}\")\n",
    "    else:\n",
    "        print(f\"  {value}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Display first few rows\n",
    "print(\"First 5 rows of the dataset:\")\n",
    "display(features.head())\n",
    "\n",
    "print(\"\\nTarget distribution:\")\n",
    "display(targets.value_counts().sort_index())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Basic statistical summary\n",
    "print(\"Statistical Summary for Numerical Features:\")\n",
    "numerical_features = features.select_dtypes(include=[np.number])\n",
    "display(numerical_features.describe())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Target Variable Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Target distribution visualization\n",
    "fig, axes = plt.subplots(1, 2, figsize=(15, 6))\n",
    "\n",
    "# Bar plot\n",
    "target_counts = targets.value_counts().sort_index()\n",
    "target_labels = ['Good Credit (1)', 'Bad Credit (2)']\n",
    "colors = ['#2E8B57', '#DC143C']\n",
    "\n",
    "axes[0].bar(target_labels, target_counts.values, color=colors, alpha=0.8, edgecolor='black')\n",
    "axes[0].set_title('Target Distribution (Count)', fontsize=14, fontweight='bold')\n",
    "axes[0].set_ylabel('Count')\n",
    "for i, v in enumerate(target_counts.values):\n",
    "    axes[0].text(i, v + 10, str(v), ha='center', fontweight='bold')\n",
    "\n",
    "# Pie chart\n",
    "axes[1].pie(target_counts.values, labels=target_labels, colors=colors, autopct='%1.1f%%',\n",
    "           startangle=90, explode=(0.05, 0.05))\n",
    "axes[1].set_title('Target Distribution (Percentage)', fontsize=14, fontweight='bold')\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# Print statistics\n",
    "print(f\"Total samples: {len(targets)}\")\n",
    "print(f\"Good Credit (1): {target_counts[1]} ({target_counts[1]/len(targets)*100:.1f}%)\")\n",
    "print(f\"Bad Credit (2): {target_counts[2]} ({target_counts[2]/len(targets)*100:.1f}%)\")\n",
    "print(f\"Class imbalance ratio: {target_counts[1]/target_counts[2]:.2f}:1\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Numerical Features Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Identify numerical features\n",
    "numerical_cols = features.select_dtypes(include=[np.number]).columns.tolist()\n",
    "print(f\"Numerical features ({len(numerical_cols)}): {numerical_cols}\")\n",
    "\n",
    "# Create comprehensive numerical analysis\n",
    "fig, axes = plt.subplots(2, 4, figsize=(20, 12))\n",
    "axes = axes.ravel()\n",
    "\n",
    "for i, col in enumerate(numerical_cols[:8]):\n",
    "    # Histogram with target separation\n",
    "    good_credit = features[targets == 1][col]\n",
    "    bad_credit = features[targets == 2][col]\n",
    "    \n",
    "    axes[i].hist(good_credit, alpha=0.7, label='Good Credit', color='#2E8B57', bins=20)\n",
    "    axes[i].hist(bad_credit, alpha=0.7, label='Bad Credit', color='#DC143C', bins=20)\n",
    "    axes[i].set_title(f'{col}', fontweight='bold')\n",
    "    axes[i].set_xlabel(col)\n",
    "    axes[i].set_ylabel('Frequency')\n",
    "    axes[i].legend()\n",
    "    axes[i].grid(True, alpha=0.3)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.suptitle('Distribution of Numerical Features by Credit Status', fontsize=16, fontweight='bold', y=1.02)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Box plots for numerical features\n",
    "fig, axes = plt.subplots(2, 4, figsize=(20, 12))\n",
    "axes = axes.ravel()\n",
    "\n",
    "# Combine features and targets for easy plotting\n",
    "df_combined = features.copy()\n",
    "df_combined['target'] = targets.map({1: 'Good Credit', 2: 'Bad Credit'})\n",
    "\n",
    "for i, col in enumerate(numerical_cols[:8]):\n",
    "    sns.boxplot(data=df_combined, x='target', y=col, ax=axes[i])\n",
    "    axes[i].set_title(f'{col}', fontweight='bold')\n",
    "    axes[i].set_xlabel('Credit Status')\n",
    "    axes[i].grid(True, alpha=0.3)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.suptitle('Box Plots of Numerical Features by Credit Status', fontsize=16, fontweight='bold', y=1.02)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Correlation analysis\n",
    "numerical_data = features[numerical_cols].copy()\n",
    "numerical_data['target'] = targets\n",
    "\n",
    "# Calculate correlation matrix\n",
    "correlation_matrix = numerical_data.corr()\n",
    "\n",
    "# Create correlation heatmap\n",
    "plt.figure(figsize=(12, 10))\n",
    "mask = np.triu(np.ones_like(correlation_matrix, dtype=bool))\n",
    "sns.heatmap(correlation_matrix, mask=mask, annot=True, cmap='RdBu_r', center=0,\n",
    "            square=True, linewidths=0.5, cbar_kws={\"shrink\": .8})\n",
    "plt.title('Correlation Matrix of Numerical Features', fontsize=16, fontweight='bold')\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "# Feature correlations with target\n",
    "target_correlations = correlation_matrix['target'].drop('target').sort_values(key=abs, ascending=False)\n",
    "print(\"\\nFeature correlations with target (sorted by absolute value):\")\n",
    "for feature, corr in target_correlations.items():\n",
    "    print(f\"{feature}: {corr:.3f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Categorical Features Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Identify categorical features\n",
    "categorical_cols = features.select_dtypes(include=['object']).columns.tolist()\n",
    "print(f\"Categorical features ({len(categorical_cols)}): {categorical_cols}\")\n",
    "\n",
    "# Analyze each categorical feature\n",
    "for col in categorical_cols:\n",
    "    print(f\"\\n{'='*50}\")\n",
    "    print(f\"Feature: {col.upper()}\")\n",
    "    print(f\"{'='*50}\")\n",
    "    \n",
    "    # Value counts\n",
    "    value_counts = features[col].value_counts()\n",
    "    print(f\"Unique values: {len(value_counts)}\")\n",
    "    print(f\"Value distribution:\")\n",
    "    for value, count in value_counts.items():\n",
    "        percentage = count / len(features) * 100\n",
    "        print(f\"  {value}: {count} ({percentage:.1f}%)\")\n",
    "    \n",
    "    # Cross-tabulation with target\n",
    "    crosstab = pd.crosstab(features[col], targets, margins=True)\n",
    "    print(f\"\\nCross-tabulation with target:\")\n",
    "    display(crosstab)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Visualize categorical features (first 6)\n",
    "categorical_subset = categorical_cols[:6]\n",
    "fig, axes = plt.subplots(2, 3, figsize=(18, 12))\n",
    "axes = axes.ravel()\n",
    "\n",
    "for i, col in enumerate(categorical_subset):\n",
    "    # Create crosstab for visualization\n",
    "    crosstab = pd.crosstab(features[col], targets)\n",
    "    crosstab_pct = pd.crosstab(features[col], targets, normalize='index') * 100\n",
    "    \n",
    "    # Stacked bar plot\n",
    "    crosstab.plot(kind='bar', ax=axes[i], color=['#2E8B57', '#DC143C'], alpha=0.8)\n",
    "    axes[i].set_title(f'{col}', fontweight='bold')\n",
    "    axes[i].set_xlabel('')\n",
    "    axes[i].set_ylabel('Count')\n",
    "    axes[i].legend(['Good Credit', 'Bad Credit'])\n",
    "    axes[i].tick_params(axis='x', rotation=45)\n",
    "    axes[i].grid(True, alpha=0.3)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.suptitle('Distribution of Categorical Features by Credit Status', fontsize=16, fontweight='bold', y=1.02)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Feature Relationships and Interactions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Analyze key feature relationships\n",
    "fig, axes = plt.subplots(2, 2, figsize=(16, 12))\n",
    "\n",
    "# Credit amount vs Duration by target\n",
    "good_credit_mask = targets == 1\n",
    "bad_credit_mask = targets == 2\n",
    "\n",
    "axes[0,0].scatter(features.loc[good_credit_mask, 'duration_months'], \n",
    "                 features.loc[good_credit_mask, 'credit_amount'],\n",
    "                 alpha=0.6, label='Good Credit', color='#2E8B57')\n",
    "axes[0,0].scatter(features.loc[bad_credit_mask, 'duration_months'], \n",
    "                 features.loc[bad_credit_mask, 'credit_amount'],\n",
    "                 alpha=0.6, label='Bad Credit', color='#DC143C')\n",
    "axes[0,0].set_xlabel('Duration (months)')\n",
    "axes[0,0].set_ylabel('Credit Amount')\n",
    "axes[0,0].set_title('Credit Amount vs Duration', fontweight='bold')\n",
    "axes[0,0].legend()\n",
    "axes[0,0].grid(True, alpha=0.3)\n",
    "\n",
    "# Age vs Credit Amount by target\n",
    "axes[0,1].scatter(features.loc[good_credit_mask, 'age_years'], \n",
    "                 features.loc[good_credit_mask, 'credit_amount'],\n",
    "                 alpha=0.6, label='Good Credit', color='#2E8B57')\n",
    "axes[0,1].scatter(features.loc[bad_credit_mask, 'age_years'], \n",
    "                 features.loc[bad_credit_mask, 'credit_amount'],\n",
    "                 alpha=0.6, label='Bad Credit', color='#DC143C')\n",
    "axes[0,1].set_xlabel('Age (years)')\n",
    "axes[0,1].set_ylabel('Credit Amount')\n",
    "axes[0,1].set_title('Credit Amount vs Age', fontweight='bold')\n",
    "axes[0,1].legend()\n",
    "axes[0,1].grid(True, alpha=0.3)\n",
    "\n",
    "# Installment rate distribution\n",
    "good_installment = features.loc[good_credit_mask, 'installment_rate']\n",
    "bad_installment = features.loc[bad_credit_mask, 'installment_rate']\n",
    "\n",
    "axes[1,0].hist(good_installment, alpha=0.7, label='Good Credit', color='#2E8B57', bins=10)\n",
    "axes[1,0].hist(bad_installment, alpha=0.7, label='Bad Credit', color='#DC143C', bins=10)\n",
    "axes[1,0].set_xlabel('Installment Rate (%)')\n",
    "axes[1,0].set_ylabel('Frequency')\n",
    "axes[1,0].set_title('Installment Rate Distribution', fontweight='bold')\n",
    "axes[1,0].legend()\n",
    "axes[1,0].grid(True, alpha=0.3)\n",
    "\n",
    "# Number of existing credits\n",
    "existing_credits_crosstab = pd.crosstab(features['existing_credits'], targets)\n",
    "existing_credits_crosstab.plot(kind='bar', ax=axes[1,1], color=['#2E8B57', '#DC143C'], alpha=0.8)\n",
    "axes[1,1].set_title('Existing Credits Distribution', fontweight='bold')\n",
    "axes[1,1].set_xlabel('Number of Existing Credits')\n",
    "axes[1,1].set_ylabel('Count')\n",
    "axes[1,1].legend(['Good Credit', 'Bad Credit'])\n",
    "axes[1,1].grid(True, alpha=0.3)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Multi-Class Risk Categorization Analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize risk categorizer and create multi-class labels\n",
    "risk_categorizer = RiskCategorizer()\n",
    "risk_categories = risk_categorizer.create_risk_categories(features, targets)\n",
    "\n",
    "# Analyze risk distribution\n",
    "risk_analysis = risk_categorizer.analyze_risk_distribution(risk_categories)\n",
    "\n",
    "print(\"MULTI-CLASS RISK CATEGORIZATION ANALYSIS\")\n",
    "print(\"=\" * 60)\n",
    "\n",
    "for key, value in risk_analysis.items():\n",
    "    print(f\"\\n{key.upper()}:\")\n",
    "    if isinstance(value, dict):\n",
    "        for k, v in value.items():\n",
    "            print(f\"  {k}: {v}\")\n",
    "    else:\n",
    "        print(f\"  {value}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Visualize risk categories\n",
    "fig, axes = plt.subplots(1, 3, figsize=(18, 6))\n",
    "\n",
    "# Risk category distribution\n",
    "risk_counts = risk_categories.value_counts().sort_index()\n",
    "class_names = [risk_analysis['class_names'][i] for i in risk_counts.index]\n",
    "colors = ['#228B22', '#90EE90', '#FFD700', '#FF8C00', '#DC143C']\n",
    "\n",
    "# Bar plot\n",
    "bars = axes[0].bar(class_names, risk_counts.values, color=colors[:len(risk_counts)], alpha=0.8, edgecolor='black')\n",
    "axes[0].set_title('Risk Category Distribution', fontweight='bold', fontsize=14)\n",
    "axes[0].set_ylabel('Count')\n",
    "axes[0].tick_params(axis='x', rotation=45)\n",
    "for i, v in enumerate(risk_counts.values):\n",
    "    axes[0].text(i, v + 5, str(v), ha='center', fontweight='bold')\n",
    "\n",
    "# Pie chart\n",
    "axes[1].pie(risk_counts.values, labels=class_names, colors=colors[:len(risk_counts)], \n",
    "           autopct='%1.1f%%', startangle=90)\n",
    "axes[1].set_title('Risk Category Percentages', fontweight='bold', fontsize=14)\n",
    "\n",
    "# Original vs Multi-class comparison\n",
    "comparison_data = pd.DataFrame({\n",
    "    'Original_Binary': targets.map({1: 'Good Credit', 2: 'Bad Credit'}),\n",
    "    'Multi_Class_Risk': risk_categories.map(risk_analysis['class_names'])\n",
    "})\n",
    "\n",
    "crosstab_comparison = pd.crosstab(comparison_data['Original_Binary'], \n",
    "                                 comparison_data['Multi_Class_Risk'])\n",
    "\n",
    "sns.heatmap(crosstab_comparison, annot=True, fmt='d', cmap='YlOrRd', ax=axes[2])\n",
    "axes[2].set_title('Original vs Multi-Class Mapping', fontweight='bold', fontsize=14)\n",
    "axes[2].set_ylabel('Original Binary Classes')\n",
    "axes[2].set_xlabel('Multi-Class Risk Categories')\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Analyze feature distributions across risk categories\n",
    "df_risk = features.copy()\n",
    "df_risk['risk_category'] = risk_categories.map(risk_analysis['class_names'])\n",
    "\n",
    "# Key numerical features across risk categories\n",
    "key_numerical = ['credit_amount', 'duration_months', 'age_years', 'installment_rate']\n",
    "\n",
    "fig, axes = plt.subplots(2, 2, figsize=(16, 12))\n",
    "axes = axes.ravel()\n",
    "\n",
    "for i, feature in enumerate(key_numerical):\n",
    "    sns.boxplot(data=df_risk, x='risk_category', y=feature, ax=axes[i])\n",
    "    axes[i].set_title(f'{feature} by Risk Category', fontweight='bold')\n",
    "    axes[i].set_xlabel('Risk Category')\n",
    "    axes[i].tick_params(axis='x', rotation=45)\n",
    "    axes[i].grid(True, alpha=0.3)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.suptitle('Feature Distributions Across Risk Categories', fontsize=16, fontweight='bold', y=1.02)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Data Quality Assessment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Missing values analysis\n",
    "missing_values = features.isnull().sum()\n",
    "missing_percentages = (missing_values / len(features)) * 100\n",
    "\n",
    "missing_df = pd.DataFrame({\n",
    "    'Feature': missing_values.index,\n",
    "    'Missing_Count': missing_values.values,\n",
    "    'Missing_Percentage': missing_percentages.values\n",
    "})\n",
    "\n",
    "print(\"MISSING VALUES ANALYSIS\")\n",
    "print(\"=\" * 40)\n",
    "if missing_df['Missing_Count'].sum() == 0:\n",
    "    print(\"✅ No missing values found in the dataset!\")\n",
    "else:\n",
    "    display(missing_df[missing_df['Missing_Count'] > 0])\n",
    "\n",
    "# Duplicate rows analysis\n",
    "duplicate_rows = features.duplicated().sum()\n",
    "print(f\"\\nDUPLICATE ROWS: {duplicate_rows}\")\n",
    "if duplicate_rows == 0:\n",
    "    print(\"✅ No duplicate rows found!\")\n",
    "else:\n",
    "    print(f\"⚠️  Found {duplicate_rows} duplicate rows\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Outlier detection for numerical features\n",
    "def detect_outliers_iqr(df, feature):\n",
    "    Q1 = df[feature].quantile(0.25)\n",
    "    Q3 = df[feature].quantile(0.75)\n",
    "    IQR = Q3 - Q1\n",
    "    lower_bound = Q1 - 1.5 * IQR\n",
    "    upper_bound = Q3 + 1.5 * IQR\n",
    "    outliers = df[(df[feature] < lower_bound) | (df[feature] > upper_bound)]\n",
    "    return outliers\n",
    "\n",
    "print(\"OUTLIER ANALYSIS (IQR Method)\")\n",
    "print(\"=\" * 40)\n",
    "\n",
    "outlier_summary = []\n",
    "for feature in numerical_cols:\n",
    "    outliers = detect_outliers_iqr(features, feature)\n",
    "    outlier_count = len(outliers)\n",
    "    outlier_percentage = (outlier_count / len(features)) * 100\n",
    "    \n",
    "    outlier_summary.append({\n",
    "        'Feature': feature,\n",
    "        'Outlier_Count': outlier_count,\n",
    "        'Outlier_Percentage': outlier_percentage\n",
    "    })\n",
    "    \n",
    "    print(f\"{feature}: {outlier_count} outliers ({outlier_percentage:.1f}%)\")\n",
    "\n",
    "outlier_df = pd.DataFrame(outlier_summary)\n",
    "\n",
    "# Visualize outliers\n",
    "if len(outlier_df[outlier_df['Outlier_Count'] > 0]) > 0:\n",
    "    plt.figure(figsize=(12, 6))\n",
    "    features_with_outliers = outlier_df[outlier_df['Outlier_Count'] > 0]\n",
    "    plt.bar(features_with_outliers['Feature'], features_with_outliers['Outlier_Percentage'], \n",
    "            color='#FF6B6B', alpha=0.7, edgecolor='black')\n",
    "    plt.title('Outlier Percentage by Feature', fontweight='bold', fontsize=14)\n",
    "    plt.xlabel('Features')\n",
    "    plt.ylabel('Outlier Percentage (%)')\n",
    "    plt.xticks(rotation=45)\n",
    "    plt.grid(True, alpha=0.3)\n",
    "    plt.tight_layout()\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. Feature Importance and Risk Contribution Analysis"
   ]
   }
 ]
}
#   {
#    "cell_type": "code",{
#  "cells": [
#   {
#    "cell_type": "markdown",
#    "metadata": {},
#    "source": [
#     "# German Credit Risk Dataset - Exploratory Data Analysis\n",
#     "\n",
#     "This notebook provides a comprehensive exploration of the German Credit dataset, preparing