In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [None]:
data = pd.read_csv("loan_approval_dataset.csv")
data.columns = data.columns.str.strip() # remove whitespace

target_column = 'loan_status'
numerical_features = data.select_dtypes(include=[np.number])
unique_targets = data[target_column].unique()

# Plot features vs target variable
for column in numerical_features.columns:
    plt.figure(figsize=(8, 6))
    for target in unique_targets:
        subset = data[data[target_column] == target]
        plt.hist(subset[column], bins=20, alpha=0.7, label=f'{target}')
    plt.title(f"{column} vs {target_column} (Supervised)")
    plt.xlabel(column)
    plt.ylabel("Frequency")
    plt.legend(title=target_column)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.show()

### creating histograms for each feature

In [None]:
numerical_features = data.select_dtypes(include=[np.number])

# Plotting histograms for each numerical feature
for column in numerical_features.columns:
    plt.figure(figsize=(6, 4))
    plt.hist(numerical_features[column], bins=20, edgecolor='k', alpha=0.7)
    plt.title(f"Histogram of {column}")
    plt.xlabel(column)
    plt.ylabel("Frequency")
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.show()


### Create a correlation matrix of the numerical features

In [None]:
correlation_matrix = numerical_features.corr()

# Display the correlation matrix as a heatmap for better visualization
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="coolwarm", cbar=True)
plt.title("Correlation Matrix of Numerical Features")
plt.show()