# Exploratory Data Analysis (EDA)
## Loan Approval Prediction
This notebook explores the dataset to understand feature distributions, missing values, and relationships with the target variable.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('whitegrid')
%matplotlib inline

### 1. Load Data

In [None]:
df = pd.read_csv('../data/loan_prediction.csv')
df.head()

In [None]:
df.info()

In [None]:
df.describe()

### 2. Missing Values Analysis

In [None]:
plt.figure(figsize=(10, 6))
sns.heatmap(df.isnull(), cbar=False, cmap='viridis')
plt.title('Missing Values Heatmap')
plt.show()

In [None]:
df.isnull().sum()

### 3. Univariate Analysis

In [None]:
fig, axes = plt.subplots(1, 4, figsize=(20, 5))
sns.countplot(x='Gender', data=df, ax=axes[0])
sns.countplot(x='Married', data=df, ax=axes[1])
sns.countplot(x='Education', data=df, ax=axes[2])
sns.countplot(x='Self_Employed', data=df, ax=axes[3])
plt.show()

In [None]:
plt.figure(figsize=(6,4))
sns.countplot(x='Loan_Status', data=df)
plt.title('Target Distribution')
plt.show()

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
sns.histplot(df['ApplicantIncome'], kde=True, ax=axes[0])
sns.histplot(df['CoapplicantIncome'], kde=True, ax=axes[1])
sns.histplot(df['LoanAmount'].dropna(), kde=True, ax=axes[2])
plt.show()

### 4. Correlation Analysis

In [None]:
# Compute correlation for numeric columns
plt.figure(figsize=(10,8))
sns.heatmap(df.select_dtypes(include=np.number).corr(), annot=True, cmap='coolwarm')
plt.title('Feature Correlation')
plt.show()

### 5. Categorical vs Target

In [None]:
pd.crosstab(df['Credit_History'], df['Loan_Status'], normalize='index').plot(kind='bar', stacked=True)
plt.title('Loan Status by Credit History')
plt.xlabel('Credit History')
plt.ylabel('Proportion')
plt.show()