In [6]:
""" This notebook contains visualizations for EDA.
Each cell takes a DataFrame (df) as input and generates plots accordingly.
Plots are saved in the reports/images/ directory. """

import seaborn as sns
from matplotlib import pyplot as plt
import pandas as pd

sns.set(style="whitegrid")
df = pd.read_csv("../../data/german_cleaned.csv")


In [None]:
plt.figure(figsize=(6,6))
df['Target'].value_counts().plot.pie(autopct='%1.1f%%', colors=['lightblue', 'salmon'], startangle=90)
plt.title('Target Variable Distribution')
plt.ylabel('')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(12,6))
sns.countplot(x='Purpose', hue='Target', data=df)
plt.title('Loan Purpose vs. Target')
plt.xticks(rotation=30)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(8,6))
sns.boxplot(x='Target', y='CreditAmount', data=df)
plt.title('Credit Amount vs. Target')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10,8))
corr = df.corr(numeric_only=True)
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix')
plt.xticks(rotation=30)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(8,6))
sns.histplot(df['Duration'], kde=True, bins=30, color='skyblue')
plt.title('Duration Distribution')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(8,6))
sns.histplot(df['CreditAmount'], kde=True, bins=30, color='green')
plt.title('Credit Amount Distribution')
plt.tight_layout()
plt.show()

In [None]:
sns.pairplot(df[['Age', 'CreditAmount', 'Duration', 'Target']], hue='Target')
plt.show()

In [None]:
plt.figure(figsize=(8,6))
sns.violinplot(x='Target', y='Duration', data=df)
plt.title('Duration Distribution by Target')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10,6))
sns.countplot(x='Employment', hue='Target', data=df)
plt.title('Employment Duration vs. Target')
plt.xticks(rotation=30)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(12,6))
sns.countplot(x='Purpose', hue='Employment', data=df)
plt.title('Loan Purpose vs. Employment Duration')
plt.xticks(rotation=30)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10,6))
sns.boxplot(x='Savings', y='CreditAmount', data=df)
plt.title('Savings Account vs. Credit Amount')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(10,6))
sns.countplot(x='PersonalStatusSex', hue='Target', data=df)
plt.title('Personal Status vs. Target')
plt.xticks(rotation=30)
plt.tight_layout()
plt.show()