## 1. Import Libraries & Load Dataset

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# For model building
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load the dataset
try:
    data = pd.read_csv('loan_prediction.csv')
    print("Data loaded successfully.")
except FileNotFoundError:
    print("Error: 'loan_prediction.csv' not found.")

## 2. Initial Data Exploration & Cleaning

In [None]:
# Display basic info
print(data.info())
print(data.describe())
print(data.head())

# Drop ID column
data.drop('Loan_ID', axis=1, inplace=True)

# Handle missing values
essential_cols = ['Gender', 'Dependents', 'LoanAmount', 'Loan_Amount_Term']
data.dropna(subset=essential_cols, inplace=True)

# Fill mode-based missing values
data['Self_Employed'].fillna(data['Self_Employed'].mode()[0], inplace=True)
data['Credit_History'].fillna(data['Credit_History'].mode()[0], inplace=True)

# Replace '3+' with '3' and convert to int
data['Dependents'] = data['Dependents'].replace('3+', '3').astype(int)

## 3. Encoding Categorical Features

In [None]:
# Binary mapping
data['Gender'] = data['Gender'].map({'Male': 1, 'Female': 0}).astype(int)
data['Married'] = data['Married'].map({'Yes': 1, 'No': 0}).astype(int)
data['Education'] = data['Education'].map({'Graduate': 1, 'Not Graduate': 0}).astype(int)
data['Self_Employed'] = data['Self_Employed'].map({'Yes': 1, 'No': 0}).astype(int)
data['Loan_Status'] = data['Loan_Status'].map({'Y': 1, 'N': 0}).astype(int)

# One-hot encoding for multi-category feature
data = pd.get_dummies(data, columns=['Property_Area'], drop_first=True)

# Final data check
print(data.head())

## 4. Exploratory Data Analysis (Optional Visuals)

In [None]:
# Visualize class distribution
sns.countplot(data['Loan_Status'])
plt.title("Loan Approval Status")
plt.show()

# Correlation heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm')
plt.title("Feature Correlation")
plt.show()

## 5. Model Training

In [None]:
# Feature matrix & target vector
X = data.drop('Loan_Status', axis=1)
y = data['Loan_Status']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

## 6. Evaluation

In [None]:
# Predictions
y_pred = model.predict(X_test)

# Metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

## 7. Predict on New Data (Optional)

In [None]:
# Example input (you can customize this as needed)
sample = X_test.iloc[0].values.reshape(1, -1)
prediction = model.predict(sample)
print("Loan Status Prediction (1=Approved, 0=Rejected):", prediction[0])