## 🏦 Loan Approval Prediction Using ML

### 🔹 Step 1: Import Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

### 🔹 Step 2: Load & Explore Dataset

In [None]:
file_path = "C:/Users/mdpra/OneDrive/Desktop/loan bank approval/loan_approval.csv"
df = pd.read_csv(file_path)
df.head()

In [None]:
df.info()

### 🔹 Step 3: Data Cleaning

In [None]:
df.columns = df.columns.str.strip()
df['loan_status'] = df['loan_status'].astype(str).str.strip()
df = df[df['loan_status'].isin(['Approved', 'Rejected'])]
df['loan_status'] = df['loan_status'].map({'Approved': 1, 'Rejected': 0})

### 🔹 Step 4: EDA (Graphs & Stats)

In [None]:
sns.countplot(x='loan_status', data=df)
plt.title("Loan Status Distribution")
plt.show()

In [None]:
sns.boxplot(x='loan_status', y='income_annum', data=df)
plt.title("Income vs Loan Status")
plt.show()

### 🔹 Step 5: Preprocessing (Encoding, Nulls)

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['education'] = le.fit_transform(df['education'].astype(str).str.strip())
df['self_employed'] = le.fit_transform(df['self_employed'].astype(str).str.strip())
df['log_income'] = np.log(df['income_annum'])

### 🔹 Step 6: Train-Test Split

In [None]:
X = df.drop(['loan_id', 'loan_status'], axis=1)
y = df['loan_status']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### 🔹 Step 7: Model Building (Random Forest)

In [None]:
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

### 🔹 Step 8: Evaluation

In [None]:
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

### 🔹 Step 9: Conclusion

- The model was trained using Random Forest, a powerful ensemble algorithm.
- Based on evaluation metrics like accuracy and confusion matrix, the model performs well.
- This can now be used to automate loan approvals in banks or NBFCs.

### 🔹 Step 10: Real-time Prediction Using Manual Input

In [None]:
manual_input = np.array([[0, 1, 0, 1000000, 200000, 15, 800,
                          300000, 200000, 100000, 250000, np.log(1000000)]])
result = model.predict(manual_input)
print("✅ Loan Approved" if result[0] == 1 else "❌ Loan Rejected")