# Loan Approval Prediction using Machine Learning

## Objective
Build a classification model to predict loan approval status.

## Algorithm Used
Logistic Regression

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [3]:
df = pd.read_csv("train.csv")
df.head()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


## Data Preprocessing

- Handling missing values
- Converting categorical variables
- Feature selection

In [5]:
df['Gender'] = df['Gender'].fillna(df['Gender'].mode()[0])
df['Married'] = df['Married'].fillna(df['Married'].mode()[0])
df['Dependents'] = df['Dependents'].replace('3+', 3).fillna(0).astype(float)
df['Self_Employed'] = df['Self_Employed'].fillna(df['Self_Employed'].mode()[0])
df['LoanAmount'] = df['LoanAmount'].fillna(df['LoanAmount'].median())
df['Loan_Amount_Term'] = df['Loan_Amount_Term'].fillna(df['Loan_Amount_Term'].median())
df['Credit_History'] = df['Credit_History'].fillna(df['Credit_History'].mode()[0])

df = pd.get_dummies(df, columns=['Gender', 'Married', 'Self_Employed', 'Education', 'Property_Area'], drop_first=True)

df.head()

Unnamed: 0,Loan_ID,Dependents,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Loan_Status,Gender_Male,Married_Yes,Self_Employed_Yes,Education_Not Graduate,Property_Area_Semiurban,Property_Area_Urban
0,LP001002,0.0,5849,0.0,128.0,360.0,1.0,Y,True,False,False,False,False,True
1,LP001003,1.0,4583,1508.0,128.0,360.0,1.0,N,True,True,False,False,False,False
2,LP001005,0.0,3000,0.0,66.0,360.0,1.0,Y,True,True,True,False,False,True
3,LP001006,0.0,2583,2358.0,120.0,360.0,1.0,Y,True,True,False,True,False,True
4,LP001008,0.0,6000,0.0,141.0,360.0,1.0,Y,True,False,False,False,False,True


In [6]:
X = df.drop(columns=['Loan_ID', 'Loan_Status'])
y = df['Loan_Status'].map({'Y': 1, 'N': 0})

print(X.shape, y.shape)

(614, 12) (614,)


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train.fillna(0, inplace=True)
X_test.fillna(0, inplace=True)

print(X_train.shape, X_test.shape)

(491, 12) (123, 12)


In [8]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Model Training

Training Logistic Regression model on scaled dataset.

In [9]:
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

In [10]:
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)

print(f"Model Accuracy: {accuracy:.4f}")

Model Accuracy: 0.7886


In [11]:
sample_input = X_test.iloc[0:1]
sample_input_scaled = scaler.transform(sample_input)

prediction = model.predict(sample_input_scaled)

print(f"Loan Approval Prediction: {'Approved' if prediction[0] == 1 else 'Not Approved'}")

Loan Approval Prediction: Approved
