# Loan Data Prediction using Logistic Regression

In [None]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the dataset
file_path = "loan_data_set_lyst1728376986423.csv"
loan_df = pd.read_csv(file_path)

# Drop Loan_ID (not needed for prediction)
loan_df.drop('Loan_ID', axis=1, inplace=True)

# Handle missing values
loan_df['Gender'].fillna(loan_df['Gender'].mode()[0], inplace=True)
loan_df['Married'].fillna(loan_df['Married'].mode()[0], inplace=True)
loan_df['Dependents'].fillna(loan_df['Dependents'].mode()[0], inplace=True)
loan_df['Self_Employed'].fillna(loan_df['Self_Employed'].mode()[0], inplace=True)
loan_df['LoanAmount'].fillna(loan_df['LoanAmount'].median(), inplace=True)
loan_df['Loan_Amount_Term'].fillna(loan_df['Loan_Amount_Term'].mode()[0], inplace=True)
loan_df['Credit_History'].fillna(loan_df['Credit_History'].mode()[0], inplace=True)

# Encode categorical variables
label_encoders = {}
for col in loan_df.select_dtypes(include='object').columns:
    le = LabelEncoder()
    loan_df[col] = le.fit_transform(loan_df[col])
    label_encoders[col] = le

# Define input features and target
X = loan_df.drop('Loan_Status', axis=1)
y = loan_df['Loan_Status']

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Logistic Regression model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

# Make predictions and evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Output results
print("Model Accuracy:", round(accuracy * 100, 2), "%")
print("Confusion Matrix:\n", conf_matrix)
    