In [40]:
# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [41]:
# Step 1: Read data
loan_train_df = pd.read_csv("Dataset\loan_sanction_train.csv")

In [42]:
# Step 2: Clean dataset and convert categorical to numeric
loan_train_clean_df = loan_train_df.drop(columns=["Loan_ID"])
loan_train_clean_df = pd.get_dummies(loan_train_clean_df, columns=["Gender",
                                                                   "Married",
                                                                   "Dependents",
                                                                   "Education",
                                                                   "Self_Employed",
                                                                   "Property_Area"])
loan_train_clean_df = loan_train_clean_df.dropna()


In [43]:
# Step 3: Split into features (X) and target (y) labels
X = loan_train_clean_df.drop(columns=["Loan_Status"]).values
y = loan_train_clean_df["Loan_Status"].values

In [44]:
# Encode the target variable (Loan_Status) to numeric values
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [45]:
# Step 4: Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

In [46]:
# Step 5: Scale the features
X_scaler = StandardScaler()
X_train_scaled = X_scaler.fit_transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [47]:
# Step 6: Initialize and train the Logistic Regression model
log_reg = LogisticRegression(random_state=42)
log_reg.fit(X_train_scaled, y_train)

In [48]:
# Step 7: Make predictions on the test data
y_pred = log_reg.predict(X_test_scaled)

In [57]:
# Step 8: Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Logistic Regression Model Accuracy: {accuracy}")
print("Classification Report:\n", classification_report(y_test, y_pred))


Logistic Regression Model Accuracy: 0.8345864661654135
Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.49      0.65        41
           1       0.81      0.99      0.89        92

    accuracy                           0.83       133
   macro avg       0.88      0.74      0.77       133
weighted avg       0.86      0.83      0.82       133



In [50]:
# Step 9: Make predictions and display in a DataFrame with accuracy
predictions_df = pd.DataFrame({"Prediction": y_pred, "Actual": y_test})
print("\nSample Predictions vs. Actual Values:")
predictions_df.head(20)  # Display the first few rows of predictions


Sample Predictions vs. Actual Values:


Unnamed: 0,Prediction,Actual
0,0,0
1,1,0
2,1,1
3,1,1
4,1,0
5,0,0
6,1,1
7,1,1
8,1,1
9,1,1


In [51]:
# Display accuracy score alongside predictions
print(f"\nOverall Model Accuracy: {accuracy:.2f}")


Overall Model Accuracy: 0.83


In [52]:
# Save ML model
with open('logistic_regression_model.pkl', 'wb') as model_file:
    pickle.dump(log_reg, model_file)

In [53]:
# Save the scaler
with open('x_scaler.pkl', 'wb') as scaler_file:
    pickle.dump(X_scaler, scaler_file)