In [1]:
import pandas as pd

# Load the pre_processed dataset

In [4]:
feature = pd.read_csv('pre_processed.csv')
feature.head()

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,Credit_History,Property_Area,Total_Income,LoanAmount,Loan_Amount_Term,Loan_Status
0,1,0,0.0,0,0,1.0,2,5849.0,146.412162,360.0,1
1,1,1,1.0,0,0,1.0,0,6091.0,128.0,360.0,0
2,1,1,0.0,0,1,1.0,2,3000.0,66.0,360.0,1
3,1,1,0.0,1,0,1.0,2,4941.0,120.0,360.0,1
4,1,0,0.0,0,0,1.0,2,6000.0,141.0,360.0,1


# Train Test split

In [5]:
from sklearn.model_selection import train_test_split

X= feature.drop('Loan_Status', axis=1)
y= feature['Loan_Status']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=107)


## Fit logisticRegression Model and checking the accuracy with confusion matrix.

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [7]:
accuracy = accuracy_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy * 100:.2f}%')
print('Confusion Matrix:')
print(confusion)
print('Classification Report:')
print(classification_rep)

Accuracy: 83.33%
Confusion Matrix:
[[22 18]
 [ 0 68]]
Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.55      0.71        40
           1       0.79      1.00      0.88        68

    accuracy                           0.83       108
   macro avg       0.90      0.78      0.80       108
weighted avg       0.87      0.83      0.82       108



## Fit RandomForestClassifier Model and checking the accuracy with confusion matrix

In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=107)
# Train the model using the training data
rf_classifier.fit(X_train, y_train)

# Make predictions using the test data
y_pred = rf_classifier.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy * 100:.2f}%')
print('Confusion Matrix:')
print(confusion)
print('Classification Report:')
print(classification_rep)

Accuracy: 80.56%
Confusion Matrix:
[[23 17]
 [ 4 64]]
Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.57      0.69        40
           1       0.79      0.94      0.86        68

    accuracy                           0.81       108
   macro avg       0.82      0.76      0.77       108
weighted avg       0.81      0.81      0.80       108



## Fit K-NN Model and checking the accuracy with confusion matrix

In [9]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

knn_classifier = KNeighborsClassifier(n_neighbors=5)

# Train the model using the training data
knn_classifier.fit(X_train, y_train)

# Make predictions using the test data
y_pred = knn_classifier.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy * 100:.2f}%')
print('Confusion Matrix:')
print(confusion)
print('Classification Report:')
print(classification_rep)

Accuracy: 64.81%
Confusion Matrix:
[[ 6 34]
 [ 4 64]]
Classification Report:
              precision    recall  f1-score   support

           0       0.60      0.15      0.24        40
           1       0.65      0.94      0.77        68

    accuracy                           0.65       108
   macro avg       0.63      0.55      0.51       108
weighted avg       0.63      0.65      0.57       108



### Since logisticRegression provides us maximum accuracy compared to the other two models, we will accept this model.

# Saving Model file

In [12]:
import pickle


file_name = 'model_file.pkl'

with open(file_name, 'wb') as file:
    pickle.dump(model, file)


In [15]:
import pickle

file_name = 'model_file.pkl'


with open(file_name, 'rb') as file:
    model = pickle.load(file)


print(model)

LogisticRegression()


In [17]:
if hasattr(model, 'coef_') and hasattr(model, 'intercept_'):
    print("Coefficients:", model.coef_)
    print("Intercept:", model.intercept_)
else:
    print("This model doesn't have coefficients and intercept.")

Coefficients: [[ 1.71361380e-01  7.44964488e-01 -1.02803094e-01 -4.40447601e-01
  -8.51165034e-02  2.98371931e+00  2.39427583e-02  1.15116820e-05
  -4.10979359e-03 -4.72261199e-03]]
Intercept: [0.10490887]


In [20]:
import numpy as np

# Example data for prediction
new_data = np.array([[1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 6091.0, 128.0, 360.0]])

# Assuming 'model' is your logistic regression model
prediction = model.predict(new_data)

# The result will be the predicted class (0 or 1 for binary classification)
print("Predicted class:", prediction)

# Assuming 'model' is your logistic regression model
probability_scores = model.predict_proba(new_data)

# The result will be an array of probabilities for each class
print("Probability scores:", probability_scores)


Predicted class: [1]
Probability scores: [[0.14854298 0.85145702]]


