In [1]:
# Import all dependencies
from pathlib import Path
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [3]:
# Read csv file into pandas dataframe
loan_df = pd.read_csv(Path('loans.csv'))

In [4]:
# Display loan_df
loan_df.head()

Unnamed: 0,assets,liabilities,income,credit_score,mortgage,status
0,0.210859,0.452865,0.281367,0.628039,0.302682,deny
1,0.395018,0.661153,0.330622,0.638439,0.502831,approve
2,0.291186,0.593432,0.438436,0.434863,0.315574,approve
3,0.45864,0.576156,0.744167,0.291324,0.394891,approve
4,0.46347,0.292414,0.489887,0.811384,0.566605,approve


In [6]:
# Check data shape
loan_df.shape

(100, 6)

In [8]:
# Check for null data
loan_df.isnull().sum()

assets          0
liabilities     0
income          0
credit_score    0
mortgage        0
status          0
dtype: int64

In [9]:
# Check for distribution of target column - in this case the status column
loan_df['status'].value_counts()

deny       53
approve    47
Name: status, dtype: int64

In [13]:
# The distribution of targets are relatively close. This can be a good sign of a great prediction model in the making.

In [14]:
# Check for the mean across features
loan_df.groupby(['status']).mean()

Unnamed: 0_level_0,assets,liabilities,income,credit_score,mortgage
status,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
approve,0.443889,0.496999,0.600641,0.502527,0.456975
deny,0.375839,0.406047,0.572761,0.520653,0.389728


In [15]:
# The mean across features based on target look distinct enough. This can be a good sign of a great prediction model in the making. 

In [17]:
# We do not need Standard Scaler here because the means across features seem to be all in similar range.

In [18]:
# Define X and Y in terms of features and target
X = loan_df.drop(columns=['status'], axis=1)
Y = loan_df['status']

In [27]:
# Split the data between training and testing dataset
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, stratify=Y, test_size=0.1, random_state=1)

In [28]:
# Define model
model = LogisticRegression()

In [29]:
# Fit X_train and Y_train into the model
model.fit(X_train, Y_train)

LogisticRegression()

In [31]:
# Determine the accuracy based on model prediction for training data
training_data_prediction = model.predict(X_train)
training_data_accuracy_score = accuracy_score(training_data_prediction, Y_train)

# Display Training Accuracy Score
print(f'The training accuracy score is : {round(training_data_accuracy_score,4)*100}%')

The training accuracy score is : 53.33%


In [34]:
# Determine the accuracy based on model prediction for testing data
testing_data_prediction = model.predict(X_test)
testing_data_accuracy_score = accuracy_score(testing_data_prediction, Y_test)

# Display Training Accuracy Score
print(f'The testing accuracy score is : {round(testing_data_accuracy_score,4)*100}%')

The testing accuracy score is : 60.0%


In [40]:
# Import ignore warnings
import warnings
warnings.filterwarnings('ignore')
# Create a prediction system
input = (0.21085933994821096,0.4528654325371546,0.28136746009933805,0.6280387916106287,0.302681967156299)
input_asarray = np.asarray(input)
input_reshaped = input_asarray.reshape(1,-1)

prediction_1 = model.predict(input_reshaped)

if prediction_1 == 'deny':
    print('Loan has been denied.')
else:
    print('Loan has been approved.')

Loan has been denied.


In [42]:
# Check Results of the validation or testing data
results = pd.DataFrame({'Prediction': testing_data_prediction,
                        'Actual': Y_test}).reset_index(drop=True)

results.head()

Unnamed: 0,Prediction,Actual
0,approve,approve
1,approve,deny
2,deny,approve
3,deny,approve
4,deny,deny


In [44]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix
confusion_matrix(Y_test, testing_data_prediction)

array([[2, 3],
       [1, 4]], dtype=int64)

In [46]:
# Classification Report
from sklearn.metrics import classification_report
print(classification_report(Y_test, testing_data_prediction))

              precision    recall  f1-score   support

     approve       0.67      0.40      0.50         5
        deny       0.57      0.80      0.67         5

    accuracy                           0.60        10
   macro avg       0.62      0.60      0.58        10
weighted avg       0.62      0.60      0.58        10

