# Credit Score Prediction for New Loan Applications

This Jupyter notebook utilizes a pre-trained Credit Scoring model to predict the credit scores of new loan applications. The model was previously developed and saved in the file `f1_Classifier_CreditScoring`, while the normalization coefficients used for scaling the data were saved in `f2_Normalisation_CreditScoring`. 

In [1]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.linear_model import LogisticRegression

### Importing Dataset

In [2]:
df = pd.read_excel("e_NewApplications_CreditScore_Needed.xlsx")
df.head()

Unnamed: 0,ID,DerogCnt,CollectCnt,BanruptcyInd,InqCnt06,InqTimeLast,InqFinanceCnt24,TLTimeFirst,TLTimeLast,TLCnt03,...,TL50UtilCnt,TLBalHCPct,TLSatPct,TLDel3060Cnt24,TLDel90Cnt24,TLDel60CntAll,TLOpenPct,TLBadDerogCnt,TLDel60Cnt24,TLOpen24Pct
0,7140,3,2,0,0,11.0,0,90,9,0,...,2.0,0.8645,0.1667,2,2,4,0.25,3,3,1.0
1,7163,4,2,1,3,3.0,2,221,3,1,...,2.0,0.6774,0.3333,0,2,5,0.2,3,2,0.3333
2,122900,0,0,0,11,1.0,13,152,2,2,...,5.0,0.8371,0.5758,1,0,0,0.2121,0,0,1.8571
3,7226,1,1,0,2,1.0,4,206,21,0,...,,0.0,0.25,0,0,3,0.125,1,0,1.0
4,7251,1,1,0,4,1.0,5,152,11,0,...,2.0,0.8892,0.25,2,1,2,0.75,1,2,0.3333


In [3]:
df.shape

(30, 29)

### Data Preparation

In [4]:
# Drop the 'ID' column
df = df.drop('ID', axis=1)
df.shape

(30, 28)

In [5]:
# Find missing values in the dataset
df.isna().sum()

DerogCnt           0
CollectCnt         0
BanruptcyInd       0
InqCnt06           0
InqTimeLast        3
InqFinanceCnt24    0
TLTimeFirst        0
TLTimeLast         0
TLCnt03            0
TLCnt12            0
TLCnt24            0
TLCnt              0
TLSum              0
TLMaxSum           0
TLSatCnt           0
TLDel60Cnt         0
TLBadCnt24         0
TL75UtilCnt        1
TL50UtilCnt        1
TLBalHCPct         0
TLSatPct           0
TLDel3060Cnt24     0
TLDel90Cnt24       0
TLDel60CntAll      0
TLOpenPct          0
TLBadDerogCnt      0
TLDel60Cnt24       0
TLOpen24Pct        0
dtype: int64

In [6]:
# Fill missing values with the mean of each column which preserves the overall distribution of the data,
# and doesn't introduce bias, which is especially important for maintaining the integrity of the dataset.
df = df.fillna(df.mean())

In [7]:
# Verify that there are no missing values left
df.isna().sum()

DerogCnt           0
CollectCnt         0
BanruptcyInd       0
InqCnt06           0
InqTimeLast        0
InqFinanceCnt24    0
TLTimeFirst        0
TLTimeLast         0
TLCnt03            0
TLCnt12            0
TLCnt24            0
TLCnt              0
TLSum              0
TLMaxSum           0
TLSatCnt           0
TLDel60Cnt         0
TLBadCnt24         0
TL75UtilCnt        0
TL50UtilCnt        0
TLBalHCPct         0
TLSatPct           0
TLDel3060Cnt24     0
TLDel90Cnt24       0
TLDel60CntAll      0
TLOpenPct          0
TLBadDerogCnt      0
TLDel60Cnt24       0
TLOpen24Pct        0
dtype: int64

### Train Test Split

In [8]:
sc = joblib.load('f2_Normalisation_CreditScoring.joblib')

# Apply the normalization to the new dataset
X_fresh = sc.transform(df.values)

### Risk Model building

In [9]:
# Load the trained classifier from the saved file
classifier = joblib.load('f1_Classifier_CreditScoring')

In [10]:
# Generate predictions for the new loan applications
y_fresh = classifier.predict(X_fresh)

### Writing output file

In [11]:
# Predict the probabilities for each class (0 and 1) for the new loan applications
predictions = classifier.predict_proba(X_fresh)
predictions

array([[0.08, 0.92],
       [0.13, 0.87],
       [0.89, 0.11],
       [0.16, 0.84],
       [0.06, 0.94],
       [0.26, 0.74],
       [0.15, 0.85],
       [0.13, 0.87],
       [0.12, 0.88],
       [0.28, 0.72],
       [0.98, 0.02],
       [0.51, 0.49],
       [0.87, 0.13],
       [0.84, 0.16],
       [1.  , 0.  ],
       [0.41, 0.59],
       [0.91, 0.09],
       [0.51, 0.49],
       [0.96, 0.04],
       [0.78, 0.22],
       [1.  , 0.  ],
       [0.97, 0.03],
       [0.96, 0.04],
       [0.83, 0.17],
       [1.  , 0.  ],
       [0.37, 0.63],
       [0.86, 0.14],
       [0.89, 0.11],
       [0.87, 0.13],
       [0.93, 0.07]])

In [12]:
# Convert the predictions and original features into DataFrames
df_prediction_prob = pd.DataFrame(predictions, columns=['prob_0', 'prob_1'])
df_test_dataset = pd.DataFrame(X_fresh, columns=df.columns)
df_prediction_target = pd.DataFrame(y_fresh, columns=['Predicted Outcome'])

In [13]:
dfx = pd.concat([df_prediction_target, df_prediction_prob, df_test_dataset], axis=1)
dfx.to_csv("f4_NewApplications_CreditScore_Predictions.csv", sep=',', encoding='UTF-8', index=False)

In [14]:
dfx.head(10)

Unnamed: 0,Predicted Outcome,prob_0,prob_1,DerogCnt,CollectCnt,BanruptcyInd,InqCnt06,InqTimeLast,InqFinanceCnt24,TLTimeFirst,...,TL50UtilCnt,TLBalHCPct,TLSatPct,TLDel3060Cnt24,TLDel90Cnt24,TLDel60CntAll,TLOpenPct,TLBadDerogCnt,TLDel60Cnt24,TLOpen24Pct
0,1,0.08,0.92,0.590166,0.5627,-0.425561,-0.878069,1.713059,-0.786738,-0.861443,...,-0.666667,0.811837,-1.473889,1.0922,0.7402,0.411803,-1.178579,0.689685,1.062752,0.875076
1,1,0.13,0.87,0.97,0.5627,2.349838,-0.033095,-0.037628,-0.343037,0.546435,...,-0.666667,0.110051,-0.773378,-0.624421,0.7402,0.698691,-1.418722,0.689685,0.510793,-0.475915
2,0,0.89,0.11,-0.549334,-0.432495,-0.425561,2.220171,-0.4753,2.097322,-0.195119,...,0.306783,0.709064,0.246273,0.23389,-0.507328,-0.735747,-1.360607,-0.613656,-0.593127,2.61189
3,1,0.16,0.84,-0.169501,0.065102,-0.425561,-0.314753,-0.4753,0.100665,0.385228,...,-0.017701,-2.430781,-1.123633,-0.624421,-0.507328,0.124916,-1.778937,-0.179209,-0.593127,0.875076
4,1,0.06,0.94,-0.169501,0.065102,-0.425561,0.248563,-0.4753,0.322516,-0.195119,...,-0.666667,0.904483,-1.123633,1.0922,0.116436,-0.161972,1.222852,-0.179209,0.510793,-0.475915
5,1,0.26,0.74,-0.169501,0.065102,2.349838,-0.314753,-0.256464,-0.786738,-0.49604,...,-0.017701,1.424352,-0.072447,-0.624421,-0.507328,-0.161972,0.622495,-0.613656,-0.593127,0.875076
6,1,0.15,0.85,-0.549334,-0.432495,-0.425561,-0.596411,0.618879,0.100665,-0.732477,...,2.253682,1.009883,-0.346596,1.950511,1.987728,0.411803,0.544208,1.124132,1.614712,-1.15131
7,1,0.13,0.87,5.148169,4.045884,-0.425561,-0.033095,-0.4753,-0.121186,-1.678228,...,-0.99115,0.569907,-2.17482,-0.624421,-0.507328,-0.735747,2.423568,4.599707,-0.593127,0.875076
8,1,0.12,0.88,-0.549334,-0.432495,-0.425561,-0.878069,-0.118679,-0.786738,-0.6465,...,-0.99115,0.369236,-0.372666,-0.624421,0.116436,-0.161972,-0.320788,-0.179209,-0.041167,-1.15131
9,1,0.28,0.72,-0.549334,-0.432495,-0.425561,-0.596411,0.400043,-0.564888,1.975808,...,0.306783,-1.098476,0.574243,-0.624421,0.116436,-0.44886,1.130638,-0.179209,-0.041167,-0.084823
