In [1]:
import pandas as pd
import joblib

from sklearn.preprocessing import LabelEncoder

In [2]:
# Load the trained model, Preprocess Data and Predict
model_rfc = joblib.load('trained_score_model_rfc.pkl')
bank_df = pd.read_csv('test.csv')

## Data Preparation

In [3]:
features = bank_df.drop(columns = ['ID','Customer_ID','Month','Name',
                                   'Age','SSN','Occupation',
                                   'Type_of_Loan', 'Credit_Mix', 'Credit_History_Age',
                                   'Payment_of_Min_Amount', 'Total_EMI_per_month',
                                   'Payment_Behaviour','Changed_Credit_Limit',
                                   'Num_Credit_Inquiries','Outstanding_Debt','Amount_invested_monthly'])

features['Annual_Income'] = features['Annual_Income'].str.replace('_','')
features['Num_of_Loan'] = features['Num_of_Loan'].str.replace('_','')
features['Num_of_Delayed_Payment'] = features['Num_of_Delayed_Payment'].str.replace('_','')
features['Num_of_Delayed_Payment'] = features['Num_of_Delayed_Payment'].fillna(0)
features['Monthly_Balance'] = features['Monthly_Balance'].replace({'__-333333333333333333333333333__': 0})

features['Annual_Income'] = features['Annual_Income'].astype(float)
features['Num_of_Loan'] = features['Num_of_Loan'].astype(int)
features['Num_of_Delayed_Payment'] = features['Num_of_Delayed_Payment'].astype(int)
features['Monthly_Balance'] = features['Monthly_Balance'].astype(float)

In [4]:
features.dtypes

Annual_Income               float64
Monthly_Inhand_Salary       float64
Num_Bank_Accounts             int64
Num_Credit_Card               int64
Interest_Rate                 int64
Num_of_Loan                   int32
Delay_from_due_date           int64
Num_of_Delayed_Payment        int32
Credit_Utilization_Ratio    float64
Monthly_Balance             float64
dtype: object

## End of Data Preparation

In [5]:
score_pred = features
score_pred.fillna(0, inplace=True)

In [6]:
#make predictions
predictions_rfc = model_rfc.predict(score_pred)

In [7]:
#save predictions
features['Predictions_RFC'] = predictions_rfc
pred_mapping = {0:'Unsafe', 1:'Safe', 2:'Safe'}
features['Prediction_RFC_label'] = features['Predictions_RFC'].map(pred_mapping)

features

Unnamed: 0,Annual_Income,Monthly_Inhand_Salary,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,Num_of_Loan,Delay_from_due_date,Num_of_Delayed_Payment,Credit_Utilization_Ratio,Monthly_Balance,Predictions_RFC,Prediction_RFC_label
0,19114.12,1824.843333,3,4,3,4,3,7,35.030402,186.266702,2,Safe
1,19114.12,1824.843333,3,4,3,4,3,9,33.053114,361.444004,2,Safe
2,19114.12,1824.843333,3,4,3,4,-1,4,33.811894,264.675446,2,Safe
3,19114.12,0.000000,3,4,3,4,4,5,32.430559,343.826873,2,Safe
4,34847.84,3037.986667,2,4,6,1,3,1,25.926822,485.298434,1,Safe
...,...,...,...,...,...,...,...,...,...,...,...,...
49995,20002.88,1929.906667,10,8,29,5,33,25,34.780553,275.539570,1,Safe
49996,39628.99,0.000000,4,6,7,2,20,0,27.758522,409.394562,1,Safe
49997,39628.99,3359.415833,4,6,7,2,23,5,36.858542,349.726332,0,Unsafe
49998,39628.99,0.000000,4,6,7,2,21,6,39.139840,463.238981,0,Unsafe


In [8]:
features.to_csv('Credit_Score_predictions.csv', index=False)
features.head(10)

Unnamed: 0,Annual_Income,Monthly_Inhand_Salary,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,Num_of_Loan,Delay_from_due_date,Num_of_Delayed_Payment,Credit_Utilization_Ratio,Monthly_Balance,Predictions_RFC,Prediction_RFC_label
0,19114.12,1824.843333,3,4,3,4,3,7,35.030402,186.266702,2,Safe
1,19114.12,1824.843333,3,4,3,4,3,9,33.053114,361.444004,2,Safe
2,19114.12,1824.843333,3,4,3,4,-1,4,33.811894,264.675446,2,Safe
3,19114.12,0.0,3,4,3,4,4,5,32.430559,343.826873,2,Safe
4,34847.84,3037.986667,2,4,6,1,3,1,25.926822,485.298434,1,Safe
5,34847.84,3037.986667,2,4,6,1,3,3,30.1166,303.355083,1,Safe
6,34847.84,3037.986667,2,4,6,1,3,0,30.996424,452.302307,2,Safe
7,34847.84,3037.986667,2,4,6,1,3,2,33.875167,421.447964,2,Safe
8,143162.64,0.0,1,5,8,3,8,1942,35.229707,854.226027,1,Safe
9,143162.64,12187.22,1,5,8,3,6,3,35.685836,788.11455,2,Safe


In [9]:
features.shape

(50000, 12)

In [10]:
features['Prediction_RFC_label'].value_counts()

Prediction_RFC_label
Safe      34727
Unsafe    15273
Name: count, dtype: int64

In [11]:
features['Prediction_RFC_label'].value_counts()

Prediction_RFC_label
Safe      34727
Unsafe    15273
Name: count, dtype: int64