<a href="https://colab.research.google.com/github/cckmwong/portfolio/blob/main/CreditScore.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#FINAL
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import numpy as np
import datetime
import pandas as pd

#Function for handling user's input data
def reading_input():
    #keep asking for inputs until the input data is in the correct format
    while True:
        try:
            income = float(input("Monthly Income including salary, investment income, etc. (£) : "))
            if(income >= 0):
                break
            else:
                print("Please enter a positive number.")
        except Exception as e:
            print("Invalid format. Please enter a positive number.")
    while True:
        try:
            credit_history = float(input("Credit score (between 0 and 10, higher the score, better the credibility): "))
            if(credit_history >= 0 and credit_history <=10):
                break
            else:
                print("Please enter a number between 0 and 10.")
        except Exception as e:
            print("Invalid format. a number between 0 and 10.")
    while True:
        try:
            balance_ = float(input("Outstanding loan balance (£): "))
            if(balance_ >= 0):
                break
            else:
                print("Please enter a positive number.")
        except Exception as e:
            print("Invalid format. Please enter a positive number.")
    while True:
        try:
            employment_ = int(input("Employment status (0 for Employed/ 1 for Self-employed/ 2 for Unemployed): "))
            if (employment_ == 0 or employment_ == 1 or employment_ == 2):
                break
            else:
                print("Please enter 0/ 1/ 2.")
        except Exception as e:
            print("Invalid format. Please enter 0/ 1/ 2.")

    #change the categorical variable (employment) into multiple dummy variables
    if(employment_ == 0):
        employed = 1
        self_employed = 0
        unemployed = 0
    elif (employment_ == 1):
        employed = 0
        self_employed = 1
        unemployed = 0
    else:
        employed = 0
        self_employed = 0
        unemployed = 1

    #Prepare the input data as a DataFrame with the correct columns
    input_data = pd.DataFrame([[income, credit_history, balance_, employed, self_employed, unemployed]],
                              columns=['Income', 'Credit_History', 'Outstanding_Balance', 'Employed', 'Self-Employed', 'Unemployed'])

    return input_data

#Function for cleaning data of the dataset
def cleaning_data(df):
    # Transform categorical variables
    dummy = pd.get_dummies(df['Employment'])
    data = pd.concat([df, dummy], axis=1)
    # Drop the original column of Employment
    data.drop("Employment", axis=1, inplace=True)

    data['Employed'] = np.where(data['Employed'] == True, 1, 0)
    data['Self-Employed'] = np.where(data['Employed'] == True, 1, 0)
    data['Unemployed'] = np.where(data['Employed'] == True, 1, 0)

    x = data[['Income', 'Credit_History', 'Outstanding_Balance', 'Employed', 'Self-Employed', 'Unemployed']] #independent variables
    y = data['Repay_Loan'] #target variable

    # Split the data into training and test set (70% - 30%)
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state = 42)

    # Create a StandardScaler instance and fit it on the training set
    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.transform(x_test)

    return x_train, y_train, x_test, y_test, scaler  # Return the scaler for reuse

#Function for Logistic Regression Modeling
def modeling(x_train, y_train):
    # Training the model on the training set
    model = LogisticRegression(max_iter=1000)
    model.fit(x_train, y_train)
    return model

#Function for finding the accuracy of the model
def model_evaluation(model, x_test, y_test):
    # Test the model on the test set
    predictions = model.predict(x_test)
    print('Accuracy of the model: %.2f' % accuracy_score(y_test, predictions))
    #print(confusion_matrix(predictions, y_test)) # Find the confusion matrix

#Predicting the results of the input data
def prediction(input_data_scaled):
    # Make prediction
    pay = Logistic_Reg.predict(input_data_scaled)

    #convert the numerical results into the corresponding text
    if(pay[0] == 0):
        results = "NO"
    else:
        results = "YES"

    print("\n*******************************************************************************************************")
    print("Prediction for loan repayment: " + results)

# Read the credit scoring dataset
csv_path = 'https://raw.githubusercontent.com/cckmwong/portfolio/refs/heads/main/dataset/credit_scoring_dataset.csv'
df = pd.read_csv(csv_path)

# Reading the inputs
input_data = reading_input()

# Cleaning the data of the dataset
x_train, y_train, x_test, y_test, scaler = cleaning_data(df)

# Scale the input data using the fitted scaler (standardization)
input_data_scaled = scaler.transform(input_data)

# Logistic Regression Modeling
Logistic_Reg = modeling(x_train, y_train)

# Model evaluation
model_evaluation(Logistic_Reg, x_test, y_test)

# Predicting whether loan default according to the inputs
prediction(input_data_scaled)


Monthly Income including salary, investment income, etc. (£) : 2000
Credit score (between 0 and 10, higher the score, better the credibility): 1
Outstanding loan balance (£): 2
Employment status (0 for Employed/ 1 for Self-employed/ 2 for Unemployed): 1
Accuracy of the model: 0.91

*******************************************************************************************************
Prediction for loan repayment: YES
