In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Getting the dataset through pandas.

In [None]:
credit = pd.read_csv("/kaggle/input/credit-score-data/train.csv")

# A peek at a small part of the complete dataset.

In [None]:
print(credit.head())

# Info of the columns that are present in our dataset

In [None]:
print(credit.info())

# Checking whether the dataset has any null values

In [None]:
print(credit.isnull().sum())

In [None]:
credit["Credit_Score"].value_counts()

# **Data Exploration**

# Exploring the  dataset as it has features that can train a Machine Learning model for credit score classification.

# Using "plotly" library to make interactive chartz for better understanding.

In [None]:
import plotly.express as px

# Exploring each feature to see if it affects the persons credit scores

# 1. Occupation

In [None]:
fig = px.box(credit, 
             x="Occupation",  
             color="Credit_Score", 
             title="Credit Scores Based on Occupation", 
             color_discrete_map={'Poor':'red',
                                 'Standard':'yellow',
                                 'Good':'green'})
fig.show()

# 1. Result: not much difference

# 2. Annual Income:

In [None]:
fig = px.box(credit, 
             x="Credit_Score", 
             y="Annual_Income", 
             color="Credit_Score",
             title="Credit Scores Based on Annual Income", 
             color_discrete_map={'Poor':'red',
                                 'Standard':'yellow',
                                 'Good':'green'})
fig.update_traces(quartilemethod="exclusive")
fig.show()

# 2. Result: More Annual Income -> Better credit score

# 3. Monthly in-hand salary:

In [None]:
fig = px.box(credit, 
             x="Credit_Score", 
             y="Monthly_Inhand_Salary", 
             color="Credit_Score",
             title="Credit Scores Based on Monthly Inhand Salary", 
             color_discrete_map={'Poor':'red',
                                 'Standard':'yellow',
                                 'Good':'green'})
fig.update_traces(quartilemethod="exclusive")
fig.show()

# 3. Result: More monthly in-hand salary -> better credit score

# 4. Number of Bank accounts:

In [None]:
fig = px.box(credit, 
             x="Credit_Score", 
             y="Num_Bank_Accounts", 
             color="Credit_Score",
             title="Credit Scores Based on Number of Bank Accounts", 
             color_discrete_map={'Poor':'red',
                                 'Standard':'yellow',
                                 'Good':'green'})
fig.update_traces(quartilemethod="exclusive")
fig.show()

# 4 Conclusion: more bank accounts -> does not better credit score

# 5. Number of credit cards

In [None]:
fig = px.box(credit, 
             x="Credit_Score", 
             y="Num_Credit_Card", 
             color="Credit_Score",
             title="Credit Scores Based on Number of Credit cards", 
             color_discrete_map={'Poor':'red',
                                 'Standard':'yellow',
                                 'Good':'green'})
fig.update_traces(quartilemethod="exclusive")
fig.show()

# 5 Conclusion: More credit cards -> does not better credit score

# 6 average interest on loans and EMIs

In [None]:
fig = px.box(credit, 
             x="Credit_Score", 
             y="Interest_Rate", 
             color="Credit_Score",
             title="Credit Scores Based on the Average Interest rates", 
             color_discrete_map={'Poor':'red',
                                 'Standard':'yellow',
                                 'Good':'green'})
fig.update_traces(quartilemethod="exclusive")
fig.show()

# 6 Conclusion: 
# Average interest rate is 4 – 11%    -> Good credit score 
# Average interest rate more than 15% -> Bad credit scores

# 7. Number of loans

In [None]:
fig = px.box(credit, 
             x="Credit_Score", 
             y="Num_of_Loan", 
             color="Credit_Score", 
             title="Credit Scores Based on Number of Loans Taken by the Person",
             color_discrete_map={'Poor':'red',
                                 'Standard':'yellow',
                                 'Good':'green'})
fig.update_traces(quartilemethod="exclusive")
fig.show()

# 7 Conclusion: more than three loans at a time -> Bad credit scores

# 8a. Delaying payments

In [None]:
fig = px.box(credit, 
             x="Credit_Score", 
             y="Delay_from_due_date", 
             color="Credit_Score",
             title="Credit Scores Based on Average Number of Days Delayed for Credit card Payments", 
             color_discrete_map={'Poor':'red',
                                 'Standard':'yellow',
                                 'Good':'green'})
fig.update_traces(quartilemethod="exclusive")
fig.show()

# 8a Conclusion: delaying more than 12 payments from the due date -> Bad credit scores

# 8b. frequently delaying payments

In [None]:
fig = px.box(credit, 
             x="Credit_Score", 
             y="Num_of_Delayed_Payment", 
             color="Credit_Score", 
             title="Credit Scores Based on Number of Delayed Payments",
             color_discrete_map={'Poor':'red',
                                 'Standard':'yellow',
                                 'Good':'green'})
fig.update_traces(quartilemethod="exclusive")
fig.show()

# 8b. Conclusion: 
# delaying 4 – 12 payments -> No effect on credit Score 
# more than 12 payments from the due date -> Bad Effect credit scores

# 9. More Debt

In [None]:
fig = px.box(credit, 
             x="Credit_Score", 
             y="Outstanding_Debt", 
             color="Credit_Score", 
             title="Credit Scores Based on Outstanding Debt",
             color_discrete_map={'Poor':'red',
                                 'Standard':'yellow',
                                 'Good':'green'})
fig.update_traces(quartilemethod="exclusive")
fig.show()

# 9 Conclusion: Debt of more than $1338 -> Bad credit scores

# 10. High Credit Utilization
***Credit utilization ratio = total debt / total available credit***

In [None]:
fig = px.box(credit, 
             x="Credit_Score", 
             y="Credit_Utilization_Ratio", 
             color="Credit_Score",
             title="Credit Scores Based on Credit Utilization Ratio", 
             color_discrete_map={'Poor':'red',
                                 'Standard':'yellow',
                                 'Good':'green'})
fig.update_traces(quartilemethod="exclusive")
fig.show()

# 10 Conclusion: credit utilization -> No effect on credit scores

# 11. Credit History Age

In [None]:
fig = px.box(credit, 
             x="Credit_Score", 
             y="Credit_History_Age", 
             color="Credit_Score", 
             title="Credit Scores Based on Credit History Age",
             color_discrete_map={'Poor':'red',
                                 'Standard':'yellow',
                                 'Good':'green'})
fig.update_traces(quartilemethod="exclusive")
fig.show()

# 11 Conclusion: long credit history -> Good Credit Score

# 12. Number of EMIs

In [None]:
fig = px.box(credit, 
             x="Credit_Score", 
             y="Total_EMI_per_month", 
             color="Credit_Score", 
             title="Credit Scores Based on Total Number of EMIs per Month",
             color_discrete_map={'Poor':'red',
                                 'Standard':'yellow',
                                 'Good':'green'})
fig.update_traces(quartilemethod="exclusive")
fig.show()

# 12 Conclusion: Number of EMIs -> No effect on credit score

# 13. Minthly Investments

In [None]:
fig = px.box(credit, 
             x="Credit_Score", 
             y="Amount_invested_monthly", 
             color="Credit_Score", 
             title="Credit Scores Based on Amount Invested Monthly",
             color_discrete_map={'Poor':'red',
                                 'Standard':'yellow',
                                 'Good':'green'})
fig.update_traces(quartilemethod="exclusive")
fig.show()

# 13 Conclusion: Monthly Investment -> No effect on credit score

# 14 Low Monthly Balance

In [None]:
fig = px.box(credit, 
             x="Credit_Score", 
             y="Monthly_Balance", 
             color="Credit_Score", 
             title="Credit Scores Based on Monthly Balance Left",
             color_discrete_map={'Poor':'red',
                                 'Standard':'yellow',
                                 'Good':'green'})
fig.update_traces(quartilemethod="exclusive")
fig.show()

#  14 Conclusion: High monthly balance -> good for credit scores
#  monthly balance less than $250 -> bad for credit scores

# **Credit Score Classification Model**

# One more important feature (Credit Mix) in the dataset is valuable for determining credit scores. 
# The credit mix feature tells about the types of credits and loans you have taken.
# As the Credit_Mix column is categorical, transform it into a numerical feature so that we can use it to train a Machine Learning model for the task of credit score classification:

In [None]:
credit["Credit_Mix"] = credit["Credit_Mix"].map({"Standard": 1, 
                               "Good": 2, 
                               "Bad": 0})

# split the data into features and labels by selecting the features important for the model

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
x = np.array(credit[["Annual_Income", "Monthly_Inhand_Salary", 
                   "Num_Bank_Accounts", "Num_Credit_Card", 
                   "Interest_Rate", "Num_of_Loan", 
                   "Delay_from_due_date", "Num_of_Delayed_Payment", 
                   "Credit_Mix", "Outstanding_Debt", 
                   "Credit_History_Age", "Monthly_Balance"]])
y = np.array(credit[["Credit_Score"]])

# 1. split the data into training and test sets 
# 2. train a credit score classification model

In [None]:
xtrain, xtest, ytrain, ytest = train_test_split(x, y, 
                                                    test_size=0.33, 
                                                    random_state=42)
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.fit(xtrain, ytrain)

# make predictions from the model by giving inputs to it according to the features used to train the model

In [None]:
#add the values by yourself to check the credit score:
print("Credit Score Prediction : ")
a = float(input("Annual Income: "))
b = float(input("Monthly Inhand Salary: "))
c = float(input("Number of Bank Accounts: "))
d = float(input("Number of Credit cards: "))
e = float(input("Interest rate: "))
f = float(input("Number of Loans: "))
g = float(input("Average number of days delayed by the person: "))
h = float(input("Number of delayed payments: "))
i = input("Credit Mix (Bad: 0, Standard: 1, Good: 3) : ")
j = float(input("Outstanding Debt: "))
k = float(input("Credit History Age: "))
l = float(input("Monthly Balance: "))

features = np.array([[a, b, c, d, e, f, g, h, i, j, k, l]])
print("Predicted Credit Score = ", model.predict(features))