In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio

In [2]:
data=pd.read_csv("credit_scoring.csv")

In [3]:
data.head()

Unnamed: 0,Age,Gender,Marital Status,Education Level,Employment Status,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term,Type of Loan
0,60,Male,Married,Master,Employed,0.22,2685.0,2,4675000,2.65,48,Personal Loan
1,25,Male,Married,High School,Unemployed,0.2,2371.0,9,3619000,5.19,60,Auto Loan
2,30,Female,Single,Master,Employed,0.22,2771.0,6,957000,2.76,12,Auto Loan
3,58,Female,Married,PhD,Unemployed,0.12,1371.0,2,4731000,6.57,60,Auto Loan
4,32,Male,Married,Bachelor,Self-Employed,0.99,828.0,2,3289000,6.28,36,Personal Loan


In [4]:
data.describe()

Unnamed: 0,Age,Credit Utilization Ratio,Payment History,Number of Credit Accounts,Loan Amount,Interest Rate,Loan Term
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,42.702,0.50995,1452.814,5.58,2471401.0,10.6866,37.128
std,13.266771,0.291057,827.934146,2.933634,1387047.0,5.479058,17.436274
min,20.0,0.0,0.0,1.0,108000.0,1.01,12.0
25%,31.0,0.25,763.75,3.0,1298000.0,6.0225,24.0
50%,42.0,0.53,1428.0,6.0,2437500.0,10.705,36.0
75%,54.0,0.75,2142.0,8.0,3653250.0,15.44,48.0
max,65.0,1.0,2857.0,10.0,4996000.0,19.99,60.0


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 12 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Age                        1000 non-null   int64  
 1   Gender                     1000 non-null   object 
 2   Marital Status             1000 non-null   object 
 3   Education Level            1000 non-null   object 
 4   Employment Status          1000 non-null   object 
 5   Credit Utilization Ratio   1000 non-null   float64
 6   Payment History            1000 non-null   float64
 7   Number of Credit Accounts  1000 non-null   int64  
 8   Loan Amount                1000 non-null   int64  
 9   Interest Rate              1000 non-null   float64
 10  Loan Term                  1000 non-null   int64  
 11  Type of Loan               1000 non-null   object 
dtypes: float64(3), int64(4), object(5)
memory usage: 93.9+ KB


In [6]:
data.isnull().sum()

Unnamed: 0,0
Age,0
Gender,0
Marital Status,0
Education Level,0
Employment Status,0
Credit Utilization Ratio,0
Payment History,0
Number of Credit Accounts,0
Loan Amount,0
Interest Rate,0


In [7]:
data['Education Level'].unique()

array(['Master', 'High School', 'PhD', 'Bachelor'], dtype=object)

In [8]:
data['Employment Status'].unique()

array(['Employed', 'Unemployed', 'Self-Employed'], dtype=object)

In [9]:
## lets see credit utility ratio

In [10]:
credit_utility_figure=px.box(data,y='Credit Utilization Ratio',title="CREDIT UTILITY DISTRIBUTION")
credit_utility_figure

In [11]:
loan_amount_figure=px.histogram(data,x="Loan Amount")
loan_amount_figure

In [12]:
data['Education Level'].unique()
data['Employment Status'].unique()

array(['Employed', 'Unemployed', 'Self-Employed'], dtype=object)

In [13]:
Education_level_mapping={'Master':3, 'High School':1, 'PhD':4, 'Bachelor':2}
Employment_status_mapping={'Employed':2, 'Unemployed':1,'Self-Employed':3}

In [14]:
data['Education Level']=data['Education Level'].map(Education_level_mapping)
data['Employment Status']=data['Employment Status'].map(Employment_status_mapping)

In [15]:
data["Education Level"].unique()

array([3, 1, 4, 2])

In [16]:
#Calculate credit scores using the complete FICO formula

In [17]:
credit_scores = []

for index, row in data.iterrows():
    payment_history = row['Payment History']
    credit_utilization_ratio = row['Credit Utilization Ratio']
    number_of_credit_accounts = row['Number of Credit Accounts']
    education_level = row['Education Level']
    employment_status = row['Employment Status']
    # Apply the FICO formula to calculate the credit score
    credit_score = (payment_history * 0.35) + (credit_utilization_ratio * 0.30) + (number_of_credit_accounts * 0.15) + (education_level * 0.10) + (employment_status * 0.10)
    credit_scores.append(credit_score)


In [18]:
# Add the credit scores as a new column to the DataFrame
data['Credit Score'] = credit_scores

In [19]:
print(data.head())

   Age  Gender Marital Status  Education Level  Employment Status  \
0   60    Male        Married                3                  2   
1   25    Male        Married                1                  1   
2   30  Female         Single                3                  2   
3   58  Female        Married                4                  1   
4   32    Male        Married                2                  3   

   Credit Utilization Ratio  Payment History  Number of Credit Accounts  \
0                      0.22           2685.0                          2   
1                      0.20           2371.0                          9   
2                      0.22           2771.0                          6   
3                      0.12           1371.0                          2   
4                      0.99            828.0                          2   

   Loan Amount  Interest Rate  Loan Term   Type of Loan  Credit Score  
0      4675000           2.65         48  Personal Loan       

In [20]:
from sklearn.cluster import KMeans

X = data[['Credit Score']]
kmeans = KMeans(n_clusters=4, n_init=10, random_state=42)
kmeans.fit(X)
data['Segment'] = kmeans.labels_

In [21]:
# Convert the 'Segment' column to category data type
data['Segment'] = data['Segment'].astype('category')

# Visualize the segments using Plotly
fig = px.scatter(data, x=data.index, y='Credit Score', color='Segment',
                 color_discrete_sequence=['green', 'blue', 'yellow', 'red'])
fig.update_layout(
    xaxis_title='Customer Index',
    yaxis_title='Credit Score',
    title='Customer Segmentation based on Credit Scores'
)
fig.show()

In [22]:
data['Segment'] = data['Segment'].map({2: 'Very Low',
                                       0: 'Low',
                                       1: 'Good',
                                       3: "Excellent"})

# Convert the 'Segment' column to category data type
data['Segment'] = data['Segment'].astype('category')

# Visualize the segments using Plotly
fig = px.scatter(data, x=data.index, y='Credit Score', color='Segment',
                 color_discrete_sequence=['green', 'blue', 'yellow', 'red'])
fig.update_layout(
    xaxis_title='Customer Index',
    yaxis_title='Credit Score',
    title='Customer Segmentation based on Credit Scores'
)
fig.show()