# Analysis & Visualisation

#### Percentage of Loans Recovered

In [14]:
import pandas as pd
import plotly.graph_objs as go

df = pd.read_csv('final_df.csv')

In [15]:
df['total_amount_to_be_paid'] = df['loan_amount'] * (1 + df['int_rate'])

# Calculate percentage recovered
df['percentage_recovered'] = (df['total_payment'] / df['total_amount_to_be_paid']) * 100


In [16]:
total_recovered = df['total_payment'].sum()
total_amount_to_be_paid = df['total_amount_to_be_paid'].sum()
total_remaining = total_amount_to_be_paid - total_recovered
overall_percentage_recovered = (total_recovered / total_amount_to_be_paid) * 100

print(f"Total Recovered: £{total_recovered:.2f}")
print(f"Total Remaining: £{total_remaining:.2f}")
print(f"\nOverall Percentage of Loans Recovered: {overall_percentage_recovered:.4f}%")

Total Recovered: £299330.20
Total Remaining: £6301030690.05

Overall Percentage of Loans Recovered: 0.0048%


In [17]:
df['amount_in_6_months'] = df['loan_amount'] * (1 + df['int_rate'] * 0.5)
total_amount_in_6_months = df['amount_in_6_months'].sum()

print(f"Total Amount to be paid back in 6 Months: £{total_amount_in_6_months:.2f}")

Total Amount to be paid back in 6 Months: £3366519285.12


In [18]:
trace1 = go.Bar(
    name='Initial Loan Amount',
    x=df.index,
    y=df['loan_amount'],
    yaxis='y1'
)
trace2 = go.Bar(
    name='Amount in 6 Months',
    x=df.index,
    y=df['amount_in_6_months'],
    yaxis='y2'
)

# Layout of the chart with secondary y-axis
layout = go.Layout(
    title='Loan Amounts Now and in 6 Months with Interest',
    xaxis=dict(title='Loan'),
    yaxis=dict(title='Initial Loan Amount'),
    yaxis2=dict(title='Amount in 6 Months', overlaying='y', side='right'),
    barmode='group'
)

# Create the figure
fig = go.Figure(data=[trace1, trace2], layout=layout)

# Show the figure
fig.show()

In [19]:
df['total_amount_to_be_paid'] = df['loan_amount'] * (1 + df['int_rate'])

# Calculate amount remaining
df['amount_remaining'] = df['total_amount_to_be_paid'] - df['total_payment']

# Summarize the total recovered and remaining amounts by loan term
grouped = df.groupby('term').agg(
    total_recovered=pd.NamedAgg(column='total_payment', aggfunc='sum'),
    total_remaining=pd.NamedAgg(column='amount_remaining', aggfunc='sum')
).reset_index()

# Create a stacked bar chart
fig = go.Figure(data=[
    go.Bar(name='Recovered', x=grouped['term'], y=grouped['total_recovered'], marker_color='green'),
    go.Bar(name='Remaining', x=grouped['term'], y=grouped['total_remaining'], marker_color='red')
])
fig.update_layout(barmode='stack', 
                  title='Total Recovered vs Remaining Amount by Loan Term',
                  xaxis_title='Loan Term (Months)',
                  yaxis_title='Amount',
                  xaxis=dict(type='category'))

fig.show()

In [20]:


# Define the amounts
total_recovered = 177629.45
total_remaining = 2766016838.55

# Data for the bar chart
data = [
    go.Bar(name='Total Recovered', x=['Loan Amount'], y=[total_recovered]),
    go.Bar(name='Total Remaining', x=['Loan Amount'], y=[total_remaining])
]

# Layout of the chart
layout = go.Layout(title='Loan Recovery vs Remaining Amount',
                   barmode='group',
                   xaxis=dict(title='Loan Amount'),
                   yaxis=dict(title='Amount', type = 'log'))

# Create the figure
fig = go.Figure(data=data, layout=layout)

# Show the figure
fig.show()


#### Calculating Loss

In [21]:
charged_off_loans = df[df['loan_status'] == 'Charged Off']

# Calculate the total number of charged-off loans
total_charged_off_loans = charged_off_loans.shape[0]

# Calculate the total amount paid towards charged-off loans
total_paid_before_charged_off = charged_off_loans['total_payment'].sum()


# Calculate the percentage of charged-off loans
total_loans = df.shape[0]
percentage_charged_off_loans = (total_charged_off_loans / total_loans) * 100

print(f"Total Number of Charged-Off Loans: {total_charged_off_loans}")
print(f"Total Amount Paid Before Charged Off: £{total_paid_before_charged_off:.2f}")
print(f"Percentage of Charged-Off Loans: {percentage_charged_off_loans:.2f}%")

Total Number of Charged-Off Loans: 1156
Total Amount Paid Before Charged Off: £9801.26
Percentage of Charged-Off Loans: 3.56%


#### Projected Loss

In [22]:
charged_off_loans.loc[:, 'expected_total_payment'] = charged_off_loans['loan_amount'] * (1 + charged_off_loans['int_rate'] * (charged_off_loans['term'] / 12))

charged_off_loans.loc[:, 'expected_loss'] = charged_off_loans['expected_total_payment'] - charged_off_loans['total_payment']

# Calculate the total expected loss
total_expected_loss = charged_off_loans['expected_loss'].sum()
print(f"Total Expected Loss: £{total_expected_loss:.2f}")

# Visualize the projected loss over the remaining term of these loans
trace = go.Bar(
    x=charged_off_loans.index,
    y=charged_off_loans['expected_loss'],
    name='Expected Loss',
    marker = dict(color = 'orange')
)

layout = go.Layout(
    title='Expected Loss of Charged Off Loans',
    xaxis=dict(title='Loan'),
    yaxis=dict(title='Expected Loss')
)

fig = go.Figure(data=[trace], layout=layout)

fig.show()

Total Expected Loss: £1017710778.74




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



#### Possible Loss

In [23]:
risk_status = ['Late (31-120 days)', 'Late (16-30 days)', 'In Grace Period']
risk_customers = df[df['loan_status'].isin(risk_status)]

total_risk_customers = risk_customers.shape[0]

total_customers = df.shape[0]
percentage_risk_customers = (total_risk_customers / total_customers) * 100

print(f"Percentage of customers behind on payments: {percentage_risk_customers:.2f}%")

Percentage of customers behind on payments: 1.53%


In [27]:
risk_customers.loc[:,'expected_total_payment'] = risk_customers['loan_amount'] * (1 + risk_customers['int_rate'] * (risk_customers['term']/12))

risk_customers.loc[:,'expected_loss'] = risk_customers['expected_total_payment'] - risk_customers['total_payment']

total_expected_loss_if_charged_off = risk_customers['expected_loss'].sum()

print(f"Total Number of Customers Behind on Payments: {total_risk_customers}")
print(f"Percentage of Customers Behind on Payments: {percentage_risk_customers:.2f}%")
print(f"Total Expected Loss if Status Changed to Charged Off: £{total_expected_loss_if_charged_off:.2f}")

Total Number of Customers Behind on Payments: 498
Percentage of Customers Behind on Payments: 1.53%
Total Expected Loss if Status Changed to Charged Off: £458054685.81




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [25]:
import plotly.graph_objs as go

trace = go.Bar(
    x=risk_customers.index,
    y=risk_customers['expected_loss'],
    name='Expected Loss if Charged Off',
    marker=dict(color='red')  # Change the color to red for visibility
)

layout = go.Layout(
    title='Expected Loss of Customers Behind on Payments if Charged Off',
    xaxis=dict(title='Customer'),
    yaxis=dict(title='Expected Loss')
)

fig = go.Figure(data=[trace], layout=layout)

fig.show()

**If customers that are late on payments converted to Charged Off, what percentage of total expected revenue do these customers and the customers who have already defaulted on their loan represent?**

In [28]:
# Calculate the projected loss for each risk customer if they were to finish the full loan term
risk_customers.loc[:,'projected_loss'] = risk_customers['expected_total_payment'] - risk_customers['total_payment']

# Calculate the total projected loss if these customers were to finish the full loan term
total_projected_loss = risk_customers['projected_loss'].sum()

# Filter customers who have already defaulted (Charged Off)
charged_off_customers = df[df['loan_status'] == 'Charged Off']

# Calculate the expected total amount to be paid for each charged-off customer
charged_off_customers.loc[:,'expected_total_payment'] = charged_off_customers['loan_amount'] * (1 + charged_off_customers['int_rate'] * (charged_off_customers['term'] / 12))

# Calculate the expected loss for each charged-off customer
charged_off_customers.loc[:,'expected_loss'] = charged_off_customers['expected_total_payment'] - charged_off_customers['total_payment']

# Calculate the total expected loss for customers who have already defaulted
total_expected_loss_charged_off = charged_off_customers['expected_loss'].sum()

# Calculate the total expected revenue from all loans
df.loc[:, 'expected_total_payment'] = df['loan_amount'] * (1 + df['int_rate'] * (df['term'] / 12))
total_expected_revenue = df['expected_total_payment'].sum()

# Calculate the percentage of total expected revenue represented by late and charged-off customers
total_expected_loss = total_projected_loss + total_expected_loss_charged_off
percentage_total_expected_revenue = (total_expected_loss / total_expected_revenue.sum()) * 100

print(f"Total Projected Loss if Late Customers Finish Full Term: £{total_projected_loss:.2f}")
print(f"Total Expected Loss for Charged-Off Customers: £{total_expected_loss_charged_off:.2f}")
print(f"Percentage of Total Expected Revenue Represented by Late and Charged-Off Customers: {percentage_total_expected_revenue:.2f}%")

Total Projected Loss if Late Customers Finish Full Term: £458054685.81
Total Expected Loss for Charged-Off Customers: £1017710778.74
Percentage of Total Expected Revenue Represented by Late and Charged-Off Customers: 6.86%




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



#### Indicators of loss

In [29]:
from data_visualiser import DataVisualiser

analysis = DataVisualiser(df)
analysis.loan_indicators()





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Analysis Results:
Grade distribution for Charged Off and At Risk loans:
grade  risk_category
A      At Risk           18
       Charged Off      109
B      At Risk           99
       Charged Off      298
C      At Risk           93
       Charged Off      349
D      At Risk           77
       Charged Off      220
E      At Risk           42
       Charged Off      110
F      At Risk           16
       Charged Off       56
G      At Risk            5
       Charged Off       14
dtype: int64

Purpose distribution for Charged Off and At Risk loans:
purpose             risk_category
car                 At Risk            3
                    Charged Off       12
credit_card         At Risk           89
                    Charged Off      215
debt_consolidation  At Risk          208
                    Charged Off      692
educational         Charged Off        4
home_improvement    At Risk           14
                    Charged Off       48
house               Charged Off       10
m