# Leanding Club Case Study

Imports and basic set-up

In [None]:
import warnings
from os import getcwd
from os.path import join

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

%matplotlib inline

In [None]:
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
sns.set_style('darkgrid')
# plt.style.use('seaborn-v0_8-pastel')

In [None]:
PRJ_DIR = getcwd()
DATA_DIR = join(PRJ_DIR, 'data')

## Step 0: Reading data

In [None]:
df = pd.read_csv(join(DATA_DIR, 'loan.csv'))
df.head(10)

In [None]:
data_dict = pd.read_excel(join(DATA_DIR, 'Data_Dictionary.xlsx'))
data_dict = data_dict.dropna()
data_dict.sample(5)

## Step 1: Cleaning

### Choosing columns
There are columns with `Nan` values.

In [None]:
df.isna().sum()

As seen above, there are columns which do not contain any values. Some of the columns contain not but few values.

In [None]:
(df.isna().sum() >= 0.5 * df.shape[0]).sum()

57 columns have more than 50% vlues as null. I am choosing not to use them and dropping them from analysis.

In [None]:
column_names = df.columns[df.isna().sum() <= 0.50 * df.shape[0]].tolist()
data_df = df[column_names]
del df
data_df.sample(3)

Another thing to check in the remaining column is the numbeor of unique values found in the each column

In [None]:
data_df.nunique()

There are columns which contain only 1 value for all the rows. We are choosing not to use them either as they are not adding any information wrt to our target variable

In [None]:
(data_df.nunique() <= 1).sum()

9 columns will be additionally removed from our analysis.

In [None]:
column_names = data_df.columns[data_df.nunique() > 1]
data_df = data_df[column_names]
data_df.sample(3)

In [None]:
column_desc_df = pd.DataFrame([(i, data_df[i].isna().sum(),
                                data_df[i].nunique(),
                                data_dict[data_dict.LoanStatNew == i]['Description'].values[0])
                               for i in data_df.columns],
                              columns=['Column_Name', 'Num_NAs',
                                       'Num_unique_vals', 'Description'])
column_desc_df

In [None]:
data_df.info()

In [None]:
def get_earliest_yr(x):
    x = int(x.split("-")[1])
    if 0 <= x <= 11:
        return str(2000 + x)
    return str(1900 + x)

In [None]:
data_df['term'] = data_df.term.apply(lambda x: int(x.strip().split(' ')[0]))
data_df['int_rate'] = data_df.int_rate.apply(lambda x: float(x.strip().strip('%')))

In [None]:
data_df['issue_d_month'] = data_df.issue_d.apply(lambda x: x.split('-')[0])
data_df['issue_d_year'] = data_df.issue_d.apply(lambda x: "20" + x.split('-')[1])

In [None]:
data_df['earliest_cr_line_month'] = data_df.earliest_cr_line.apply(lambda x: x.split('-')[0])
data_df['earliest_cr_line_year'] = data_df.earliest_cr_line.apply(get_earliest_yr)

In [None]:
data_df['last_pymnt_d_month'] = data_df.last_pymnt_d.apply(lambda x: x.split('-')[0] if isinstance(x, str) else x)
data_df['last_pymnt_d_year'] = data_df.last_pymnt_d.apply(lambda x: "20" + x.split('-')[1] if isinstance(x, str) else x)

In [None]:
data_df['last_credit_pull_d_month'] = data_df.last_credit_pull_d.apply(lambda x: x.split('-')[0] if isinstance(x, str) else x)
data_df['last_credit_pull_d_year'] = data_df.last_credit_pull_d.apply(lambda x: "20" + x.split('-')[1] if isinstance(x, str) else x)

In [None]:
data_df['emp_title'] = data_df.emp_title.apply(lambda x: x.strip().upper() if not isinstance(x, float) else x)

In [None]:
data_df['emp_length'] = data_df.emp_length.apply(lambda x: x if '<' not in str(x) else '<1 year')

In [None]:
data_df = data_df.drop(['issue_d', 'earliest_cr_line', 'url', 'last_pymnt_d', 'last_credit_pull_d'], axis=1)

## Step 2: Uni-variate Analysis

### Annual Income

In [None]:
data_df.annual_inc.describe()

In [None]:
plt.figure(figsize=(3, 8))
sns.boxplot(data_df.annual_inc)
plt.title('Annual Income')
plt.show()

Here we encounter some outliers. It is evident from the 5 point summary that 75% of the annual incomes are below USD 100K, but the highest income is USD 6M. Analyzing this columns while considering all the values would not yield correct results and therefore we choose drop rows where the annual income is greater than USD 150K.

In [None]:
data_df = data_df[(data_df.annual_inc <= 150_000)]
data_df.annual_inc.describe()

In [None]:
plt.figure(figsize=(3, 8))
sns.boxplot(data_df.annual_inc)
plt.title('Annual Income')
plt.xticks(ticks=[])
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(data_df.annual_inc, kde=True)
plt.title('Annual Income')
plt.xlabel("")
plt.ylabel("")
plt.show()

We observe that:
- The median annual income is USD 57K
- Income of most of the applicants lie below USD 80K

### Last Payment Amount

In [None]:
data_df.last_pymnt_amnt.describe()

In [None]:
plt.figure(figsize=(3, 8))
sns.boxplot(data_df.last_pymnt_amnt)
plt.title('Last Payment')
plt.xticks(ticks=[])
plt.show()

Here we encounter some outliers. It is evident from the 5 point summary that 75% of the loan payment amounts are around USD 3200, but the highest income is around USD 36K. Analyzing this columns while considering all the values would not yield correct results and therefore we choose drop rows where the annual income is greater than USD 150K.

In [None]:
data_df = data_df[(data_df.last_pymnt_amnt <= 8000)]
data_df.last_pymnt_amnt.describe()

In [None]:
plt.figure(figsize=(3, 8))
sns.boxplot(data_df.last_pymnt_amnt)
plt.title('Last Payment done in $')
plt.xticks(ticks=[])
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(data_df.last_pymnt_amnt, kde=True)
plt.title('Last Payment done in $')
plt.show()

We observe that:
- More than 50% of the amount paid as the latest payment is less than $500

### Loan amount

In [None]:
data_df.loan_amnt.describe()

In [None]:
plt.figure(figsize=(3, 8))
sns.boxplot(data_df.loan_amnt)
plt.title('Loan Amount')
plt.show()

We observe that:
- The median amount that is applied for is USD 8200
- Only 25% of the applicants have applied for amounts greater than USD 13000

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(data_df.loan_amnt, bins=20, kde=True)
plt.title('Loan Amount')
plt.show()

### Funded Amount

In [None]:
data_df.funded_amnt.describe()

In [None]:
plt.figure(figsize=(3, 8))
sns.boxplot(data_df.funded_amnt)
plt.title('Funded Amount')
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(data_df.funded_amnt, kde=True)
plt.title('Funded Amount')
plt.show()

We observe that:
- The median amount that is funded by the club is approximately USD 8000

### Amount Funded by Investor

In [None]:
data_df.funded_amnt_inv.describe()

In [None]:
plt.figure(figsize=(3, 8))
sns.boxplot(data_df.funded_amnt_inv)
plt.title('Amount Funded by Invertors')
plt.xticks(ticks=[])
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(data_df.funded_amnt_inv, bins=20, kde=True)
plt.title('Amount Funded by Invertors')
plt.show()

We observe that:
- The median amount that is funded by the investors is a approximately USD 8000
- Only 25% of the loans have been funded for more than USD 12000

### Term of the loan

In [None]:
plt.figure(figsize=(4, 5))
data_df.term.value_counts().plot.bar()
plt.title("Term of the loan")
plt.xticks(ticks=[0 ,1], labels=['36 months', '60 months'], rotation=0)
plt.xlabel("")
plt.ylabel("Number of loans")
plt.show()

We have loans of 2 type of terms:
- 36 months or 3 years
- 60 months or 5 years

We observe that amount of loans issued for the 36 month period are far more than those issued for 60 month period.

### Interest Rates

In [None]:
data_df.int_rate.describe()

In [None]:
plt.figure(figsize=(3, 8))
sns.boxplot(data_df.int_rate)
plt.title('Interest Rate')
plt.xticks(ticks=[])
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(data_df.int_rate, bins=50, kde=True)
plt.title('Interest Rate')
plt.ylabel(None)
plt.show()

We observe that:
- **More than 75%** of the loans have an interest rate less than **15%**
- **50%** of the loans have an interest rate between ~**9%** and ~**14.25%** 

### Installments

In [None]:
data_df.installment.describe()

In [None]:
plt.figure(figsize=(3, 8))
sns.boxplot(data_df.installment)
plt.title('Installments')
plt.xticks(ticks=[])
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(data_df.installment, kde=True, bins=50)
plt.title('Installments')
plt.ylabel(None)
plt.show()

We observe that:
- Median installment paid is **~USD 250**
- **More than 75%** of the monthly installments are **below USD 390**

### Grade and Sub-Grade

In [None]:
plt.figure(figsize=(5, 4))
data_df.grade.value_counts().plot.bar()
plt.title("Grade of loans")
plt.xticks(rotation=0)
plt.xlabel("")
plt.ylabel("Number of loans")
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
data_df.sub_grade.value_counts().plot.bar()
plt.title("Sub-Grade of loans")
plt.xticks(rotation=0)
plt.xlabel("")
plt.ylabel("Number of loans")
plt.show()

We observe that:
- Most of the loans are **Grade B**, followed by **A**, and **C**
- Most of the loans are of sub-grade **A4** followed by **B3**, and **A5**

### Employee Title

> **NOTE**: The values of this columns are text. We observe that values indicating same employer name are repeated in different cases, and spellings. We tackled the case earlier by converting every value to upper case. 

In [None]:
data_df.emp_title.value_counts()[:10]

We observed that:
- Most of the loans are issued to employees of **US Army**

### Employee Length

In [None]:
data_df.emp_length.value_counts()

In [None]:
plt.figure(figsize=(10, 5))
data_df.emp_length.value_counts().plot.bar()
plt.title("Years of Continuous Employement")
plt.xticks(rotation=0)
plt.xlabel("")
plt.ylabel("Number of loans")
plt.show()

We observe that:
- More than **~7200** applicants have **more than 10 years** of continuous employment

### Home Ownership

In [None]:
data_df.home_ownership.value_counts()

In [None]:
plt.figure(figsize=(10, 5))
data_df.home_ownership.value_counts().plot.bar()
plt.title("Type of home ownership")
plt.xticks(rotation=0)
plt.xlabel("")
plt.ylabel("Number of loans")
plt.show()

We observe that:
- More than **~17000** applicants are renters, closely followed by applicates with mortgage

### Verification Status of Income

In [None]:
data_df.verification_status.value_counts()

In [None]:
plt.figure(figsize=(10, 5))
data_df.verification_status.value_counts().plot.bar()
plt.title("Status of Income/Income source Verification")
plt.xticks(rotation=0)
plt.xlabel("")
plt.ylabel("Number of loans")
plt.show()

We observe that:
- ~**44%** applicants' income is not verified
- Only ~**25%** of the applicants' source of income is verified

### Loan Status

In [None]:
data_df.loan_status.value_counts()

In [None]:
plt.figure(figsize=(10, 5))
data_df.loan_status.value_counts().plot.bar()
plt.title("Status of loan")
plt.xticks(rotation=0)
plt.xlabel("")
plt.ylabel("Number of loans")
plt.show()

We observe that:
- More than **27K** loans have been fully paid
- **~5.5K** loans are actually bad loans i.e. the applicants defaulted

### Purpose of Loan

In [None]:
data_df.purpose.str.upper().value_counts() / (~data_df.purpose.isna()).sum()

In [None]:
plt.figure(figsize=(10, 5))
data_df.purpose.str.upper().value_counts().plot.barh()
plt.title("Category of loan purpose")
plt.ylabel("")
plt.xlabel("Number of loans")
plt.show()

We observe that:
- Most popular reason for applying for a loan is **debt consilidation (~45%)**
- Least number of loan application are for **Renewable Energy**, **Education**, and **Housing** 

### Title

In [None]:
data_df['title'] = data_df.title.apply(lambda x: x.strip().lower() if not isinstance(x, float) else x)

In [None]:
data_df.title.value_counts()[:10]

We observe that:
- Most loans are applied for **Debt Consolidation**

### State of residence 

In [None]:
data_df.addr_state.value_counts()

In [None]:
plt.figure(figsize=(10, 5))
data_df.addr_state.value_counts()[:5].plot.bar()
plt.title("Top-5 States with most applicants")
plt.xticks(rotation=0)
plt.xlabel("")
plt.ylabel("Number of loans")
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
data_df.addr_state.value_counts()[-5:].plot.bar()
plt.title("Top-5 States with least applicants")
plt.xticks(rotation=0)
plt.xlabel("")
plt.ylabel("Number of loans")
plt.show()

We observe that:
- Most applicants are from the California, New York and Florida
- Idaho, Nebraska, and Maine are the states with least number of loan applications

### Debt-To-Income Ratio

In [None]:
data_df.dti.describe()

In [None]:
plt.figure(figsize=(3, 8))
sns.boxplot(data_df.dti)
plt.title('Debt to Income Ratio')
plt.xticks(ticks=[])
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(data_df.dti, kde=True)
plt.title('Debt to Income Ratio')
plt.show()

We observe that:
- The median ratio is 13.5, which is a quite good
- 25% of the applicants have DTI > 18.7

### Deliquincy in the past 2 years

In [None]:
data_df.delinq_2yrs.value_counts()

In [None]:
plt.figure(figsize=(10, 5))
data_df.delinq_2yrs.value_counts()[1:].plot.bar()
plt.title("Number of delinqueny cases")
plt.xticks(rotation=0)
plt.xlabel("")
plt.ylabel("Number of loans")
plt.show()

We observe that:
- Most of the applicants have not shown any delinquent behviour in the past 2 years
- Less than 100 applicant have a record of 4 or more delinquency incidence in past 2 year

### Inquiries made in the past 6 months

In [None]:
data_df.inq_last_6mths.value_counts()

In [None]:
plt.figure(figsize=(10, 5))
data_df.inq_last_6mths.value_counts().plot.bar()
plt.title("Number of delinqueny cases")
plt.xticks(rotation=0)
plt.xlabel("")
plt.ylabel("Number of loans")
plt.show()

### Earliest Credit Line Year

The year the borrower's earliest reported credit line was opened

In [None]:
data_df.earliest_cr_line_year.value_counts()

In [None]:
plt.figure(figsize=(10, 5))
data_df.earliest_cr_line_year.value_counts()[:10].plot.bar()
plt.title('Year of first applications by the applicants')
plt.xticks(rotation=0)
plt.xlabel("")
plt.ylabel("Number of loans")
plt.show()

We observe that:
- Although the earliest line of credit for the applicants can be traced back to 1946, most of the applicant have thier first line of credit in the 1990s and 2000s

### Earliest Credit Line Month 

The month the borrower's earliest reported credit line was opened


In [None]:
data_df.earliest_cr_line_month.value_counts()

In [None]:
plt.figure(figsize=(10, 5))
data_df.earliest_cr_line_month.value_counts().plot.bar()
plt.title('First Credit Month-wise split')
plt.xticks(rotation=0)
plt.xlabel("")
plt.ylabel("Number of loans")
plt.show()

We observe that:
- Most applicants took their first loans in the month of October, followed by December and November

### Issued Year and month 

The month and the year in which loan was funded

In [None]:
data_df.issue_d_year.value_counts()

In [None]:
plt.figure(figsize=(10, 5))
data_df.issue_d_year.value_counts().sort_index().plot.bar()
plt.title('Year-wise Loan issued')
plt.xticks(rotation=0)
plt.xlabel("")
plt.ylabel("Number of loans")
plt.show()

We observe that:
- Most loans are issue in 2011
- There is a steady increase in the number of loans issues YoY

In [None]:
data_df.issue_d_month.value_counts()

In [None]:
plt.figure(figsize=(10, 5))
data_df.issue_d_month.value_counts().plot.bar()
plt.title('Month-wise Loan issued over the years')
plt.xticks(rotation=0)
plt.xlabel("")
plt.ylabel("Number of loans")
plt.show()

We observe that:
- Top-2 months are December and November with more than 4000 application every month
- January and February have the lowest number of applications

### Public Record Bankruptcies

Number of public record bankruptcies

In [None]:
data_df.pub_rec_bankruptcies.value_counts()

We observe that:
- There are 1513 borrowers who have declared bankruptcy among which only 6 have declared bankrupcies twice

### Last Credit Pulled date

The most recent month LC pulled the credit for this loan

In [None]:
data_df.last_credit_pull_d_year.value_counts()

In [None]:
data_df.last_credit_pull_d_month.value_counts()

### Last Payment Year and Month

In [None]:
data_df.last_pymnt_d_year.value_counts()

In [None]:
data_df.last_pymnt_d_month.value_counts()

### Collection Recovery Fee

In [None]:
data_df.collection_recovery_fee.describe()

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(data_df.collection_recovery_fee)
plt.title('Collection Recovery Fee')
plt.show()

> **NOTE**: Since the collection recovery fee is only applicable in case of bad loan, we are choosing not to remove outliers from this column.  

We observe that:
- Collection recovery fee is 0 for most of the application
- For the rest of them it is less than USD 5000

### Recoveries

In [None]:
data_df.recoveries.describe()

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(data_df.recoveries)
plt.title('Collection Recovery Fee')
plt.show()

> **NOTE**: Since the recoveries is only applicable in case of bad loan, we are choosing not to remove outliers from this column.  

We observe that:
- Recoveries is 0 for most of the application

### Total Recovered Late Fee

In [None]:
data_df.total_rec_late_fee.describe()

Here also we can that the column doesn't have any significant data

### Total Recovery Interest

In [None]:
data_df.total_rec_int.describe()

In [None]:
plt.figure(figsize=(3, 8))
sns.boxplot(data_df.total_rec_int)
plt.title('Total Interest received till date')
plt.xticks(ticks=[])
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(data_df.total_rec_int, kde=True)
plt.title('Total Interest received till date')
plt.show()

### Total Recovery Principle 

In [None]:
data_df.total_rec_prncp.describe()

In [None]:
plt.figure(figsize=(3, 8))
sns.boxplot(data_df.total_rec_prncp)
plt.title('Total Principle received till date')
plt.xticks(ticks=[])
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(data_df.total_rec_prncp, kde=True)
plt.title('Total Principle received till date')
plt.show()

### Total Payment Funded by Investors

Total payment received for the amount invested by the investors

In [None]:
data_df.total_pymnt_inv.describe()

In [None]:
plt.figure(figsize=(3, 8))
sns.boxplot(data_df.total_pymnt_inv)
plt.title('Total Payment received till date for the investor')
plt.xticks(ticks=[])
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(data_df.total_pymnt_inv, kde=True)
plt.title('Total Payment received till date for the investor')
plt.show()

### Total Payment 

In [None]:
data_df.total_pymnt.describe()

In [None]:
plt.figure(figsize=(3, 8))
sns.boxplot(data_df.total_pymnt)
plt.title('Total Payemnt received till date')
plt.xticks(ticks=[])
plt.show()

In [None]:
plt.figure(figsize=(10, 5))
sns.histplot(data_df.total_pymnt, kde=True)
plt.title('Total Payment received till date')
plt.show()

From this we can understand that the median amount of the total payment received is 9899 USD

### Out Principal Invested by Investors

Remaining outstanding principal for total amount funded by investors

In [None]:
data_df.out_prncp_inv.describe()

From the above data we can understand that most of the investors money has been paid

### Out Principal

Remaining outstanding principal for total amount funded


In [None]:
data_df.out_prncp.describe()

From these two insights we can understand that most of the people have 0 outstanding principle to be paid

### Total Accounts

The total number of credit lines currently in the borrower's credit file


In [None]:
data_df.total_acc.describe()

In [None]:
plt.figure(figsize=(3, 8))
sns.boxplot(data_df.total_acc)
plt.title('Total Number for each Applicant')
plt.xticks(ticks=[])
plt.show()

### Revolving Credit Utilization 

Revolving line utilization rate, or the amount of credit the borrower is using relative to all available revolving credit.

In [None]:
data_df['revol_util'] = data_df.revol_util.apply(lambda x: float(x.strip("%")) if isinstance(x, str) else float(x))

In [None]:
data_df.revol_util.describe()

In [None]:
plt.figure(figsize=(3, 8))
sns.boxplot(data_df.revol_util)
plt.title('Revolving Credit utilization')
plt.xticks(ticks=[])
plt.show()

### Revolving balance

In [None]:
data_df.revol_bal.describe()

In [None]:
plt.figure(figsize=(3, 8))
sns.boxplot(data_df.revol_bal)
plt.title('Revolving Balance')
plt.xticks(ticks=[])
plt.show()

We observe that:
- The median of the total revolving balance is **~USD 8200**

### Public Records

The number of derogatory public records

In [None]:
data_df.pub_rec.value_counts()

In [None]:
plt.figure(figsize=(10, 5))
data_df.pub_rec.value_counts().plot.bar()
plt.title('Derogatory Public Records')
plt.xticks(rotation=0)
plt.xlabel("")
plt.ylabel("Number of loans")
plt.show()

These number show that the maximum of the borrowers don't have derogaotry public records

### Open Credit lines

The number of open credit lines in the borrower's credit file.

In [None]:
data_df.open_acc.value_counts()

In [None]:
plt.figure(figsize=(10, 5))
data_df.open_acc.value_counts()[:10].plot.bar()
plt.title('Open Credit Lines')
plt.xticks(rotation=0)
plt.xlabel("")
plt.ylabel("Number of loans")
plt.show()

We observe that:
- **~7000** applicants have 6 or 7 lines of credit
- **~600** applicants have more than 20 credit lines

## Step 3: Segmented Uni-Variate Analysis

### Segmenting based on Loan Status

#### Loan Amount, Funded Amount, Investor Amounts

**Mean**

In [None]:
temp_df = data_df.groupby(['loan_status']).agg({'loan_amnt': 'mean', 'funded_amnt':'mean', 'funded_amnt_inv':'mean'}).reset_index()
temp_df.columns = ['loan_status', 'loan_amount', 'funded_amount', 'funded_amount_inv']
temp_df

In [None]:
temp_df = temp_df.melt(id_vars='loan_status', var_name='agg_fn', value_name='agg_value')
temp_df

In [None]:
plt.figure(figsize=(10, 5))
sns.barplot(data=temp_df, x='loan_status', y='agg_value', hue='agg_fn')
plt.title("Mean Amount")
plt.xlabel("Loan Status")
plt.ylabel("Amount in US$")
plt.legend()
plt.show()

**Median**

In [None]:
temp_df = data_df.groupby(['loan_status']).agg({'loan_amnt': 'median',
                                                'funded_amnt':'median',
                                                'funded_amnt_inv':'median'}).reset_index()
temp_df.columns = ['loan_status', 'loan_amount', 'funded_amount', 'funded_amount_inv']
temp_df

In [None]:
temp_df = temp_df.melt(id_vars='loan_status', var_name='agg_fn', value_name='agg_value')
temp_df

In [None]:
plt.figure(figsize=(10, 5))
ax = sns.barplot(data=temp_df, x='loan_status', y='agg_value', hue='agg_fn')
plt.title("Median Amount of Loan")
plt.xlabel("Loan Status")
plt.ylabel("Loan Amount in US$")
plt.legend()
plt.show()
del temp_df

#### Interest Rate

In [None]:
plt.figure()
ax = sns.barplot(data=data_df, x='loan_status', y='int_rate', estimator='mean', errorbar=None)
ax.bar_label(ax.containers[0])
plt.title("Interest Rates")
plt.xlabel("Loan Status")
plt.ylabel("%")
plt.show()

In [None]:
plt.figure()
ax = sns.barplot(data=data_df, x='loan_status', y='int_rate', estimator='median', errorbar=None)
ax.bar_label(ax.containers[0])
plt.title("Interest Rates")
plt.xlabel("Loan Status")
plt.ylabel("%")
plt.show()