In [None]:
# Importing pandas and matplotlib
import pandas as pd
import matplotlib.pyplot as plt

# Read csv file into Pandas data frame
bank_loans = pd.read_csv("../data/cleaned_data/cleaned_bank_loan_modeling.csv")

# Displays the csv file
bank_loans.head()

In [None]:
# Groups the data frame into people who do and do not have personal loans
grouped = bank_loans.groupby("Personal Loan")

# Pulls out the income column of the grouped data frame
income = grouped["Income"]

# Determines the mean income of people who do and do not have personal loans
income_mean = income.mean()

In [None]:
# Plots a bar chart comparing the income of clients who do and do not have personal loans
plot = income_mean.plot(kind="bar", ylabel="Income ($)", rot=0,
                        title="Average Income vs Personal Loan Holding Status")

# Saves plot in appropriate file as png
plt.savefig("plots/income_loans_bar.png")

In [None]:
# Showing the min and max to help determine bins
income.describe()

In [None]:
# Creates bins and names for different income ranges
b = [0, 25000, 75000, 150000, 225000]
groupnames = ["<$25,000", "$25,000-74,999", "$75,000-149,999", "$150,000-225,000"]

# Adds a new column income ranges based on the income of the current row
bank_loans["Income Ranges"] = pd.cut(bank_loans["Income"], bins=b, labels=groupnames, 
                                    include_lowest=True)

# Checks that the new column was added
bank_loans.head()

In [None]:
# Groups the data frame into the income ranges and then by if they have a personal loan
income_ranges = bank_loans.groupby(["Income Ranges", "Personal Loan"])

# Determines how many people are in each section of the grouped dataframe
count = income_ranges.count()

# Pulls out the count of just the income column
count_income = count["Income"]
count_income

In [None]:
# Plots the percent of clients with a personal loan and an income less than $25,000 yearly
plot = count_income.iloc[0:2].plot(kind="pie", autopct="%1.1f%%", startangle=140, 
                                  title="Personal Loan Percentage - Under $25,000 Income")

# Saves plot in appropriate file as png
plt.savefig("plots/income_bin1_loans_pie.png")

In [None]:
# Plots the percent of clients with a personal loan and an income between than $25,000-74,999 yearly
plot = count_income.iloc[2:4].plot(kind="pie", autopct="%1.1f%%", startangle=140, 
                                  title="Personal Loan Percentage - $25,000-74,999 Income")

# Saves plot in appropriate file as png
plt.savefig("plots/income_bin2_loans_pie.png")

In [None]:
# Plots the percent of clients with a personal loan and an income between $75,000-149,999 yearly
plot = count_income.iloc[4:6].plot(kind="pie", autopct="%1.1f%%", startangle=140, 
                                  title="Personal Loan Percentage - $75,000-149,999 Income")

# Saves plot in appropriate file as png
plt.savefig("plots/income_bin3_loans_pie.png")

In [None]:
# Plots the percent of clients with a personal loan and an income between $150,000-225,000 yearly
plot = count_income.iloc[6:8].plot(kind="pie", autopct="%1.1f%%", startangle=140, 
                                  title="Personal Loan Percentage - $150,000-225,000 Income")

# Saves plot in appropriate file as png
plt.savefig("plots/income_bin4_loans_pie.png")