In [2]:
# Importing pandas package
import pandas as pd

# Read csv file into Pandas data frame
bank_loans = pd.read_csv("../data/cleaned_data/cleaned_bank_loan_modeling.csv")

# Displays the csv file
bank_loans.head()

Unnamed: 0,Age,Years Work Experience,Income,ZIP Code,Family Size,Monthly Credit Card Spending,Education Level,Value of Mortgage,Personal Loan
0,25,1,49000,91107,4,1600,Undergrad,0,No
1,45,19,34000,90089,3,1500,Undergrad,0,No
2,39,15,11000,94720,1,1000,Undergrad,0,No
3,35,9,100000,94112,1,2700,Graduate,0,No
4,35,8,45000,91330,4,1000,Graduate,0,No


In [7]:
# Groups the data frame into people who do and do not have personal loans
grouped = bank_loans.groupby("Personal Loan")

# Pulls out the income column of the grouped data frame
income = grouped["Income"]

# Determines the mean income of people who do and do not have personal loans
income.mean()

Personal Loan
No      66237.389381
Yes    144745.833333
Name: Income, dtype: float64

In [8]:
# Showing the min and max
income.describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Personal Loan,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
No,4520.0,66237.389381,40578.534417,8000.0,35000.0,59000.0,84000.0,224000.0
Yes,480.0,144745.833333,31584.42944,60000.0,122000.0,142500.0,172000.0,203000.0


In [11]:
# Creates bins and names for different income ranges
b = [0, 25000, 75000, 150000, 225000]
groupnames = ["<$25,000", "$25,000-74,999", "$75,000-149,999", "$150,000-225,000"]

# Adds a new column income ranges based on the income of the current row
bank_loans["Income Ranges"] = pd.cut(bank_loans["Income"], bins=b, labels=groupnames, 
                                    include_lowest=True)

# Checks that the new column was added
bank_loans.head()

Unnamed: 0,Age,Years Work Experience,Income,ZIP Code,Family Size,Monthly Credit Card Spending,Education Level,Value of Mortgage,Personal Loan,Income Ranges
0,25,1,49000,91107,4,1600,Undergrad,0,No,"$25,000-74,999"
1,45,19,34000,90089,3,1500,Undergrad,0,No,"$25,000-74,999"
2,39,15,11000,94720,1,1000,Undergrad,0,No,"<$25,000"
3,35,9,100000,94112,1,2700,Graduate,0,No,"$75,000-149,999"
4,35,8,45000,91330,4,1000,Graduate,0,No,"$25,000-74,999"


In [21]:
# Groups the data frame into the income ranges and then by if they have a personal loan
income_ranges = bank_loans.groupby(["Income Ranges", "Personal Loan"])

# Determines how many people are in each section of the grouped dataframe
count = income_ranges.count()
count


Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Years Work Experience,Income,ZIP Code,Family Size,Monthly Credit Card Spending,Education Level,Value of Mortgage
Income Ranges,Personal Loan,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
"<$25,000",No,672,672,672,672,672,672,672,672
"<$25,000",Yes,0,0,0,0,0,0,0,0
"$25,000-74,999",No,2279,2279,2279,2279,2279,2279,2279,2279
"$25,000-74,999",Yes,7,7,7,7,7,7,7,7
"$75,000-149,999",No,1345,1345,1345,1345,1345,1345,1345,1345
"$75,000-149,999",Yes,255,255,255,255,255,255,255,255
"$150,000-225,000",No,224,224,224,224,224,224,224,224
"$150,000-225,000",Yes,218,218,218,218,218,218,218,218
