In [None]:
# Importing pandas
import pandas as pd

# Reading CSV file containing data into a Dataframe
bank_loans = pd.read_csv("raw_data/Bank_Personal_Loan_Modelling.csv")

# Displays first few rows of dataframe to show column names
bank_loans.head()

In [None]:
# Narrowing columns in the data frame for our use (can be adjusted if need be)
bank_loans = bank_loans[['Age', 'Experience', 'Income', 'ZIP Code', 'Family', 'CCAvg',
                         'Education', 'Mortgage', 'Personal Loan']]

# Displaying reduced dataframe
bank_loans.head()

In [None]:
# Checking data types of each column
bank_loans.dtypes

In [None]:
# Converts education column to type string
bank_loans = bank_loans.astype({"Education": 'str'})

# Clarifies what each value in the education category stands for
bank_loans["Education"] = bank_loans["Education"].replace({"1": "Undergrad", "2": "Graduate",
                                                          "3": "Advanced/Professional"})



In [None]:
# Converts personal loan column to type string
bank_loans = bank_loans.astype({"Personal Loan": 'str'})

# Clarifies what each value in the personal loan category stands for
bank_loans["Personal Loan"] = bank_loans["Personal Loan"].replace({"0": "No", "1": "Yes"})

In [None]:
# Pull out the income column into a series and converts to type string
income = bank_loans["Income"].astype('str')

# Adds 3 zeros to each value in income
income = [x + "000" for x in income]

# Converts income back to int
income = [int(x) for x in income]

# Overwrites the bank loan dataframe with the updated income column
bank_loans["Income"] = income

# Checks the dataframe
bank_loans.head()


In [None]:
# Pull out the mortgage column into a series and converts to type string
mortgage = bank_loans["Mortgage"].astype('str')

# Adds 3 zeros to each value in mortgage
mortgage = [x + "000" for x in mortgage]

# Converts mortgage back to int, removes extra zeros from people who don't have a mortage also
mortgage = [int(x) for x in mortgage]

# Overwrites the bank loan dataframe with the updated mortgage column
bank_loans["Mortgage"] = mortgage

# Checks the dataframe
bank_loans.head()

In [None]:
# Makes sure each value in CCAvg has 3 decimal places
bank_loans["CCAvg"] = bank_loans["CCAvg"].map('{:,.3f}'.format)

# Converts CCAvg column to type string
ccavg = bank_loans["CCAvg"].astype('str')

# Removes the decimal in each row to convert value from thousands to normal
ccavg = [x.replace(".", "") for x in ccavg]

# Converts the list into type int
ccavg = [int(x) for x in ccavg]

# Overwrites the CCAvg column in bank loans dataframe with new formatting
bank_loans["CCAvg"] = ccavg

# Checks the dataframe
bank_loans.head()

In [None]:
# Rename columns to more clearly indicate what is shown
bank_loans = bank_loans.rename(columns={"Experience": "Years Work Experience", "Family": "Family Size",
                                       "CCAvg": "Monthly Credit Card Spending", 
                                       "Education": "Education Level", "Mortgage": "Value of Mortgage"})

# check the dataframe 
bank_loans.head()

# # Cleanly displays the dataframe if needed
# bank_loans.style.set_properties(**{'text-align': 'left'})

In [None]:
# Outputs data frame to CSV file for use in other notebooks
bank_loans.to_csv("cleaned_data/cleaned_bank_loan_modeling.csv", index=False)