# Data Inspection (Basic Exploration)

## a.Loading the dataset and displaying the first five rows

In [38]:
import pandas as pd
import numpy as np
df = pd.read_excel("bank_customers_group2.xlsx", sheet_name="Sheet1")
df.head(5)

Unnamed: 0,Customer_ID,Age,Gender,Education_Level,Number_of_Children,Account_Balance,Loan_Status,Credit_Score,Account_Type,Monthly_Income,Number_of_Transactions,Products_Purchased,Customer_Satisfaction
0,100000,56,Male,Master,0,103387.03,Rejected,376,Savings,300702.84,39,Investment Account,2
1,100001,69,Male,High School,5,971.24,Pending,691,Current,124363.77,49,Investment Account,5
2,100002,46,Male,PhD,3,20112.89,Approved,711,Fixed Deposit,471216.94,26,,4
3,100003,32,Female,High School,0,488653.27,Approved,472,Current,298509.32,26,,5
4,100004,60,Female,High School,0,121555.67,Approved,500,Savings,162484.3,36,,1


## b.Checking for missing values and handling them appropriately.

In [40]:

missing_values = df.isnull().sum()
missing_values

Customer_ID                 0
Age                         0
Gender                      0
Education_Level             0
Number_of_Children          0
Account_Balance             0
Loan_Status                 0
Credit_Score                0
Account_Type                0
Monthly_Income              0
Number_of_Transactions      0
Products_Purchased        366
Customer_Satisfaction       0
dtype: int64

In [43]:
df['Products_Purchased'].fillna('Other', inplace=True)
df.head(5)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Products_Purchased'].fillna('Other', inplace=True)


Unnamed: 0,Customer_ID,Age,Gender,Education_Level,Number_of_Children,Account_Balance,Loan_Status,Credit_Score,Account_Type,Monthly_Income,Number_of_Transactions,Products_Purchased,Customer_Satisfaction
0,100000,56,Male,Master,0,103387.03,Rejected,376,Savings,300702.84,39,Investment Account,2
1,100001,69,Male,High School,5,971.24,Pending,691,Current,124363.77,49,Investment Account,5
2,100002,46,Male,PhD,3,20112.89,Approved,711,Fixed Deposit,471216.94,26,Other,4
3,100003,32,Female,High School,0,488653.27,Approved,472,Current,298509.32,26,Other,5
4,100004,60,Female,High School,0,121555.67,Approved,500,Savings,162484.3,36,Other,1


## c.Get summary statistics for numerical columns

In [46]:
summary_statistics = df.describe()
summary_statistics

Unnamed: 0,Customer_ID,Age,Number_of_Children,Account_Balance,Credit_Score,Monthly_Income,Number_of_Transactions,Customer_Satisfaction
count,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0
mean,100999.5,49.114,2.4985,248177.617345,572.9115,252012.00712,24.7205,3.03
std,577.494589,17.926564,1.714779,144547.276662,160.776843,142796.195854,14.20124,1.419192
min,100000.0,18.0,0.0,971.24,300.0,10589.77,1.0,1.0
25%,100499.75,34.0,1.0,122622.8125,432.0,125425.935,12.0,2.0
50%,100999.5,49.0,2.0,244074.935,575.0,248148.37,24.0,3.0
75%,101499.25,65.0,4.0,374770.0925,712.0,377756.655,37.0,4.0
max,101999.0,79.0,5.0,499730.61,849.0,499433.97,49.0,5.0


## d.Display the data types of all columns and convert any incorrect types to the appropriate format

In [10]:
print (data.dtypes)

Customer_ID                 int64
Age                         int64
Gender                     object
Education_Level            object
Number_of_Children          int64
Account_Balance           float64
Loan_Status                object
Credit_Score                int64
Account_Type               object
Monthly_Income            float64
Number_of_Transactions      int64
Products_Purchased         object
Customer_Satisfaction       int64
dtype: object


In [49]:
print (df.dtypes)

Customer_ID                 int64
Age                         int64
Gender                     object
Education_Level            object
Number_of_Children          int64
Account_Balance           float64
Loan_Status                object
Credit_Score                int64
Account_Type               object
Monthly_Income            float64
Number_of_Transactions      int64
Products_Purchased         object
Customer_Satisfaction       int64
dtype: object


# Data Cleaning (Use Loops and Conditional Statements)

## a.Converting categorical columns (Gender, Education Level, Loan_Status, Account_Type, Products_Purchased) to title case using a for loop

In [54]:

categorical_columns = ["Gender", "Education_Level", "Loan_Status", "Account_Type", "Products_Purchased"]
for col in categorical_columns:
    df[col] = df[col].str.title()
    print(df)

      Customer_ID  Age  Gender Education_Level  Number_of_Children  \
0          100000   56    Male          Master                   0   
1          100001   69    Male     High School                   5   
2          100002   46    Male             PhD                   3   
3          100003   32  Female     High School                   0   
4          100004   60  Female     High School                   0   
...           ...  ...     ...             ...                 ...   
1995       101995   52    Male         Diploma                   5   
1996       101996   61    Male     High School                   1   
1997       101997   77    Male          Master                   3   
1998       101998   62    Male          Master                   1   
1999       101999   51  Female             PhD                   4   

      Account_Balance Loan_Status  Credit_Score   Account_Type  \
0           103387.03    Rejected           376        Savings   
1              971.24     P

# b1.Identifying if there are any duplicates

In [59]:
duplicate_count = df.duplicated().sum()  
duplicate_count

0

# b2.Removing any duplicate entries

In [63]:
df.drop_duplicates(subset=["Customer_ID"], keep="first", inplace=True)

# c.Ensuring Credit_Score values fall within the valid range (300-850). If any are outside this range, replaced with the median credit score using an if statement

In [93]:
median_credit_score = df['Credit_Score'].median()
df['Credit_Score'] = df['Credit_Score'].apply(lambda x: median_credit_score if x < 300 or x > 850 else x)
df.head(3)

Unnamed: 0,Customer_ID,Age,Gender,Education_Level,Number_of_Children,Account_Balance,Loan_Status,Credit_Score,Account_Type,Monthly_Income,Number_of_Transactions,Products_Purchased,Customer_Satisfaction,Annual_Income,Age_Category,Categorize_Transactions,Loan_Eligibility
0,100000,56,Male,Master,0,103387.03,Rejected,376,Savings,300702.84,39,Investment Account,2,3608434.08,Senior Citizen,Frequent,Not Eligible
1,100001,69,Male,High School,5,971.24,Pending,691,Current,124363.77,49,Investment Account,5,1492365.24,Senior Citizen,Frequent,Not Eligible
2,100002,46,Male,Phd,3,20112.89,Approved,711,Fixed Deposit,471216.94,26,Other,4,5654603.28,Middle Age,Low,Not Eligible


## d.Identifying and replacing any negative or zero values in Monthly_Income with the median income.

In [68]:
df.loc[df['Monthly_Income'] < 0, 'Monthly_Income'] = df['Monthly_Income'].median()
print(df)

      Customer_ID  Age  Gender Education_Level  Number_of_Children  \
0          100000   56    Male          Master                   0   
1          100001   69    Male     High School                   5   
2          100002   46    Male             Phd                   3   
3          100003   32  Female     High School                   0   
4          100004   60  Female     High School                   0   
...           ...  ...     ...             ...                 ...   
1995       101995   52    Male         Diploma                   5   
1996       101996   61    Male     High School                   1   
1997       101997   77    Male          Master                   3   
1998       101998   62    Male          Master                   1   
1999       101999   51  Female             Phd                   4   

      Account_Balance Loan_Status  Credit_Score   Account_Type  \
0           103387.03    Rejected           376        Savings   
1              971.24     P

# Data Manipulation (Use Functions, Loops, and Conditional Statements)

## a.Writing a function calculate_annual_income() that takes Monthly_Income as input and returns Annual_Income and applying it to the dataset.

In [91]:
def calculate_annual_income(Monthly_Income):
    return Monthly_Income * 12
print(df)
df['Annual_Income'] = df['Monthly_Income'].apply(calculate_annual_income)
df.head(3)

      Customer_ID  Age  Gender Education_Level  Number_of_Children  \
0          100000   56    Male          Master                   0   
1          100001   69    Male     High School                   5   
2          100002   46    Male             Phd                   3   
3          100003   32  Female     High School                   0   
4          100004   60  Female     High School                   0   
...           ...  ...     ...             ...                 ...   
1995       101995   52    Male         Diploma                   5   
1996       101996   61    Male     High School                   1   
1997       101997   77    Male          Master                   3   
1998       101998   62    Male          Master                   1   
1999       101999   51  Female             Phd                   4   

      Account_Balance Loan_Status  Credit_Score   Account_Type  \
0           103387.03    Rejected           376        Savings   
1              971.24     P

Unnamed: 0,Customer_ID,Age,Gender,Education_Level,Number_of_Children,Account_Balance,Loan_Status,Credit_Score,Account_Type,Monthly_Income,Number_of_Transactions,Products_Purchased,Customer_Satisfaction,Annual_Income,Age_Category,Categorize_Transactions,Loan_Eligibility
0,100000,56,Male,Master,0,103387.03,Rejected,376,Savings,300702.84,39,Investment Account,2,3608434.08,Senior Citizen,Frequent,Not Eligible
1,100001,69,Male,High School,5,971.24,Pending,691,Current,124363.77,49,Investment Account,5,1492365.24,Senior Citizen,Frequent,Not Eligible
2,100002,46,Male,Phd,3,20112.89,Approved,711,Fixed Deposit,471216.94,26,Other,4,5654603.28,Middle Age,Low,Not Eligible


## b.Categorize Age into groups using an if-else statement and add a new column Age_Group:18-30: 'Young Adult',31-50: 'Middle Age',51-80: 'Senior Citizen'


In [89]:
def category_age(Age):
    if 18 <= Age < 30:
        return 'Young Adult'
    elif 30 <= Age < 50:
        return 'Middle Age'
    else:
        return 'Senior Citizen'

df['Age_Category'] = df['Age'].apply(category_age)
df.head(3)

Unnamed: 0,Customer_ID,Age,Gender,Education_Level,Number_of_Children,Account_Balance,Loan_Status,Credit_Score,Account_Type,Monthly_Income,Number_of_Transactions,Products_Purchased,Customer_Satisfaction,Annual_Income,Age_Category,Categorize_Transactions,Loan_Eligibility
0,100000,56,Male,Master,0,103387.03,Rejected,376,Savings,300702.84,39,Investment Account,2,3608434.08,Senior Citizen,Frequent,Not Eligible
1,100001,69,Male,High School,5,971.24,Pending,691,Current,124363.77,49,Investment Account,5,1492365.24,Senior Citizen,Frequent,Not Eligible
2,100002,46,Male,Phd,3,20112.89,Approved,711,Fixed Deposit,471216.94,26,Other,4,5654603.28,Middle Age,Low,Not Eligible


## c.Create a function categorize_transactions(num_transactions) that returns:'Frequent' if Number_of_Transactions > 30 'Moderate', if 10 ≤ Number_of_Transactions ≤ 30 and'Low' if Number_of_Transactions < 10


In [87]:
def Categorize_Transactions(Number_of_Transactions):
    if Number_of_Transactions > 30:
        return 'Frequent'
    elif Number_of_Transactions <= 10:
        return 'Moderate'
    else:
        return 'Low'

df['Categorize_Transactions'] = df['Number_of_Transactions'].apply(Categorize_Transactions)
df.head(3)

Unnamed: 0,Customer_ID,Age,Gender,Education_Level,Number_of_Children,Account_Balance,Loan_Status,Credit_Score,Account_Type,Monthly_Income,Number_of_Transactions,Products_Purchased,Customer_Satisfaction,Annual_Income,Age_Category,Categorize_Transactions,Loan_Eligibility
0,100000,56,Male,Master,0,103387.03,Rejected,376,Savings,300702.84,39,Investment Account,2,3608434.08,Senior Citizen,Frequent,Not Eligible
1,100001,69,Male,High School,5,971.24,Pending,691,Current,124363.77,49,Investment Account,5,1492365.24,Senior Citizen,Frequent,Not Eligible
2,100002,46,Male,Phd,3,20112.89,Approved,711,Fixed Deposit,471216.94,26,Other,4,5654603.28,Middle Age,Low,Not Eligible


## d.Use a for loop to apply this function to all customers and create a new column Transaction_Frequency.

In [110]:
def calculate_transaction_frequency(transactions):customers['Transaction_Frequency'] = 0r
for index, ro
    w in customers.iterrows():
    customers.at[index, 'Transaction_Frequency'] = calculate_transaction_frequency(row['Transad DataFrame
print(customers)

SyntaxError: invalid decimal literal (114245129.py, line 2)

## e.Create a new column Loan_Eligibility, where:If Credit_Score >= 700 and Account_Balance > 50000 → "Eligible" Otherwise → "Not Eligible"


In [85]:
def check_loan_eligibility(row):
    if row['Credit_Score'] >= 700 and row['Account_Balance'] > 50000:
        return 'Eligible'
    else:
        return 'Not Eligible'
df['Loan_Eligibility'] = df.apply(check_loan_eligibility, axis=1)
df.head(3)

Unnamed: 0,Customer_ID,Age,Gender,Education_Level,Number_of_Children,Account_Balance,Loan_Status,Credit_Score,Account_Type,Monthly_Income,Number_of_Transactions,Products_Purchased,Customer_Satisfaction,Annual_Income,Age_Category,Categorize_Transactions,Loan_Eligibility
0,100000,56,Male,Master,0,103387.03,Rejected,376,Savings,300702.84,39,Investment Account,2,3608434.08,Senior Citizen,Frequent,Not Eligible
1,100001,69,Male,High School,5,971.24,Pending,691,Current,124363.77,49,Investment Account,5,1492365.24,Senior Citizen,Frequent,Not Eligible
2,100002,46,Male,Phd,3,20112.89,Approved,711,Fixed Deposit,471216.94,26,Other,4,5654603.28,Middle Age,Low,Not Eligible


# Filtering and analysing (Use Loops, Conditional Statements, and Functions)

## a.Writing a function average_balance(loan_status) that takes Loan_Status as input and returns the average account balance for customers with that loan status. Use it to find the average balance of customers with an Approved loan.

In [116]:
def average_balance(loan_status):
    filtered_df = df[df['Loan_Status'] == loan_status]  
    return filtered_df['Account_Balance'].mean()

approved_avg_balance = average_balance("Approved")

print(f"Average Account Balance for Approved Loans: {approved_avg_balance}")

Average Account Balance for Approved Loans: 248285.2210873016


## b.Identifying the most common Education_Level among customers with a Rejected loan.

In [36]:
most_common_education = df[df['Loan_Status'] == 'Rejected']['Education_Level'].mode()[0]
print(f"Most Common Education Level Among Rejected Loans: {most_common_education}")

Most Common Education Level Among Rejected Loans: High School


## c.Using a for loop to count how many customers have a Credit Score above 700.

In [123]:
count = 0
for i in range(len(df)):
    if df.loc[i, 'Credit_Score'] > 700: 
        count += 1 
print(f"Number of customers with Credit Score above 700: {count}")



Number of customers with Credit Score above 700: 541


## d.Finding the percentage of female customers in the dataset.

In [126]:
female_count = df[df['Gender'] == 'Female'].shape[0]
total_count = df.shape[0]
female_percentage = (female_count / total_count) * 100
print(f"Percentage of Female Customers: {female_percentage:.2f}%")

Percentage of Female Customers: 51.30%


## e.Identifying the customer ID with the highest account balance.

In [80]:
def high_income_customers():
    count = len(df[df['Monthly_Income'] > 100000]) 
    return count
print(f"Total customers with Monthly Income > 100,000: {high_income_customers()}")

Total customers with Monthly Income > 100,000: 1628


## f.Finding the total number of customers who have a monthly income greater than 100,000

In [132]:
Monthly_Income = df[df["Monthly_Income"] > 100000].sum()
Monthly_Income.head()

Customer_ID                                                   164428236
Age                                                               79774
Gender                MaleMaleMaleFemaleFemaleMaleMaleMaleMaleFemale...
Education_Level       MasterHigh SchoolPhdHigh SchoolHigh SchoolDipl...
Number_of_Children                                                 4090
dtype: object

# Advanced challenge (While Loop, Aggregations, and Functions)

## a.Using a while loop to count the number of customers who have purchased at least one financial product (Products Purchased! = 'None').

In [134]:
customers = [
    {"Customer_ID": 1, "Products_Purchased": "Investment_Account"},
    {"Customer_ID": 2, "Products_Purchased": "Investment_Account"},
    {"Customer_ID": 3, "Products_Purchased": "Other"},
    {"Customer_ID": 4, "Products_Purchased": "Other"},
    {"Customer_ID": 5, "Products_Purchased": "Other"}
]
# Initialize variables
count = 0
index = 0
while index < len(customers):
    if customers[index]["Products_Purchased"] != "None":
        count += 1
    index += 1
print(f"Number of customers who purchased at least one financial product: {count}")

Number of customers who purchased at least one financial product: 5


## b.Identifying the relationship between Customer Satisfaction and Account Type by calculating the average satisfaction score per account type.

In [142]:
def avg_satisfaction_by_account_type():
    avg_satisfaction = df.groupby('Account_Type')['Customer_Satisfaction'].mean()  
    return avg_satisfaction


print(avg_satisfaction_by_account_type())

Account_Type
Current          3.038690
Fixed Deposit    3.005806
Savings          3.046948
Name: Customer_Satisfaction, dtype: float64


## c.Finding the oldest customer who has an Approved Loan.

In [144]:
def oldest_approved_customer():
    approved_customers = df[df['Loan_Status'] == 'Approved']  
    max_age = approved_customers['Age'].max() 
    oldest_customer = approved_customers[approved_customers['Age'] == max_age]  
    return oldest_customer[['Customer_ID', 'Age', 'Loan_Status']] 
print(oldest_approved_customer())

      Customer_ID  Age Loan_Status
48         100048   79    Approved
89         100089   79    Approved
101        100101   79    Approved
112        100112   79    Approved
219        100219   79    Approved
229        100229   79    Approved
236        100236   79    Approved
252        100252   79    Approved
268        100268   79    Approved
368        100368   79    Approved
399        100399   79    Approved
549        100549   79    Approved
612        100612   79    Approved
655        100655   79    Approved
881        100881   79    Approved
1043       101043   79    Approved
1240       101240   79    Approved
1290       101290   79    Approved
1346       101346   79    Approved
1381       101381   79    Approved
1414       101414   79    Approved
1514       101514   79    Approved
1529       101529   79    Approved
1887       101887   79    Approved


## c.Write a function calculate_debt_to_income_ratio (account balance, monthly income) that returns the debt-to-income ratio as (account balance / annual income) * 100.

In [151]:
def calculate_debt_to_income_ratio(account_balance, monthly_income):
    annual_income = monthly_income * 12
    return (account_balance / annual_income) * 100 if annual_income > 0 else 0
df['Debt_to_Income_Ratio'] = (df['Account_Balance'] / (df['Monthly_Income'] * 12)) * 100
print(df.head(3))

   Customer_ID  Age Gender Education_Level  Number_of_Children  \
0       100000   56   Male          Master                   0   
1       100001   69   Male     High School                   5   
2       100002   46   Male             Phd                   3   

   Account_Balance Loan_Status  Credit_Score   Account_Type  Monthly_Income  \
0        103387.03    Rejected           376        Savings       300702.84   
1           971.24     Pending           691        Current       124363.77   
2         20112.89    Approved           711  Fixed Deposit       471216.94   

   Number_of_Transactions  Products_Purchased  Customer_Satisfaction  \
0                      39  Investment Account                      2   
1                      49  Investment Account                      5   
2                      26               Other                      4   

   Annual_Income    Age_Category Categorize_Transactions Loan_Eligibility  \
0     3608434.08  Senior Citizen                Freq

## d.Filter customers with Debt-to-Income Ratio > 50% and display their details.

In [154]:
def filter_high_dti(df):
    df['Debt_to_Income_Ratio'] = (df['Account_Balance'] / (df['Monthly_Income'] * 12)) * 100
    return df[df['Debt_to_Income_Ratio'] > 50]
high_dti_customers = filter_high_dti(df)
print(high_dti_customers)

      Customer_ID  Age  Gender Education_Level  Number_of_Children  \
14         100014   41  Female         Diploma                   2   
15         100015   70    Male          Master                   4   
30         100030   50    Male         Diploma                   5   
42         100042   77    Male          Master                   1   
51         100051   72  Female     High School                   5   
...           ...  ...     ...             ...                 ...   
1940       101940   34  Female             Phd                   3   
1947       101947   42  Female        Bachelor                   3   
1971       101971   70  Female        Bachelor                   1   
1973       101973   26    Male         Diploma                   0   
1981       101981   35  Female     High School                   4   

      Account_Balance Loan_Status  Credit_Score   Account_Type  \
14          286994.86    Approved           618        Savings   
15          171149.74    Ap

# Additional Questions

## a.Identify the most common product purchased.

In [160]:
def most_common_product(df):
    return df['Products_Purchased'].mode()[0]
common_product = most_common_product(df)
print(f"Most common product purchased: {common_product}")

Most common product purchased: Mortgage


## b.Find the gender distribution of customers who have a Rejected Loan

In [163]:
def rejected_loan_gender_distribution(df):
    rejected_loans = df[df['Loan_Status'] == 'Rejected']
    return rejected_loans['Gender'].value_counts()


gender_distribution = rejected_loan_gender_distribution(df)
print("Gender distribution of customers with Rejected Loans:")
print(gender_distribution)

Gender distribution of customers with Rejected Loans:
Gender
Female    186
Male      183
Name: count, dtype: int64


## c.Identify the average age of customers who have an account balance greater than 200,000.

In [168]:
def average_age_high_balance(df):
    high_balance_customers = df[df['Account_Balance'] > 200000]
    return high_balance_customers['Age'].mean()


avg_age = average_age_high_balance(df)
print(f"Average age of customers with account balance > 200,000: {avg_age:.2f}")

Average age of customers with account balance > 200,000: 49.47


## d.Find the youngest customer who has purchased a financial product (Products Purchased != 'None').

In [173]:
def youngest_financial_product_customer(df):
    filtered_df = df[df['Products_Purchased'] != 'None']  
    return filtered_df.loc[filtered_df['Age'].idxmin()] 
youngest_customer = youngest_financial_product_customer(df)
print("Youngest customer who purchased a financial product:")
youngest_customer

Youngest customer who purchased a financial product:


Customer_ID                       100131
Age                                   18
Gender                              Male
Education_Level              High School
Number_of_Children                     3
Account_Balance                383414.67
Loan_Status                     Rejected
Credit_Score                         625
Account_Type               Fixed Deposit
Monthly_Income                 484981.31
Number_of_Transactions                 4
Products_Purchased           Credit Card
Customer_Satisfaction                  5
Annual_Income                 5819775.72
Age_Category                 Young Adult
Categorize_Transactions         Moderate
Loan_Eligibility            Not Eligible
Debt_to_Income_Ratio            6.588135
Name: 131, dtype: object

## e.Write a function to classify customers based on satisfaction:o	1-2 stars → "Dissatisfied",o	3 stars → "Neutral",o	4-5 stars → "Satisfied"

In [193]:
def classify_satisfaction(rating):
    if rating in [1, 2]:
        return "Dissatisfied"
    elif rating == 3:
        return "Neutral"
    elif rating in [4, 5]:
        return "Satisfied"
    else:
        return "Unknown" 
df.head(3)

Unnamed: 0,Customer_ID,Age,Gender,Education_Level,Number_of_Children,Account_Balance,Loan_Status,Credit_Score,Account_Type,Monthly_Income,Number_of_Transactions,Products_Purchased,Customer_Satisfaction,Annual_Income,Age_Category,Categorize_Transactions,Loan_Eligibility,Debt_to_Income_Ratio
0,100000,56,Male,Master,0,103387.03,Rejected,376,Savings,300702.84,39,Investment Account,2,3608434.08,Senior Citizen,Frequent,Not Eligible,2.865149
1,100001,69,Male,High School,5,971.24,Pending,691,Current,124363.77,49,Investment Account,5,1492365.24,Senior Citizen,Frequent,Not Eligible,0.065081
2,100002,46,Male,Phd,3,20112.89,Approved,711,Fixed Deposit,471216.94,26,Other,4,5654603.28,Middle Age,Low,Not Eligible,0.355691


## f.Create a new column Satisfaction Category using this function

In [195]:
df['Satisfaction_Category'] = df['Customer_Satisfaction'].apply(classify_satisfaction)
df.head(3)

Unnamed: 0,Customer_ID,Age,Gender,Education_Level,Number_of_Children,Account_Balance,Loan_Status,Credit_Score,Account_Type,Monthly_Income,Number_of_Transactions,Products_Purchased,Customer_Satisfaction,Annual_Income,Age_Category,Categorize_Transactions,Loan_Eligibility,Debt_to_Income_Ratio,Satisfaction_Category
0,100000,56,Male,Master,0,103387.03,Rejected,376,Savings,300702.84,39,Investment Account,2,3608434.08,Senior Citizen,Frequent,Not Eligible,2.865149,Dissatisfied
1,100001,69,Male,High School,5,971.24,Pending,691,Current,124363.77,49,Investment Account,5,1492365.24,Senior Citizen,Frequent,Not Eligible,0.065081,Satisfied
2,100002,46,Male,Phd,3,20112.89,Approved,711,Fixed Deposit,471216.94,26,Other,4,5654603.28,Middle Age,Low,Not Eligible,0.355691,Satisfied


# Summary

##Summary
#The average account balance of approved loans was 248,285.
#The education level with most rejected loans is high school education.
#Female customers were the majority at 51.30%
#The most common purchase product is mortgage while the least is personal loan
#Gender distribution of customers with Rejected Loans:
Fema-  1 and 6
M-   .183


# Saved the cleaned dataset as bank_customers_group2_cleaned.xlsx.

In [200]:
df.to_excel("bank_customers_group2_cleaned.xlsx", index=False)
print("Dataset saved successfully!")

Dataset saved successfully!
