-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_statistics.py
31 lines (26 loc) · 1.15 KB
/
generate_statistics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import pandas as pd
def generate_descriptive_statistics(loan_data, filename):
with open(f"{filename}.txt", "w") as f:
# Check for missing values
missing_values = loan_data.isnull().sum().sum()
f.write(f"Number of missing values: {missing_values}\n")
# Calculate percentage of female applicants that had their loan approved
female_approved = (
loan_data[loan_data["Gender"] == 2]["Loan_Status"].value_counts(
normalize=True
)[0]
* 100
)
f.write(
f"Percentage of female applicants that had their loan approved: {female_approved:.2f}%\n"
)
# Calculate average income of all applicants
avg_income_all = loan_data["ApplicantIncome"].mean()
f.write(f"Average income of all applicants: {avg_income_all:.2f}\n")
# Calculate average income of all self-employed applicants
avg_income_self_employed = loan_data[loan_data["Self_Employed"] == 1][
"ApplicantIncome"
].mean()
f.write(
f"Average income of all self-employed applicants: {avg_income_self_employed:.2f}\n"
)