In [20]:
# Import required libraries for data analysis
import pandas as pd  # For data manipulation and analysis
import numpy as np   # For numerical operations and array handling
import statistics as st  # For statistical functions

In [21]:
# Read the loan dataset from CSV file into a pandas DataFrame
# This will load the data for further analysis and processing
df = pd.read_csv("loan.csv")

In [None]:
# Print the dimensions of the DataFrame (rows, columns)
# This shows the total number of rows and columns in the dataset
print(df.shape)

In [None]:
# Display detailed information about the DataFrame including:
# - Data types of each column
# - Number of non-null values
# - Memory usage
# - Index information
print(df.info())

In [None]:
# Convert all columns to numeric type, replacing non-numeric values with NaN
# This ensures consistent data types for statistical calculations
# The 'coerce' parameter handles invalid values by converting them to NaN
df = df.apply(pd.to_numeric, errors='coerce')
df.mean()

In [None]:
# Calculate the maximum value for each numeric column in the DataFrame
# This returns a Series containing the highest value found in each column
df.max()

In [None]:
# Calculate the minimum value for each numeric column in the DataFrame
# This returns a Series containing the lowest value found in each column
df.min()

In [None]:
# Calculate the median value for each numeric column in the DataFrame
# This returns a Series containing the middle value of each column when sorted
# The median is less sensitive to outliers than the mean
df.median()

In [None]:
# Calculate the standard deviation for each numeric column in the DataFrame
# This returns a Series containing the measure of spread/dispersion for each column
# Standard deviation indicates how much the values deviate from the mean
df.std()

In [None]:
# Calculate the mean value of the 'LoanAmount' column
# Using loc accessor to select the specific column
# This provides a measure of central tendency for loan amounts
print(df.loc[:,'LoanAmount'].mean())

In [None]:
# Calculate the mean value for each row (axis=1) and display the first 5 rows
# This provides the average value across all numeric columns for each individual record
# The [0:5] slice limits the output to the first 5 rows for better readability
df.mean(axis = 1)[0:5]

In [None]:
# Calculate the median value of the 'LoanAmount' column
# Using loc accessor to select the specific column
# The median represents the middle value when data is ordered
# This is more robust to outliers than the mean
print(df.loc[:,'LoanAmount'].median())

In [None]:
# Calculate the median value for each row (axis=1) and display the first 5 rows
# This provides the middle value across all numeric columns for each individual record
# The [0:5] slice limits the output to the first 5 rows for better readability
df.median(axis = 1)[0:5]

In [None]:
# Calculate the variance for each numeric column in the DataFrame
# This returns a Series containing the measure of spread/dispersion for each column
# Variance indicates how far numbers are spread out from their average value
# It's the square of the standard deviation
df.var()

In [None]:
# Group the DataFrame by 'Gender' and calculate the mean 'LoanAmount' for each gender
# This provides insight into average loan amounts by gender category
# The result shows how loan amounts are distributed between different genders
df[["Gender","LoanAmount"]].groupby("Gender").mean()

In [None]:
# Group the DataFrame by 'Gender' and calculate the median 'LoanAmount' for each gender
# This provides insight into the middle loan amount by gender category
# The median is more robust to outliers than the mean, giving a better central tendency measure
# The result shows how loan amounts are distributed between different genders
df[["Gender","LoanAmount"]].groupby("Gender").median()

In [None]:
# Group the DataFrame by 'Self_Employed' and calculate the mean 'LoanAmount' for each employment status
# This provides insight into average loan amounts by self-employment category
# The result shows how loan amounts are distributed between self-employed and non-self-employed individuals
# This analysis helps understand if self-employment status influences loan amounts
df[["Self_Employed","LoanAmount"]].groupby("Self_Employed").mean()