In [4]:
import pandas as pd

# Define the risk score mapping for each status code
risk_mapping = {
    'C': 0,
    'X': 0,
    '0': 1,
    '1': 2,
    '2': 3,
    '3': 4,
    '4': 5,
    '5': 6
}

df = pd.read_csv('credit_record.csv')

# Map the STATUS values to risk scores
df['risk_score'] = df['STATUS'].map(risk_mapping)

#  Calculate aggregate metrics (Group by client ID)
agg_df = df.groupby('ID').agg(
    total_risk_score=('risk_score', 'sum'),
    average_risk_score=('risk_score', 'mean'),
    max_risk_score=('risk_score', 'max')
).reset_index()

# A client is labeled "Not Approved" (0) if either:
#   - max risk score is 3 or higher, or
#   - average risk score is 1.0 or higher.
# Otherwise, label as "Approved" (1).
agg_df['IS_APPROVED'] = ((agg_df['max_risk_score'] < 3) & (agg_df['average_risk_score'] < 1.0)).astype(int)
result_df = agg_df[['ID', 'IS_APPROVED']]
result_df.to_csv('credit_approval_labels.csv', index=False)


The new CSV file 'credit_approval_labels.csv' has been created.
