In [1]:
import pandas as pd
import numpy as np

# Set seed for reproducibility
np.random.seed(42)

# Generate synthetic senior-level dataset
n_samples = 200

data = {
    'age': np.random.randint(20, 70, size=n_samples),
    'annual_income': np.round(np.random.uniform(30000, 120000, size=n_samples), 2),
    'loan_amount': np.round(np.random.uniform(1000, 50000, size=n_samples), 2),
    'loan_term_months': np.random.choice([12, 36, 60], size=n_samples, p=[0.3, 0.4, 0.3]),
    'credit_score': np.random.randint(300, 851, size=n_samples),
    'credit_history_years': np.random.randint(0, 31, size=n_samples),
    'num_dependents': np.random.randint(0, 6, size=n_samples),
    'employment_status': np.random.choice(['Employed', 'Unemployed', 'Self-Employed', 'Retired'], size=n_samples, p=[0.6, 0.1, 0.2, 0.1]),
}

df = pd.DataFrame(data)

# Create a default probability using a logistic-like function for complexity
coeffs = {
    'age': -0.02,
    'annual_income': -0.00001,
    'loan_amount': 0.00005,
    'loan_term_months': 0.005,
    'credit_score': -0.003,
    'credit_history_years': -0.04,
    'num_dependents': 0.1
}

# Compute linear combination
linear_combination = (
    coeffs['age'] * df['age'] +
    coeffs['annual_income'] * df['annual_income'] +
    coeffs['loan_amount'] * df['loan_amount'] +
    coeffs['loan_term_months'] * df['loan_term_months'] +
    coeffs['credit_score'] * df['credit_score'] +
    coeffs['credit_history_years'] * df['credit_history_years'] +
    coeffs['num_dependents'] * df['num_dependents'] +
    np.random.normal(scale=0.5, size=n_samples)
)

# Sigmoid function to get probability
probabilities = 1 / (1 + np.exp(-linear_combination))
df['default'] = np.random.binomial(1, probabilities)

# Display the dataset to the user



In [2]:
print(df)

     age  annual_income  loan_amount  loan_term_months  credit_score  \
0     58      114861.87      3254.13                60           431   
1     48       59088.26      2995.71                60           341   
2     34       76691.16     42917.57                60           388   
3     62       93271.71     35479.24                12           701   
4     27       62726.66     24234.52                36           595   
..   ...            ...          ...               ...           ...   
195   56       52295.79     43371.40                12           457   
196   52       62037.54     40832.15                12           358   
197   61       98206.15     49986.17                60           427   
198   63       31295.41     49835.21                36           414   
199   43       40446.54     28216.15                36           508   

     credit_history_years  num_dependents employment_status  default  
0                      13               1     Self-Employed     

In [None]:
count_age_group = df['age'].value_counts()

print(count_age_group)


age
63    10
58     9
43     8
34     7
27     7
47     7
33     6
61     6
28     6
54     6
66     5
46     5
52     5
21     5
45     5
56     5
26     5
64     5
20     5
44     5
30     4
40     4
48     4
59     4
22     4
55     4
42     4
23     4
41     4
60     3
32     3
49     3
37     3
25     3
51     3
24     2
62     2
69     2
35     2
68     2
31     2
67     2
53     2
36     2
39     2
38     1
57     1
29     1
50     1
Name: count, dtype: int64
