In [1]:
import numpy as np
import pandas as pd

np.random.seed(42)
n = 300

# Core variable: financial stability
# Creates a normally distributed sample of size n, with a mean of 2.5, std of 0.8 and clips to a range of 1 -> 4.
financial_stability = np.clip(np.random.normal(2.5, 0.8, n), 1, 4)

# Derived correlated variables
# 5 - stability (negative cor) + low level of noise (std) in range 1 -> 4
borrowing_frequency = np.clip(5 - financial_stability + np.random.normal(0, 0.5, n), 1, 4)
afford_essentials   = np.clip(5 - financial_stability + np.random.normal(0, 0.5, n), 1, 4)

# higher level of noise (std) in range 1 -> 5 (reflects survey options)
emotional_wellbeing = np.clip(financial_stability + np.random.normal(0, 0.8, n), 1, 5)
income_range        = np.clip(financial_stability + np.random.normal(0, 0.7, n), 1, 5)

# stability + low level of noise (std) in range 1 -> 4
financial_awareness = np.clip(financial_stability + np.random.normal(0, 0.4, n), 1, 4)

# BNPL use (probabilistic, higher for unstable individuals)
prob_bnpl = 1 / (1 + np.exp(1.2 * (financial_stability - 2.5)))  # logistic sigmoid function creates probability based upon financial stability k = 1.2 x0 = 2.5
bnpl_user = np.random.binomial(1, prob_bnpl) # generates a binary classificiation as a result of probability 

# Build DataFrame
data = pd.DataFrame({
    "financial_stability": financial_stability.round(),
    "borrowing_frequency": borrowing_frequency.round(),
    "afford_essentials": afford_essentials.round(),
    "emotional_wellbeing": emotional_wellbeing.round(),
    "income_range": income_range.round(),
    "financial_awareness": financial_awareness.round(),
    "bnpl_user": bnpl_user
})


In [3]:
data.head(25)

Unnamed: 0,financial_stability,borrowing_frequency,afford_essentials,emotional_wellbeing,income_range,financial_awareness,bnpl_user
0,3.0,2.0,2.0,3.0,3.0,3.0,0
1,2.0,2.0,2.0,2.0,2.0,2.0,0
2,3.0,2.0,2.0,3.0,3.0,3.0,1
3,4.0,2.0,2.0,5.0,4.0,4.0,0
4,2.0,3.0,3.0,2.0,2.0,2.0,1
5,2.0,3.0,4.0,2.0,2.0,2.0,1
6,4.0,2.0,1.0,3.0,3.0,4.0,0
7,3.0,2.0,1.0,4.0,3.0,3.0,0
8,2.0,3.0,2.0,3.0,1.0,2.0,0
9,3.0,2.0,3.0,5.0,4.0,4.0,1
