# Age, Income, and the Discounting of Delayed Monetary Losses

This notebook replicates the analyses from the publication:

> Wan, H., Myerson, J., Green, L., Strube, M. J., & Hale, S. (2025). Age, income, and the discounting of delayed monetary losses. *The Journals of Gerontology, Series B: Psychological Sciences and Social Sciences, 80*(11), gbaf162. https://doi.org/10.1093/geronb/gbaf162

The original R analysis is translated into a Python workflow using `pandas`, `statsmodels`, `pymc`, and `matplotlib`/`seaborn`. The analyses examine the effects of age and income on the discounting of delayed monetary losses by fitting a series of Bayesian multilevel beta regressions.

The data for this study are available in the Supplementary Material at the publisher's website: <https://doi.org/10.1093/geronb/gbaf162>.

In [None]:
# Install necessary packages
import sys
!{sys.executable} -m pip install pandas numpy scipy statsmodels pymc arviz matplotlib seaborn scikit-learn openpyxl pingouin

In [29]:
# --- Setup: Imports and Custom Functions ---
import pandas as pd
import numpy as np
import warnings
from sklearn.metrics import auc as calculate_auc, r2_score
from scipy.optimize import curve_fit
import statsmodels.formula.api as smf
import pymc as pm
import arviz as az
import matplotlib.pyplot as plt
import seaborn as sns
import pingouin as pg
import patsy # Import patsy for formula handling

# Suppress warnings for cleaner output
warnings.filterwarnings('ignore')
pd.options.display.float_format = '{:.3f}'.format

def set_mattheme(ax, title=""):
    """Applies a consistent plot theme."""
    ax.set_title(title, fontsize=14, fontweight='bold')
    ax.set_xlabel(ax.get_xlabel(), fontsize=12, fontweight='bold')
    ax.set_ylabel(ax.get_ylabel(), fontsize=12, fontweight='bold')
    for spine in ax.spines.values():
        spine.set_edgecolor('black')
        spine.set_linewidth(1)
    ax.set_facecolor('white')
    ax.grid(False)

# CORRECTED: This version converts patsy objects to NumPy arrays to allow for parallel processing.
def betaMLM_pymc(formula, data):
    """Wrapper to fit a Bayesian beta regression using patsy for formula parsing."""
    
    # 1. Prepare coordinates and formula
    coords = {}
    if '(1|ID)' in formula:
        fixed_formula = formula.split('(1|ID)')[0].strip().rstrip('+').strip()
        has_re = True
        id_idx, id_cats = pd.factorize(data['ID'])
        coords['ID_dim'] = id_cats
    else:
        fixed_formula = formula
        has_re = False

    # 2. Use patsy to create design matrices OUTSIDE the model context
    y_patsy, X_patsy = patsy.dmatrices(fixed_formula, data=data)
    
    # 3. Convert patsy objects to simple NumPy arrays (which are picklable)
    y_data = np.asarray(y_patsy).ravel()
    X_data = np.asarray(X_patsy)
    X_cols = X_patsy.design_info.column_names

    # 4. Initialize the model with the coordinates
    with pm.Model(coords=coords) as model:
        # --- Priors ---
        intercept = pm.Normal('Intercept', mu=0, sigma=100)
        slopes = pm.Cauchy('slopes', alpha=0, beta=2.5, shape=X_data.shape[1] - 1)
        betas = pm.math.concatenate([[intercept], slopes])
        
        if has_re:
            sigma_id = pm.HalfCauchy('sigma_id', beta=2.5)
            z_id = pm.Normal('z_id', mu=0, sigma=1, dims='ID_dim')
            intercept_id = pm.Deterministic('intercept_id', z_id * sigma_id, dims='ID_dim')
        
        nu = pm.HalfNormal('nu', sigma=100)

        # --- Linear Model (eta) ---
        # Use the NumPy array X_data here
        eta = pm.math.dot(X_data, betas)
        if has_re:
            eta += intercept_id[id_idx]

        # --- Likelihood ---
        mu = pm.math.invlogit(eta)
        pm.Beta(y_patsy.design_info.column_names[0], mu=mu, nu=nu, observed=y_data)
        
        # --- Sampling ---
        idata = pm.sample(draws=2000, tune=2000, chains=4, cores=4,
                          progressbar=False, random_seed=42)
        
        # Rename variables for easier summary
        idata.posterior = idata.posterior.rename_vars(
            {'slopes': [col for col in X_cols if col != 'Intercept']}
        )
    return idata


def pymc_summary(idata, var_names=None):
    """Custom function to format PyMC model summary."""
    if var_names is None:
        # Auto-detect variable names, excluding random effects infrastructure
        var_names = [v for v in idata.posterior.data_vars if 'z_id' not in v and 'sigma_id' not in v]

    summary = az.summary(idata, var_names=var_names, kind='stats', hdi_prob=0.95)
    posterior = az.extract(idata, var_names=var_names)
    
    pd_values = {}
    for var in posterior.data_vars:
        samples = posterior[var].values.flatten()
        pd = np.mean(samples > 0)
        pd = max(pd, 1 - pd)
        pd_values[var] = pd
        
    summary['pd'] = summary.index.map(pd_values)
    summary['signif'] = np.where(summary['pd'] >= 0.975, '*', ' ')
    summary = summary[['mean', 'sd', 'pd', 'signif']]
    summary.columns = ['Est.', '(SE)', 'pd', 'signif']
    summary['Est.(SE)'] = summary.apply(lambda row: f"{row['Est.']:.3f} ({row['(SE)']:.3f})", axis=1)
    
    return summary[['Est.(SE)', 'pd', 'signif']]

In [13]:
# --- Data Loading and Processing ---
# Load raw data from the main source file
Samp = pd.read_csv("R code/dat/Lifespan.csv", index_col=0)

# Perform initial filtering and cleaning, matching the R script
Disc_Raw = Samp[
    (Samp['type'] == 'delay') & (Samp['task'] == 'loss') &
    (Samp['age'].notna()) & (Samp['age'].between(20, 80)) &
    (Samp['check'] == 6) & (Samp['sex'].notna()) &
    ((Samp['age'] - 10 * Samp['age_grp']).between(0, 10))
].copy()

# Create Age_Group factor
age_bins = [19, 34, 50, 64, 80]
age_labels = ["20-34", "35-50", "51-64", "65-80"]
Disc_Raw['age_grp_factor'] = pd.cut(Disc_Raw['age'], bins=age_bins, labels=age_labels)

# Rename columns to match the analysis script's expectations
rename_dict = {
    'type': 'Exp', 'id': 'ID', 'amt': 'Amount', 'iv': 'Delay', 'value': 'SV', 'age': 'Age',
    'income': 'Income', 'sex': 'Gender', 'depress': 'Depression', 'anxious': 'Anxiety',
    'hads': 'Distress', 'edu': 'Education', 'health': 'Health', 'age_grp_factor': 'Age_Group',
    'eth': 'Ethnicity', 'race': 'Race'
}
Disc_Raw = Disc_Raw.rename(columns=rename_dict)
Disc_Raw = Disc_Raw.filter(items=rename_dict.values()) # Keep only renamed columns

# Group-Level Data Frame for Plotting
Disc_Grp = Disc_Raw.copy()
Disc_Grp['Amount'] = pd.Categorical(
    Disc_Grp['Amount'].replace({1: "$150", 2: "$2,500", 3: "$30,000"}),
    categories=["$150", "$2,500", "$30,000"], ordered=True
)
Disc_Grp = Disc_Grp.groupby(['Age_Group', 'Amount', 'Delay'], observed=True).agg(Mean_SV=('SV', 'mean')).reset_index()

# Calculate Area under the Curve (AuC) for each individual
AuC_ID = Disc_Raw.sort_values('Delay').groupby(['ID', 'Amount']).apply(
    lambda g: calculate_auc(g['Delay'] / g['Delay'].max(), g['SV'])
).reset_index(name='AuC')

# Join back demographic info
demographics = Disc_Raw.drop_duplicates(subset='ID').drop(columns=['Amount', 'Delay', 'SV', 'Exp'])
AuC_ID = pd.merge(AuC_ID, demographics, on='ID', how='left')

# Remap Education codes
edu_map = {1: 5, 2: 12, 3: 14, 4: 16, 5: 18}
AuC_ID['Education'] = AuC_ID['Education'].map(edu_map)


# Create Model-Specific Dataframes with Scaled Predictors
def scale_var(series):
    return (series - series.mean()) / (2 * series.std())

DL_dat0 = AuC_ID.copy()
DL_dat0['Age_std'] = scale_var(DL_dat0['Age'])

DL_dat1 = AuC_ID[AuC_ID['Age'] >= 35].copy()
DL_dat1['Age_std'] = scale_var(DL_dat1['Age'])

DL_dat2 = AuC_ID[(AuC_ID['Age'] >= 35) & AuC_ID['Income'].notna()].copy()
DL_dat2['Age_std'] = scale_var(DL_dat2['Age'])
DL_dat2['Income_std'] = scale_var(DL_dat2['Income'])

DL_dat3 = AuC_ID[(AuC_ID['Age'] >= 35) & AuC_ID['Income'].notna() & AuC_ID['Distress'].notna()].copy()
DL_dat3['Age_std'] = scale_var(DL_dat3['Age'])
DL_dat3['Income_std'] = scale_var(DL_dat3['Income'])
DL_dat3['Anxiety_std'] = scale_var(DL_dat3['Anxiety'])

DL_dat4 = AuC_ID[
    (AuC_ID['Age'] >= 35) & AuC_ID['Income'].notna() & AuC_ID['Gender'].notna() & 
    AuC_ID['Education'].notna() & AuC_ID['Distress'].notna() & AuC_ID['Health'].notna()
].copy()
for col in ['Age', 'Amount', 'Income', 'Education', 'Anxiety', 'Health']:
    DL_dat4[f'{col}_std'] = scale_var(DL_dat4[col])
DL_dat4['Gender_c'] = DL_dat4['Gender'] - DL_dat4['Gender'].mean()

# Clip AuC values for Beta regression (must be in the open interval (0, 1))
for df in [DL_dat0, DL_dat1, DL_dat2, DL_dat3, DL_dat4]:
    df['AuC'] = np.clip(df['AuC'], 1e-5, 1 - 1e-5)

print("Data processing complete.")

Data processing complete.


---
## Group-Level Descriptive Analyses

This section visualizes the group-level discounting functions, reports the goodness-of-fit (R-squared) for the hyperboloid model, and tests for the amount effect.

In [15]:
# --- Group-Level Analysis: Plot and R2 ---
def hyperboloid(delay, k, s):
    return 1 / (1 + k * delay)**s

# R-squared for group fits
print("--- R-squared for Group-Level Hyperboloid Fits ---")
r2_results = Disc_Grp.groupby(['Age_Group', 'Amount'], observed=True).apply(
    lambda g: r2_score(g['Mean_SV'], hyperboloid(g['Delay'], *curve_fit(hyperboloid, g['Delay'], g['Mean_SV'], p0=[0.1, 1])[0]))
).unstack()
print(r2_results)

--- R-squared for Group-Level Hyperboloid Fits ---
Amount     $150  $2,500  $30,000
Age_Group                       
20-34     0.956   0.983    0.997
35-50     0.973   0.976    0.965
51-64     0.898   0.878    0.955
65-80     0.925   0.926    0.890


---
## Reliability and Correlational Analyses

This section examines the internal consistency of the AuC measure across different amounts and presents the correlation matrix for all key variables for participants aged 35 and older.

In [16]:
# --- Reliability and Correlations ---
print("--- Reliability of AuC Measure (Cronbach's Alpha) ---")
reliability_df = AuC_ID.pivot(index='ID', columns='Amount', values='AuC')
print(pg.cronbach_alpha(data=reliability_df))

print("\n--- Correlation Matrix for Participants Aged 35-80 ---")
Cor_df = AuC_ID[AuC_ID['Age'] >= 35].groupby('ID').agg(
    Age=('Age', 'mean'), Income=('Income', 'mean'), Education=('Education', 'mean'),
    Gender=('Gender', 'mean'), Distress=('Distress', 'mean'), Anxiety=('Anxiety', 'mean'),
    Depression=('Depression', 'mean'), Health=('Health', 'mean'), AuC=('AuC', 'mean')
)
print(Cor_df.corr(method='pearson').round(3))

--- Reliability of AuC Measure (Cronbach's Alpha) ---
(np.float64(0.8601565041438437), array([0.839, 0.879]))

--- Correlation Matrix for Participants Aged 35-80 ---
              Age  Income  Education  Gender  Distress  Anxiety  Depression  \
Age         1.000  -0.031      0.011  -0.029    -0.281   -0.325      -0.174   
Income     -0.031   1.000      0.388  -0.132    -0.207   -0.193      -0.177   
Education   0.011   0.388      1.000  -0.100    -0.189   -0.171      -0.173   
Gender     -0.029  -0.132     -0.100   1.000     0.153    0.174       0.095   
Distress   -0.281  -0.207     -0.189   0.153     1.000    0.932       0.893   
Anxiety    -0.325  -0.193     -0.171   0.174     0.932    1.000       0.670   
Depression -0.174  -0.177     -0.173   0.095     0.893    0.670       1.000   
Health      0.026   0.286      0.224  -0.036    -0.515   -0.411      -0.540   
AuC         0.206   0.114      0.017  -0.045    -0.149   -0.169      -0.084   

            Health    AuC  
Age          0.

---
## Hypothesis Testing: Bayesian Multilevel Models

This section presents the results from a series of Bayesian multilevel beta regressions testing the effects of age, income, and other covariates on the discounting of delayed losses.

In [None]:
# --- Hypothesis Testing: Main Models ---
# Model 1: AuC ~ Age
print("--- Model 1: AuC ~ Age + Age^2 ---")
idata_mod1b = betaMLM_pymc("AuC ~ Age_std + I(Age_std**2) + (1|ID)", DL_dat1)
print(pymc_summary(idata_mod1b, ['Intercept', 'Age_std', 'I(Age_std ** 2)']))

# Model 2: AuC ~ Age + Income
print("\n--- Model 2: AuC ~ Age + Age^2 + Income ---")
idata_mod2 = betaMLM_pymc("AuC ~ Age_std + I(Age_std**2) + Income_std + (1|ID)", DL_dat2)
print(pymc_summary(idata_mod2, ['Intercept', 'Age_std', 'I(Age_std ** 2)', 'Income_std']))

# Model 3: AuC ~ Age + Income + Anxiety
print("\n--- Model 3: AuC ~ Age + Age^2 + Income + Anxiety ---")
idata_mod3 = betaMLM_pymc("AuC ~ Age_std + I(Age_std**2) + Income_std + Anxiety_std + (1|ID)", DL_dat3)
print(pymc_summary(idata_mod3, ['Intercept', 'Age_std', 'I(Age_std ** 2)', 'Income_std', 'Anxiety_std']))

# Model 4: Full Model with Interactions
print("\n--- Model 4: Full Model with Covariates and Age Interactions ---")
formula_mod4 = ("AuC ~ Age_std * Income_std + Age_std * Anxiety_std + Age_std * Amount_std + "
                "Age_std * Education_std + Age_std * Gender_c + Age_std * Health_std + "
                "I(Age_std**2) + (1|ID)")
idata_mod4 = betaMLM_pymc(formula_mod4, DL_dat4)
print(pymc_summary(idata_mod4, [v for v in idata_mod4.posterior.data_vars if 'ID' not in v]))

Initializing NUTS using jitter+adapt_diag...


--- Model 1: AuC ~ Age + Age^2 ---


Multiprocess sampling (4 chains in 4 jobs)
NUTS: [Intercept, slopes, sigma_id, z_id, nu]
