In [1]:
import pandas as pd

In [2]:
import os

In [3]:
# https://www.federalreserve.gov/econres/scfindex.htm
# 'https://www.federalreserve.gov/econres/files/scf2022s.zip'
df = pd.read_stata('./Downloads/p22i6.dta')

In [4]:
df.columns

Index(['j7398', 'j7578', 'j7579', 'j7019', 'j7020', 'j7001', 'j7050', 'j8020',
       'j8021', 'j5908',
       ...
       'j6768', 'x42001', 'x42000', 'j306', 'j11272', 'j11572', 'j33001',
       'x306', 'x11272', 'x11572'],
      dtype='object', length=5473)

In [5]:
len(df)

22975

In [6]:
df['x3506'].max()

45530000

In [7]:
# https://www.federalreserve.gov/econres/files/codebk2022.txt
variable_org = {
    'checking': [
        # Amount
        'x3506',
        'x3510',
        'x3514',
        'x3518',
        'x3522',
        'x3526',
    ],
    'ira': [
        # Roth
        'x6551',
        'x6559',  # Spouse
        'x6567',  # Family
        # Roll-over
        'x6552',
        'x6560',  # Spouse
        'x6568',  # Family
        # Regular
        'x6553',
        'x6561',  # Spouse
        'x6569',  # Family
        # Keogh
        'x6554',
        'x6562',  # Spouse
        'x6570',  # Family
    ],
    'cds': [
        'x3721'
    ],
    'money_market': [
        'x3730',
        'x3736',
        'x3742',
        'x3748',
        'x3754',
        'x3760',
    ],
    'mutual_funds': [
#         'x3822',
#         'x3824',
#         'x3826',
#         'x3828',
#         'x3830',
#         'x7787',
        'x6704',  # Total
    ],
    'savings_bonds': [
        'x3902',
    ],
    'other_bonds': [
#         'x7635',
#         'x7637',
#         'x7636',
#         'x7638',
#         'x7639',
        'x6706',  # Total
    ],
    'stock': [
        'x3915'
    ],
    'brokerage': [
        'x3930'
    ],
    'managed': [
        # I don't understand this exactly and may be missing some
#         '',
#         '',
        'x3960',
    ],
    'life_insurance': [
        'x4003',
        'x4005',
    ],
    'misc': [
        'x4018',
        'x4022',
        'x4026',
        'x4030',
#         'x4032',  # Debts, ignoring for now
    ]
}
all_cols = sum(variable_org.values(), start=[])

In [8]:
total_net_worth = df[all_cols].sum(axis=1)
total_net_worth

0        258100.0
1        184300.0
2        175200.0
3         93600.0
4         34600.0
           ...   
22970         0.0
22971         0.0
22972         0.0
22973         0.0
22974         0.0
Length: 22975, dtype: float64

In [9]:
import plotly.express as px
import plotly.offline as pyo
pyo.init_notebook_mode()

In [10]:
import matplotlib.pyplot as plt

# d/dx Lorenz

In [11]:
import numpy as np

In [12]:
percentiles = np.arange(len(df)) / len(df)

In [13]:
px.line(
    x=percentiles,
    y=total_net_worth.sort_values().values / total_net_worth.max(),
)

In [14]:
import sklearn.metrics

In [15]:
sklearn.metrics.auc(
    x=percentiles,
    y=total_net_worth.sort_values().values / total_net_worth.max(),
)

0.0029292012811404926

# Mean Net Worth - "Ideal Communism"

In [16]:
total_net_worth.mean()

6523643.14398259

# Lorenz

In [17]:
px.line(
    x=percentiles,
    y=total_net_worth.sort_values().cumsum().values / total_net_worth.sum()
)

In [18]:
sklearn.metrics.auc(
    x=percentiles,
    y=total_net_worth.sort_values().cumsum().values / total_net_worth.sum(),
)

0.03205641490145538

# Net Worth by Age

In [19]:
DOBs = df['x5908']
DOBs

0        1952
1        1952
2        1952
3        1952
4        1952
         ... 
22970    1981
22971    1981
22972    1981
22973    1981
22974    1981
Name: x5908, Length: 22975, dtype: int16

In [20]:
age = 2024 - df['x5908'].values

In [21]:
px.scatter(x=age, y=total_net_worth)

In [22]:
px.scatter(x=age, y=total_net_worth, log_y=True)

In [23]:
fig = px.density_contour(df, x=age, y=np.log(total_net_worth))
fig.update_traces(contours_coloring="fill", contours_showlabels = True)
fig.show()


divide by zero encountered in log


invalid value encountered in log



# Wealth per Year of Life Distribution

In [24]:
px.line((total_net_worth / age).sort_values().values)

In [25]:
(total_net_worth / age).mean()

92054.95705777119