In [1]:
import pandas as pd
import numpy as np

# 1. Read CSV file
df = pd.read_csv("CIS2022_PUMF.csv")

# ----------------------------------------------
# FUNCTION: Weighted Median
# ----------------------------------------------
def weighted_median(values, weights):
    sorted_idx = np.argsort(values)
    sorted_vals = values.iloc[sorted_idx]
    sorted_weights = weights.iloc[sorted_idx]
    cum_weights = np.cumsum(sorted_weights)
    cutoff = sorted_weights.sum() / 2
    return sorted_vals.loc[cum_weights >= cutoff].iloc[0]

# ----------------------------------------------
# GROUP 1: Income < 30,000
# ----------------------------------------------
df_low = df[
    (df["FWORKED"] == 1) &
    (df["EARNG"].notna()) &
    (df["EARNG"] > 0) &
    (df["EARNG"] < 30000)
]

w_low = df_low["FWEIGHT"]

mean_low_unw = df_low["EARNG"].mean()
median_low_unw = df_low["EARNG"].median()
mean_low_w = np.average(df_low["EARNG"], weights=w_low)
median_low_w = weighted_median(df_low["EARNG"], w_low)

# ----------------------------------------------
# GROUP 2: Income > 90,000
# ----------------------------------------------
df_high = df[
    (df["FWORKED"] == 1) &
    (df["EARNG"].notna()) &
    (df["EARNG"] > 90000)
]

w_high = df_high["FWEIGHT"]

mean_high_unw = df_high["EARNG"].mean()
median_high_unw = df_high["EARNG"].median()
mean_high_w = np.average(df_high["EARNG"], weights=w_high)
median_high_w = weighted_median(df_high["EARNG"], w_high)

# ----------------------------------------------
# PRINT RESULTS
# ----------------------------------------------
print("===== Income < 30K =====")
print("Unweighted mean:", mean_low_unw)
print("Unweighted median:", median_low_unw)
print("Weighted mean:", mean_low_w)
print("Weighted median:", median_low_w)

print("\n===== Income > 90K =====")
print("Unweighted mean:", mean_high_unw)
print("Unweighted median:", median_high_unw)
print("Weighted mean:", mean_high_w)
print("Weighted median:", median_high_w)


===== Income < 30K =====
Unweighted mean: 14028.017977988926
Unweighted median: 13500.0
Weighted mean: 13800.945129737156
Weighted median: 13000.0

===== Income > 90K =====
Unweighted mean: 145192.56944444444
Unweighted median: 120000.0
Weighted mean: 146879.8995673129
Weighted median: 120000.0
