In [1]:
# Load Dependencies
import pandas as pd
import numpy as np

In [2]:
# Read in the CSV file
# U.S. Census Bureau, Real Median Family Income in the United States [MEFAINUSA672N], 
# retrieved from FRED, Federal Reserve Bank of St. Louis; 
# https://fred.stlouisfed.org/series/MEFAINUSA672N, June 11, 2025.

median_income = pd.read_csv("../Resources/MEFAINUSA672N.csv")

In [3]:
# Create the DataFrame
median_income_df = pd.DataFrame(median_income)

In [4]:
# Check data types
median_income_df.dtypes

observation_date    object
MEFAINUSA672N        int64
dtype: object

In [5]:
# Rename the columns
median_income_df.rename(columns={'observation_date': 'Date', 'MEFAINUSA672N': 'Median_Household_Income'}, inplace=True)

In [6]:
# Update data types
median_income_df['Date'] = pd.to_datetime(median_income_df['Date'])

In [7]:
# Display DataFrame
# Units:  2023 CPI-U-RS Adjusted Dollars
median_income_df.head()

Unnamed: 0,Date,Median_Household_Income
0,1975-01-01,66480
1,1976-01-01,68580
2,1977-01-01,68940
3,1978-01-01,71100
4,1979-01-01,72120


In [8]:
# Get the range of years in your dataset
years = median_income_df["Date"].dt.year.unique()

# Generate only the missing quarterly start dates (4/1, 7/1, and 10/1) for each year
quarterly_dates = [pd.Timestamp(year, month, 1) for year in years for month in [4, 7, 10]]

# Create a DataFrame for the missing quarterly dates
missing_quarters_df = pd.DataFrame({"Date": quarterly_dates})

# Merge with your existing income data, forward-filling to fill the quarterly gaps
full_income_df = pd.concat([median_income_df, missing_quarters_df], ignore_index=True).sort_values("Date")
full_income_df["Median_Household_Income"] = full_income_df["Median_Household_Income"].ffill()

# Display the updated DataFrame
full_income_df.head()

Unnamed: 0,Date,Median_Household_Income
0,1975-01-01,66480.0
49,1975-04-01,66480.0
50,1975-07-01,66480.0
51,1975-10-01,66480.0
1,1976-01-01,68580.0


In [9]:
# Check end of updated DataFrame
full_income_df.tail()

Unnamed: 0,Date,Median_Household_Income
192,2022-10-01,96430.0
48,2023-01-01,100800.0
193,2023-04-01,100800.0
194,2023-07-01,100800.0
195,2023-10-01,100800.0


In [10]:
# Save DataFrame to CSV inside the Output folder
full_income_df.to_csv("../Output/median_income.csv", index=False)

In [11]:
import os

# Check to make sure file saved
output_files = ["median_income.csv"]
for file in output_files:
    print(file, "exists:", os.path.exists(f"../Output/{file}"))

median_income.csv exists: True
