In [1]:
# EDA Workbook

In [None]:
# FRED API Key

In [ ]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get FRED API key
FRED_API_KEY = os.getenv('FRED_API_KEY')

# Verify the key is loaded (optional)
if FRED_API_KEY:
    print("FRED API key loaded successfully")
else:
    print("FRED API key not found in .env file")

In [2]:
# Data Sources


In [3]:
#uMich consumer Data

### Home Purchase Sentiment Index

https://fredblog.stlouisfed.org/2025/05/the-home-purchase-sentiment-index/

In [ ]:
# Install fredapi if not already installed
!pip install fredapi -q

In [ ]:
import pandas as pd
from fredapi import Fred
import matplotlib.pyplot as plt

# Initialize FRED API connection
fred = Fred(api_key=FRED_API_KEY)

# Since HPSI might not be directly available in FRED, let's look for related housing indicators
# that are mentioned in the blog post, such as consumer sentiment and housing market indicators

# Create a dataframe with multiple related series
housing_series = {
    'UMCSENT': 'University of Michigan: Consumer Sentiment',
    'HOUST': 'Housing Starts: Total',
    'MORTGAGE30US': '30-Year Fixed Rate Mortgage Average',
    'CSUSHPISA': 'S&P/Case-Shiller U.S. National Home Price Index',
    'RRVRUSQ156N': 'Rental Vacancy Rate',
    'NHSUSSPT': 'New One Family Houses Sold'
}

# Fetch data for each series
print("Fetching housing-related data from FRED...")
data_dict = {}

for series_id, description in housing_series.items():
    try:
        data = fred.get_series(series_id, observation_start='2011-03-01')
        data_dict[series_id] = data
        print(f"✓ Fetched {series_id}: {description}")
    except Exception as e:
        print(f"✗ Error fetching {series_id}: {e}")

# Create a combined dataframe
df_housing = pd.DataFrame(data_dict)
df_housing.index.name = 'date'

# Display info about the dataframe
print(f"\nDataframe created with shape: {df_housing.shape}")
print(f"Date range: {df_housing.index.min()} to {df_housing.index.max()}")
print(f"\nColumns: {list(df_housing.columns)}")
print(f"\nFirst few rows:")
df_housing.head()

### Note on Home Purchase Sentiment Index (HPSI)

The Fannie Mae Home Purchase Sentiment Index (HPSI) is not directly available through FRED API. The HPSI is proprietary data from Fannie Mae's National Housing Survey. 

Instead, I've created a dataframe with related housing market indicators that are available through FRED:
- **UMCSENT**: University of Michigan Consumer Sentiment (similar sentiment measure)
- **HOUST**: Housing Starts (housing market activity)
- **MORTGAGE30US**: 30-Year Mortgage Rates (key factor in home purchasing decisions)
- **CSUSHPISA**: Case-Shiller Home Price Index (home price trends)
- **RRVRUSQ156N**: Rental Vacancy Rate (housing market conditions)
- **NHSUSSPT**: New Home Sales (housing demand indicator)

import pandas as pd
from fredapi import Fred
import matplotlib.pyplot as plt

# Initialize FRED API connection
fred = Fred(api_key=FRED_API_KEY)

# Search for Home Purchase Sentiment Index series
print("Searching for Home Purchase Sentiment Index series...")
search_results = fred.search('Home Purchase Sentiment Index', limit=10)
print(search_results[['title', 'units', 'frequency']])

### Note on Home Purchase Sentiment Index (HPSI)

The Fannie Mae Home Purchase Sentiment Index (HPSI) is not directly available through FRED API. The HPSI is proprietary data from Fannie Mae's National Housing Survey. 

Instead, I've created a dataframe with related housing market indicators that are available through FRED:
- **UMCSENT**: University of Michigan Consumer Sentiment (similar sentiment measure)
- **HOUST**: Housing Starts (housing market activity)
- **MORTGAGE30US**: 30-Year Mortgage Rates (key factor in home purchasing decisions)
- **CSUSHPISA**: Case-Shiller Home Price Index (home price trends)
- **RRVRUSQ156N**: Rental Vacancy Rate (housing market conditions)
- **NHSUSSPT**: New Home Sales (housing demand indicator)

These indicators together provide insights into housing market sentiment and conditions similar to what HPSI tracks.

In [ ]:
# Since different series have different frequencies, let's resample to monthly frequency
# and forward-fill missing values where appropriate
df_monthly = df_housing.resample('M').mean()

# Forward fill for series that don't change frequently
df_monthly['RRVRUSQ156N'] = df_monthly['RRVRUSQ156N'].ffill()  # Quarterly data

print(f"Monthly resampled dataframe shape: {df_monthly.shape}")
print(f"\nLast 5 rows of the dataframe:")
df_monthly.tail()

In [ ]:
# Check data availability and missing values
print("\nData availability summary:")
print(df_housing.info())
print("\nMissing values by column:")
print(df_housing.isnull().sum())

In [ ]:
# Since HPSI might not be directly available in FRED, let's look for related housing indicators
# that are mentioned in the blog post, such as consumer sentiment and housing market indicators

# Create a dataframe with multiple related series
housing_series = {
    'UMCSENT': 'University of Michigan: Consumer Sentiment',
    'HOUST': 'Housing Starts: Total',
    'MORTGAGE30US': '30-Year Fixed Rate Mortgage Average',
    'CSUSHPISA': 'S&P/Case-Shiller U.S. National Home Price Index',
    'RRVRUSQ156N': 'Rental Vacancy Rate',
    'NHSUSSPT': 'New One Family Houses Sold'
}

# Fetch data for each series
print("Fetching housing-related data from FRED...")
data_dict = {}

for series_id, description in housing_series.items():
    try:
        data = fred.get_series(series_id, observation_start='2011-03-01')
        data_dict[series_id] = data
        print(f"✓ Fetched {series_id}: {description}")
    except Exception as e:
        print(f"✗ Error fetching {series_id}: {e}")

# Create a combined dataframe
df_housing = pd.DataFrame(data_dict)
df_housing.index.name = 'date'

# Display info about the dataframe
print(f"\nDataframe created with shape: {df_housing.shape}")
print(f"Date range: {df_housing.index.min()} to {df_housing.index.max()}")
print(f"\nColumns: {list(df_housing.columns)}")
print(f"\nFirst few rows:")
df_housing.head()

In [ ]:
# Let's also search for Fannie Mae specific series
print("\nSearching for Fannie Mae series...")
fannie_search = fred.search('Fannie Mae', limit=20)
print(fannie_search[['id', 'title', 'units', 'frequency']].head(10))

In [ ]:
import pandas as pd
from fredapi import Fred
import matplotlib.pyplot as plt

# Initialize FRED API connection
fred = Fred(api_key=FRED_API_KEY)

# The Home Purchase Sentiment Index from Fannie Mae doesn't have a direct FRED series
# Let's search for related housing sentiment indicators
print("Searching for housing sentiment related series...")
search_results = fred.search('housing sentiment', limit=20)
print("\nTop results:")
print(search_results[['id', 'title', 'units', 'frequency']].head(10))

In [ ]:
# Import the Fannie Mae HPI data from the Excel file
import pandas as pd

# Read the Excel file
fannie_mae_hpi = pd.read_excel('data_sources/fannie-mae-hpi-sa.xlsx')

# Display basic information about the dataframe
print("Fannie Mae HPI Data:")
print(f"Shape: {fannie_mae_hpi.shape}")
print(f"\nColumn names:")
print(fannie_mae_hpi.columns.tolist())
print(f"\nFirst 5 rows:")
fannie_mae_hpi.head()