# Relationship Between Home Purchase Sentiment Index on Housing Prices?

## Rate Hike Rangers (Group 10)

In [1]:
# Import the necessary libraries
import os
from dotenv import load_dotenv
import pandas as pd
import requests
from datetime import datetime


# Load environment variables from .env file
load_dotenv()

True

## FRED Data



In [2]:
# FRED API key
FRED_API_KEY = os.getenv('FRED_API_KEY')


In [3]:
# Helper function to fetch data from FRED API
def fetch_fred_data(series_id, start_date=None, end_date=None):
    """
    Fetch data from FRED API
    
    Parameters:
    series_id (str): FRED series identifier
    start_date (str): Start date in 'YYYY-MM-DD' format (optional)
    end_date (str): End date in 'YYYY-MM-DD' format (optional)
    
    Returns:
    pandas.DataFrame: DataFrame with date and value columns
    """
    
    base_url = "https://api.stlouisfed.org/fred/series/observations"
    
    params = {
        'series_id': series_id,
        'api_key': FRED_API_KEY,
        'file_type': 'json'
    }
    
    # Add date filters if provided
    if start_date:
        params['observation_start'] = start_date
    if end_date:
        params['observation_end'] = end_date
    
    # try:
    response = requests.get(base_url, params=params)
    response.raise_for_status()  # Raises an HTTPError for bad responses
    
    data = response.json()
    observations = data['observations']
    
    # Convert to DataFrame
    df = pd.DataFrame(observations)
    
    # Clean up the data
    df['date'] = pd.to_datetime(df['date'])
    df['value'] = pd.to_numeric(df['value'], errors='coerce')
    
    # Remove any rows where value is '.' (FRED's missing data indicator)
    df = df[df['value'].notna()]
    
    # Keep only date and value columns
    df = df[['date', 'value']].reset_index(drop=True)
    
    print(f"Successfully fetched {len(df)} observations for {series_id}")
    return df

In [4]:
# =============================================================================
# UMCSENT DATA 
# =============================================================================
umcsent_data = fetch_fred_data('UMCSENT')

print(f"\nUMCSENT Data Range: {umcsent_data['date'].min()} to {umcsent_data['date'].max()}")
print(f"Latest UMCSENT Value: {umcsent_data['value'].iloc[-1]}")
print(f"Total UMCSENT Observations: {len(umcsent_data)}")
print(umcsent_data.head())
print(umcsent_data.tail())


Successfully fetched 661 observations for UMCSENT

UMCSENT Data Range: 1952-11-01 00:00:00 to 2025-05-01 00:00:00
Latest UMCSENT Value: 52.2
Total UMCSENT Observations: 661
        date  value
0 1952-11-01   86.2
1 1953-02-01   90.7
2 1953-08-01   80.8
3 1953-11-01   80.7
4 1954-02-01   82.0
          date  value
656 2025-01-01   71.7
657 2025-02-01   64.7
658 2025-03-01   57.0
659 2025-04-01   52.2
660 2025-05-01   52.2


In [5]:
# =============================================================================
# HOME PURCHASE SENTIMENT INDEX (HPSI)
# =============================================================================
hpsi_data = fetch_fred_data('FMNHSHPSIUS')

print(f"\nHPSI Data Range: {hpsi_data['date'].min()} to {hpsi_data['date'].max()}")
print(f"Latest HPSI Value: {hpsi_data['value'].iloc[-1]}")
print(f"Total HPSI Observations: {len(hpsi_data)}")
print(hpsi_data.head())
print(hpsi_data.tail())


Successfully fetched 172 observations for FMNHSHPSIUS

HPSI Data Range: 2011-03-01 00:00:00 to 2025-06-01 00:00:00
Latest HPSI Value: 69.8
Total HPSI Observations: 172
        date  value
0 2011-03-01   60.0
1 2011-04-01   62.5
2 2011-05-01   60.3
3 2011-06-01   61.0
4 2011-07-01   60.0
          date  value
167 2025-02-01   71.6
168 2025-03-01   68.1
169 2025-04-01   69.2
170 2025-05-01   73.5
171 2025-06-01   69.8

HPSI Data Range: 2011-03-01 00:00:00 to 2025-06-01 00:00:00
Latest HPSI Value: 69.8
Total HPSI Observations: 172
        date  value
0 2011-03-01   60.0
1 2011-04-01   62.5
2 2011-05-01   60.3
3 2011-06-01   61.0
4 2011-07-01   60.0
          date  value
167 2025-02-01   71.6
168 2025-03-01   68.1
169 2025-04-01   69.2
170 2025-05-01   73.5
171 2025-06-01   69.8


In [6]:
# =============================================================================
# FHFA HOUSE PRICE INDEX (Seasonally Adjusted)
# =============================================================================
hpi_data = fetch_fred_data('HPIPONM226S')

print(f"\nFHFA House Price Index Data Range: {hpi_data['date'].min()} to {hpi_data['date'].max()}")
print(f"Latest FHFA House Price Index Value: {hpi_data['value'].iloc[-1]}")
print(f"Total FHFA House Price Index Observations: {len(hpi_data)}")
print(hpi_data.head())
print(hpi_data.tail())

Successfully fetched 412 observations for HPIPONM226S

FHFA House Price Index Data Range: 1991-01-01 00:00:00 to 2025-04-01 00:00:00
Latest FHFA House Price Index Value: 434.93
Total FHFA House Price Index Observations: 412
        date   value
0 1991-01-01  100.00
1 1991-02-01  100.39
2 1991-03-01  100.46
3 1991-04-01  100.29
4 1991-05-01  100.34
          date   value
407 2024-12-01  435.16
408 2025-01-01  436.53
409 2025-02-01  436.56
410 2025-03-01  436.69
411 2025-04-01  434.93


In [7]:
# =============================================================================
# FEDERAL FUNDS RATE
# =============================================================================
fed_rate_data = fetch_fred_data('FEDFUNDS')

print(f"\nFederal Funds Rate Data Range: {fed_rate_data['date'].min()} to {fed_rate_data['date'].max()}")
print(f"Latest Federal Funds Rate Value: {fed_rate_data['value'].iloc[-1]}")
print(f"Total Federal Funds Rate Observations: {len(fed_rate_data)}")
print(fed_rate_data.head())
print(fed_rate_data.tail())

Successfully fetched 852 observations for FEDFUNDS

Federal Funds Rate Data Range: 1954-07-01 00:00:00 to 2025-06-01 00:00:00
Latest Federal Funds Rate Value: 4.33
Total Federal Funds Rate Observations: 852
        date  value
0 1954-07-01   0.80
1 1954-08-01   1.22
2 1954-09-01   1.07
3 1954-10-01   0.85
4 1954-11-01   0.83
          date  value
847 2025-02-01   4.33
848 2025-03-01   4.33
849 2025-04-01   4.33
850 2025-05-01   4.33
851 2025-06-01   4.33


In [8]:
# =============================================================================
# HOUSING STARTS
# =============================================================================
housing_starts_data = fetch_fred_data('HOUST')

print(f"\nHousing Starts Data Range: {housing_starts_data['date'].min()} to {housing_starts_data['date'].max()}")
print(f"Latest Housing Starts Value: {housing_starts_data['value'].iloc[-1]}")
print(f"Total Housing Starts Observations: {len(housing_starts_data)}")
print(housing_starts_data.head())
print(housing_starts_data.tail())

Successfully fetched 797 observations for HOUST

Housing Starts Data Range: 1959-01-01 00:00:00 to 2025-05-01 00:00:00
Latest Housing Starts Value: 1256.0
Total Housing Starts Observations: 797
        date   value
0 1959-01-01  1657.0
1 1959-02-01  1667.0
2 1959-03-01  1620.0
3 1959-04-01  1590.0
4 1959-05-01  1498.0
          date   value
792 2025-01-01  1358.0
793 2025-02-01  1490.0
794 2025-03-01  1355.0
795 2025-04-01  1392.0
796 2025-05-01  1256.0


In [9]:
# =============================================================================
# BUILDING PERMITS
# =============================================================================
building_permits_data = fetch_fred_data('PERMIT')

print(f"\nBuilding Permits Data Range: {building_permits_data['date'].min()} to {building_permits_data['date'].max()}")
print(f"Latest Building Permits Value: {building_permits_data['value'].iloc[-1]}")
print(f"Total Building Permits Observations: {len(building_permits_data)}")
print(building_permits_data.head())
print(building_permits_data.tail())

Successfully fetched 785 observations for PERMIT

Building Permits Data Range: 1960-01-01 00:00:00 to 2025-05-01 00:00:00
Latest Building Permits Value: 1394.0
Total Building Permits Observations: 785
        date   value
0 1960-01-01  1092.0
1 1960-02-01  1088.0
2 1960-03-01   955.0
3 1960-04-01  1016.0
4 1960-05-01  1052.0
          date   value
780 2025-01-01  1460.0
781 2025-02-01  1454.0
782 2025-03-01  1481.0
783 2025-04-01  1422.0
784 2025-05-01  1394.0


In [10]:
# =============================================================================
# EXISTING HOME SALES
# =============================================================================
existing_home_sales_data = fetch_fred_data('EXHOSLUSM495S')

print(f"\nExisting Home Sales Data Range: {existing_home_sales_data['date'].min()} to {existing_home_sales_data['date'].max()}")
print(f"Latest Existing Home Sales Value: {existing_home_sales_data['value'].iloc[-1]}")
print(f"Total Existing Home Sales Observations: {len(existing_home_sales_data)}")
print(existing_home_sales_data.head())
print(existing_home_sales_data.tail())

Successfully fetched 13 observations for EXHOSLUSM495S

Existing Home Sales Data Range: 2024-05-01 00:00:00 to 2025-05-01 00:00:00
Latest Existing Home Sales Value: 4030000.0
Total Existing Home Sales Observations: 13
        date      value
0 2024-05-01  4060000.0
1 2024-06-01  3930000.0
2 2024-07-01  3980000.0
3 2024-08-01  3930000.0
4 2024-09-01  3900000.0
         date      value
8  2025-01-01  4090000.0
9  2025-02-01  4270000.0
10 2025-03-01  4020000.0
11 2025-04-01  4000000.0
12 2025-05-01  4030000.0


In [11]:
# =============================================================================
# NEW HOME SALES
# =============================================================================
new_home_sales_data = fetch_fred_data('NHSUSSPT')

print(f"\nNew Home Sales Data Range: {new_home_sales_data['date'].min()} to {new_home_sales_data['date'].max()}")
print(f"Latest New Home Sales Value: {new_home_sales_data['value'].iloc[-1]}")
print(f"Total New Home Sales Observations: {len(new_home_sales_data)}")
print(new_home_sales_data.head())
print(new_home_sales_data.tail())

Successfully fetched 65 observations for NHSUSSPT

New Home Sales Data Range: 2020-01-01 00:00:00 to 2025-05-01 00:00:00
Latest New Home Sales Value: 56.0
Total New Home Sales Observations: 65
        date  value
0 2020-01-01   59.0
1 2020-02-01   63.0
2 2020-03-01   59.0
3 2020-04-01   52.0
4 2020-05-01   64.0
         date  value
60 2025-01-01   56.0
61 2025-02-01   56.0
62 2025-03-01   63.0
63 2025-04-01   67.0
64 2025-05-01   56.0


In [12]:
# =============================================================================
# 30-YEAR MORTGAGE RATE
# =============================================================================
mortgage_30yr_data = fetch_fred_data('MORTGAGE30US')

print(f"\n30-Year Mortgage Rate Data Range: {mortgage_30yr_data['date'].min()} to {mortgage_30yr_data['date'].max()}")
print(f"Latest 30-Year Mortgage Rate Value: {mortgage_30yr_data['value'].iloc[-1]}")
print(f"Total 30-Year Mortgage Rate Observations: {len(mortgage_30yr_data)}")
print(mortgage_30yr_data.head())
print(mortgage_30yr_data.tail())

Successfully fetched 2832 observations for MORTGAGE30US

30-Year Mortgage Rate Data Range: 1971-04-02 00:00:00 to 2025-07-03 00:00:00
Latest 30-Year Mortgage Rate Value: 6.67
Total 30-Year Mortgage Rate Observations: 2832
        date  value
0 1971-04-02   7.33
1 1971-04-09   7.31
2 1971-04-16   7.31
3 1971-04-23   7.31
4 1971-04-30   7.29
           date  value
2827 2025-06-05   6.85
2828 2025-06-12   6.84
2829 2025-06-18   6.81
2830 2025-06-26   6.77
2831 2025-07-03   6.67


In [17]:
# =============================================================================
# COMBINE DATASETS INTO SINGLE DATAFRAME
# =============================================================================

# Rename value columns to be descriptive
umcsent_renamed = umcsent_data.rename(columns={'value': 'umcsent'})
hpsi_renamed = hpsi_data.rename(columns={'value': 'hpsi'})
hpi_renamed = hpi_data.rename(columns={'value': 'hpi'})
fed_rate_renamed = fed_rate_data.rename(columns={'value': 'fed_funds_rate'})
housing_starts_renamed = housing_starts_data.rename(columns={'value': 'housing_starts'})
building_permits_renamed = building_permits_data.rename(columns={'value': 'building_permits'})
existing_home_sales_renamed = existing_home_sales_data.rename(columns={'value': 'existing_home_sales'})
new_home_sales_renamed = new_home_sales_data.rename(columns={'value': 'new_home_sales'})
mortgage_30yr_renamed = mortgage_30yr_data.rename(columns={'value': 'mortgage_30yr'})

# Combine all datasets using outer join (keeps all dates)
merged_FRED_df = umcsent_renamed.merge(hpsi_renamed, on='date', how='outer') \
                               .merge(hpi_renamed, on='date', how='outer') \
                               .merge(fed_rate_renamed, on='date', how='outer') \
                               .merge(housing_starts_renamed, on='date', how='outer') \
                               .merge(building_permits_renamed, on='date', how='outer') \
                               .merge(existing_home_sales_renamed, on='date', how='outer') \
                               .merge(new_home_sales_renamed, on='date', how='outer') \
                               .merge(mortgage_30yr_renamed, on='date', how='outer')

# Sort by date
merged_FRED_df = merged_FRED_df.sort_values('date').reset_index(drop=True)

print("\n" + "="*50)
print("COMBINED DATASET")
print("="*50)

print(f"\nCombined Data Range: {merged_FRED_df['date'].min()} to {merged_FRED_df['date'].max()}")
print(f"Total Combined Observations: {len(merged_FRED_df)}")
print(f"Columns: {list(merged_FRED_df.columns)}")
print(merged_FRED_df.head())

print(f"\nData availability by column:")
print(merged_FRED_df.isnull().sum())

# Save the combined dataset to a CSV file
merged_FRED_df.to_csv("data_outputs/combined_FRED_data.csv", index=False)

print(f"\nCombined data saved to data_outputs/combined_FRED_data.csv")


COMBINED DATASET

Combined Data Range: 1952-11-01 00:00:00 to 2025-07-03 00:00:00
Total Combined Observations: 3603
Columns: ['date', 'umcsent', 'hpsi', 'hpi', 'fed_funds_rate', 'housing_starts', 'building_permits', 'existing_home_sales', 'new_home_sales', 'mortgage_30yr']
        date  umcsent  hpsi  hpi  fed_funds_rate  housing_starts  \
0 1952-11-01     86.2   NaN  NaN             NaN             NaN   
1 1953-02-01     90.7   NaN  NaN             NaN             NaN   
2 1953-08-01     80.8   NaN  NaN             NaN             NaN   
3 1953-11-01     80.7   NaN  NaN             NaN             NaN   
4 1954-02-01     82.0   NaN  NaN             NaN             NaN   

   building_permits  existing_home_sales  new_home_sales  mortgage_30yr  
0               NaN                  NaN             NaN            NaN  
1               NaN                  NaN             NaN            NaN  
2               NaN                  NaN             NaN            NaN  
3               NaN 

## Open Question (Data Cleaning):

* HPSI doesn't provide data before Mar-2011
* USMCSENT doesn't have June 2025
* FHFA doesn't have May or June in 2025

### Remove all incomplete cases (rows with any NaN values)

In [14]:
# Create a version with only complete cases (no missing values)
merged_FRED_no_NaN_df = merged_FRED_df.copy()
merged_FRED_no_NaN_df = merged_FRED_no_NaN_df.dropna()
print(f"\nComplete cases (no missing values): {len(merged_FRED_no_NaN_df)} rows")
if len(merged_FRED_no_NaN_df) > 0:
    print(f"Complete cases date range: {merged_FRED_no_NaN_df['date'].min()} to {merged_FRED_no_NaN_df['date'].max()}")
    print(f"\nSample of complete cases:")
    print(merged_FRED_no_NaN_df.head())


Complete cases (no missing values): 1 rows
Complete cases date range: 2024-08-01 00:00:00 to 2024-08-01 00:00:00

Sample of complete cases:
           date  umcsent  hpsi     hpi  fed_funds_rate  housing_starts  \
3545 2024-08-01     67.9  72.1  426.36            5.33          1391.0   

      building_permits  existing_home_sales  new_home_sales  mortgage_30yr  
3545            1476.0            3930000.0            56.0           6.73  


### Remove Home Price Sentiment Index Column and May/June of UMCSENT and FHFA

In [15]:
# Create a version without HPSI column, then remove any remaining missing values
merged_FRED_no_HPSI_df = merged_FRED_df.copy()
merged_FRED_no_HPSI_df = merged_FRED_no_HPSI_df.drop(columns=['hpsi'])
merged_FRED_no_HPSI_df = merged_FRED_no_HPSI_df.dropna()

print(f"\nNo HPSI, no missing values: {len(merged_FRED_no_HPSI_df)} rows")
if len(merged_FRED_no_HPSI_df) > 0:
    print(f"Date range: {merged_FRED_no_HPSI_df['date'].min()} to {merged_FRED_no_HPSI_df['date'].max()}")
    print(f"Columns: {list(merged_FRED_no_HPSI_df.columns)}")
    print(f"\nSample data:")
    print(merged_FRED_no_HPSI_df.head())


No HPSI, no missing values: 1 rows
Date range: 2024-08-01 00:00:00 to 2024-08-01 00:00:00
Columns: ['date', 'umcsent', 'hpi', 'fed_funds_rate', 'housing_starts', 'building_permits', 'existing_home_sales', 'new_home_sales', 'mortgage_30yr']

Sample data:
           date  umcsent     hpi  fed_funds_rate  housing_starts  \
3545 2024-08-01     67.9  426.36            5.33          1391.0   

      building_permits  existing_home_sales  new_home_sales  mortgage_30yr  
3545            1476.0            3930000.0            56.0           6.73  


In [16]:
# =============================================================================
# DATASET ENHANCEMENT STRATEGY
# =============================================================================

print("🚀 DATASET ENHANCEMENT RECOMMENDATIONS")
print("="*60)

# =============================================================================
# 1. FEATURE ENGINEERING FROM EXISTING DATA
# =============================================================================

print("\n1️⃣ FEATURE ENGINEERING (Free - Use Existing Data)")
print("-" * 50)

# Create enhanced dataset with engineered features
enhanced_df = merged_FRED_no_HPSI_df.copy()

# Sort by date to ensure proper ordering
enhanced_df = enhanced_df.sort_values('date').reset_index(drop=True)

# Calculate rolling averages
enhanced_df['umcsent_3m_avg'] = enhanced_df['umcsent'].rolling(window=3).mean()
enhanced_df['umcsent_12m_avg'] = enhanced_df['umcsent'].rolling(window=12).mean()
enhanced_df['hpi_3m_avg'] = enhanced_df['hpi'].rolling(window=3).mean()
enhanced_df['fed_rate_3m_avg'] = enhanced_df['fed_funds_rate'].rolling(window=3).mean()

# Calculate percentage changes
enhanced_df['umcsent_pct_change'] = enhanced_df['umcsent'].pct_change()
enhanced_df['hpi_pct_change'] = enhanced_df['hpi'].pct_change()
enhanced_df['fed_rate_change'] = enhanced_df['fed_funds_rate'].diff()

# Calculate year-over-year changes
enhanced_df['umcsent_yoy'] = enhanced_df['umcsent'].pct_change(periods=12)
enhanced_df['hpi_yoy'] = enhanced_df['hpi'].pct_change(periods=12)

# Create volatility measures (rolling standard deviation)
enhanced_df['umcsent_volatility'] = enhanced_df['umcsent'].rolling(window=12).std()
enhanced_df['fed_rate_volatility'] = enhanced_df['fed_funds_rate'].rolling(window=12).std()

# Create time-based features
enhanced_df['year'] = enhanced_df['date'].dt.year
enhanced_df['month'] = enhanced_df['date'].dt.month
enhanced_df['quarter'] = enhanced_df['date'].dt.quarter

# Create regime indicators
enhanced_df['fed_rate_regime'] = pd.cut(enhanced_df['fed_funds_rate'], 
                                       bins=[0, 2, 5, 10, 20], 
                                       labels=['Very_Low', 'Low', 'Normal', 'High'])

enhanced_df['sentiment_regime'] = pd.cut(enhanced_df['umcsent'], 
                                        bins=[0, 70, 85, 100, 120], 
                                        labels=['Pessimistic', 'Cautious', 'Optimistic', 'Very_Optimistic'])

print(f"✅ Added {len(enhanced_df.columns) - 4} engineered features")
print(f"   New columns: {[col for col in enhanced_df.columns if col not in ['date', 'umcsent', 'hpi', 'fed_funds_rate']]}")

# =============================================================================
# 2. ADDITIONAL FRED DATA SERIES
# =============================================================================

print(f"\n2️⃣ ADDITIONAL FRED DATA SERIES (Recommended)")
print("-" * 50)

additional_fred_series = {
    'Economic Indicators': [
        'UNRATE (Unemployment Rate)',
        'GDPC1 (Real GDP)', 
        'CPIAUCSL (Consumer Price Index)',
        'DGS10 (10-Year Treasury Rate)',
        'DGS2 (2-Year Treasury Rate)'
    ],
    'Housing Market': [
        'HOUST (Housing Starts)',
        'PERMIT (Building Permits)',
        'EXHOSLUSM495S (Existing Home Sales)',
        'NHSUSSPT (New Home Sales)',
        'MORTGAGE30US (30-Year Mortgage Rate)'
    ],
    'Financial Markets': [
        'SP500 (S&P 500 Index)',
        'DEXUSEU (USD/EUR Exchange Rate)',
        'DCOILWTICO (Oil Prices)',
        'GOLDAMGBD228NLBM (Gold Prices)'
    ],
    'Labor Market': [
        'PAYEMS (Nonfarm Payrolls)',
        'AWHMAN (Average Weekly Hours)',
        'CES0500000003 (Average Hourly Earnings)'
    ]
}

for category, series_list in additional_fred_series.items():
    print(f"\n{category}:")
    for series in series_list:
        print(f"   • {series}")

# =============================================================================
# 3. EXTERNAL DATA SOURCES
# =============================================================================

print(f"\n3️⃣ EXTERNAL DATA SOURCES")
print("-" * 50)

external_sources = {
    'Google Trends': [
        'Search interest for "buy house", "mortgage rates", "real estate"',
        'Regional housing search trends'
    ],
    'Demographic Data': [
        'Census Bureau population data',
        'Age distribution (millennials entering housing market)',
        'Income distribution data'
    ],
    'Policy/Events': [
        'Presidential election cycles',
        'Federal Reserve meeting dates',
        'Major policy announcements'
    ],
    'Regional Data': [
        'State-level house prices',
        'Metropolitan area economic indicators',
        'Regional employment data'
    ]
}

for source, data_types in external_sources.items():
    print(f"\n{source}:")
    for data_type in data_types:
        print(f"   • {data_type}")

# =============================================================================
# 4. SAMPLE ENHANCED DATASET
# =============================================================================

print(f"\n4️⃣ SAMPLE ENHANCED DATASET")
print("-" * 50)
print("Current dataset with engineered features:")
print(f"Observations: {len(enhanced_df)}")
print(f"Features: {len(enhanced_df.columns)}")
print("\nSample with new features:")
sample_cols = ['date', 'umcsent', 'hpi', 'fed_funds_rate', 'umcsent_yoy', 'hpi_yoy', 'sentiment_regime']
print(enhanced_df[sample_cols].tail().to_string(index=False))

# =============================================================================
# 5. PROJECT IDEAS BASED ON ENHANCED DATA
# =============================================================================

print(f"\n5️⃣ DATA SCIENCE PROJECT IDEAS")
print("-" * 50)

project_ideas = [
    "🏠 House Price Prediction Model",
    "📊 Consumer Sentiment Impact Analysis", 
    "💰 Fed Rate Policy Effect Modeling",
    "📈 Economic Regime Classification",
    "🔮 Recession Prediction Using Sentiment",
    "🎯 Optimal Home Buying Time Predictor",
    "📉 Sentiment-Price Relationship Analysis",
    "🌊 Economic Cycle Phase Detection"
]

for i, idea in enumerate(project_ideas, 1):
    print(f"{i}. {idea}")

print(f"\n🎯 NEXT STEPS:")
print("1. Start with feature engineering (free and immediate)")
print("2. Add 3-5 key FRED series based on your focus area") 
print("3. Choose a specific project goal")
print("4. Build and validate your model")

# Save enhanced dataset
print(f"\n💾 Saving enhanced dataset...")
enhanced_df.to_csv('enhanced_dataset.csv', index=False)
print("✅ Enhanced dataset saved as 'enhanced_dataset.csv'")

🚀 DATASET ENHANCEMENT RECOMMENDATIONS

1️⃣ FEATURE ENGINEERING (Free - Use Existing Data)
--------------------------------------------------
✅ Added 21 engineered features
   New columns: ['housing_starts', 'building_permits', 'existing_home_sales', 'new_home_sales', 'mortgage_30yr', 'umcsent_3m_avg', 'umcsent_12m_avg', 'hpi_3m_avg', 'fed_rate_3m_avg', 'umcsent_pct_change', 'hpi_pct_change', 'fed_rate_change', 'umcsent_yoy', 'hpi_yoy', 'umcsent_volatility', 'fed_rate_volatility', 'year', 'month', 'quarter', 'fed_rate_regime', 'sentiment_regime']

2️⃣ ADDITIONAL FRED DATA SERIES (Recommended)
--------------------------------------------------

Economic Indicators:
   • UNRATE (Unemployment Rate)
   • GDPC1 (Real GDP)
   • CPIAUCSL (Consumer Price Index)
   • DGS10 (10-Year Treasury Rate)
   • DGS2 (2-Year Treasury Rate)

Housing Market:
   • HOUST (Housing Starts)
   • PERMIT (Building Permits)
   • EXHOSLUSM495S (Existing Home Sales)
   • NHSUSSPT (New Home Sales)
   • MORTGAGE30US (30