<a href="https://colab.research.google.com/github/jacobmillerforever/ECON_506/blob/main/506_Final_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Introduction & Setup

In [1]:
!pip install fredapi
!pip install investpy



In [2]:
import pandas as pd
import yfinance as yf
import datetime as dt
from fredapi import Fred
import investpy

# Data Collection & Preparation

In [3]:
def get_ticker_data(ticker_dict, start_date, end_date):
    """
    Fetches data for multiple tickers and creates a DataFrame for each with
    single-index columns named as Ticker_ColumnName (e.g., SPY_Close)

    Parameters:
    -----------
    ticker_dict : dict
        Dictionary with display names as keys and ticker symbols as values
    start_date : str
        Start date in format 'YYYY-MM-DD'
    end_date : str
        End date in format 'YYYY-MM-DD'

    Returns:
    --------
    dict
        Dictionary with display names as keys and their respective DataFrames as values
    """
    ticker_dataframes = {}

    for display_name, ticker_symbol in ticker_dict.items():
        # Fetch data for current ticker
        data = yf.download(ticker_symbol, start=start_date, end=end_date, progress=False)

        # Handle multi-index columns if present
        if isinstance(data.columns, pd.MultiIndex):
            # Flatten the multi-index columns to single index
            data.columns = [f"{ticker_symbol}_{col[0]}" for col in data.columns]
        else:
            # If not multi-index, still rename columns to match pattern
            data.columns = [f"{ticker_symbol}_{col}" for col in data.columns]

        # Store the DataFrame in the dictionary with display name as key
        ticker_dataframes[display_name] = data

    return ticker_dataframes

tickers = {
    # Global Indices
    'Nikkei 225 (Japan)': '^N225',
    'Hang Seng (Hong Kong)': '^HSI',
    'SSE Composite (China)': '000001.SS',
    'ASX 200 (Australia)': '^AXJO',
    'DAX (Germany)': '^GDAXI',
    'FTSE 100 (UK)': '^FTSE',
    'CAC 40 (France)': '^FCHI',
    'Euro Stoxx 50 (EU)': '^STOXX50E',
    'SPY (US)': 'SPY',


    # Volatility Indices
    'VIX (US)': '^VIX',
    'VIX Brazil': '^VXEWZ',
    'DAX Volatility': '^VDAX',

    # Currency Pairs
    'US Dollar Index': 'DX-Y.NYB',
    'EUR/USD': 'EURUSD=X',
    'JPY/USD': 'JPY=X',
    'CNY/USD': 'CNY=X',

    # Commodities
    'Gold': 'GC=F',
    'Crude Oil': 'CL=F',
    'Silver': 'SI=F',
    'Corn': 'ZC=F',
    'Copper': 'HG=F'
}

start_date = '2000-01-01'
end_date = dt.datetime.now().strftime('%Y-%m-%d')

# Get individual DataFrames for each ticker
ticker_data = get_ticker_data(tickers, start_date, end_date)

# Display the first few rows and column names for each DataFrame
for display_name, df in ticker_data.items():
    print(f"\n{display_name} DataFrame:")
    print(f"Column names: {df.columns.tolist()}")
    print(df.head())

YF.download() has changed argument auto_adjust default to True

Nikkei 225 (Japan) DataFrame:
Column names: ['^N225_Close', '^N225_High', '^N225_Low', '^N225_Open', '^N225_Volume']
             ^N225_Close    ^N225_High     ^N225_Low    ^N225_Open  \
Date                                                                 
2000-01-04  19002.859375  19187.609375  18937.449219  18937.449219   
2000-01-05  18542.550781  19003.509766  18221.820312  19003.509766   
2000-01-06  18168.269531  18582.740234  18168.269531  18574.009766   
2000-01-07  18193.410156  18285.730469  18068.099609  18194.050781   
2000-01-11  18850.919922  18887.560547  18246.099609  18246.099609   

            ^N225_Volume  
Date                      
2000-01-04             0  
2000-01-05             0  
2000-01-06             0  
2000-01-07             0  
2000-01-11             0  

Hang Seng (Hong Kong) DataFrame:
Column names: ['^HSI_Close', '^HSI_High', '^HSI_Low', '^HSI_Open', '^HSI_Volume']
              ^HSI_Clos

In [4]:
def get_fred_data(api_key, series_list, start_date='2000-01-01', end_date=None):
    """
    Fetches data for multiple FRED series at the highest available frequency

    Parameters:
    -----------
    api_key : str
        Your FRED API key
    series_list : list
        List of FRED series IDs as strings
    start_date : str, optional
        Start date in format 'YYYY-MM-DD', defaults to '2000-01-01'
    end_date : str, optional
        End date in format 'YYYY-MM-DD', defaults to current date

    Returns:
    --------
    dict
        Dictionary with series IDs as keys and their respective DataFrames as values
    dict
        Dictionary with series IDs as keys and the frequency used as values
    """
    # Initialize FRED API connection
    fred = Fred(api_key=api_key)

    # Set end date to current date if not provided
    if end_date is None:
        end_date = dt.datetime.now().strftime('%Y-%m-%d')

    # Convert start and end dates to datetime objects
    start_dt = dt.datetime.strptime(start_date, '%Y-%m-%d')
    end_dt = dt.datetime.strptime(end_date, '%Y-%m-%d')

    # Initialize dictionaries to store DataFrames and frequencies
    fred_dataframes = {}
    fred_frequencies = {}

    # Frequency hierarchy from highest to lowest resolution
    # Not all series support all frequencies
    frequency_hierarchy = ['d', 'w', 'bw', 'm', 'q', 'sa', 'a']

    # Process each series ID
    for series_id in series_list:
        # Try frequencies in order from highest to lowest resolution
        for freq in frequency_hierarchy:
            try:
                # Get data for current series with current frequency
                data = fred.get_series(series_id, start_dt, end_dt, frequency=freq)

                # If successful and data is not empty, convert to DataFrame
                if not data.empty:
                    # Convert Series to DataFrame
                    df = pd.DataFrame(data)
                    df.columns = [f"{series_id}_value"]

                    # Add to dictionaries
                    fred_dataframes[series_id] = df
                    fred_frequencies[series_id] = freq

                    print(f"Successfully fetched data for {series_id} with frequency '{freq}'")
                    # Break out of frequency loop once we've found a working frequency
                    break
                else:
                    print(f"No data found for {series_id} with frequency '{freq}'")
            except Exception as e:
                # If this frequency doesn't work, try the next one
                print(f"Could not fetch {series_id} with frequency '{freq}': {str(e)}")

        # Check if we were able to fetch this series with any frequency
        if series_id not in fred_dataframes:
            print(f"Failed to fetch data for {series_id} with any available frequency")

    return fred_dataframes, fred_frequencies

from google.colab import userdata
fred_api = '8b000b950d5841b5b7e35ebbcacedaea'

fred_series = [
    'DFF',           # Federal Funds Rate
    'T10Y2Y',        # 10-Year minus 2-Year Treasury Spread
    'CPIAUCSL',      # Consumer Price Index
    'UNRATE',        # Unemployment Rate
    'STLFSI',        # St. Louis Fed Financial Stress Index
    'M2SL',          # M2 Money Supply
    'USSLIND',       # US Leading Index
    'BAMLH0A0HYM2',  # High Yield Spread
    'GS5',           # 5-Year Treasury Rate
    'GS30',          # 30-Year Treasury Rate
    'BAMLC0A0CM'     # Corporate Bond Spread
]

fred_data = get_fred_data(fred_api, fred_series)

Successfully fetched data for DFF with frequency 'd'
Successfully fetched data for T10Y2Y with frequency 'd'
Could not fetch CPIAUCSL with frequency 'd': Bad Request.  Value of frequency is not one of: 'm', 'q', 'sa', 'a'.
Could not fetch CPIAUCSL with frequency 'w': Bad Request.  Value of frequency is not one of: 'm', 'q', 'sa', 'a'.
Could not fetch CPIAUCSL with frequency 'bw': Bad Request.  Value of frequency is not one of: 'm', 'q', 'sa', 'a'.
Successfully fetched data for CPIAUCSL with frequency 'm'
Could not fetch UNRATE with frequency 'd': Bad Request.  Value of frequency is not one of: 'm', 'q', 'sa', 'a'.
Could not fetch UNRATE with frequency 'w': Bad Request.  Value of frequency is not one of: 'm', 'q', 'sa', 'a'.
Could not fetch UNRATE with frequency 'bw': Bad Request.  Value of frequency is not one of: 'm', 'q', 'sa', 'a'.
Successfully fetched data for UNRATE with frequency 'm'
Could not fetch STLFSI with frequency 'd': Bad Request.  Value of frequency is not one of: 'wef',

In [5]:
calendar_df = investpy.economic_calendar(
      from_date='01/01/2000',
      to_date='31/12/2025',
      countries=['united states'],
      categories=['monetary policy', 'inflation', 'employment'],
      importances=['high']
)

calendar_df = calendar_df[~calendar_df['importance'].isna()].reset_index(drop=True)
calendar_df.tail()


Unnamed: 0,id,date,time,zone,currency,importance,event,actual,forecast,previous
4749,521809,05/05/2025,15:00,united states,USD,high,ISM Non-Manufacturing Prices (Apr),,,60.9
4750,522277,13/05/2025,13:30,united states,USD,high,Core CPI (MoM) (Apr),,,0.1%
4751,522278,13/05/2025,13:30,united states,USD,high,CPI (YoY) (Apr),,,2.4%
4752,522275,13/05/2025,13:30,united states,USD,high,CPI (MoM) (Apr),,,-0.1%
4753,522560,15/05/2025,13:30,united states,USD,high,PPI (MoM) (Apr),,,-0.4%


# Exploratory Data Analysis

# Feature Engineering

# Model Development

# Model Evaluation