In [None]:
# Import necessary libraries
import pandas as pd
import yfinance as yf
import pandas_ta as ta
from fredapi import Fred
from datetime import datetime, timedelta
import holidays
import numpy as np

# Initialize Fred with your API key
fred = Fred(api_key='db61e0d65c4d2a1053221aec21822d4e')  # Replace with your actual API key securely

# Define today's date
today = pd.Timestamp.today().normalize()

# Define economic indicators and their series IDs
indicators = {
    'Effective Federal Funds Rate': 'FEDFUNDS',
    '10-Year Treasury Rate': 'DGS10',
    'Consumer Price Index': 'CPIAUCSL',
    'Producer Price Index': 'PPIACO',
    'Unemployment Rate': 'UNRATE',
    'Nonfarm Payroll Employment': 'PAYEMS',
    'Real GDP': 'GDPC1',
    'Housing Starts': 'HOUST',
    'Industrial Production Index': 'INDPRO',
    'M2 Money Stock': 'M2SL',
    'Crude Oil Prices': 'DCOILWTICO',
    'Retail Sales': 'RSXFS',
    'Total Business Inventories': 'BUSINV'
}

In [None]:
def fetch_economic_data_for_today(indicators):
    economic_data = pd.DataFrame()
    for name, series_id in indicators.items():
        try:
            # Fetch the latest data point
            data = fred.get_series_latest_release(series_id)
            if data is not None and not data.empty:
                latest_value = data.iloc[-1]
                latest_date = data.index[-1]
                # Use the latest available value for today
                economic_data.at[today, name] = latest_value
                print(f"Successfully fetched data for {name}: {latest_value} on {latest_date.date()}")
            else:
                print(f"No data fetched for {name}")
        except Exception as e:
            print(f"Error fetching {name}: {e}")
    economic_data.index = pd.to_datetime(economic_data.index)
    return economic_data

In [4]:
def fetch_stock_data(ticker):
    # Fetch stock data for the ticker up to today
    # We'll get enough historical data to compute the technical indicators
    start_date = today - timedelta(days=365)  # Assuming we need one year of data
    # Adding one day to end date to include today's data
    stock_data = yf.download(
        ticker,
        start=start_date.strftime('%Y-%m-%d'),
        end=(today + timedelta(days=1)).strftime('%Y-%m-%d'),
        progress=False,
        group_by='ticker'
    )
    if stock_data.empty:
        print(f"No stock data fetched for {ticker} between {start_date.date()} and {today.date()}")
        return None

    stock_data.index = pd.to_datetime(stock_data.index)

    # Flatten columns if MultiIndex
    if isinstance(stock_data.columns, pd.MultiIndex):
        stock_data.columns = stock_data.columns.get_level_values(1)

    return stock_data

In [None]:
# %% [markdown]
# ### Calculate Technical Indicators

# %%
def calculate_technical_indicators(data, ticker):
    close_col = 'Close'
    if close_col in data.columns:
        # Calculate technical indicators
        data[f'{ticker}_SMA_20'] = ta.sma(data[close_col], length=20)
        data[f'{ticker}_RSI_14'] = ta.rsi(data[close_col], length=14)
        macd = ta.macd(data[close_col], fast=12, slow=26)
        if macd is not None and not macd.empty:
            macd_columns = [f'{ticker}_MACD', f'{ticker}_MACD_Hist', f'{ticker}_MACD_Signal']
            macd.columns = macd_columns
            data = pd.concat([data, macd], axis=1)
        else:
            print(f"Failed to calculate MACD for {ticker}")
        bbands = ta.bbands(data[close_col], length=20)
        if bbands is not None and not bbands.empty:
            bbands_columns = [
                f'{ticker}_BB_Lower', f'{ticker}_BB_Middle', f'{ticker}_BB_Upper',
                f'{ticker}_BB_Bandwidth', f'{ticker}_BB_Percentage'
            ]
            bbands.columns = bbands_columns
            data = pd.concat([data, bbands], axis=1)
        else:
            print(f"Failed to calculate Bollinger Bands for {ticker}")
        data[f'{ticker}_MOM_10'] = ta.mom(data[close_col], length=10)
    else:
        print(f"Missing 'Close' column in stock data for {ticker}")
    return data


In [None]:
# %% [markdown]
# ### Prepare Input Features

# %%
def prepare_input_features(stock_data, economic_data, ticker):
    # Ensure indices are single-level and have the same name
    if stock_data.index.nlevels > 1:
        stock_data.reset_index(inplace=True)
        stock_data.set_index('Date', inplace=True)
    if economic_data.index.nlevels > 1:
        economic_data.reset_index(inplace=True)
        economic_data.set_index('Date', inplace=True)

    stock_data.index.name = 'Date'
    economic_data.index.name = 'Date'

    # Ensure indices are DatetimeIndex
    stock_data.index = pd.to_datetime(stock_data.index)
    economic_data.index = pd.to_datetime(economic_data.index)

    # Merge stock data and economic data
    combined_data = pd.merge(stock_data, economic_data, left_index=True, right_index=True, how='left')

    # Fill missing values
    combined_data.fillna(method='ffill', inplace=True)

    # Calculate technical indicators
    combined_data = calculate_technical_indicators(combined_data, ticker)

    # Create lag features
    n_lags = 5
    lagged_features = {}
    close_col = 'Close'
    if close_col in combined_data.columns:
        for lag in range(1, n_lags + 1):
            lag_col_name = f'{ticker}_Close_Lag_{lag}'
            lagged_features[lag_col_name] = combined_data[close_col].shift(lag)
    else:
        print(f"Column {close_col} not found in stock data.")
    # Create lag features for economic indicators
    for indicator in indicators.keys():
        if indicator in combined_data.columns:
            for lag in range(1, n_lags + 1):
                lag_col_name = f'{indicator}_Lag_{lag}'
                lagged_features[lag_col_name] = combined_data[indicator].shift(lag)
        else:
            print(f"Indicator {indicator} not found in combined_data.")
    # Convert the lagged features dictionary to a DataFrame
    lagged_features_df = pd.DataFrame(lagged_features, index=combined_data.index)
    # Concatenate the lagged features DataFrame to the original DataFrame
    combined_data = pd.concat([combined_data, lagged_features_df], axis=1)

    # Extract Date-Based Features
    combined_data['Day_of_Week'] = combined_data.index.dayofweek  # Monday=0, Sunday=6
    combined_data['Month'] = combined_data.index.month
    combined_data['Quarter'] = combined_data.index.quarter
    us_holidays = holidays.US()
    combined_data['Is_Holiday'] = combined_data.index.isin(us_holidays).astype(int)
    combined_data['Is_Month_Start'] = combined_data.index.is_month_start.astype(int)
    combined_data['Is_Month_End'] = combined_data.index.is_month_end.astype(int)

    # **Load the feature names used during training**
    try:
        with open('combined_data_columns.txt', 'r') as f:
            trained_features = [line.strip() for line in f.readlines()]
    except FileNotFoundError:
        print("The file 'combined_data_columns.txt' was not found. Ensure it exists and contains the feature names used during training.")
        return None

    # **Ensure these features are present in combined_data**
    missing_features = [feat for feat in trained_features if feat not in combined_data.columns]
    if missing_features:
        print(f"The following trained features are missing in the inference data: {missing_features}")
        return None

    # **Filter combined_data to include only the trained features**
    combined_data = combined_data[trained_features]

    return combined_data


In [None]:
# %% [markdown]
# ### Positional Encoding Functions

# %%
def positional_encoding(sequence_length, d_model):
    position = np.arange(sequence_length)[:, np.newaxis]
    i = np.arange(d_model)[np.newaxis, :]
    angle_rates = 1 / np.power(10000.0, (2 * (i//2)) / np.float32(d_model))
    angle_rads = position * angle_rates
    sin_terms = np.sin(angle_rads[:, 0::2])
    cos_terms = np.cos(angle_rads[:, 1::2])
    pos_encoding = np.zeros((sequence_length, d_model))
    pos_encoding[:, 0::2] = sin_terms
    pos_encoding[:, 1::2] = cos_terms
    return pos_encoding

def add_positional_encoding(inputs):
    pe = positional_encoding(inputs.shape[1], inputs.shape[2])
    pe = np.expand_dims(pe, axis=0)  # Shape: (1, sequence_length, num_features)
    inputs_with_pe = inputs + pe
    return inputs_with_pe


In [None]:
# %% [markdown]
# ### Main Code to Prepare Input Sequence and Make Prediction

# %%
def main():
    ticker = 'AAPL'
    today = pd.Timestamp.today().normalize()
    
    # Fetch economic data
    economic_data = fetch_economic_data_for_today(indicators)
    if economic_data.empty:
        print("Economic data is empty.")
        return

    # Fetch stock data
    stock_data = fetch_stock_data(ticker)
    if stock_data is None:
        print("Stock data is empty.")
        return

    # Prepare input features
    combined_data = prepare_input_features(stock_data, economic_data, ticker)
    if combined_data is None:
        print("Failed to prepare input features.")
        return

    # Ensure the DataFrame is sorted by date
    combined_data.sort_index(inplace=True)

    # Prepare input sequence
    sequence_length = 60  # Should match the sequence length used during training
    if len(combined_data) >= sequence_length:
        input_sequence = combined_data.tail(sequence_length).values  # Shape: (sequence_length, num_features)
        # Check for NaNs
        if np.isnan(input_sequence).any():
            print("Input sequence contains NaN values after feature calculation.")
            return
        # Normalize features (per sample as during training)
        feature_mean = np.mean(input_sequence, axis=0, keepdims=True)
        feature_std = np.std(input_sequence, axis=0, keepdims=True) + 1e-6
        normalized_input_sequence = (input_sequence - feature_mean) / feature_std
        # Reshape to (1, sequence_length, num_features)
        normalized_input_sequence = normalized_input_sequence.reshape(1, sequence_length, -1).astype(np.float32)
    else:
        print(f"Not enough data to form input sequence of length {sequence_length}")
        return

    # Add positional encoding
    inputs_with_pe = add_positional_encoding(normalized_input_sequence)

    # Load the trained model
    try:
        model = tf.keras.models.load_model('enhanced_stock_galformer_model.keras')
        print("Model loaded successfully.")
    except Exception as e:
        print(f"Error loading model: {e}")
        return

    # Make prediction
    predictions = model.predict(inputs_with_pe)
    print(f"Predictions (standardized) shape: {predictions.shape}")

    # Since labels were normalized during training per sample, and during inference we don't have label mean and std,
    # these predictions are in standardized form and cannot be directly denormalized.
    # If you have global label_mean and label_std from training data, use them here to denormalize.
    # For now, we will print the standardized predictions.

    print("Predictions (standardized):")
    print(predictions.flatten())


Successfully fetched data for Effective Federal Funds Rate: 4.64 on 2024-11-01
Successfully fetched data for 10-Year Treasury Rate: 4.18 on 2024-11-29
Successfully fetched data for Consumer Price Index: 315.454 on 2024-10-01
Successfully fetched data for Producer Price Index: 253.452 on 2024-10-01
Successfully fetched data for Unemployment Rate: 4.1 on 2024-10-01
Successfully fetched data for Nonfarm Payroll Employment: 159005.0 on 2024-10-01
Successfully fetched data for Real GDP: 23386.733 on 2024-07-01
Successfully fetched data for Housing Starts: 1311.0 on 2024-10-01
Successfully fetched data for Industrial Production Index: 102.2805 on 2024-10-01
Successfully fetched data for M2 Money Stock: 21311.2 on 2024-10-01
Successfully fetched data for Crude Oil Prices: 69.41 on 2024-11-25
Successfully fetched data for Retail Sales: 621590.0 on 2024-10-01
Successfully fetched data for Total Business Inventories: 2587145.0 on 2024-09-01
Using data from 2024-12-02 instead of today.


  today_data_filled = today_data[required_columns].fillna(method='ffill')


In [10]:
df = pd.DataFrame({'data': feature_row})