In [19]:
# Import libraries
import yfinance as yf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout
from fredapi import Fred  # FRED API for macroeconomic data

# ============== PARAMETERS ==============
stock_ticker = 'NVDA'
start_date = '2015-01-01'
end_date = '2024-12-15'
input_date = '2024-11-29'  # Example date from which we predict 5 days ahead
future_days = 5
sequence_length = 60
api_key = 'f00fd15c5439961e61801c7d97d715ef'
model_filename = 'nvda.keras'
# ========================================

# Step 1: Fetch stock data
data = yf.download(stock_ticker, start=start_date, end=end_date)

# Use Close, Open, High, Low in that order so Close is at index 0
data_monthly = data[['Close', 'Open', 'High', 'Low']]

# Step 2: Fetch macroeconomic data from FRED (Inflation, GDP Growth, Interest Rates)
fred = Fred(api_key=api_key)
inflation = fred.get_series('CPIAUCSL', start_date=start_date, end_date=end_date)
gdp_growth = fred.get_series('A191RL1Q225SBEA', start_date=start_date, end_date=end_date)
interest_rate = fred.get_series('FEDFUNDS', start_date=start_date, end_date=end_date)

# Resample macro data to daily frequency
inflation = inflation.resample('D').ffill()
gdp_growth = gdp_growth.resample('D').ffill()
interest_rate = interest_rate.resample('D').ffill()

# Step 3: Align lengths
min_length = min(len(inflation), len(gdp_growth), len(interest_rate), len(data_monthly))
data_monthly = data_monthly.iloc[:min_length]
inflation = inflation.iloc[:min_length]
gdp_growth = gdp_growth.iloc[:min_length]
interest_rate = interest_rate.iloc[:min_length]

# Step 4: Merge datasets
data_monthly['Inflation'] = inflation.values
data_monthly['GDP Growth'] = gdp_growth.values
data_monthly['Interest Rate'] = interest_rate.values

# Fill missing data
data_monthly = data_monthly.fillna(method='ffill')

# Features: Close(0), Open(1), High(2), Low(3), Inflation(4), GDP Growth(5), Interest Rate(6)
features = ['Close', 'Open', 'High', 'Low', 'Inflation', 'GDP Growth', 'Interest Rate']

# Scale data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(data_monthly[features])

def create_sequences(data, sequence_length, target_size=4):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i + sequence_length])
        # Predicting the next day's Close, Open, High, Low
        y.append(data[i + sequence_length, [0,1,2,3]]) 
    return np.array(X), np.array(y)

X, y = create_sequences(scaled_data, sequence_length)

# Split into training and testing
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Reshape for LSTM: [samples, time_steps, features]
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], len(features))
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], len(features))

# Build the LSTM Model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(50, return_sequences=False),
    Dropout(0.2),
    Dense(25),
    Dense(4)  # Predicting 4 features: Close, Open, High, Low
])

model.compile(optimizer='adam', loss='mean_squared_error')

# Train the Model
history = model.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_test, y_test))

# Save the model
model.save(model_filename)
print(f"Model saved as {model_filename}")

# Ensure input_date is in data range
if input_date not in data_monthly.index:
    raise ValueError(f"Input date {input_date} not found in dataset index.")

# Ensure input_date is not too close to start
idx = data_monthly.index.get_loc(input_date)
if idx < sequence_length:
    raise ValueError(f"Not enough historical data before {input_date} to create the input sequence.")

# Prepare input sequence for prediction
input_sequence = scaled_data[idx - sequence_length:idx]
current_input = input_sequence.reshape(1, sequence_length, len(features))

predictions_all = []

# Predict day by day for future_days
for day in range(future_days):
    pred = model.predict(current_input)
    predictions_all.append(pred[0])  # shape: (4,)

    # We will use the last known macro features from the last day in our sequence
    last_macro = input_sequence[-1, 4:]  # Inflation, GDP Growth, Interest Rate
    # Construct the next day's input row
    next_day_full = np.concatenate([pred[0], last_macro])  # (4 + 3 = 7 features)
    
    # Update input_sequence
    input_sequence = np.vstack([input_sequence[1:], next_day_full])
    current_input = input_sequence.reshape(1, sequence_length, len(features))

# Convert predictions to numpy array
predictions_all = np.array(predictions_all)  # shape (5,4)

# Inverse transform predictions
pred_padded = np.zeros((predictions_all.shape[0], len(features)))
# Fill in predicted price features
pred_padded[:, 0] = predictions_all[:, 0] # Close
pred_padded[:, 1] = predictions_all[:, 1] # Open
pred_padded[:, 2] = predictions_all[:, 2] # High
pred_padded[:, 3] = predictions_all[:, 3] # Low
# Macro features remain zero, won't affect price columns after inverse transform

predictions_inv = scaler.inverse_transform(pred_padded)
pred_close = predictions_inv[:, 0]
pred_open = predictions_inv[:, 1]
pred_high = predictions_inv[:, 2]
pred_low = predictions_inv[:, 3]

# Create a DataFrame with the requested order: low, high, close, open
pred_data = pd.DataFrame({
    'Low': pred_low,
    'High': pred_high,
    'Close': pred_close,
    'Open': pred_open
}, index=pd.bdate_range(start=pd.to_datetime(input_date) + pd.tseries.offsets.BDay(1), periods=future_days))

print("Predicted prices for the next 5 business days after", input_date)
print(pred_data)


[*********************100%***********************]  1 of 1 completed


Epoch 1/10


  data_monthly = data_monthly.fillna(method='ffill')
  super().__init__(**kwargs)


[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 45ms/step - loss: 0.0047 - val_loss: 0.0217
Epoch 2/10
[1m38/62[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m1s[0m 42ms/step - loss: 3.6704e-04

In [18]:
from tensorflow.keras.models import load_model
import numpy as np
import pandas as pd
import datetime

def predict_next_5_days(input_date):
    # Constants used in the previous code
    future_days = 5
    
    # Load the pre-trained model
    model = load_model('nvda.keras')
    
    # Check that the input_date exists in data_monthly
    if input_date not in data_monthly.index:
        raise ValueError(f"Input date {input_date} not found in dataset index.")
        
    idx = data_monthly.index.get_loc(input_date)
    if idx < sequence_length:
        raise ValueError(f"Not enough historical data before {input_date} to create the input sequence.")
    
    # Prepare the input sequence from scaled_data
    input_sequence = scaled_data[idx - sequence_length:idx]
    current_input = input_sequence.reshape(1, sequence_length, len(features))
    
    predictions_all = []
    
    # Predict day by day for the next 5 business days
    for day in range(future_days):
        pred = model.predict(current_input)
        predictions_all.append(pred[0])  # shape: (4,)
        
        # Use the last known macro features from the last day in our sequence
        # Macro features: Inflation(4), GDP Growth(5), Interest Rate(6)
        last_macro = input_sequence[-1, 4:]
        next_day_full = np.concatenate([pred[0], last_macro])
        
        # Update the input sequence
        input_sequence = np.vstack([input_sequence[1:], next_day_full])
        current_input = input_sequence.reshape(1, sequence_length, len(features))
    
    # Convert to numpy array
    predictions_all = np.array(predictions_all)  # (5,4): [Close, Open, High, Low]
    
    # Prepare for inverse transform: pad predictions with zeros for macro features
    pred_padded = np.zeros((predictions_all.shape[0], len(features)))
    pred_padded[:, 0] = predictions_all[:, 0] # Close
    pred_padded[:, 1] = predictions_all[:, 1] # Open
    pred_padded[:, 2] = predictions_all[:, 2] # High
    pred_padded[:, 3] = predictions_all[:, 3] # Low
    
    predictions_inv = scaler.inverse_transform(pred_padded)
    pred_close = predictions_inv[:, 0]
    pred_open = predictions_inv[:, 1]
    pred_high = predictions_inv[:, 2]
    pred_low = predictions_inv[:, 3]

    # Create a DataFrame with requested order: low, high, close, open
    pred_dates = pd.bdate_range(start=pd.to_datetime(input_date) + pd.tseries.offsets.BDay(1), periods=future_days)
    pred_data = pd.DataFrame({
        'Low': pred_low,
        'High': pred_high,
        'Close': pred_close,
        'Open': pred_open
    }, index=pred_dates)

    return pred_data

def get_actual_next_5_days_data(input_date):
    # We will assume input_date is a valid business day within data_monthly's range.
    # We'll fetch the next 5 business days of actual data from Yahoo Finance.
    future_days = 5
    
    # Generate the next 5 business days dates after input_date
    start_pred = (pd.to_datetime(input_date) + pd.tseries.offsets.BDay(1)).date()
    pred_dates = pd.bdate_range(start=start_pred, periods=future_days)

    # We'll fetch data from start_pred to the last predicted day
    end_pred = pred_dates[-1].date()
    
    # Fetch actual data from Yahoo Finance
    actual_data = yf.download('NVDA', start=start_pred, end=end_pred + pd.tseries.offsets.BDay(1))
    
    if actual_data.empty:
        print("No actual data available for the specified period.")
        return None
    
    # Filter columns: Low, High, Close, Open
    actual_data_filtered = actual_data[['Low', 'High', 'Close', 'Open']]
    
    # Reindex to ensure only those 5 business days are shown (in case of missing data)
    actual_data_filtered = actual_data_filtered.reindex(pred_dates)
    
    print(f"Actual NVDA data for the 5 business days after {input_date}:")
    print(actual_data_filtered)
    return actual_data_filtered

# ===== Tester Code =====
# Example: Test the function with a given date
test_input_date = '2024-11-29'
results = predict_next_5_days(test_input_date)
#print(f"Predicted prices for the next 5 business days after {test_input_date}:")

actual_data_filtered = get_actual_next_5_days_data(test_input_date)

# print(f"Predicted prices for the next 5 business days after {actual_data_filtered}:")




print(results)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 222ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step


[*********************100%***********************]  1 of 1 completed

Actual NVDA data for the 5 business days after 2024-11-29:
Price              Low        High       Close        Open
Ticker            NVDA        NVDA        NVDA        NVDA
2024-12-02  137.820007  140.449997  138.630005  138.830002
2024-12-03  137.949997  140.539993  140.259995  138.259995
2024-12-04  140.289993  145.789993  145.139999  142.000000
2024-12-05  143.949997  146.539993  145.059998  145.110001
2024-12-06  141.309998  145.699997  142.440002  144.600006
                  Low       High      Close       Open
2024-12-02  72.111194  76.958650  67.363268  82.052727
2024-12-03  72.322215  77.251697  67.662403  82.292873
2024-12-04  72.411926  77.404143  67.825937  82.404277
2024-12-05  72.462578  77.489650  67.928942  82.466709
2024-12-06  72.487521  77.534815  67.987764  82.497500



