In [None]:
import yfinance as yf
import numpy as np
import os
import joblib
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Dropout, Embedding, Flatten, Concatenate
from keras.mixed_precision import experimental as mixed_precision

In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print("GPU enabled:", gpus[0])
    except RuntimeError as e:
        print(e)

# Enable mixed precision for speed on NVIDIA GPUs
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_policy(policy)

In [None]:
tickers = [
    "AAPL", "MSFT", "GOOGL", "AMZN", "TSLA",
    "META", "NVDA", "JPM", "V", "JNJ",
    "PG", "MA", "UNH", "HD", "PYPL",
    "DIS", "VZ", "INTC", "CSCO", "IBM",
    "KO", "PEP", "MRK", "PFE", "ABT",
    "T", "XOM", "CVX", "BA", "GE",
    "WMT", "MCD", "NKE", "ADBE", "NFLX",
    "ORCL", "CRM", "AMD", "QCOM", "TXN",
    "TMO", "MDT", "BMY", "GILD", "AMGN",
    "LLY", "MRNA", "REGN", "VRTX", "UPS"
]

sequence_length = 60
scaler_folder = "scalers"
os.makedirs(scaler_folder, exist_ok=True)

all_X_seq, all_X_ticker, all_y = [], [], []

In [None]:
ticker_encoder = LabelEncoder()
ticker_encoder.fit(tickers)
num_tickers = len(ticker_encoder.classes_)
joblib.dump(ticker_encoder, "ticker_encoder.pkl")

In [None]:
# Load and process each stock's data
for ticker in tickers:
    print(f"Processing {ticker}...")
    data = yf.download(ticker, start="2018-01-01", end="2025-01-01")
    close_prices = data["Close"].values.reshape(-1,1)

    if len(close_prices) < sequence_length:
        print(f"Not enough data for {ticker}, skipping...")
        continue

    # Scale prices per stock
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(close_prices)
    joblib.dump(scaler, os.path.join(scaler_folder, f"scaler_{ticker}.pkl"))

    # Create sequences
    for i in range(sequence_length, len(scaled_data)):
        seq = scaled_data[i-sequence_length:i, 0]
        all_X_seq.append(seq)
        all_X_ticker.append(ticker_encoder.transform([ticker])[0])
        all_y.append(scaled_data[i,0])

In [None]:
# Convert to numpy
X_seq = np.array(all_X_seq, dtype=np.float32)
X_seq = np.reshape(X_seq, (X_seq.shape[0], X_seq.shape[1], 1))
X_ticker = np.array(all_X_ticker)
y = np.array(all_y, dtype=np.float32)
print("Sequences shape:", X_seq.shape, "Tickers shape:", X_ticker.shape, "y shape:", y.shape)

In [None]:
# Build model
seq_input = Input(shape=(sequence_length, 1))
lstm_out = LSTM(50, return_sequences=True)(seq_input)
lstm_out = Dropout(0.2)(lstm_out)
lstm_out = LSTM(50, return_sequences=False)(lstm_out)
lstm_out = Dropout(0.2)(lstm_out)

ticker_input = Input(shape=(1,))
embed = Embedding(input_dim=num_tickers, output_dim=5)(ticker_input)
embed = Flatten()(embed)

combined = Concatenate()([lstm_out, embed])
dense = Dense(25, activation="relu")(combined)
output = Dense(1, dtype='float32')(dense)  # ensure float32 output for mixed precision

model = Model(inputs=[seq_input, ticker_input], outputs=output)
model.compile(optimizer="adam", loss="mean_squared_error")

In [None]:
model.fit(
    [X_seq, X_ticker],
    y,
    batch_size=128,  # larger batch size for GPU
    epochs=10,
    shuffle=True
)

In [None]:
#Save model
model.save("multi_stock_model_gpu.h5")
print("Model saved as multi_stock_model_gpu.h5")
print(f"Scalers saved in folder: {scaler_folder}")