In [None]:
import yfinance as yf
import numpy as np
import os
import joblib
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Dropout, Embedding, Flatten, Concatenate
from keras.mixed_precision import experimental as mixed_precision

In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print("GPU enabled:", gpus[0])
    except RuntimeError as e:
        print(e)

# Enable mixed precision for speed on NVIDIA GPUs
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_policy(policy)

In [None]:
tickers = [
    "AAPL", "MSFT", "GOOGL", "AMZN", "TSLA",
    "META", "NVDA", "JPM", "V", "JNJ",
    "PG", "MA", "UNH", "HD", "PYPL",
    "DIS", "VZ", "INTC", "CSCO", "IBM",
    "KO", "PEP", "MRK", "PFE", "ABT",
    "T", "XOM", "CVX", "BA", "GE",
    "WMT", "MCD", "NKE", "ADBE", "NFLX",
    "ORCL", "CRM", "AMD", "QCOM", "TXN",
    "TMO", "MDT", "BMY", "GILD", "AMGN",
    "LLY", "MRNA", "REGN", "VRTX", "UPS"
]

sequence_length = 60
scaler_folder = "scalers"
os.makedirs(scaler_folder, exist_ok=True)

all_X_seq, all_X_ticker, all_y = [], [], []

In [None]:
ticker_encoder = LabelEncoder()
ticker_encoder.fit(tickers)
num_tickers = len(ticker_encoder.classes_)
joblib.dump(ticker_encoder, "ticker_encoder.pkl")

In [None]:
# Load and process each stock's data
for ticker in tickers:
    print(f"Processing {ticker}...")
    data = yf.download(ticker, start="2018-01-01", end="2025-01-01")
    close_prices = data["Close"].values.reshape(-1,1)

    if len(close_prices) < sequence_length:
        print(f"Not enough data for {ticker}, skipping...")
        continue

    # Scale prices per stock
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(close_prices)
    joblib.dump(scaler, os.path.join(scaler_folder, f"scaler_{ticker}.pkl"))

    # Create sequences
    for i in range(sequence_length, len(scaled_data)):
        seq = scaled_data[i-sequence_length:i, 0]
        all_X_seq.append(seq)
        all_X_ticker.append(ticker_encoder.transform([ticker])[0])
        all_y.append(scaled_data[i,0])

In [None]:
# Convert to numpy
X_seq = np.array(all_X_seq, dtype=np.float32)
X_seq = np.reshape(X_seq, (X_seq.shape[0], X_seq.shape[1], 1))
X_ticker = np.array(all_X_ticker)
y = np.array(all_y, dtype=np.float32)
print("Sequences shape:", X_seq.shape, "Tickers shape:", X_ticker.shape, "y shape:", y.shape)

In [None]:
# Build model
seq_input = Input(shape=(sequence_length, 1))
lstm_out = LSTM(50, return_sequences=True)(seq_input)
lstm_out = Dropout(0.2)(lstm_out)
lstm_out = LSTM(50, return_sequences=False)(lstm_out)
lstm_out = Dropout(0.2)(lstm_out)

ticker_input = Input(shape=(1,))
embed = Embedding(input_dim=num_tickers, output_dim=5)(ticker_input)
embed = Flatten()(embed)

combined = Concatenate()([lstm_out, embed])
dense = Dense(25, activation="relu")(combined)
output = Dense(1, dtype='float32')(dense)  # ensure float32 output for mixed precision

model = Model(inputs=[seq_input, ticker_input], outputs=output)
model.compile(optimizer="adam", loss="mean_squared_error")

In [None]:
model.fit(
    [X_seq, X_ticker],
    y,
    batch_size=128,  # larger batch size for GPU
    epochs=10,
    shuffle=True
)

In [None]:
#Save model
model.save("multi_stock_model_gpu.h5")
print("Model saved as multi_stock_model_gpu.h5")
print(f"Scalers saved in folder: {scaler_folder}")

In [1]:
import yfinance as yf

In [17]:
import yfinance as yf
import pandas as pd

ticker = "AMZN"
data = yf.download(ticker, period="180d", auto_adjust=True)

# Flatten columns if MultiIndex
if isinstance(data.columns, pd.MultiIndex):
    # Combine levels into single strings, e.g., "Price_Close_AMZN"
    data.columns = ["_".join(col).strip() for col in data.columns.values]

# Now the Close column for AMZN is "Price_Close_AMZN"
close_col = [col for col in data.columns if "Close" in col][0]

close_series = data[[close_col]].dropna().reset_index()
close_series.rename(columns={close_col: "Close"}, inplace=True)

for d, c in zip(close_series["Date"], close_series["Close"]):
    print(d, c)

[*********************100%***********************]  1 of 1 completed

2025-01-16 00:00:00 220.66000366210938
2025-01-17 00:00:00 225.94000244140625
2025-01-21 00:00:00 230.7100067138672
2025-01-22 00:00:00 235.00999450683594
2025-01-23 00:00:00 235.4199981689453
2025-01-24 00:00:00 234.85000610351562
2025-01-27 00:00:00 235.4199981689453
2025-01-28 00:00:00 238.14999389648438
2025-01-29 00:00:00 237.07000732421875
2025-01-30 00:00:00 234.63999938964844
2025-01-31 00:00:00 237.67999267578125
2025-02-03 00:00:00 237.4199981689453
2025-02-04 00:00:00 242.05999755859375
2025-02-05 00:00:00 236.1699981689453
2025-02-06 00:00:00 238.8300018310547
2025-02-07 00:00:00 229.14999389648438
2025-02-10 00:00:00 233.13999938964844
2025-02-11 00:00:00 232.75999450683594
2025-02-12 00:00:00 228.92999267578125
2025-02-13 00:00:00 230.3699951171875
2025-02-14 00:00:00 228.67999267578125
2025-02-18 00:00:00 226.64999389648438
2025-02-19 00:00:00 226.6300048828125
2025-02-20 00:00:00 222.8800048828125
2025-02-21 00:00:00 216.5800018310547
2025-02-24 00:00:00 212.71000671386


