In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sqlalchemy import create_engine, inspect
from tqdm import tqdm

In [2]:
# Database connection
DATABASE_URL = "sqlite:////Users/aryanhazra/Downloads/VSCode Repos/trading_model/src/pre_processing/stock_data/stock_data.db"
engine = create_engine(DATABASE_URL)

In [3]:
# Get all table names (assuming each table is a stock ticker)
def get_stock_tables():
    inspector = inspect(engine)
    return inspector.get_table_names()

In [4]:
# Load stock data from table
def load_stock_data(ticker):
    query = f'SELECT * FROM "{ticker}" ORDER BY date'
    df = pd.read_sql(query, con=engine)
    df['date'] = pd.to_datetime(df['date'])
    df.set_index('date', inplace=True)

    df = df.drop(columns=['acos', 'asin'])
    df = df.dropna()

    # Select features (close + any technical indicators if available)
    features = [col for col in df.columns if col != 'date']
    return df[features]

In [5]:
# Prepare dataset for LSTM
def prepare_data(df, lookback=30):
    scaler = MinMaxScaler(feature_range=(0, 1))

    # Assuming 'ticker' is one of the columns in df
    label_encoder = LabelEncoder()
    df['ticker_encoded'] = label_encoder.fit_transform(df['ticker'])

    # Scale the numeric columns, excluding the 'ticker' column
    scaled_data = scaler.fit_transform(df.drop(columns=['ticker']))

    # Now we have both scaled numeric data and the encoded ticker column
    scaled_data = np.column_stack((scaled_data, df['ticker_encoded']))

    X, y = [], []
    for i in range(len(scaled_data) - lookback - 30):
        X.append(scaled_data[i:i+lookback])   # Last 30 days of data
        y.append(scaled_data[i+lookback:i+lookback+30, 0])  # Predict next 30 days of close price

    return np.array(X), np.array(y), scaler

In [6]:
# Define or load LSTM model
def build_or_load_model(input_shape, model_path="lstm_model.h5"):
    try:
        model = load_model(model_path)
        print("Loaded existing model.")
    except:
        print("Creating new model...")
        model = Sequential([
            LSTM(100, return_sequences=True, input_shape=input_shape),
            Dropout(0.2),
            LSTM(50, return_sequences=False),
            Dropout(0.2),
            Dense(30)  # Predict next 30 days of prices
        ])
        model.compile(optimizer='adam', loss='mse')

    return model

In [None]:
# Train model on multiple tickers
def train_model():
    tickers = get_stock_tables()
    model = None

    for ticker in tqdm(tickers, desc="Training models", unit="ticker"):
        print(f"Training on {ticker}...")
        df = load_stock_data(ticker)
        if df.empty:
            continue
        X, y, scaler = prepare_data(df)

        train_size = int(len(X) * 0.8)
        X_train, X_test = X[:train_size], X[train_size:]
        y_train, y_test = y[:train_size], y[train_size:]

        if model is None:
            model = build_or_load_model((X.shape[1], X.shape[2]))

        model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test), verbose=2)

    # Save model after training on all tickers
    model.save("lstm_model.h5")
    print("Model saved.")

In [8]:
# Predict next 30 days for each stock
def predict_next_30_days():
    tickers = get_stock_tables()
    model = load_model("lstm_model.h5")

    predictions = {}
    for ticker in tickers:
        print(f"Predicting for {ticker}...")
        df = load_stock_data(ticker)
        
        if not df.empty:
            print(df.shape)
            X, _, scaler = prepare_data(df)

            last_sequence = X[-1].reshape(1, X.shape[1], X.shape[2])
            predicted_scaled = model.predict(last_sequence)[0]
            predicted_prices = scaler.inverse_transform(predicted_scaled.reshape(-1, 1))

            predictions[ticker] = predicted_prices.flatten()

    return predictions

In [9]:
# Run training & prediction
train_model()
predictions = predict_next_30_days()

Training models:   0%|          | 0/18630 [00:00<?, ?ticker/s]2025-04-01 01:23:48.577997: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3
2025-04-01 01:23:48.578134: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2025-04-01 01:23:48.578144: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
I0000 00:00:1743485028.578570  430422 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1743485028.578606  430422 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
  super().__init__(**kwargs)


Training on A...
Creating new model...


2025-04-01 01:23:49.083665: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.
Training models:   0%|          | 1/18630 [00:14<75:18:13, 14.55s/ticker]

Training on AA...


Training models:   0%|          | 2/18630 [00:27<70:43:18, 13.67s/ticker]

Training on AAA...


Training models:   0%|          | 3/18630 [00:39<66:56:19, 12.94s/ticker]

Training on AAAU...


Training models:   0%|          | 4/18630 [00:52<67:15:43, 13.00s/ticker]

Training on AAC...


Training models:   0%|          | 5/18630 [00:59<55:40:06, 10.76s/ticker]

Training on AAC.U...


Training models:   0%|          | 6/18630 [01:05<47:40:26,  9.22s/ticker]

Training on AAC.WS...


Training models:   0%|          | 7/18630 [01:11<42:36:10,  8.24s/ticker]

Training on AACBU...
Training on AACG...


Training models:   0%|          | 9/18630 [01:25<38:27:11,  7.43s/ticker]

Training on AACI...


Training models:   0%|          | 10/18630 [01:29<34:29:52,  6.67s/ticker]

Training on AACIU...


Training models:   0%|          | 11/18630 [01:31<28:08:23,  5.44s/ticker]

Training on AACIW...


Training models:   0%|          | 12/18630 [01:36<27:15:46,  5.27s/ticker]

Training on AACQ...


Training models:   0%|          | 12/18630 [01:36<41:47:19,  8.08s/ticker]


KeyboardInterrupt: 

In [None]:
# Print predictions
for ticker, prices in predictions.items():
    print(f"{ticker} Next 30 Days Predictions:", prices)