In [62]:
import numpy as np
#import pandas as pd
import modin.pandas as pd
import ray
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, LayerNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
%matplotlib inline


In [64]:
tf.random.set_seed(7)
def load_data(filename):
    df = pd.read_csv(filename, parse_dates=['Date'], index_col='Date')
    df = df.sort_index()
    return df

In [72]:
def init_data_prep(df,features,Seq,target_idx=5):
    data = df[features].values
    scaler = MinMaxScaler(feature_range=(0,1))
    data_scaled = scaler.fit_transform(data)
    X, y = [], []
    for i in range(Seq, len(data_scaled)):
        X.append(data_scaled[i-Seq:i])
        y.append(data_scaled[i, target_idx])
    X, y = np.array(X), np.array(y)

    train_size = int(0.8*len(X))
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]
    print(f"Training data shape: {X_train.shape}")
    print(f"Testing data shape: {X_test.shape}")
    return X_train, X_test, y_train, y_test, scaler

def lstm_model(units, Dpr, input_shape):
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
    loss = 'mean_squared_error'
    metrics = ['mae']
    model = Sequential()
    model.add(LSTM(units=units, return_sequences=True, input_shape=input_shape)),BatchNormalization()
    model.add(LSTM(units=units-20, return_sequences=True, input_shape=input_shape)), LayerNormalization()
    model.add(LSTM(units=units-40, return_sequences=False, input_shape=input_shape))
    model.add(Dropout(Dpr))
    model.add(Dense(units=1))
    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
    model.summary()
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001)
    ]
    return model, callbacks


In [6]:
ray.init(ignore_reinit_error=True)

2026-01-30 08:06:51,240	INFO worker.py:2007 -- Started a local Ray instance.


0,1
Python version:,3.12.12
Ray version:,2.53.0


In [66]:
stocks_df = load_data('yahoo_stock.csv')

In [74]:
X_train, X_test, y_train, y_test, scaler = init_data_prep(stocks_df, ['Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close'], Seq=60)

Training data shape: (1412, 60, 6)
Testing data shape: (353, 60, 6)


In [75]:
model, callbacks = lstm_model(units=100, Dpr=0.2, input_shape=(X_train.shape[1], X_train.shape[2]))

