In [5]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras import layers, regularizers
from keras.callbacks import ReduceLROnPlateau
from sklearn.preprocessing import LabelEncoder, StandardScaler
import matplotlib.pyplot as plt

In [6]:
column_names = ['Date','Open','High','Low','Close','Volume',
                'Dividends','Stock Splits','Brand_Name','Ticker',
                'Industry_Tag','Country','Capital Gains']

dataset = pd.read_csv('World-Stock-Prices-Dataset.csv', names=column_names, header=0, na_values='?')
dataset['Date'] = pd.to_datetime(dataset['Date'], utc=True).dt.tz_localize(None)

dataset['Year'] = dataset['Date'].dt.year
dataset['Month'] = dataset['Date'].dt.month
dataset['Day'] = dataset['Date'].dt.day
dataset = dataset.dropna(subset=['Close'])

feature_columns = ['Open', 'High', 'Low', 'Volume', 'Dividends', 'Stock Splits', 'Year', 'Month', 'Day']
target_column = 'Close'

In [None]:
train_dataset = dataset.sample(frac=0.8, random_state=0)
test_dataset = dataset.drop(train_dataset.index)

scaler = StandardScaler()
X_train_num = scaler.fit_transform(train_dataset[feature_columns])
X_test_num = scaler.transform(test_dataset[feature_columns])

label_encoder = LabelEncoder()
X_train_cat = label_encoder.fit_transform(train_dataset['Ticker'])
X_test_cat = label_encoder.transform(test_dataset['Ticker'])
num_tickers = len(label_encoder.classes_)

y_train = train_dataset[target_column].values
y_test = test_dataset[target_column].values

In [None]:
X_train_num_seq = X_train_num.reshape((-1, 1, len(feature_columns)))
X_test_num_seq = X_test_num.reshape((-1, 1, len(feature_columns)))

embedding_dim = int(min(50, (num_tickers // 2)**0.25))
embedding_dim = max(1, embedding_dim)

numerical_input = layers.Input(shape=(1, len(feature_columns)), name='num_input')  
ticker_input = layers.Input(shape=(1,), name='ticker_input')

ticker_embedding = layers.Embedding(
    input_dim=num_tickers + 1,
    output_dim=embedding_dim,
    name='ticker_embedding'
)(ticker_input)
ticker_flatten = layers.Flatten()(ticker_embedding)

x = layers.LSTM(64, return_sequences=False)(numerical_input)
x = layers.BatchNormalization()(x)

merged = layers.Concatenate()([x, ticker_flatten])

x = layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(0.01))(merged)
x = layers.BatchNormalization()(x)
x = layers.Dense(256, activation='relu')(x)

output = layers.Dense(1, dtype='float32')(x)
model = tf.keras.Model(inputs=[numerical_input, ticker_input], outputs=output)


In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='mean_absolute_error',
    metrics=['mae']
)

# Learning rate scheduler
reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=10,
    min_lr=1e-6
)

# Train model
history = model.fit(
    x={'num_input': X_train_num_seq, 'ticker_input': X_train_cat},
    y=y_train,
    batch_size=2048,
    epochs=100,
    validation_data=(
        {'num_input': X_test_num_seq, 'ticker_input': X_test_cat},
        y_test
    ),
    callbacks=[reduce_lr],
    verbose=1
)

model.save("model_keras_native_lstm.keras")