In [1]:
import os
import torch
import time
from transformer_common import TransformerConfig, TransformerRunner
import matplotlib.pyplot as plt
print(torch.cuda.is_available())

# https://www.kaggle.com/datasets/footballjoe789/us-stock-dataset/data


False


In [2]:
from transformer_common import TimeseriesDataloader

# Start the timer
start_time = time.time()

input_tensor_file = 'stock_history_data.pt'

if os.path.exists(input_tensor_file):
    data = torch.load(input_tensor_file)
    ln = len(data)
    print(f"Tensor loaded successfully: {input_tensor_file}, len={ln}")
else:
    directory_path = './us-stock-dataset/Data/StockHistory'

    all_files = [os.path.splitext(f)[0] for f in os.listdir(directory_path) if f.endswith('.csv')]
    
    stocks_to_load = set(all_files)
    dataloader = TimeseriesDataloader(directory_path, stocks_to_load, add_diff=False)
    
    data=dataloader.get_data().transpose(0, 1).cuda()

    torch.save(data, input_tensor_file)

    print(f"Tensor saved successfully: {input_tensor_file}")

end_time = time.time()
execution_time = end_time - start_time

print(f"Execution time: {execution_time:.4f} seconds")

Tensor loaded successfully: stock_history_data.pt, len=6262
Execution time: 0.0164 seconds


  data = torch.load(input_tensor_file)


In [3]:

def filter_invalid_stocks(tensor):
    """
    Filters out stocks (rows) that have all zero values, all NaN values, or no price change (constant values) across their time series.
    
    Parameters:
    - tensor (torch.Tensor): Time-series data with shape [num_stocks, time_steps].

    Returns:
    - filtered_tensor (torch.Tensor): Tensor with invalid stocks removed.
    """
    # Replace NaNs with zeros in the tensor
    tensor = torch.nan_to_num(tensor, nan=0.0)

    # Identify stocks that have all zeroes or all NaNs (now converted to zeros)
    non_zero_stocks = torch.any(tensor != 0, dim=1)  # Only keep rows (stocks) that have non-zero values

    # Identify stocks where there is no price change (i.e., variance is zero)
    non_constant_stocks = torch.var(tensor, dim=1) != 0  # Keep stocks with non-zero variance

    # Combine both conditions (stocks with non-zero values and non-constant prices)
    valid_stocks = non_zero_stocks & non_constant_stocks

    # Filter out invalid stocks
    filtered_tensor = tensor[valid_stocks]

    return filtered_tensor
print(data.shape)

data = filter_invalid_stocks(data)
print(f"shape after filtering {data.shape}" )

torch.Size([6262, 5283])
shape after filtering torch.Size([4735, 5283])


In [None]:

def plot_timeseries(tensor, num_charts=5):
    """
    Plots time-series data for multiple stocks from the tensor in a grid layout with 5 charts per row.

    Parameters:
    - tensor (torch.Tensor): Time-series data with shape [num_stocks, time_steps].
    - num_charts (int): The number of charts to plot. Each chart corresponds to one stock.
    """
    # Check the number of stocks
    num_stocks, num_time_steps = tensor.shape

    # Ensure num_charts doesn't exceed the number of available stocks
    num_charts = min(num_charts, num_stocks)

    # Calculate the number of rows required for the grid
    rows = (num_charts + 4) // 5  # This ensures that we have a full row for the remaining charts

    # Create a figure with subplots
    fig, axes = plt.subplots(rows, 5, figsize=(15, 3 * rows))
    
    # Flatten axes array to easily index through them
    axes = axes.flatten()

    # Plot each stock's time-series data
    for i in range(num_charts):
        ax = axes[i]
        ax.plot(tensor[i].cpu().numpy())  # Move tensor to CPU and convert to numpy for plotting
        ax.set_title(f"Stock {i+1} - Time Series")
        ax.set_xlabel("Time Steps")
        ax.set_ylabel("Stock Value")
        ax.grid(True)
    
    # Hide any unused subplots
    for i in range(num_charts, len(axes)):
        axes[i].axis('off')  # Hide the empty subplots

    plt.tight_layout()  # Adjust the layout to avoid overlap
    plt.show()


# Plotting the first 5 stocks' time-series data
plot_timeseries(data, num_charts=10)

In [None]:
# data = torch.diff(data, dim=1)


def scale_timeseries_data(data, dim=0):

    # Replace NaNs with zeros
    data = torch.nan_to_num(data, nan=0.0)

    # Standardize along the specified dimension
    # Calculate mean and std along the specified dimension
    mean = data.mean(dim=dim, keepdim=True)
    std = data.std(dim=dim, keepdim=True)

    # Avoid division by zero for dimensions with zero std
    std[std == 0] = 1

    # Scale the data (standardization)
    scaled_data = (data - mean) / std

    return scaled_data
    
data = scale_timeseries_data(data, dim=0)  # Scale each stock independently

In [None]:
training_data = data.transpose(0, 1)
print(training_data.shape)

In [None]:
num_of_channels=4735

In [None]:
training_data = data[0:num_of_channels].transpose(0, 1)
print(training_data.shape)

In [None]:
from transformers import ConvKarpathyTransformerModel

config = TransformerConfig(
    # precision=torch.bfloat16,
    precision=torch.float32,
    batch_size=128,
    block_size=16,
    causal=True,
    input_embed=num_of_channels,
    n_embed=128,
    output_embed=num_of_channels,
    n_head=8,
    n_layer=16,
    learning_rate=1e-4
)
config.eval_interval=50

trainer1 = TransformerRunner(config, ConvKarpathyTransformerModel(config), training_data, training_data)

# if not trainer1.load_model("abc"):
trainer1.train_iterate_n(20000)

In [None]:

# Reverse normalization function
def inverse_normalize(tensor, original_data,dim=0):
    mean = original_data.mean(dim=dim, keepdim=True)
    std = original_data.std(dim=dim, keepdim=True)
    return tensor * std + mean

# Reverse differencing function
def inverse_difference(predictions, last_known_value):
    restored_values = [last_known_value]  # Start with the last known value
    for diff in predictions:
        restored_values.append(restored_values[-1] + diff)
    return restored_values[1:]  # Skip the initial value

# Generate predictions for 100 future days for 20 stocks
num_stocks = 20  # The number of stocks you want to predict
days_to_predict = 5  # Predict for 100 future days
predictions = []

context = training_data[-config.block_size:, :].unsqueeze(0)  # Use last block_size days as context
print(f"context={context.shape}")


prediction_diff = trainer1.generate(context, max_new_tokens=days_to_predict).transpose(0, 2).detach()
print(prediction_diff.shape)

# # Inverse normalization
# prediction_diff = inverse_normalize(prediction_diff, training_data)

# # Inverse differencing using last value from original data
# last_known_value = training_data[:, -1]
# prediction = inverse_difference(prediction_diff.squeeze(), last_known_value)
# predictions.append(prediction)

# # Plot the predictions for each stock
# fig, axes = plt.subplots(4, 5, figsize=(20, 15))  # 4 rows x 5 columns for 20 stocks

# for i, ax in enumerate(axes.flatten()):
#     ax.plot(predictions[i], label=f'Stock {i+1}')
#     ax.set_title(f'Stock {i+1} - 100-Day Prediction')
#     ax.set_xlabel('Days')
#     ax.set_ylabel('Price')
#     ax.grid(True)
#     ax.legend()

# plt.tight_layout()
# plt.show()

def inverse_scale_timeseries_data(scaled_data, mean, std):
    # Reverse the standardization process
    return scaled_data * std + mean

predictions_original_scale = inverse_scale_timeseries_data(prediction_diff, mean.unsqueeze(-1), std.unsqueeze(-1))


plot_timeseries(predictions_original_scale[:,:,0])