# Stock Trend Classification
Here, we classify the stock movement $n$ weeks ahead as a binary up/down trend, based on historical data.

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

from dataset.stock import StocksDataset
from models import TGCN, train, measure_accuracy

device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')

  from .autonotebook import tqdm as notebook_tqdm


The data from the custom PyG dataset for forecasting is loaded into a PyTorch dataloader.
A "transform" is applied to change the targets `y` of the dataset to a binary buy/sell class instead of the close price. We have a buy signal (1) if the stock return two weeks ahead was higher that the average market return, else sell (0).

In [2]:
def future_close_price_to_buy_sell_class(sample: Data):
	market_return = ((sample.close_price_y[:, -1] - sample.close_price[:, -1]) / sample.close_price[:, -1]).mean()
	sample.returns = ((sample.close_price_y[:, -1] - sample.close_price[:, -1]) / sample.close_price[:, -1]).unsqueeze(1)
	sample.market_return = market_return
	sample.y = (sample.returns >= 0).float()
	return sample

In [3]:
weeks_ahead = 1

dataset = StocksDataset(future_window=weeks_ahead * 5, force_reload=True, transform=future_close_price_to_buy_sell_class)
dataset, dataset[0]

Processing...
Done!


(StocksDataset(2443),
 Data(x=[98, 8, 25], edge_index=[2, 638], y=[98, 1], edge_weight=[638], close_price=[98, 25], close_price_y=[98, 5], returns=[98, 1], market_return=0.02591547742486))

In [4]:
for i in range(0, 10):
	print(f"Stock return: {dataset[i].returns[i].item() * 100:.2f}%, trend: {['Down', 'Up'][int(dataset[i].y[i].item())]}")

Stock return: 1.72%, trend: Up
Stock return: 2.86%, trend: Up
Stock return: 2.96%, trend: Up
Stock return: 5.91%, trend: Up
Stock return: 9.30%, trend: Up
Stock return: -1.92%, trend: Down
Stock return: 6.32%, trend: Up
Stock return: -1.71%, trend: Down
Stock return: 3.08%, trend: Up
Stock return: -1.25%, trend: Down


In [5]:
train_part = .9
batch_size = 32

train_dataset, test_dataset = dataset[:int(train_part * len(dataset))], dataset[int(train_part * len(dataset)):]
print(f"Train dataset: {len(train_dataset)}, Test dataset: {len(test_dataset)}")
train_dataloader, test_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True), DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=True)

Train dataset: 2198, Test dataset: 245


In [None]:
in_channels, out_channels, hidden_size, layers_nb, dropout = dataset[0].x.shape[-2], 1, 16, 2, .3
model = TGCN(in_channels, out_channels, hidden_size, layers_nb, use_gat=False).to(device)

lr, weight_decay, num_epochs = 0.005, 1e-5, 16
	
criterion = nn.BCEWithLogitsLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
model

TGCN(
  (cells): ModuleList(
    (0): TGCNCell(
      (gcn): GCN(
        (convs): ModuleList(
          (0): GCNConv(8, 16)
          (1): GCNConv(16, 16)
        )
      )
      (lin_u): Linear(in_features=40, out_features=16, bias=True)
      (lin_r): Linear(in_features=40, out_features=16, bias=True)
      (lin_c): Linear(in_features=40, out_features=16, bias=True)
    )
    (1): TGCNCell(
      (gcn): GCN(
        (convs): ModuleList(
          (0-1): 2 x GCNConv(16, 16)
        )
      )
      (lin_u): Linear(in_features=48, out_features=16, bias=True)
      (lin_r): Linear(in_features=48, out_features=16, bias=True)
      (lin_c): Linear(in_features=48, out_features=16, bias=True)
    )
  )
  (out): Sequential(
    (0): Linear(in_features=16, out_features=1, bias=True)
    (1): Identity()
  )
)

In [7]:
train(model, optimizer, criterion, train_dataloader, test_dataloader, num_epochs, device, "UpDownTrend", measure_acc=True)

Epochs: 100%|██████████| 16/16 [25:12<00:00, 94.51s/it, Batch=100.0%]


In [8]:
torch.save(model.state_dict(), f"models/saved_models/UpDownTrend_{model.__class__.__name__}.pt")

In [10]:
model = TGCN(in_channels, out_channels, hidden_size, layers_nb, use_gat=False).to(device)
model.load_state_dict(torch.load(f"models/saved_models/UpDownTrend_{model.__class__.__name__}.pt"))

<All keys matched successfully>

In [12]:
full_train_data = next(iter(DataLoader(train_dataset, batch_size=len(train_dataset), shuffle=True))).to(device)
acc = measure_accuracy(model, full_train_data)

print(f"Train accuracy: {acc * 100:.1f}%")

Train accuracy: 57.9%


In [15]:
full_test_data = next(iter(train_dataloader)).to(device)
acc = measure_accuracy(model, full_test_data)

print(f"Test accuracy: {acc * 100:.1f}%")

Test accuracy: 57.0%
