# Model Training Notebook

In [14]:
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset

from simulator.objects.policies.architectures.perceptron import MultiLayerPerceptron
from simulator.objects.policies.architectures import ModelTask
from simulator.objects.stock import Stock


torch.manual_seed(0)


<torch._C.Generator at 0x19d2dcb36f0>

In [15]:
def generate_example_stock_features(n_stocks: int) -> torch.Tensor:
    output = []
    noise_dist = torch.distributions.Normal(loc=0, scale=0.001)
    for _ in range(n_stocks):
        cash = torch.rand(size=(1,)) * 10000 - 2000
        earning_value_of_assets = torch.rand(size=(1,)) * 20000
        latest_quarterly_earnings = torch.rand(size=(1,)) * 20000
        start_price = torch.rand(size=(1,)) * 90 + 10
        price_slope = torch.rand(size=(1,)) * 0.01 - 0.005
        growth_component = start_price + price_slope * torch.arange(0, 1825)
        noise_component = noise_dist.sample(sample_shape=(1825,))
        price_history = growth_component + noise_component
        quality_of_leadership = torch.rand(size=(1,))
        stock = Stock(
            cash=cash.item(),
            earning_value_of_assets=earning_value_of_assets.item(),
            latest_quarterly_earnings=latest_quarterly_earnings.item(),
            price_history=price_history.numpy(),
            quality_of_leadership=quality_of_leadership.item(),
            stock_volatility=0.5
        )

        output.append(np.append(stock.get_stock_features(), 0))

    return torch.tensor(output)

class StockDataset(Dataset):
    def __init__(self, data, labels):
        self.data = torch.tensor(data, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

In [16]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f"device: {device}")

device: cuda


In [17]:
N_SAMPLES = 5000
VALID_RATIO = 0.15
TEST_RATIO = 0.1

stock_features = generate_example_stock_features(N_SAMPLES)
stock_labels = stock_features[:, 0]

stock_dataset = StockDataset(stock_features, stock_labels)
val_length = int(N_SAMPLES * VALID_RATIO)
test_length = int(N_SAMPLES * TEST_RATIO)
train_length = N_SAMPLES - (val_length + test_length)

train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(stock_dataset, lengths=[train_length, val_length, test_length])

train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=val_length, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=test_length, shuffle=True)

  self.data = torch.tensor(data, dtype=torch.float32)
  self.labels = torch.tensor(labels, dtype=torch.float32)


In [18]:
N_EPOCHS = 50

model = MultiLayerPerceptron(
    in_channels=14,
    hidden_channels=[16, 32],
    n_classes=1, 
    model_task=ModelTask.REGRESSOR
).to(device)

optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)
loss_fun = torch.nn.MSELoss()

for i in range(N_EPOCHS):
    train_features, train_labels = next(iter(train_dataloader))
    val_features, val_labels = next(iter(val_dataloader))

    train_features = train_features.to(device)
    train_labels = train_labels.to(device)
    val_features = val_features.to(device)
    val_labels = val_labels.to(device)

    preds = model(train_features)
    loss = loss_fun(preds, train_labels)

    val_preds = model(val_features)
    val_loss = loss_fun(val_preds, val_labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"Epoch: {i}; training_loss: {loss}; validation_loss: {val_loss}")



Epoch: 0; training_loss: 11076.826171875; validation_loss: 13681.4189453125
Epoch: 1; training_loss: 9682.2861328125; validation_loss: 10122.6611328125
Epoch: 2; training_loss: 7392.1875; validation_loss: 8367.1328125
Epoch: 3; training_loss: 7529.39794921875; validation_loss: 7611.2646484375
Epoch: 4; training_loss: 7787.353515625; validation_loss: 7084.88916015625
Epoch: 5; training_loss: 6195.8818359375; validation_loss: 6310.4658203125
Epoch: 6; training_loss: 5978.92529296875; validation_loss: 5427.51904296875
Epoch: 7; training_loss: 4515.642578125; validation_loss: 4496.30419921875
Epoch: 8; training_loss: 3599.224609375; validation_loss: 3719.7138671875
Epoch: 9; training_loss: 3035.80615234375; validation_loss: 3141.724609375
Epoch: 10; training_loss: 3286.528564453125; validation_loss: 2744.127685546875
Epoch: 11; training_loss: 2248.07177734375; validation_loss: 2505.010986328125
Epoch: 12; training_loss: 2094.518798828125; validation_loss: 2356.3623046875
Epoch: 13; trainin

In [19]:
test_features, test_labels = next(iter(test_dataloader))
print(loss_fun(model(test_features.to(device)), test_labels.to(device)))

tensor(969.8900, device='cuda:0', grad_fn=<MseLossBackward0>)


In [20]:
torch.save(model, "model.pt")