# Model Training Notebook

In [1]:
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset

from simulator.objects.policies.architectures import ModelTask
from simulator.objects.policies.architectures.perceptron import MultiLayerPerceptron
from simulator.objects.stock import Stock

torch.manual_seed(0)

<torch._C.Generator at 0x2177e5e36d0>

In [2]:
def generate_example_stock_features(n_stocks: int) -> torch.Tensor:
    output = []
    noise_dist = torch.distributions.Normal(loc=0, scale=0.001)
    for _ in range(n_stocks):
        cash = torch.rand(size=(1,)) * 110000 - 10000
        earning_value_of_assets = torch.rand(size=(1,)) * 20000 + 10000
        latest_quarterly_earnings = torch.rand(size=(1,)) * 20000 + 10000
        start_price = torch.rand(size=(1,)) * 200 + 10
        price_slope = torch.rand(size=(1,)) * 0.01 - 0.005
        growth_component = start_price + price_slope * torch.arange(0, 1825)
        noise_component = noise_dist.sample(sample_shape=(1825,))
        price_history = growth_component + noise_component
        quality_of_leadership = torch.rand(size=(1,))
        stock = Stock(
            cash=cash.item(),
            earning_value_of_assets=earning_value_of_assets.item(),
            latest_quarterly_earnings=latest_quarterly_earnings.item(),
            price_history=price_history.numpy(),
            quality_of_leadership=quality_of_leadership.item(),
            stock_volatility=0.5,
        )

        output.append(np.append(stock.get_stock_features(), 0))

    return torch.tensor(output)


class StockDataset(Dataset):
    def __init__(self, data, labels):
        self.data = torch.tensor(data, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

In [3]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f"device: {device}")

device: cuda


In [4]:
N_SAMPLES = 5000
VALID_RATIO = 0.15
TEST_RATIO = 0.1

# NOTE: SWITCH TO PERCENT ERROR LOSS OR SOME VARIANT

stock_features = generate_example_stock_features(N_SAMPLES)
stock_labels = stock_features[:, 0]

stock_dataset = StockDataset(stock_features, stock_labels)
val_length = int(N_SAMPLES * VALID_RATIO)
test_length = int(N_SAMPLES * TEST_RATIO)
train_length = N_SAMPLES - (val_length + test_length)

train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
    stock_dataset, lengths=[train_length, val_length, test_length]
)

train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=val_length, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=test_length, shuffle=True)

  return torch.tensor(output)
  self.data = torch.tensor(data, dtype=torch.float32)
  self.labels = torch.tensor(labels, dtype=torch.float32)


In [None]:
N_EPOCHS = 100

model = MultiLayerPerceptron(
    in_channels=14,
    hidden_channels=[16, 32],
    n_classes=1,
    model_task=ModelTask.REGRESSOR,
).to(device)

optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)
loss_fun = torch.nn.MSELoss()

for i in range(N_EPOCHS):
    train_features, train_labels = next(iter(train_dataloader))
    val_features, val_labels = next(iter(val_dataloader))

    train_features = train_features.to(device)
    train_labels = train_labels.to(device)
    val_features = val_features.to(device)
    val_labels = val_labels.to(device)

    preds = model(train_features)
    loss = loss_fun(preds, train_labels)

    val_preds = model(val_features)
    val_loss = loss_fun(val_preds, val_labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"Epoch: {i}; training_loss: {loss}; validation_loss: {val_loss}")

tensor([ 481.4355,  592.3110,  481.0538,  432.3416, -123.6647,  535.6890,
         538.9157,  266.4671,  433.1505,  327.0678,  596.9601,  332.4056,
         -13.4751,  411.0263,  300.2988,  572.4095,  545.1811,  470.3801,
         542.8246,   24.1726,  257.5952,  335.1741,  252.7847,  484.6864,
        -218.6537,  210.4191,  313.3604, -199.5735,  506.9567,   63.5730,
         253.1318,  -62.9319,   69.4868,  437.5997,  289.0756,  361.8966,
         358.1302,  507.0768, -161.3343,  580.2241,  421.0995,  499.5115,
         613.0241, -129.8923,  521.0204,  174.8167,  538.4290,  691.6048,
         579.0468,  272.8017,  534.9308,   -2.5504,  403.7343,  556.1829,
         -18.5812,   16.6875,  394.5534,  472.5803,  536.5507,  526.0880,
         150.3609,  459.9489,  642.9243,   -5.8728,  449.8411,  251.9279,
         530.7426,  518.6959,  476.1462,  579.7624,  519.9696,  560.1100,
         453.5096,  441.5818,  491.0075,  491.1055,  200.1434,  384.2862,
         207.3998,  212.0990,  258.062

In [6]:
test_features, test_labels = next(iter(test_dataloader))
print(loss_fun(model(test_features.to(device)), test_labels.to(device)))

tensor([3713.9187, 3770.7571, 3857.1934, 4296.0913, 2795.8987, 4061.3416,
        4238.9326, 4112.0625, 3409.2991, 4906.7388, 4470.3628, 4702.2617,
        4044.3711, 3371.3704, 2989.3230, 3915.0779, 3154.3372, 1926.7844,
        4336.8232, 3704.4392, 4581.3618, 4388.1191, 3346.7253, 3798.8394,
        3028.4358, 3165.1689, 4788.9277, 3295.8901, 2937.5747, 4232.9556,
        2836.3503, 4222.9619, 3830.1963, 3055.9153, 2883.3528, 3739.2471,
        4026.7749, 3930.9778, 4353.5435, 3167.8796, 3914.4590, 3583.2046,
        3203.1431, 3919.0391, 3961.6726, 4495.9546, 2709.8059, 4061.9875,
        4606.5410, 2660.8401, 4179.4233, 3902.9204, 3258.3462, 3444.3936,
        3926.8831, 3073.5449, 4010.7957, 4769.5684, 3420.9727, 4455.8145,
        3806.2302, 3382.5972, 4675.5840, 3839.3906, 4186.0303, 2067.6162,
        2347.5693, 4161.6890, 4204.5161, 3625.8508, 2754.9788, 3072.4856,
        4786.0527, 2720.5552, 2744.9880, 3687.2119, 3624.0962, 3120.6294,
        4381.8267, 4285.7856, 4038.728

In [7]:
torch.save(model, "model2.pt")