In [1]:
!rm -fr r_trader out
!mkdir out input
!git clone https://github.com/abreham-atlaw/r_trader
!cd r_trader &&  git checkout deep-reinforcement.training-experiment-cnn
!pip install cattrs positional-encodings==6.0.1 dropbox pymongo==4.3.3 dependency-injector==4.41.0

Cloning into 'r_trader'...
remote: Enumerating objects: 13240, done.[K
remote: Counting objects: 100% (4154/4154), done.[K
remote: Compressing objects: 100% (1224/1224), done.[K
remote: Total 13240 (delta 3025), reused 4022 (delta 2898), pack-reused 9086 (from 1)[K
Receiving objects: 100% (13240/13240), 76.59 MiB | 30.67 MiB/s, done.
Resolving deltas: 100% (9357/9357), done.
Branch 'deep-reinforcement.training-experiment-cnn' set up to track remote branch 'deep-reinforcement.training-experiment-cnn' from 'origin'.
Switched to a new branch 'deep-reinforcement.training-experiment-cnn'
Collecting cattrs
  Downloading cattrs-24.1.2-py3-none-any.whl.metadata (8.4 kB)
Collecting positional-encodings==6.0.1
  Downloading positional_encodings-6.0.1-py3-none-any.whl.metadata (6.6 kB)
Collecting dropbox
  Downloading dropbox-12.0.2-py3-none-any.whl.metadata (4.3 kB)
Collecting pymongo==4.3.3
  Downloading pymongo-4.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.6 

In [2]:
import os
KAGGLE_ENV = os.path.exists("/kaggle/working")
REPO_PATH = "/kaggle/working/r_trader" if KAGGLE_ENV else "/content/r_trader"

print(f"KAGGLE ENV: {KAGGLE_ENV}")

import sys
sys.path.append(REPO_PATH)

KAGGLE ENV: True


In [3]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.optim import Adam, SGD, Adagrad
import matplotlib.pyplot as plt

import os
import signal 

from core.utils.research.data.load.dataset import BaseDataset
from core.utils.research.training.trainer import Trainer
from core.utils.research.model.model.cnn.model import CNN
from core.utils.research.model.model.linear.model import LinearModel
from lib.utils.torch_utils.model_handler import ModelHandler
from core.utils.research.training.callbacks.checkpoint_callback import CheckpointCallback, StoreCheckpointCallback
from core.utils.research.training.data.repositories.checkpoint_repository import CheckpointRepository
from lib.utils.file_storage import PCloudClient
from core.utils.research.training.data.state import TrainingState
from core import Config
from core.utils.research.training.callbacks.metric_callback import MetricCallback
from core.utils.research.training.data.repositories.metric_repository import MetricRepository, MongoDBMetricRepository
from core.utils.kaggle import FusedManager
from core.di import init_di, ApplicationContainer
from core.utils.research.training.data.metric import MetricsContainer
from core.utils.research.model.layers import Indicators
from core.di import ServiceProvider
from core.utils.kaggle.data_repository import KaggleDataRepository

In [4]:
def download_data(root, datasets, zip_filename, kernel_mode=True, checksums=None):
    repository = KaggleDataRepository(
        output_path=root,
        zip_filename=zip_filename
    )
    repository.download_multiple(datasets, kernel=kernel_mode, checksums=checksums)
    os.system(f"unzip -d root/")

In [5]:
DATA_ROOT = "/kaggle/input" if KAGGLE_ENV else "/content/input"

DATASETS = [
    f"abrehamatlaw0/spinoza-ds-datapreparer-simsim-cum-0-it-2-{i}"
    for i in range(0,4)
]
CHECKSUMS = None
KERNEL_MODE = False
ZIP_FILENAME = "out.zip"
if not KAGGLE_ENV:
    download_data(DATA_ROOT, DATASETS, ZIP_FILENAME, kernel_mode=KERNEL_MODE, checksums=CHECKSUMS)


CONTAINERS = [os.path.join(DATA_ROOT, container) for container in os.listdir(DATA_ROOT)]
DATA_PATHES, TEST_DATA_PATHES = [
    [
        os.path.join(container, "out", type_)
        for container in CONTAINERS
    ]
    for type_ in ["train", "test"]
]

NOTEBOOK_ID = "abrehamalemu/rtrader-training-exp-0-cnn-153-cum-0-it-4-tot"
MODEL_ID = NOTEBOOK_ID.replace("/", "-")

NUM_FILES = None
DATA_CACHE_SIZE = 2
DATALOADER_WORKERS = 4

CHANNELS = [256 for _ in range(4)]
EXTRA_LEN = 124
KERNEL_SIZES = [3 for _ in CHANNELS]
VOCAB_SIZE = 431
POOL_SIZES = [3 for _ in CHANNELS]
DROPOUT_RATE = 0
ACTIVATION = nn.LeakyReLU()
BLOCK_SIZE = 1024 + EXTRA_LEN
PADDING = 0
LINEAR_COLLAPSE = True
AVG_POOL = True
NORM = [False] + [False for _ in CHANNELS[1:]]
LR = 1e-4

INDICATORS_DELTA = True
INDICATORS_SO = []
INDICATORS_RSI = []

USE_FF = True
FF_LINEAR_LAYERS = [256 for _ in range(4)] + [VOCAB_SIZE + 1]
FF_LINEAR_ACTIVATION = nn.LeakyReLU()
FF_LINEAR_INIT = None
FF_LINEAR_NORM = [False] + [False for _ in FF_LINEAR_LAYERS[:-1]]
FF_DROPOUT = 0

BATCH_SIZE = 64
EPOCHS = 300
TIMEOUT = 10*60*60

DTYPE = torch.float32
NP_DTYPE = np.float32

MODEL_URL = None
SAVE_PATH = os.path.abspath(os.path.join("./out", f"{MODEL_ID}.zip"))

METRIC_REPOSITORY = MongoDBMetricRepository(
    Config.MONGODB_URL,
    MODEL_ID
)

CALLBACKS = [
    StoreCheckpointCallback(path=SAVE_PATH),
    MetricCallback(
       METRIC_REPOSITORY
    )
]

In [6]:
repository = CheckpointRepository(
    ServiceProvider.provide_file_storage()
)

In [7]:
state_model = repository.get(MODEL_ID)
# state_model = None
if state_model is None:
    print("[+]Creating a new model...")
    if USE_FF:
        ff = LinearModel(
            dropout_rate=FF_DROPOUT,
            layer_sizes=FF_LINEAR_LAYERS,
            hidden_activation=FF_LINEAR_ACTIVATION,
            init_fn=FF_LINEAR_INIT,
            norm=FF_LINEAR_NORM
        )
    else:
        ff = None

    indicators = Indicators(
        delta=INDICATORS_DELTA,
        so=INDICATORS_SO,
        rsi=INDICATORS_RSI
    )

    model = CNN(
        extra_len=EXTRA_LEN,
        conv_channels=CHANNELS,
        kernel_sizes=KERNEL_SIZES,
        hidden_activation=ACTIVATION,
        pool_sizes=POOL_SIZES,
        dropout_rate=DROPOUT_RATE,
        padding=PADDING,
        avg_pool=AVG_POOL,
        linear_collapse=LINEAR_COLLAPSE,
        norm=NORM,
        ff_block=ff,
        indicators=indicators,
        input_size=BLOCK_SIZE
    )
    
else:
    print("[+]Using loaded model...")
    state, model = state_model
state = TrainingState(
    epoch=0,
    batch=0,
    id=MODEL_ID
)

[+]Creating a new model...


In [8]:
dataset = BaseDataset(
    root_dirs=DATA_PATHES,
    out_dtypes=NP_DTYPE,
    num_files=NUM_FILES
)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, num_workers=DATALOADER_WORKERS, pin_memory=True)

In [9]:
test_dataset = BaseDataset(
    root_dirs=TEST_DATA_PATHES, 
    out_dtypes=NP_DTYPE,
    num_files=NUM_FILES
)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=DATALOADER_WORKERS, pin_memory=True)

In [10]:
trainer = Trainer(model, callbacks=CALLBACKS)

In [11]:
trainer.cls_loss_function = nn.CrossEntropyLoss()
trainer.reg_loss_function = nn.MSELoss()
trainer.optimizer = Adam(trainer.model.parameters(), lr=LR)

In [12]:
class TimeoutException(Exception):
    pass

def handle_timeout(*args, **kwargs):
    raise TimeoutException()

signal.signal(signal.SIGALRM, handle_timeout)
signal.alarm(TIMEOUT)

0

In [None]:
try:
    trainer.train(dataloader, val_dataloader=test_dataloader, epochs=EPOCHS, progress=True, progress_interval=100, state=state, cls_loss_only=False)
except TimeoutException:
    pass

Model Summary
Layer Name							Number of Parameters
layers.0.weight			1536
layers.0.bias			256
layers.1.weight			196608
layers.1.bias			256
layers.2.weight			196608
layers.2.bias			256
layers.3.weight			196608
layers.3.bias			256
ff_block.layers.0.weight			4029440
ff_block.layers.0.bias			256
ff_block.layers.1.weight			65536
ff_block.layers.1.bias			256
ff_block.layers.2.weight			65536
ff_block.layers.2.bias			256
ff_block.layers.3.weight			65536
ff_block.layers.3.bias			256
ff_block.layers.4.weight			110592
ff_block.layers.4.bias			432
Total Params:4930480
[+]Shuffling dataset...


  self.pid = os.fork()
Epoch 1 loss: 6.072962284088135(cls: 6.070236682891846, reg: 0.002725532976910472):   0%|          | 42/32310 [00:55<11:23:12,  1.27s/it]

In [None]:
ModelHandler.save(model, SAVE_PATH)

In [None]:
repository.update(trainer.state, trainer.model)

In [None]:
metrics = MetricsContainer()
for metric in METRIC_REPOSITORY.get_all():
    metrics.add_metric(metric)

for i in range(3):
    train_losses = [metric.value[i] for metric in metrics.filter_metrics(source=0)]
    val_losses = [metric.value[i] for metric in metrics.filter_metrics(source=1)]
    plt.figure()
    plt.plot(train_losses)
    plt.plot(val_losses)
    plt.show()

In [None]:
for X, y in test_dataloader:
    break
y_hat = model(X.to(trainer.device)).detach().cpu().numpy()

import matplotlib.pyplot as plt
def softmax(x):
    exp_x = np.exp(x - np.max(x))
    softmax_x = exp_x / np.sum(exp_x)
    return softmax_x

def scale(x):
    x = softmax(x)
    x = x / np.max(x)
    return x

for i in range(y_hat.shape[0]):
    plt.figure()
    plt.plot(y[i, :-1])
    plt.plot(scale(y_hat[i, :-1]))
    

In [None]:
!rm -fr r_trader