In [None]:
!rm -fr r_trader out
!mkdir out
!mkdir input
!git clone https://github.com/abreham-atlaw/r_trader
!cd r_trader &&  git checkout deep-reinforcement.training-experiment-cnn
!pip install cattrs positional-encodings==6.0.1 dropbox pymongo==4.3.3 dependency-injector==4.41.0

Cloning into 'r_trader'...
remote: Enumerating objects: 12921, done.[K
remote: Counting objects: 100% (3835/3835), done.[K
remote: Compressing objects: 100% (1069/1069), done.[K
remote: Total 12921 (delta 2826), reused 3759 (delta 2750), pack-reused 9086 (from 1)[K
Receiving objects: 100% (12921/12921), 76.52 MiB | 18.45 MiB/s, done.
Resolving deltas: 100% (9158/9158), done.
Branch 'deep-reinforcement.training-experiment-cnn' set up to track remote branch 'deep-reinforcement.training-experiment-cnn' from 'origin'.
Switched to a new branch 'deep-reinforcement.training-experiment-cnn'
Collecting cattrs
  Downloading cattrs-24.1.2-py3-none-any.whl.metadata (8.4 kB)
Collecting positional-encodings==6.0.1
  Downloading positional_encodings-6.0.1-py3-none-any.whl.metadata (6.6 kB)
Collecting dropbox
  Downloading dropbox-12.0.2-py3-none-any.whl.metadata (4.3 kB)
Collecting pymongo==4.3.3
  Downloading pymongo-4.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.6 

In [None]:
import sys
sys.path.append("/content/r_trader")

In [None]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.optim import Adam, SGD, Adagrad
import matplotlib.pyplot as plt

import os
import signal

from core.utils.research.data.load.dataset import BaseDataset
from core.utils.research.training.trainer import Trainer
from core.utils.research.model.model.cnn.model import CNN
from core.utils.research.model.model.linear.model import LinearModel
from lib.utils.torch_utils.model_handler import ModelHandler
from core.utils.research.training.callbacks.checkpoint_callback import CheckpointCallback, StoreCheckpointCallback
from core.utils.research.training.data.repositories.checkpoint_repository import CheckpointRepository
from lib.utils.file_storage import PCloudClient
from core.utils.research.training.data.state import TrainingState
from core import Config
from core.utils.research.training.callbacks.metric_callback import MetricCallback
from core.utils.research.training.data.repositories.metric_repository import MetricRepository, MongoDBMetricRepository
from core.utils.kaggle import FusedManager
from core.di import init_di, ApplicationContainer
from core.utils.research.training.data.metric import MetricsContainer
from core.utils.research.model.layers import Indicators
from core.di import ServiceProvider
from core.utils.kaggle.data_repository import KaggleDataRepository

In [None]:
def download_data(root, datasets, zip_filename, kernel_mode=True, checksums=None):
    repository = KaggleDataRepository(
        output_path=root,
        zip_filename=zip_filename
    )
    repository.download_multiple(datasets, kernel=kernel_mode, checksums=checksums)
    for dataset in datasets:
        os.system
    os.system(f"unzip -d root/")

In [None]:
DATA_ROOT = "/content/input"
DATASETS = [
    f"abrehamatlaw0/spinoza-ds-datapreparer-simsim-cum-0-it-2-{i}"
    for i in range(0,4)
]
# CHECKSUMS = [
#     "7f7aac0561cd2e1608d50c2ef5c34b0c03ed4611b94208bd0a5cee53bb9ce1d",
#     "55759c646a723f963d1679fa3eafcc3924f1f231d81b2a12c2122335f099dde6",
#     "419c9e74e6aa71a0b0180841d31f6212e5e1f953df78e16559c85189b8ae981e",
#     "ee3211d88d89d41f5862a81508ac723136e3df5e0b73ea20316800b19d6344dd"
# ]
CHECKSUMS = None
KERNEL_MODE = False
ZIP_FILENAME = "out.zip"
download_data(DATA_ROOT, DATASETS, ZIP_FILENAME, kernel_mode=KERNEL_MODE, checksums=CHECKSUMS)
CONTAINERS = [os.path.join(DATA_ROOT, container) for container in os.listdir(DATA_ROOT)]
DATA_PATHES, TEST_DATA_PATHES = [
    [
        os.path.join(container, "out", type_)
        for container in CONTAINERS
    ]
    for type_ in ["train", "test"]
]

NOTEBOOK_ID = "abrehamalemu/rtrader-training-exp-0-cnn-148-cum-0-it-4-tot"
MODEL_ID = NOTEBOOK_ID.replace("/", "-")

NUM_FILES = None
DATA_CACHE_SIZE = 2
DATALOADER_WORKERS = 4

CHANNELS = [128 for _ in range(4)]
EXTRA_LEN = 124
KERNEL_SIZES = [3 for _ in CHANNELS]
VOCAB_SIZE = 431
POOL_SIZES = [3 for _ in CHANNELS]
DROPOUT_RATE = 0
ACTIVATION = nn.LeakyReLU()
BLOCK_SIZE = 1024 + EXTRA_LEN
PADDING = 0
LINEAR_COLLAPSE = True
AVG_POOL = True
NORM = [False] + [False for _ in CHANNELS[1:]]
LR = 1e-4

INDICATORS_DELTA = True
INDICATORS_SO = []
INDICATORS_RSI = []

USE_FF = True
FF_LINEAR_LAYERS = [256 for _ in range(4)] + [VOCAB_SIZE + 1]
FF_LINEAR_ACTIVATION = nn.LeakyReLU()
FF_LINEAR_INIT = None
FF_LINEAR_NORM = [False] + [False for _ in FF_LINEAR_LAYERS[:-1]]
FF_DROPOUT = 0

BATCH_SIZE = 64
EPOCHS = 300
TIMEOUT = 10*60*60

DTYPE = torch.float32
NP_DTYPE = np.float32

MODEL_URL = None
SAVE_PATH = os.path.abspath(os.path.join("./out", f"{MODEL_ID}.zip"))

METRIC_REPOSITORY = MongoDBMetricRepository(
    Config.MONGODB_URL,
    MODEL_ID
)

CALLBACKS = [
    StoreCheckpointCallback(path=SAVE_PATH),
    MetricCallback(
       METRIC_REPOSITORY
    )
]


[94m PID:1984 [2024-10-31 00:15:13.193745]  Downloading abrehamatlaw0/spinoza-ds-datapreparer-simsim-cum-0-it-2-0 [0m
[94m PID:1984 [2024-10-31 00:15:13.195536]  Downloading to /content/input/abrehamatlaw0-spinoza-ds-datapreparer-simsim-cum-0-it-2-0 [0m
[94m PID:1984 [2024-10-31 00:15:13.197123]  Checking pre-downloaded for /content/input/abrehamatlaw0-spinoza-ds-datapreparer-simsim-cum-0-it-2-0 [0m
[94m PID:1984 [2024-10-31 00:15:13.198809]  Cleaning /content/input/abrehamatlaw0-spinoza-ds-datapreparer-simsim-cum-0-it-2-0 [0m
[94m PID:1984 [2024-10-31 00:15:14.623409]  Using Account: bemnetatlaw [0m
Dataset URL: https://www.kaggle.com/datasets/abrehamatlaw0/spinoza-ds-datapreparer-simsim-cum-0-it-2-0
[94m PID:1984 [2024-10-31 00:15:21.902548]  Unzipping Data... [0m
[94m PID:1984 [2024-10-31 00:16:50.175619]  Downloaded False to /content/input/abrehamatlaw0-spinoza-ds-datapreparer-simsim-cum-0-it-2-0 [0m
[94m PID:1984 [2024-10-31 00:16:50.175822]  Generating checksum for

In [None]:
repository = CheckpointRepository(
    ServiceProvider.provide_file_storage()
)

In [None]:
state_model = repository.get(MODEL_ID)
# state_model = None
if state_model is None:
    print("[+]Creating a new model...")
    if USE_FF:
        ff = LinearModel(
            dropout_rate=FF_DROPOUT,
            layer_sizes=FF_LINEAR_LAYERS,
            hidden_activation=FF_LINEAR_ACTIVATION,
            init_fn=FF_LINEAR_INIT,
            norm=FF_LINEAR_NORM
        )
    else:
        ff = None

    indicators = Indicators(
        delta=INDICATORS_DELTA,
        so=INDICATORS_SO,
        rsi=INDICATORS_RSI
    )

    model = CNN(
        extra_len=EXTRA_LEN,
        conv_channels=CHANNELS,
        kernel_sizes=KERNEL_SIZES,
        hidden_activation=ACTIVATION,
        pool_sizes=POOL_SIZES,
        dropout_rate=DROPOUT_RATE,
        padding=PADDING,
        avg_pool=AVG_POOL,
        linear_collapse=LINEAR_COLLAPSE,
        norm=NORM,
        ff_block=ff,
        indicators=indicators,
        input_size=BLOCK_SIZE
    )

else:
    print("[+]Using loaded model...")
    state, model = state_model
state = TrainingState(
    epoch=0,
    batch=0,
    id=MODEL_ID
)

  model.load_state_dict_lazy(torch.load(os.path.join(dirname, 'model_state.pth'), map_location=torch.device('cpu')))


[+]Using loaded model...


In [None]:
dataset = BaseDataset(
    root_dirs=DATA_PATHES,
    out_dtypes=NP_DTYPE,
    num_files=NUM_FILES
)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, num_workers=DATALOADER_WORKERS, pin_memory=True)



In [None]:
test_dataset = BaseDataset(
    root_dirs=TEST_DATA_PATHES,
    out_dtypes=NP_DTYPE,
    num_files=NUM_FILES
)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=DATALOADER_WORKERS, pin_memory=True)

In [None]:
trainer = Trainer(model, callbacks=CALLBACKS)

In [None]:
trainer.cls_loss_function = nn.CrossEntropyLoss()
trainer.reg_loss_function = nn.MSELoss()
trainer.optimizer = Adam(trainer.model.parameters(), lr=LR)

In [None]:
class TimeoutException(Exception):
    pass

def handle_timeout(*args, **kwargs):
    raise TimeoutException()

signal.signal(signal.SIGALRM, handle_timeout)
signal.alarm(TIMEOUT)

0

In [None]:
try:
    trainer.train(dataloader, val_dataloader=test_dataloader, epochs=EPOCHS, progress=True, progress_interval=100, state=state, cls_loss_only=False)
except TimeoutException:
    pass

Model Summary
Layer Name							Number of Parameters
layers.0.weight			768
layers.0.bias			128
layers.1.weight			49152
layers.1.bias			128
layers.2.weight			49152
layers.2.bias			128
layers.3.weight			49152
layers.3.bias			128
ff_block.layers.0.weight			2030592
ff_block.layers.0.bias			256
ff_block.layers.1.weight			65536
ff_block.layers.1.bias			256
ff_block.layers.2.weight			65536
ff_block.layers.2.bias			256
ff_block.layers.3.weight			65536
ff_block.layers.3.bias			256
ff_block.layers.4.weight			110592
ff_block.layers.4.bias			432
Total Params:2487984
[+]Shuffling dataset...


Epoch 1 loss: 3.8642566204071045(cls: 3.8642492294311523, reg: 8.170511136995628e-06): 100%|██████████| 32310/32310 [09:48<00:00, 54.94it/s]

Epoch 1 completed, loss: 3.864260196685791(cls: 3.864253044128418, reg: 8.170812179741915e-06)





Validation loss: loss: 3.8678460121154785(cls: 3.8678393363952637, reg: 5.713499831472291e-06)
[+]Uploading /content/out/abrehamalemu-rtrader-training-exp-0-cnn-148-cum-0-it-4-tot.zip => /Apps/RTrader
[+]Shuffling dataset...


Epoch 2 loss: 3.8599510192871094(cls: 3.859943151473999, reg: 8.058381354203448e-06): 100%|██████████| 32310/32310 [09:13<00:00, 58.36it/s]

Epoch 2 completed, loss: 3.8599698543548584(cls: 3.859961748123169, reg: 8.05838772066636e-06)





Validation loss: loss: 3.8684756755828857(cls: 3.8684701919555664, reg: 5.18287288286956e-06)
[+]Uploading /content/out/abrehamalemu-rtrader-training-exp-0-cnn-148-cum-0-it-4-tot.zip => /Apps/RTrader
[+]Shuffling dataset...


Epoch 3 loss: 3.8548223972320557(cls: 3.8548145294189453, reg: 7.799167178745847e-06): 100%|██████████| 32310/32310 [09:14<00:00, 58.29it/s]


Epoch 3 completed, loss: 3.8546910285949707(cls: 3.8546833992004395, reg: 7.79770107328659e-06)
Validation loss: loss: 3.8666279315948486(cls: 3.8666207790374756, reg: 7.586346328025684e-06)
[+]Uploading /content/out/abrehamalemu-rtrader-training-exp-0-cnn-148-cum-0-it-4-tot.zip => /Apps/RTrader
[+]Shuffling dataset...


Epoch 4 loss: 3.848640203475952(cls: 3.848632574081421, reg: 7.871200068620965e-06): 100%|██████████| 32310/32310 [09:28<00:00, 56.79it/s]

Epoch 4 completed, loss: 3.84867262840271(cls: 3.8486647605895996, reg: 7.869743058108725e-06)





Validation loss: loss: 3.861264705657959(cls: 3.861257791519165, reg: 6.769507763237925e-06)
[+]Uploading /content/out/abrehamalemu-rtrader-training-exp-0-cnn-148-cum-0-it-4-tot.zip => /Apps/RTrader
[+]Shuffling dataset...


Epoch 5 loss: 3.844572067260742(cls: 3.8445656299591064, reg: 7.946972800709773e-06):  94%|█████████▍| 30368/32310 [08:38<00:39, 49.58it/s]

In [None]:
ModelHandler.save(model, SAVE_PATH)

In [None]:
repository.update(trainer.state, trainer.model)

In [None]:
metrics = MetricsContainer()
for metric in METRIC_REPOSITORY.get_all():
    metrics.add_metric(metric)

for i in range(3):
    train_losses = [metric.value[i] for metric in metrics.filter_metrics(source=0)]
    val_losses = [metric.value[i] for metric in metrics.filter_metrics(source=1)]
    plt.figure()
    plt.plot(train_losses)
    plt.plot(val_losses)
    plt.show()

In [None]:
for X, y in test_dataloader:
    break
y_hat = model(X.to(trainer.device)).detach().cpu().numpy()

import matplotlib.pyplot as plt
def softmax(x):
    exp_x = np.exp(x - np.max(x))
    softmax_x = exp_x / np.sum(exp_x)
    return softmax_x

def scale(x):
    x = softmax(x)
    x = x / np.max(x)
    return x

for i in range(y_hat.shape[0]):
    plt.figure()
    plt.plot(y[i, :-1])
    plt.plot(scale(y_hat[i, :-1]))


In [None]:
!rm -fr /content/r_trader