In [None]:
!rm -fr r_trader out input
!mkdir out input
!git clone https://github.com/abreham-atlaw/r_trader
!cd r_trader &&  git checkout deep-reinforcement.training-experiment-linear
!pip install cattrs positional-encodings==6.0.1 dropbox pymongo==4.3.3 dependency-injector==4.41.0

Cloning into 'r_trader'...
remote: Enumerating objects: 12921, done.[K
remote: Counting objects: 100% (3835/3835), done.[K
remote: Compressing objects: 100% (1069/1069), done.[K
remote: Total 12921 (delta 2826), reused 3759 (delta 2750), pack-reused 9086 (from 1)[K
Receiving objects: 100% (12921/12921), 76.52 MiB | 22.00 MiB/s, done.
Resolving deltas: 100% (9158/9158), done.
Branch 'deep-reinforcement.training-experiment-linear' set up to track remote branch 'deep-reinforcement.training-experiment-linear' from 'origin'.
Switched to a new branch 'deep-reinforcement.training-experiment-linear'
Collecting cattrs
  Downloading cattrs-24.1.2-py3-none-any.whl.metadata (8.4 kB)
Collecting positional-encodings==6.0.1
  Downloading positional_encodings-6.0.1-py3-none-any.whl.metadata (6.6 kB)
Collecting dropbox
  Downloading dropbox-12.0.2-py3-none-any.whl.metadata (4.3 kB)
Collecting pymongo==4.3.3
  Downloading pymongo-4.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metad

In [None]:
import sys
sys.path.append("/content/r_trader")

In [None]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.optim import Adam, SGD, Adagrad
import matplotlib.pyplot as plt

import os
import signal

from core.utils.research.data.load.dataset import BaseDataset
from core.utils.research.training.trainer import Trainer
from core.utils.research.model.model.cnn.model import CNN
from core.utils.research.model.model.linear.model import LinearModel
from lib.utils.torch_utils.model_handler import ModelHandler
from core.utils.research.training.callbacks.checkpoint_callback import CheckpointCallback, StoreCheckpointCallback
from core.utils.research.training.data.repositories.checkpoint_repository import CheckpointRepository
from lib.utils.file_storage import PCloudClient
from core.utils.research.training.data.state import TrainingState
from core import Config
from core.utils.research.training.callbacks.metric_callback import MetricCallback
from core.utils.research.training.data.repositories.metric_repository import MetricRepository, MongoDBMetricRepository
from core.utils.kaggle import FusedManager
from core.di import init_di, ApplicationContainer
from core.utils.research.training.data.metric import MetricsContainer
from core.utils.research.model.layers import Indicators
from core.di import ServiceProvider
from core.utils.kaggle.data_repository import KaggleDataRepository

In [None]:
def download_data(root, datasets, zip_filename, kernel_mode=True):
    repository = KaggleDataRepository(
        output_path=root,
        zip_filename=zip_filename
    )
    repository.download_multiple(datasets, kernel=kernel_mode)
    for dataset in datasets:
        os.system
    os.system(f"unzip -d root/")

In [None]:
DATA_ROOT = "/content/input"
DATASETS = [
    f"abrehamatlaw0/spinoza-ds-datapreparer-simsim-cum-0-it-2-{i}"
    for i in range(0,4)
]
KERNEL_MODE = False
ZIP_FILENAME = "out.zip"
download_data(DATA_ROOT, DATASETS, ZIP_FILENAME, kernel_mode=KERNEL_MODE)
CONTAINERS = [os.path.join(DATA_ROOT, container) for container in os.listdir(DATA_ROOT)]
DATA_PATHES, TEST_DATA_PATHES = [
    [
        os.path.join(container, "out", type_)
        for container in CONTAINERS
    ]
    for type_ in ["train", "test"]
]

NOTEBOOK_ID = "abrehamalemu/rtrader-training-exp-0-linear-108-cum-0-it-4-tot"
MODEL_ID = NOTEBOOK_ID.replace("/", "-")

NUM_FILES = None
DATA_CACHE_SIZE = 2
DATALOADER_WORKERS = 4

VOCAB_SIZE = 431
DROPOUT = 0.3
LAYER_SIZES = [4096 for _ in range(8)] + [VOCAB_SIZE + 1]
HIDDEN_ACTIVATION = nn.LeakyReLU()
INIT_FUNCTION = None
NORM = [True] + [False for _ in LAYER_SIZES[1:]]
BLOCK_SIZE = 1148
LR = 1e-5

BATCH_SIZE = 64
EPOCHS = 100
TIMEOUT = int(10*60*60)

DTYPE = torch.float32
NP_DTYPE = np.float32

MODEL_URL = None
SAVE_PATH = os.path.abspath(os.path.join("./out", f"{MODEL_ID}.zip"))

METRIC_REPOSITORY = MongoDBMetricRepository(
    Config.MONGODB_URL,
    MODEL_ID
)

CALLBACKS = [
    StoreCheckpointCallback(path=SAVE_PATH),
    MetricCallback(
       METRIC_REPOSITORY
    )
]

[94m PID:272 [2024-10-30 17:06:00.874356]  Downloading abrehamatlaw0/spinoza-ds-datapreparer-simsim-cum-0-it-2-0 [0m
[94m PID:272 [2024-10-30 17:06:00.876946]  Downloading to /content/input/abrehamatlaw0-spinoza-ds-datapreparer-simsim-cum-0-it-2-0 [0m
[94m PID:272 [2024-10-30 17:06:00.878877]  Checking pre-downloaded for /content/input/abrehamatlaw0-spinoza-ds-datapreparer-simsim-cum-0-it-2-0 [0m
[94m PID:272 [2024-10-30 17:06:00.883383]  Cleaning /content/input/abrehamatlaw0-spinoza-ds-datapreparer-simsim-cum-0-it-2-0 [0m
[94m PID:272 [2024-10-30 17:06:01.275322]  Using Account: bemnetatlaw [0m
Dataset URL: https://www.kaggle.com/datasets/abrehamatlaw0/spinoza-ds-datapreparer-simsim-cum-0-it-2-0
[94m PID:272 [2024-10-30 17:06:03.460755]  Unzipping Data... [0m
[94m PID:272 [2024-10-30 17:07:21.809515]  Downloaded False to /content/input/abrehamatlaw0-spinoza-ds-datapreparer-simsim-cum-0-it-2-0 [0m
[94m PID:272 [2024-10-30 17:07:21.809739]  Generating checksum for '/conte

In [None]:
repository = CheckpointRepository(
    ServiceProvider.provide_file_storage()
)

In [None]:
state_model = repository.get(MODEL_ID)
# state_model = None
if state_model is None:
    print("[+]Creating a new model...")
    if USE_FF:
        ff = LinearModel(
            dropout_rate=FF_DROPOUT,
            layer_sizes=FF_LINEAR_LAYERS,
            hidden_activation=FF_LINEAR_ACTIVATION,
            init_fn=FF_LINEAR_INIT,
            norm=FF_LINEAR_NORM
        )
    else:
        ff = None

    indicators = Indicators(
        delta=INDICATORS_DELTA,
        so=INDICATORS_SO,
        rsi=INDICATORS_RSI
    )

    model = CNN(
        extra_len=EXTRA_LEN,
        conv_channels=CHANNELS,
        kernel_sizes=KERNEL_SIZES,
        hidden_activation=ACTIVATION,
        pool_sizes=POOL_SIZES,
        dropout_rate=DROPOUT_RATE,
        padding=PADDING,
        avg_pool=AVG_POOL,
        linear_collapse=LINEAR_COLLAPSE,
        norm=NORM,
        ff_block=ff,
        indicators=indicators,
        input_size=BLOCK_SIZE
    )

else:
    print("[+]Using loaded model...")
    state, model = state_model
state = TrainingState(
    epoch=0,
    batch=0,
    id=MODEL_ID
)

  model.load_state_dict_lazy(torch.load(os.path.join(dirname, 'model_state.pth'), map_location=torch.device('cpu')))


[+]Using loaded model...


In [None]:
dataset = BaseDataset(
    root_dirs=DATA_PATHES,
    out_dtypes=NP_DTYPE,
    num_files=NUM_FILES
)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, num_workers=DATALOADER_WORKERS, pin_memory=True)



In [None]:
test_dataset = BaseDataset(
    root_dirs=TEST_DATA_PATHES,
    out_dtypes=NP_DTYPE,
    num_files=NUM_FILES
)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=DATALOADER_WORKERS, pin_memory=True)

In [None]:
trainer = Trainer(model, callbacks=CALLBACKS)

In [None]:
trainer.cls_loss_function = nn.CrossEntropyLoss()
trainer.reg_loss_function = nn.MSELoss()
trainer.optimizer = Adam(trainer.model.parameters(), lr=LR)

In [None]:
class TimeoutException(Exception):
    pass

def handle_timeout(*args, **kwargs):
    raise TimeoutException()

signal.signal(signal.SIGALRM, handle_timeout)
signal.alarm(TIMEOUT)

0

In [None]:
try:
    trainer.train(dataloader, val_dataloader=test_dataloader, epochs=EPOCHS, progress=True, progress_interval=100, state=state, cls_loss_only=False)
except TimeoutException:
    pass

Model Summary
Layer Name							Number of Parameters
layers.0.weight			4702208
layers.0.bias			4096
layers.1.weight			16777216
layers.1.bias			4096
layers.2.weight			16777216
layers.2.bias			4096
layers.3.weight			16777216
layers.3.bias			4096
layers.4.weight			16777216
layers.4.bias			4096
layers.5.weight			16777216
layers.5.bias			4096
layers.6.weight			16777216
layers.6.bias			4096
layers.7.weight			16777216
layers.7.bias			4096
layers.8.weight			1769472
layers.8.bias			432
norms.0.weight			1148
norms.0.bias			1148
Total Params:123947688
[+]Shuffling dataset...


Epoch 1 loss: 3.8694212436676025(cls: 3.869420051574707, reg: 1.0757888730950071e-06): 100%|██████████| 32310/32310 [31:35<00:00, 17.04it/s]

Epoch 1 completed, loss: 3.869572639465332(cls: 3.8695716857910156, reg: 1.0756822348412243e-06)





Validation loss: loss: 3.8303513526916504(cls: 3.830350160598755, reg: 1.4399951169252745e-06)
[+]Uploading /content/out/abrehamalemu-rtrader-training-exp-0-linear-108-cum-0-it-4-tot.zip => /Apps/RTrader
[+]Shuffling dataset...


Epoch 2 loss: 280.632568359375(cls: 29.704322814941406, reg: 250.91664123535156): 100%|██████████| 32310/32310 [31:19<00:00, 17.19it/s]

Epoch 2 completed, loss: 280.5555419921875(cls: 29.697221755981445, reg: 250.8467559814453)





Validation loss: loss: 3.825141191482544(cls: 3.825132131576538, reg: 8.132395123539027e-06)
[+]Uploading /content/out/abrehamalemu-rtrader-training-exp-0-linear-108-cum-0-it-4-tot.zip => /Apps/RTrader
[+]Shuffling dataset...


Epoch 3 loss: 3.884852647781372(cls: 3.8848483562469482, reg: 4.069478563906159e-06): 100%|██████████| 32310/32310 [31:23<00:00, 17.16it/s]

Epoch 3 completed, loss: 3.884812355041504(cls: 3.88480806350708, reg: 4.0688969420443755e-06)





Validation loss: loss: 3.8166067600250244(cls: 3.816606044769287, reg: 1.3407229744188953e-06)
[+]Uploading /content/out/abrehamalemu-rtrader-training-exp-0-linear-108-cum-0-it-4-tot.zip => /Apps/RTrader
[+]Shuffling dataset...


Epoch 4 loss: 3.8590447902679443(cls: 3.859043598175049, reg: 1.0942170547423302e-06):  63%|██████▎   | 20335/32310 [19:47<11:41, 17.08it/s]

In [None]:
ModelHandler.save(model, SAVE_PATH)

In [None]:
repository.update(trainer.state, trainer.model)

In [None]:
metrics = MetricsContainer()
for metric in METRIC_REPOSITORY.get_all():
    metrics.add_metric(metric)

for i in range(3):
    train_losses = [metric.value[i] for metric in metrics.filter_metrics(source=0)]
    val_losses = [metric.value[i] for metric in metrics.filter_metrics(source=1)]
    plt.figure()
    plt.plot(train_losses)
    plt.plot(val_losses)
    plt.show()

In [None]:
for X, y in test_dataloader:
    break
y_hat = model(X.to(trainer.device)).detach().cpu().numpy()

import matplotlib.pyplot as plt
def softmax(x):
    exp_x = np.exp(x - np.max(x))
    softmax_x = exp_x / np.sum(exp_x)
    return softmax_x

def scale(x):
    x = softmax(x)
    x = x / np.max(x)
    return x

for i in range(y_hat.shape[0]):
    plt.figure()
    plt.plot(y[i, :-1])
    plt.plot(scale(y_hat[i, :-1]))


In [None]:
!rm -fr /content/r_trader