In [1]:
!rm -fr r_trader out
!mkdir out input
!git clone https://github.com/abreham-atlaw/r_trader
!cd r_trader &&  git checkout main
!pip install cattrs positional-encodings==6.0.1 dropbox pymongo==4.3.3 dependency-injector==4.41.0

Cloning into 'r_trader'...
remote: Enumerating objects: 16822, done.[K
remote: Counting objects: 100% (315/315), done.[K
remote: Compressing objects: 100% (193/193), done.[K
remote: Total 16822 (delta 208), reused 187 (delta 112), pack-reused 16507 (from 2)[K
Receiving objects: 100% (16822/16822), 77.87 MiB | 21.37 MiB/s, done.
Resolving deltas: 100% (12073/12073), done.
Branch 'deep-reinforcement.training-experiment-cnn' set up to track remote branch 'deep-reinforcement.training-experiment-cnn' from 'origin'.
Switched to a new branch 'deep-reinforcement.training-experiment-cnn'
Collecting cattrs
  Downloading cattrs-24.1.2-py3-none-any.whl.metadata (8.4 kB)
Collecting positional-encodings==6.0.1
  Downloading positional_encodings-6.0.1-py3-none-any.whl.metadata (6.6 kB)
Collecting dropbox
  Downloading dropbox-12.0.2-py3-none-any.whl.metadata (4.3 kB)
Collecting pymongo==4.3.3
  Downloading pymongo-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.6 kB)


In [2]:
import os
KAGGLE_ENV = os.path.exists("/kaggle/working")
REPO_PATH = "/kaggle/working/r_trader" if KAGGLE_ENV else "/content/r_trader"

print(f"KAGGLE ENV: {KAGGLE_ENV}")

import sys
sys.path.append(REPO_PATH)

KAGGLE ENV: False


In [3]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.optim import Adam, SGD, Adagrad
import matplotlib.pyplot as plt

import os
import signal

from core.utils.research.data.load.dataset import BaseDataset
from core.utils.research.training.trainer import Trainer
from core.utils.research.model.model.cnn.model import CNN
from core.utils.research.model.model.linear.model import LinearModel
from lib.utils.torch_utils.model_handler import ModelHandler
from core.utils.research.training.callbacks.checkpoint_callback import CheckpointCallback, StoreCheckpointCallback
from core.utils.research.training.data.repositories.checkpoint_repository import CheckpointRepository
from lib.utils.file_storage import PCloudClient
from core.utils.research.training.data.state import TrainingState
from core import Config
from core.utils.research.training.callbacks.metric_callback import MetricCallback
from core.utils.research.training.data.repositories.metric_repository import MetricRepository, MongoDBMetricRepository
from core.utils.kaggle import FusedManager
from core.di import init_di, ApplicationContainer
from core.utils.research.training.data.metric import MetricsContainer
from core.utils.research.model.layers import Indicators
from core.di import ServiceProvider
from core.utils.kaggle.data_repository import KaggleDataRepository
from core.utils.research.losses import ProximalMaskedLoss, MeanSquaredErrorLoss

[0m PID:191 [2025-01-24 07:18:26.549257]  XLA is not installed. Training using TPU will not be possible. [93m [0m


In [4]:
def download_data(root, datasets, zip_filename, kernel_mode=True, checksums=None):
    repository = KaggleDataRepository(
        output_path=root,
        zip_filename=zip_filename
    )
    repository.download_multiple(datasets, kernel=kernel_mode, checksums=checksums)
    os.system(f"unzip -d root/")

In [None]:
DATA_ROOT = "/kaggle/input" if KAGGLE_ENV else "/content/input"

DATASETS = [
    f"abrehamatlaw0/spinoza-ds-datapreparer-simsim-cum-0-it-36-0"
]
CHECKSUMS = [
    '52679b000c348fab8e9901a91f8e07f6e857c4fa90f6358e62269f37818a6b9d',
]
KERNEL_MODE = False
ZIP_FILENAME = "out.zip"
if not KAGGLE_ENV:
    download_data(DATA_ROOT, DATASETS, ZIP_FILENAME, kernel_mode=KERNEL_MODE, checksums=CHECKSUMS)


CONTAINERS = [os.path.join(DATA_ROOT, container) for container in os.listdir(DATA_ROOT)]
DATA_PATHES, TEST_DATA_PATHES = [
    [
        os.path.join(container, "out", type_)
        for container in CONTAINERS
    ]
    for type_ in ["train", "test"]
]

NOTEBOOK_ID = "abrehamalemu/rtrader-training-exp-0-cnn-0-cum-0-it-36-tot"
MODEL_ID = NOTEBOOK_ID.replace("/", "-")

NUM_FILES = None
DATA_CACHE_SIZE = 2
DATALOADER_WORKERS = 4

LR = 1e-4

LOSS_P = 1

BATCH_SIZE = 256
EPOCHS = 100
TIMEOUT = 10*60*60

DTYPE = torch.float32
NP_DTYPE = np.float32

MODEL_URL = None
SAVE_PATH = os.path.abspath(f"./out/{MODEL_ID}.zip")
STATE_SAVE_PATH = os.path.abspath("./out/model.zip")
UPLOAD_PATH = "/Apps/RTrader/maploss/it-36/"

METRIC_REPOSITORY = MongoDBMetricRepository(
    Config.MONGODB_URL,
    MODEL_ID
)

CALLBACKS = [
    StoreCheckpointCallback(
        path=os.path.dirname(SAVE_PATH),
        active=True, 
        interval=5,
        fs=ServiceProvider.provide_file_storage(UPLOAD_PATH)
    ),
    MetricCallback(
       METRIC_REPOSITORY
    )
]

[94m PID:191 [2025-01-24 07:18:26.626732]  Downloading abrehamatlaw0/spinoza-ds-datapreparer-simsim-cum-0-it-2-0 [0m
[94m PID:191 [2025-01-24 07:18:26.629175]  Downloading to /content/input/abrehamatlaw0-spinoza-ds-datapreparer-simsim-cum-0-it-2-0 [0m
[94m PID:191 [2025-01-24 07:18:26.632301]  Checking pre-downloaded for /content/input/abrehamatlaw0-spinoza-ds-datapreparer-simsim-cum-0-it-2-0 [0m
[94m PID:191 [2025-01-24 07:18:26.632455]  Cleaning /content/input/abrehamatlaw0-spinoza-ds-datapreparer-simsim-cum-0-it-2-0 [0m
[94m PID:191 [2025-01-24 07:18:29.389410]  Using Account: bemnetatlaw [0m
Dataset URL: https://www.kaggle.com/datasets/abrehamatlaw0/spinoza-ds-datapreparer-simsim-cum-0-it-2-0
[94m PID:191 [2025-01-24 07:18:31.971698]  Unzipping Data... [0m
[94m PID:191 [2025-01-24 07:19:54.075130]  Downloaded False to /content/input/abrehamatlaw0-spinoza-ds-datapreparer-simsim-cum-0-it-2-0 [0m
[94m PID:191 [2025-01-24 07:19:54.075393]  Generating checksum for '/conte

In [None]:
repository = CheckpointRepository(
    ServiceProvider.provide_file_storage()
)

In [None]:
state_model = repository.get(MODEL_ID)
# state_model = None
if state_model is None:
    raise ValueError("Can't Find Model")

else:
    print("[+]Using loaded model...")
    state, model = state_model
state = TrainingState(
    epoch=0,
    batch=0,
    id=MODEL_ID
)

In [None]:
dataset = BaseDataset(
    root_dirs=DATA_PATHES,
    out_dtypes=NP_DTYPE,
    num_files=NUM_FILES,
    check_file_sizes=True,
    load_weights=True
)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, num_workers=DATALOADER_WORKERS, pin_memory=True)

In [None]:
trainer = Trainer(model, callbacks=CALLBACKS)

In [None]:
trainer.cls_loss_function = ProximalMaskedLoss(
    n=len(Config.AGENT_STATE_CHANGE_DELTA_STATIC_BOUND) + 1 ,
    p=LOSS_P,
    softmax=True,
    device=trainer.device,
)
trainer.reg_loss_function = MeanSquaredErrorLoss()
trainer.optimizer = Adam(trainer.model.parameters(), lr=LR)

In [None]:
class TimeoutException(Exception):
    pass

def handle_timeout(*args, **kwargs):
    raise TimeoutException()

signal.signal(signal.SIGALRM, handle_timeout)
signal.alarm(TIMEOUT)

In [None]:
try:
    trainer.train(dataloader, epochs=EPOCHS, progress=True, progress_interval=1000, state=state, cls_loss_only=False)
except TimeoutException:
    pass

In [None]:
ModelHandler.save(model, SAVE_PATH)

In [None]:
repository.update(trainer.state, trainer.model)

In [None]:
metrics = MetricsContainer()
for metric in METRIC_REPOSITORY.get_all():
    metrics.add_metric(metric)

for i in range(3):
    train_losses = [metric.value[i] for metric in metrics.filter_metrics(source=0)]
    val_losses = [metric.value[i] for metric in metrics.filter_metrics(source=1)]
    plt.figure()
    plt.plot(train_losses)
    plt.plot(val_losses)
    plt.show()

In [None]:
for X, y in test_dataloader:
    break
y_hat = model(X.to(trainer.device)).detach().cpu().numpy()

import matplotlib.pyplot as plt
def softmax(x):
    exp_x = np.exp(x - np.max(x))
    softmax_x = exp_x / np.sum(exp_x)
    return softmax_x

def scale(x):
    x = softmax(x)
    x = x / np.max(x)
    return x

for i in range(y_hat.shape[0]):
    plt.figure()
    plt.plot(y[i, :-1])
    plt.plot(scale(y_hat[i, :-1]))


In [None]:
!rm -fr r_trader