In [1]:
!rm -fr r_trader out
!mkdir out input
!git clone https://github.com/abreham-atlaw/r_trader
!cd r_trader &&  git checkout deep-reinforcement.training-experiment-linear
!pip install cattrs positional-encodings==6.0.1 dropbox pymongo==4.3.3 dependency-injector==4.41.0

Cloning into 'r_trader'...
remote: Enumerating objects: 12948, done.[K
remote: Counting objects: 100% (3862/3862), done.[K
remote: Compressing objects: 100% (1078/1078), done.[K
remote: Total 12948 (delta 2838), reused 3784 (delta 2760), pack-reused 9086 (from 1)[K
Receiving objects: 100% (12948/12948), 76.54 MiB | 23.25 MiB/s, done.
Resolving deltas: 100% (9170/9170), done.
Branch 'deep-reinforcement.training-experiment-linear' set up to track remote branch 'deep-reinforcement.training-experiment-linear' from 'origin'.
Switched to a new branch 'deep-reinforcement.training-experiment-linear'


In [2]:
import os
KAGGLE_ENV = os.path.exists("/kaggle/working")
REPO_PATH = "/kaggle/working/r_trader" if KAGGLE_ENV else "/content/r_trader"

print(f"KAGGLE ENV: {KAGGLE_ENV}")

import sys
sys.path.append(REPO_PATH)

KAGGLE ENV: False


In [4]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.optim import Adam, SGD, Adagrad
import matplotlib.pyplot as plt

import os
import signal

from core.utils.research.data.load.dataset import BaseDataset
from core.utils.research.training.trainer import Trainer
from core.utils.research.model.model.cnn.model import CNN
from core.utils.research.model.model.linear.model import LinearModel
from lib.utils.torch_utils.model_handler import ModelHandler
from core.utils.research.training.callbacks.checkpoint_callback import CheckpointCallback, StoreCheckpointCallback
from core.utils.research.training.data.repositories.checkpoint_repository import CheckpointRepository
from lib.utils.file_storage import PCloudClient
from core.utils.research.training.data.state import TrainingState
from core import Config
from core.utils.research.training.callbacks.metric_callback import MetricCallback
from core.utils.research.training.data.repositories.metric_repository import MetricRepository, MongoDBMetricRepository
from core.utils.kaggle import FusedManager
from core.di import init_di, ApplicationContainer
from core.utils.research.training.data.metric import MetricsContainer
from core.utils.research.model.layers import Indicators
from core.di import ServiceProvider
from core.utils.research.losses import ProximalMaskedLoss
from core.utils.kaggle.data_repository import KaggleDataRepository

In [5]:
def download_data(root, datasets, zip_filename, kernel_mode=True, checksums=None):
    repository = KaggleDataRepository(
        output_path=root,
        zip_filename=zip_filename
    )
    repository.download_multiple(datasets, kernel=kernel_mode, checksums=checksums)
    os.system(f"unzip -d root/")

In [None]:
DATA_ROOT = "/kaggle/input" if KAGGLE_ENV else "/content/input"

DATASETS = [
    f"abrehamatlaw0/spinoza-ds-datapreparer-simsim-cum-0-it-2-{i}"
    for i in range(0,4)
]
CHECKSUMS = None
KERNEL_MODE = False
ZIP_FILENAME = "out.zip"
if not KAGGLE_ENV:
    download_data(DATA_ROOT, DATASETS, ZIP_FILENAME, kernel_mode=KERNEL_MODE, checksums=CHECKSUMS)


CONTAINERS = [os.path.join(DATA_ROOT, container) for container in os.listdir(DATA_ROOT)]
DATA_PATHES, TEST_DATA_PATHES = [
    [
        os.path.join(container, "out", type_)
        for container in CONTAINERS
    ]
    for type_ in ["train", "test"]
]

NOTEBOOK_ID = "abrehamalemu/rtrader-training-exp-0-linear-122-cum-0-it-4-tot"
MODEL_ID = NOTEBOOK_ID.replace("/", "-")

NUM_FILES = None
DATA_CACHE_SIZE = 2
DATALOADER_WORKERS = 4

VOCAB_SIZE = 431
DROPOUT = 0.3
LAYER_SIZES = [4096 for _ in range(8)] + [VOCAB_SIZE + 1]
HIDDEN_ACTIVATION = nn.LeakyReLU()
INIT_FUNCTION = None
NORM = [True] + [False for _ in LAYER_SIZES[1:]]
BLOCK_SIZE = 1148
LR = 1e-5

LOSS_P = 1

BATCH_SIZE = 64
EPOCHS = 100
TIMEOUT = int(10*60*60)

DTYPE = torch.float32
NP_DTYPE = np.float32

MODEL_URL = None
SAVE_PATH = os.path.abspath(os.path.join("./out", f"{MODEL_ID}.zip"))

METRIC_REPOSITORY = MongoDBMetricRepository(
    Config.MONGODB_URL,
    MODEL_ID
)

CALLBACKS = [
    StoreCheckpointCallback(path=SAVE_PATH),
    MetricCallback(
       METRIC_REPOSITORY
    )
]

[94m PID:4280 [2024-10-31 09:29:39.353103]  Downloading abrehamatlaw0/spinoza-ds-datapreparer-simsim-cum-0-it-2-0 [0m
[94m PID:4280 [2024-10-31 09:29:39.354664]  Downloading to /content/input/abrehamatlaw0-spinoza-ds-datapreparer-simsim-cum-0-it-2-0 [0m
[94m PID:4280 [2024-10-31 09:29:39.354787]  Checking pre-downloaded for /content/input/abrehamatlaw0-spinoza-ds-datapreparer-simsim-cum-0-it-2-0 [0m
[94m PID:4280 [2024-10-31 09:29:39.357623]  Cleaning /content/input/abrehamatlaw0-spinoza-ds-datapreparer-simsim-cum-0-it-2-0 [0m
[94m PID:4280 [2024-10-31 09:29:39.876672]  Using Account: bemnetatlaw [0m
Dataset URL: https://www.kaggle.com/datasets/abrehamatlaw0/spinoza-ds-datapreparer-simsim-cum-0-it-2-0
[94m PID:4280 [2024-10-31 09:29:42.197351]  Unzipping Data... [0m
[94m PID:4280 [2024-10-31 09:31:07.177755]  Downloaded False to /content/input/abrehamatlaw0-spinoza-ds-datapreparer-simsim-cum-0-it-2-0 [0m
[94m PID:4280 [2024-10-31 09:31:07.177994]  Generating checksum for

In [None]:
repository = CheckpointRepository(
    ServiceProvider.provide_file_storage()
)

In [None]:
state_model = repository.get(MODEL_ID)
# state_model = None
if state_model is None:
    print("[+]Creating a new model...")

    model = LinearModel(
        dropout_rate=DROPOUT,
        layer_sizes=LAYER_SIZES,
        hidden_activation=HIDDEN_ACTIVATION,
        init_fn=INIT_FUNCTION,
        norm=NORM,
        input_size=BLOCK_SIZE
    )

else:
    print("[+]Using loaded model...")
    state, model = state_model
state = TrainingState(
    epoch=0,
    batch=0,
    id=MODEL_ID
)

In [None]:
dataset = BaseDataset(
    root_dirs=DATA_PATHES,
    out_dtypes=NP_DTYPE,
    num_files=NUM_FILES
)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, num_workers=DATALOADER_WORKERS, pin_memory=True)

In [None]:
test_dataset = BaseDataset(
    root_dirs=TEST_DATA_PATHES,
    out_dtypes=NP_DTYPE,
    num_files=NUM_FILES
)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=DATALOADER_WORKERS, pin_memory=True)

In [None]:
trainer = Trainer(model, callbacks=CALLBACKS)

In [None]:
trainer.cls_loss_function = ProximalMaskedLoss(
    n=len(Config.AGENT_STATE_CHANGE_DELTA_STATIC_BOUND) + 1 ,
    p=LOSS_P,
    softmax=True,
    device=trainer.device,

)
trainer.reg_loss_function = nn.MSELoss()
trainer.optimizer = Adam(trainer.model.parameters(), lr=LR)

In [None]:
class TimeoutException(Exception):
    pass

def handle_timeout(*args, **kwargs):
    raise TimeoutException()

signal.signal(signal.SIGALRM, handle_timeout)
signal.alarm(TIMEOUT)

In [None]:
try:
    trainer.train(dataloader, val_dataloader=test_dataloader, epochs=EPOCHS, progress=True, progress_interval=100, state=state, cls_loss_only=False)
except TimeoutException:
    pass

In [None]:
ModelHandler.save(model, SAVE_PATH)

In [None]:
repository.update(trainer.state, trainer.model)

In [None]:
metrics = MetricsContainer()
for metric in METRIC_REPOSITORY.get_all():
    metrics.add_metric(metric)

for i in range(3):
    train_losses = [metric.value[i] for metric in metrics.filter_metrics(source=0)]
    val_losses = [metric.value[i] for metric in metrics.filter_metrics(source=1)]
    plt.figure()
    plt.plot(train_losses)
    plt.plot(val_losses)
    plt.show()

In [None]:
for X, y in test_dataloader:
    break
y_hat = model(X.to(trainer.device)).detach().cpu().numpy()

import matplotlib.pyplot as plt
def softmax(x):
    exp_x = np.exp(x - np.max(x))
    softmax_x = exp_x / np.sum(exp_x)
    return softmax_x

def scale(x):
    x = softmax(x)
    x = x / np.max(x)
    return x

for i in range(y_hat.shape[0]):
    plt.figure()
    plt.plot(y[i, :-1])
    plt.plot(scale(y_hat[i, :-1]))


In [None]:
!rm -fr r_trader