# Trading agent demo

## Imports

In [1]:
from modules.data_loader import DataLoader
from modules.single_asset_env import SingleAssetEnv
from modules.q_network import Q_network
from modules.memory import Memory
from modules.trading_agent import TradingAgent

import torch
import numpy as np

## Environment variables

In [2]:
NUM_ACTIONS = 3 # Buy, Sell, Hold
LEN_HISTORY = 7 # consider last week to predict next value
STATES_DIM = LEN_HISTORY + 1 # history + predicted value

# Q network params
INPUT_DIM = STATES_DIM
HIDDEN_DIM = 64
OUTPUT_DIM = NUM_ACTIONS

LEARNING_RATE = 1e-3

NUM_EPOCHS = 20
BATCH_SIZE = 64

MEMORY_SIZE = 200

GAMMA = 0.97

EPSILON = 1.0
EPSILON_DECREASE = 1e-3
EPSILON_MIN = 0.1
START_REDUCE_EPSILON = 200

TRAIN_FREQ = 10
UPDATE_Q_FREQ = 20
SHOW_LOG_FREQ = 5

DEVICE = 'cpu'
SEED = 123

## Loading data

In [3]:
dataloader = DataLoader('2020-01-01', '1d', '2024-01-01')

assets = ["BTC-USD", "ETH-USD", "BNB-USD"]

trains = []
tests = []

for asset in assets:
    train, test = dataloader.load(asset)

    trains.append(train)
    tests.append(test)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


## Initialisations

In [4]:
train_envs = [SingleAssetEnv(train) for train in trains]

agent = TradingAgent(STATES_DIM, NUM_ACTIONS, assets, seed=SEED)

scores = {key: [] for key in assets}

## Training the agent

In [5]:
for epoch in range(NUM_EPOCHS):

    # intialise score for the epoch
    score = {key: 0 for key in assets}
    step_count = 1

    for asset, env in zip(assets, train_envs):

        # reset the environment before each epoch + get initial state
        state = env.reset()

        while True:

            # find epsilon greedy action from state
            action = agent.act(asset, state, 1/step_count) # epsilon = 1/t

            # perform step in the environment and get completing info
            next_state, reward, done = env.step(action)

            agent.step(asset, state, action, reward, next_state, done)

            # prepare for next iteration
            step_count += 1
            state = next_state

            score[asset] += reward

            if done:
                break

    # compute info about the epoch
    for key in scores.keys():
        scores[key].append(score[key])

    print(f"Epoch {epoch:2} | Scores = {score}")

print("Training done!")

# save Q_network model weights
agent.save_models("weights")

Epoch  0 | Score = {'BTC-USD': -301459, 'ETH-USD': -107272.10096740723, 'BNB-USD': -203300.27783203125}
Epoch  1 | Score = {'BTC-USD': -230475.8994140625, 'ETH-USD': -1462, 'BNB-USD': -101461}
Epoch  2 | Score = {'BTC-USD': -103310.57861328125, 'ETH-USD': -201460, 'BNB-USD': -1462}
Epoch  3 | Score = {'BTC-USD': -26630.17431640625, 'ETH-USD': -1462, 'BNB-USD': -1462}
Epoch  4 | Score = {'BTC-USD': -18270.0556640625, 'ETH-USD': -1462, 'BNB-USD': -101461}
Epoch  5 | Score = {'BTC-USD': -696215.171875, 'ETH-USD': -1462, 'BNB-USD': -1462}
Epoch  6 | Score = {'BTC-USD': -362338.693359375, 'ETH-USD': -1462, 'BNB-USD': -1462}
Epoch  7 | Score = {'BTC-USD': -400643.81494140625, 'ETH-USD': -1462, 'BNB-USD': -1462}
Epoch  8 | Score = {'BTC-USD': -542325.5546875, 'ETH-USD': -101461, 'BNB-USD': -1462}
Epoch  9 | Score = {'BTC-USD': -115162.36279296875, 'ETH-USD': -101461, 'BNB-USD': -1462}
Epoch 10 | Score = {'BTC-USD': -199315.259765625, 'ETH-USD': -101461, 'BNB-USD': -7215.547897338867}
Epoch 11

## Testing the agent

In [7]:
test_envs = [SingleAssetEnv(test) for test in tests]

for asset, env in zip(assets, test_envs):

    state = env.reset()

    test_actions = []
    test_rewards = []

    for _ in range(len(env.data)-1):
        
        action = agent.qnets[agent.map_assets[asset]](torch.from_numpy(np.array(state, dtype=np.float32).reshape(1, -1)))
        action = np.argmax(action.data)
        test_actions.append(action.item())
                
        next_state, reward, done = env.step(action.numpy())
        test_rewards.append(reward)

        state = next_state

    print(f"Profit made with {asset}: {env.profit}")

print("-"*27)
print(f"Total profit made: {sum([env.profit for env in test_envs])}")

Profit made with BTC-USD: 0
Profit made with ETH-USD: 0
Profit made with BNB-USD: 0
---------------------------
Total profit made: 0
