# Trading agent demo

## Imports

In [27]:
from modules.data_loader import DataLoader
from modules.single_asset_env import SingleAssetEnv
from modules.q_network import Q_network
from modules.memory import Memory
from modules.trading_agent import TradingAgent

import torch
import numpy as np

## Environment variables

In [22]:
NUM_ACTIONS = 3 # Buy, Sell, Hold
LEN_HISTORY = 7 # consider last week to predict next value
STATES_DIM = LEN_HISTORY + 1 # history + predicted value

# Q network params
INPUT_DIM = STATES_DIM
HIDDEN_DIM = 64
OUTPUT_DIM = NUM_ACTIONS

LEARNING_RATE = 1e-3

NUM_EPOCHS = 10
BATCH_SIZE = 64

MEMORY_SIZE = 200

GAMMA = 0.97

EPSILON = 1.0
EPSILON_DECREASE = 1e-3
EPSILON_MIN = 0.1
START_REDUCE_EPSILON = 200

TRAIN_FREQ = 10
UPDATE_Q_FREQ = 20
SHOW_LOG_FREQ = 5

DEVICE = 'cpu'

## Loading data

In [23]:
dataloader = DataLoader('2020-01-01', '1d', '2024-01-01')

train_btc, test_btc = dataloader.load("BTC-USD")

[*********************100%%**********************]  1 of 1 completed




## Initialisations

In [24]:
train_env = SingleAssetEnv(train_btc)

MAX_STEPS = len(train_btc)-1

memory = Memory(256)

agent = TradingAgent(STATES_DIM, NUM_ACTIONS, memory)

scores = []

## Training the agent

In [25]:
for epoch in range(NUM_EPOCHS):

    # reset the environment before each epoch + get initial state
    state = train_env.reset()

    # intialise score for the epoch
    score = 0
    step_count = 1

    while True:

        # find epsilon greedy action from state
        action = agent.act(state, 1/step_count) # epsilon = 1/t

        # perform step in the environment and get completing info
        next_state, reward, done = train_env.step(action)

        agent.step(state, action, reward, next_state, done)

        # prepare for next iteration
        step_count += 1
        state = next_state

        score += reward

        if done:
            break

    # compute info about the epoch
    scores.append(score)

    print(f"Epoch {epoch:2} | Score = {score:6} | Profit = {round(train_env.profit, 2)}")

print("Training done!")

# save Q_network model weights
agent.save_models("weights")

Epoch  0 | Score =  -1100 | Profit = 95176.610000
Epoch  1 | Score =  -1102 | Profit = 142683.350000
Epoch  2 | Score =  -1192 | Profit = 28718.100000
Epoch  3 | Score =  -1178 | Profit = 36463.720000
Epoch  4 | Score =  -1182 | Profit = 46028.750000
Epoch  5 | Score =  -1182 | Profit = 146718.520000
Epoch  6 | Score =  -1228 | Profit = 332516.030000
Epoch  7 | Score =  -1234 | Profit = 430188.870000
Epoch  8 | Score =  -1266 | Profit = 428285.660000
Epoch  9 | Score =  -1328 | Profit = 238045.040000
Training done!


## Testing the agent

In [28]:
test_env = SingleAssetEnv(test_btc)

state = test_env.reset()

test_actions = []
test_rewards = []

for _ in range(len(test_env.data)-1):
    
    action = agent.qnet(torch.from_numpy(np.array(state, dtype=np.float32).reshape(1, -1)))
    action = np.argmax(action.data)
    test_actions.append(action.item())
            
    next_state, reward, done = test_env.step(action.numpy())
    test_rewards.append(reward)

    state = next_state

print(f"Profit made : {test_env.profit}")

Profit made : 20182.73046875
