# Trading agent demo

## Imports

In [1]:
import os
import json
import torch
import numpy as np

from modules.data_loader import DataLoader
from modules.single_asset_env import SingleAssetEnv
from modules.q_network import Q_network
from modules.memory import Memory
from modules.trading_agent import TradingAgent
from modules.functions import train_agent, test_agent

## Parameters and variables

In [2]:
num_actions = int(os.environ["NUM_ACTIONS"])
states_dim = int(os.environ["STATES_DIM"])

num_epochs = int(os.environ["NUM_EPOCHS"])
batch_size = int(os.environ["BATCH_SIZE"])
memory_size = int(os.environ["MEMORY_SIZE"])

learning_rate = float(os.environ["LEARNING_RATE"])
learning_freq = int(os.environ["LEARNING_FREQ"])

tau = float(os.environ["TAU"])
gamma = float(os.environ["GAMMA"])

device = os.environ["DEVICE"]
seed = int(os.environ["SEED"])

## Loading data

In [3]:
dataloader = DataLoader(os.environ["START_DATE"], 
                        os.environ["FREQ"],
                        os.environ["TRAIN_TEST_SPLIT_DATE"])

assets = json.loads(os.environ["ASSETS"])

trains = []
tests = []

for asset in assets:
    train, test = dataloader.load(asset)

    trains.append(train)
    tests.append(test)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


## Initialisations

In [4]:
train_envs = [SingleAssetEnv(train) for train in trains]

agent = TradingAgent(states_dim, num_actions, assets, 
                     batch_size=batch_size,
                     memory_size=memory_size,
                     learning_rate=learning_rate,
                     tau=tau,
                     gamma=gamma,
                     learning_freq=learning_freq,
                     device=device,
                     seed=seed)

## Training the agent

In [5]:
train_agent(assets, agent, train_envs, num_epochs)

Epoch  0 | Scores = {'BTC-USD': 296572278.3125, 'ETH-USD': 9544927.971069336, 'BNB-USD': 25288523.3691473}
Epoch  1 | Scores = {'BTC-USD': 99544648.1171875, 'ETH-USD': 39672303.16938019, 'BNB-USD': 52754580.03080559}
Epoch  2 | Scores = {'BTC-USD': 20834248.239257812, 'ETH-USD': 509770981.7358246, 'BNB-USD': 52754580.03080559}
Epoch  3 | Scores = {'BTC-USD': 48416802.56982422, 'ETH-USD': 509770981.7358246, 'BNB-USD': 52754580.03080559}
Epoch  4 | Scores = {'BTC-USD': 51727365.248535156, 'ETH-USD': 509870881.7358246, 'BNB-USD': 52700064.767388344}
Epoch  5 | Scores = {'BTC-USD': 3305325.9467773438, 'ETH-USD': 310621897.19981384, 'BNB-USD': 52754580.03080559}
Epoch  6 | Scores = {'BTC-USD': -787057.7978515625, 'ETH-USD': 386887130.8197479, 'BNB-USD': 136349080.0138378}
Epoch  7 | Scores = {'BTC-USD': 3156708.560546875, 'ETH-USD': 438878158.89445496, 'BNB-USD': 52754580.03080559}
Epoch  8 | Scores = {'BTC-USD': 54870199.69580078, 'ETH-USD': 509870881.7358246, 'BNB-USD': 52754580.03080559}

## Testing the agent

In [9]:
test_envs = [SingleAssetEnv(test) for test in tests]

print('Initial capital : 100000$ for each asset\n')

test_agent(assets, test_envs, agent)

Initial capital : 100000$ for each asset

Balance : BTC-USD: 100000.0
Balance : ETH-USD: 228668.56311035156
Balance : BNB-USD: 102471.3832397461
---------------------------
Total profit made: 131139.94635009766


{'BTC-USD': 100000.0,
 'ETH-USD': 228668.56311035156,
 'BNB-USD': 102471.3832397461}

## To test app

In [7]:
import os
from modules.data_loader import DataLoader
def test_agent(assets, test_envs, agent):
    total_profit = 0
    final_running_balance_dict = {}
    for asset, env in zip(assets, test_envs):

        state = env.reset()

        test_actions = []
        test_rewards = []

        for _ in range(len(env.data)-1):
            
            action = agent.qnets[agent.map_assets[asset]](torch.from_numpy(np.array(state, dtype=np.float32).reshape(1, -1)))
            action = np.argmax(action.data)

            test_actions.append(action.item())
                    
            next_state, reward, done = env.step(action.numpy())
            test_rewards.append(reward)

            state = next_state
    
        final_running_balance = (env.data.iloc[env.t]['Close']*env.positions) + env.balance
        final_running_balance_dict[asset] = final_running_balance
        total_profit += (env.data.iloc[env.t]['Close']*env.positions) + env.balance - env.initial_balance
        print(f"Balance : {asset}: {final_running_balance}")

    print("-"*27)
    print(f"Total profit made: {total_profit}")

    return final_running_balance_dict

def train_agent(assets, train_envs, agent, NUM_EPOCHS):
    scores = {key: [] for key in assets}
    for epoch in range(NUM_EPOCHS):

        # intialise score for the epoch
        score = {key: 0 for key in assets}
        step_count = 1

        for asset, env in zip(assets, train_envs):

            # reset the environment before each epoch + get initial state
            state = env.reset()

            while True:

                # find epsilon greedy action from state
                action = agent.act(asset, state, 1/step_count) # epsilon = 1/t

                # perform step in the environment and get completing info
                next_state, reward, done = env.step(action)

                agent.step(asset, state, action, reward, next_state, done)

                # prepare for next iteration
                step_count += 1
                state = next_state

                score[asset] += reward

                if done:
                    break

        # compute info about the epoch
        for key in scores.keys():
            scores[key].append(score[key])

        print(f"Epoch {epoch:2} | Scores = {score}")

    print("Training done!")

    # save Q_network model weights
    agent.save_models("weights")


asset_name_test = "BTC-USD"
model_path = "weights/trained_agent_model_"+asset_name_test+".pth"
assets = ["BTC-USD", "ETH-USD", "BNB-USD"]

## Load Training and Testing Dataset
date_split = '2023-01-01'
start_date = '2020-01-01'
dataloader = DataLoader(start_date, '1d', date_split)


trains = []
tests = []

for asset in assets:
    train, test = dataloader.load(asset)

    trains.append(train)
    tests.append(test)

# Environment and Agent Initiation
train_envs = [SingleAssetEnv(train) for train in trains]
test_envs = [SingleAssetEnv(test) for test in tests]
agent = TradingAgent(states_dim, num_actions, assets, seed=seed)

if os.path.exists(model_path) :
    ## Agent
    model_path = "weights/trained_agent_model_"+asset_name_test+".pth"
    # Load the state dict
    state_dict = torch.load(model_path)

    # Set the loaded state dict to the Q network of the corresponding asset
    agent.qnets[agent.map_assets[asset]].load_state_dict(state_dict)

    final_running_balance_dict = test_agent(assets, test_envs, agent)

else:  # TRAINING MODEL IN CASE IT IS NOT TRAINED YET
    agent = TradingAgent(states_dim, num_actions, assets, seed=seed)    
    train_agent(assets, train_envs, agent, num_epochs)
    final_running_balance_dict = test_agent(assets, test_envs, agent)


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Balance : BTC-USD: 100000.0
Balance : ETH-USD: 228668.56311035156
Balance : BNB-USD: 102471.3832397461
---------------------------
Total profit made: 131139.94635009766
