# Trading agent demo

## Imports

In [1]:
from modules.data_loader import DataLoader
from modules.single_asset_env import SingleAssetEnv
from modules.q_network import Q_network
from modules.memory import Memory
from modules.trading_agent import TradingAgent

import torch
import numpy as np

## Environment variables

In [2]:
NUM_ACTIONS = 3 # Buy, Sell, Hold
LEN_HISTORY = 30 # consider last week to predict next value
STATES_DIM = LEN_HISTORY # history + predicted value

# Q network params
INPUT_DIM = STATES_DIM
HIDDEN_DIM = 64
OUTPUT_DIM = NUM_ACTIONS

LEARNING_RATE = 1e-3

NUM_EPOCHS = 20
BATCH_SIZE = 64

MEMORY_SIZE = 200

GAMMA = 0.97

EPSILON = 1.0
EPSILON_DECREASE = 1e-3
EPSILON_MIN = 0.1
START_REDUCE_EPSILON = 200

TRAIN_FREQ = 10
UPDATE_Q_FREQ = 20
SHOW_LOG_FREQ = 5

DEVICE = 'cpu'
SEED = 123

## Loading data

In [3]:
dataloader = DataLoader('2020-01-01', '1d', '2023-05-01')

assets = ["BTC-USD", "ETH-USD", "BNB-USD"]

trains = []
tests = []

for asset in assets:
    train, test = dataloader.load(asset)

    trains.append(train)
    tests.append(test)

TypeError: DataLoader.__init__() missing 1 required positional argument: 'end_date'

## Initialisations

In [None]:
train_envs = [SingleAssetEnv(train) for train in trains]

agent = TradingAgent(STATES_DIM, NUM_ACTIONS, assets, seed=SEED)

scores = {key: [] for key in assets}

## Training the agent

In [None]:
for epoch in range(NUM_EPOCHS):

    # intialise score for the epoch
    score = {key: 0 for key in assets}
    step_count = 1

    for asset, env in zip(assets, train_envs):

        # reset the environment before each epoch + get initial state
        state = env.reset()

        while True:

            # find epsilon greedy action from state
            action = agent.act(asset, state, 1/step_count) # epsilon = 1/t

            # perform step in the environment and get completing info
            next_state, reward, done = env.step(action)

            agent.step(asset, state, action, reward, next_state, done)

            # prepare for next iteration
            step_count += 1
            state = next_state

            score[asset] += reward

            if done:
                break

    # compute info about the epoch
    for key in scores.keys():
        scores[key].append(score[key])

    print(f"Epoch {epoch:2} | Scores = {score}")

print("Training done!")

# save Q_network model weights
agent.save_models("weights")

Epoch  0 | Scores = {'BTC-USD': 45170500.90136719, 'ETH-USD': 362313478.77067566, 'BNB-USD': 4412661.909983635}
Epoch  1 | Scores = {'BTC-USD': 28529279.37060547, 'ETH-USD': 509770981.7358246, 'BNB-USD': 52754580.03080559}
Epoch  2 | Scores = {'BTC-USD': 25198379.451660156, 'ETH-USD': 509770981.7358246, 'BNB-USD': 52854480.03080559}
Epoch  3 | Scores = {'BTC-USD': 370675.8310546875, 'ETH-USD': 509770981.7358246, 'BNB-USD': 52754580.03080559}
Epoch  4 | Scores = {'BTC-USD': 27912296.603027344, 'ETH-USD': 509692651.7122345, 'BNB-USD': 52754580.03080559}
Epoch  5 | Scores = {'BTC-USD': 2244411.0854492188, 'ETH-USD': 512289444.0662689, 'BNB-USD': 52754580.03080559}
Epoch  6 | Scores = {'BTC-USD': 59382050.635253906, 'ETH-USD': 509770981.7358246, 'BNB-USD': 52754580.03080559}
Epoch  7 | Scores = {'BTC-USD': 215876309.0234375, 'ETH-USD': 285439274.13645935, 'BNB-USD': 52754580.03080559}
Epoch  8 | Scores = {'BTC-USD': -421400.0, 'ETH-USD': 509861396.1873627, 'BNB-USD': 71933259.18829155}
Epo

## Testing the agent

In [None]:
test_envs = [SingleAssetEnv(test) for test in tests]

print('Initial capital : 100000$ for each asset')
total_profit = 0
for asset, env in zip(assets, test_envs):

    state = env.reset()

    test_actions = []
    test_rewards = []

    for _ in range(len(env.data)-1):
        
        action = agent.qnets[agent.map_assets[asset]](torch.from_numpy(np.array(state, dtype=np.float32).reshape(1, -1)))
        action = np.argmax(action.data)

        test_actions.append(action.item())
                
        next_state, reward, done = env.step(action.numpy())
        test_rewards.append(reward)

        state = next_state
    
    # 
    final_running_balance = (env.data.iloc[env.t]['Close']*env.positions) + env.balance
    total_profit += (env.data.iloc[env.t]['Close']*env.positions) + env.balance - env.initial_balance
    print(f"Balance : {asset}: {final_running_balance}")

print("-"*27)
print(f"Total profit made: {total_profit}")

Initial capital : 100000$ for each asset
Balance : BTC-USD: 222693.484375
Balance : ETH-USD: 190286.8624267578
Balance : BNB-USD: 208606.99908447266
---------------------------
Total profit made: 321587.34588623047


## POUR TESTER APPLI

In [7]:
import os
from modules.data_loader import DataLoader
def test_agent(assets, test_envs, agent):
    total_profit = 0
    final_running_balance_dict = {}
    for asset, env in zip(assets, test_envs):

        state = env.reset()

        test_actions = []
        test_rewards = []

        for _ in range(len(env.data)-1):
            
            action = agent.qnets[agent.map_assets[asset]](torch.from_numpy(np.array(state, dtype=np.float32).reshape(1, -1)))
            action = np.argmax(action.data)

            test_actions.append(action.item())
                    
            next_state, reward, done = env.step(action.numpy())
            test_rewards.append(reward)

            state = next_state
    
        final_running_balance = (env.data.iloc[env.t]['Close']*env.positions) + env.balance
        final_running_balance_dict[asset] = final_running_balance
        total_profit += (env.data.iloc[env.t]['Close']*env.positions) + env.balance - env.initial_balance
        print(f"Balance : {asset}: {final_running_balance}")

    print("-"*27)
    print(f"Total profit made: {total_profit}")

    return final_running_balance_dict

def train_agent(assets, train_envs, agent, NUM_EPOCHS):
    scores = {key: [] for key in assets}
    for epoch in range(NUM_EPOCHS):

        # intialise score for the epoch
        score = {key: 0 for key in assets}
        step_count = 1

        for asset, env in zip(assets, train_envs):

            # reset the environment before each epoch + get initial state
            state = env.reset()

            while True:

                # find epsilon greedy action from state
                action = agent.act(asset, state, 1/step_count) # epsilon = 1/t

                # perform step in the environment and get completing info
                next_state, reward, done = env.step(action)

                agent.step(asset, state, action, reward, next_state, done)

                # prepare for next iteration
                step_count += 1
                state = next_state

                score[asset] += reward

                if done:
                    break

        # compute info about the epoch
        for key in scores.keys():
            scores[key].append(score[key])

        print(f"Epoch {epoch:2} | Scores = {score}")

    print("Training done!")

    # save Q_network model weights
    agent.save_models("weights")


asset_name_test = "BTC-USD"
model_path = "weights/trained_agent_model_"+asset_name_test+".pth"
assets = ["BTC-USD", "ETH-USD", "BNB-USD"]

## Load Training and Testing Dataset
date_split = '2023-01-01'
start_date = '2020-01-01'
end_date =  '2024-01-01'
dataloader = DataLoader(start_date, '1d', date_split, end_date)


trains = []
tests = []

for asset in assets:
    train, test = dataloader.load(asset)

    trains.append(train)
    tests.append(test)

# Environment and Agent Initiation
train_envs = [SingleAssetEnv(train) for train in trains]
test_envs = [SingleAssetEnv(test) for test in tests]
agent = TradingAgent(STATES_DIM, NUM_ACTIONS, assets, seed=SEED)

if os.path.exists(model_path) :
    ## Agent
    model_path = "weights/trained_agent_model_"+asset_name_test+".pth"
    # Load the state dict
    state_dict = torch.load(model_path)

    # Set the loaded state dict to the Q network of the corresponding asset
    agent.qnets[agent.map_assets[asset]].load_state_dict(state_dict)

    final_running_balance_dict = test_agent(assets, test_envs, agent)

else:  # TRAINING MODEL IN CASE IT IS NOT TRAINED YET
    agent = TradingAgent(STATES_DIM, NUM_ACTIONS, assets, seed=SEED)    
    train_agent(assets, train_envs, agent, NUM_EPOCHS)
    final_running_balance_dict = test_agent(assets, test_envs, agent)


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


Balance : BTC-USD: 109533.7109375
Balance : ETH-USD: 102530.52099609375
Balance : BNB-USD: 100000.0
---------------------------
Total profit made: 12064.23193359375
