In [None]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from datetime import datetime

from models.q_training import train

import logging
import argparse

logging.basicConfig(
    filename='log.txt',
    format='%(levelname)s %(asctime)s: %(name)s - %(message)s ',
    datefmt='%m/%d/%Y %I:%M:%S %p',
    level=logging.INFO
)

parser = argparse.ArgumentParser(
    prog='DQN Trainer',
    description='DQN Training algorithm for Portfolio Management',
    epilog='Source code for Carlos Gustavo Salas Flores Signature Work at Duke University & Duke Kunshan University '
           'for the B.S. in Data Science undergrduate degree. '
)

logger = logging.getLogger("main")

model ="Single_DQN"
reward ='roi'

portfolio = 1

episodes = 1000
e = 0.01
g = 0.8
lr = 1e-4
m = 0.001

tr = 4
d = 1000

ic = 100000
bl = 100000
sl = 100000

tp = 2
gl = 21000

batch = 128
memory = 10000

In [None]:
data_file = "data/raw//ClosePriceData_2022-10-01_to_2022-08-21.csv"
portfolios_json = "portfolios//portfolios.json"

portfolio = portfolio

save_path = "models/saved_models"

device = torch.device("cuda:0") if torch.cuda.is_available() else None
loss_function = nn.MSELoss()

model_name = model
reward_metric = reward

episodes = episodes
epsilon = e
gamma = g
lr = lr
momentum = m

n_transactions = tr
n_trading_days = d

initial_cash = ic
buy_limit = bl
sell_limit = sl

priority_fee = tp
gas_limit = gl

batch_size = batch
memory_size = memory

training_info = f"""
    Training {model_name} in portfolio {portfolio} with
        data_file = {data_file}
        portfolios_json = {portfolios_json}

        device = {"CPU" if not torch.cuda.is_available() else torch.cuda.get_device_name(device=device)}
        loss_function = {loss_function}
        reward_metric = {reward_metric}

        episodes = {episodes}
        epsilon = {epsilon}
        gamma = {gamma}
        lr = {lr}
        momentum = {momentum}

        n_transactions = {n_transactions}
        n_trading_days = {n_trading_days}

        initial_cash = {initial_cash}
        buy_limit = {buy_limit}
        sell_limit = {sell_limit}

        priority_fee = {priority_fee}
        gas_limit = {gas_limit}

        batch_size = {batch_size}
        memory_size = {memory_size}
    """
print(training_info)

logger.info(training_info)

q, history_dqn = train(
    model_name=model_name,
    token_prices_address=data_file,
    save_path=save_path,
    portfolio_json=portfolios_json,
    portfolio_to_use=portfolio,
    initial_cash=initial_cash,
    n_trading_days=n_trading_days,
    n_tokens=None,
    n_transactions=n_transactions,
    buy_limit=buy_limit,
    sell_limit=sell_limit,
    priority_fee=priority_fee,
    gas_limit=gas_limit,
    loss_function=loss_function,
    episodes=episodes,
    batch_size=batch_size,
    lr=lr,
    momentum=momentum,
    memory_size=memory_size,
    epsilon=epsilon,
    gamma=gamma,
    device=device,
    reward_metric=reward_metric,
)

logger.info("Training Complete!")
current_time = datetime.now().strftime("%Y-%m-%d_%H:%M:%S")

plt.title("Total reward history")
plt.plot(history_dqn["metric_history"], color="red")
plt.savefig(f"data/figures/{model_name}_reward_{current_time}.png")

plt.title("Total average loss")
plt.plot(history_dqn["avg_loss"], color="blue")
plt.savefig(f"data/figures/{model_name}_loss_{current_time}.png")