In [1]:
# add parent directory to path: enable import from parent dir
import sys
sys.path.append('../')

from environment import SmartBrokerEnv
from agents.dqn import DQN
from networks.lstm_dueling import LSTMDueling

import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch

norm_cols = ['Volume XRP']
cols = ['date'] + ['open', 'high', 'low', 'close', f'Volume XRP']
batch_dur = 20
env = SmartBrokerEnv(
    batch_dur=batch_dur,
    df_info={
        'start_date': '2021-01-01',
        'end_date': '2021-02-01',
        'norm_cols': norm_cols,
        'cols': cols,
    },
    portfolio={

    },
)

batch_size = 10
target_net = LSTMDueling(
    input_dim=batch_dur*3+3,
    output_dim=3,
)

policy_net = LSTMDueling(
    input_dim=batch_dur*3+3,
    output_dim=3,
)

dqn = DQN(
    env=env,
    env_type='vector',
    n_actions=3,
    log_freq=1,
    train_freq=3,
    batch_size=batch_size,
    w_sync_freq=1,
    memory_size=500,
    gamma=0.9995,
    step_size=0.01,
    episodes=500,
    target_net=target_net,
    policy_net=policy_net,
    loss_func=nn.SmoothL1Loss(),
    optimizer=torch.optim.Adam(policy_net.parameters(), lr=0.00008),
    load_pretrained=False,
    save_pretrained=False,
    model_path='../models/dqn_d_lstm',
    network_type='lstm',
)

In [None]:
dqn.run(500)

collecting experience...

Ep: 0 | TS: 19970 | L: 251407.426 | R: -13.4 | P: 39.98 | R.Avg P: 39.98 | NW: 139.98 | R.Avg NW: 139.98 | R.U: 258
Ep: 1 | TS: 39940 | L: 159566.88 | R: -26.4 | P: 1.81 | R.Avg P: 20.9 | NW: 101.81 | R.Avg NW: 120.9 | R.U: 229
Ep: 2 | TS: 59910 | L: 156423.297 | R: -25.86 | P: 2.86 | R.Avg P: 14.88 | NW: 102.86 | R.Avg NW: 114.88 | R.U: 220
Ep: 3 | TS: 79880 | L: 190301.959 | R: -18.59 | P: 19.23 | R.Avg P: 15.97 | NW: 119.23 | R.Avg NW: 115.97 | R.U: 226
Ep: 4 | TS: 99850 | L: 153365.39 | R: -24.86 | P: 2.42 | R.Avg P: 13.26 | NW: 102.42 | R.Avg NW: 113.26 | R.U: 221
Ep: 5 | TS: 119820 | L: 201156.983 | R: -17.12 | P: 24.25 | R.Avg P: 15.09 | NW: 124.25 | R.Avg NW: 115.09 | R.U: 226
Ep: 6 | TS: 139790 | L: 213731.804 | R: -15.33 | P: 20.73 | R.Avg P: 15.9 | NW: 120.73 | R.Avg NW: 115.9 | R.U: 230
Ep: 7 | TS: 159760 | L: 158094.452 | R: -26.22 | P: -5.67 | R.Avg P: 13.2 | NW: 94.33 | R.Avg NW: 113.2 | R.U: 225
Ep: 8 | TS: 179730 | L: 168924.995 | R: -21.83 | 

In [None]:
dqn.evaluate(start_dt='2021-01-10 07:00', duration=3000)

### Visualizations

### Traning

In [None]:
fig, ax = plt.subplots(3, 2, figsize=(16, 12))
r_avg_rewards = []
r_avg_profits = []
r_avg_bal = []
r_avg_units_held = []
r_avg_loss = []
r_avg_net_worth = []
count = 0

for _, log in dqn.logs.items():
    r_avg_rewards.append(log['r_avg_reward'])
    r_avg_profits.append(log['r_avg_profit'])
    r_avg_bal.append(log['r_avg_bal'])
    r_avg_units_held.append(log['r_avg_units_held'])
    r_avg_loss.append(log['r_avg_loss'])
    r_avg_net_worth.append(log['r_avg_net_worth'])
    count += 1

ax[0][0].plot(range(count), r_avg_loss)
ax[0][0].set_title('Rolling avg loss per episode')

ax[0][1].plot(range(count), r_avg_rewards)
ax[0][1].set_title('Rolling avg reward per episode')

ax[1][0].plot(range(count), r_avg_profits)
ax[1][0].set_title('Rolling avg profit per episode')

ax[1][1].plot(range(count), r_avg_units_held)
ax[1][1].set_title('Rolling avg units held per episode')

ax[2][0].plot(range(count), r_avg_net_worth)
ax[2][0].set_title('Rolling avg net worth per episode')

ax[2][1].plot(range(count), r_avg_bal)
ax[2][1].set_title('Rolling avg balance per episode')

In [None]:
# torch.save(dqn.target_net.state_dict(), '../models/dqn_d_lstm/target_net')
# torch.save(dqn.policy_net.state_dict(), '../models/dqn_d_lstm/policy_net')