In [1]:
# add parent directory to path: enable import from parent dir
import sys
sys.path.append('../')

from environment import SmartBrokerEnv
from agents.dqn import DQN
from networks.nn_dueling import DuelingNet

import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch

norm_cols = ['Volume XRP']
cols = ['date'] + ['open', 'high', 'low', 'close', f'Volume XRP']
batch_dur = 20
env = SmartBrokerEnv(
    batch_dur=batch_dur,
    df_info={
        'start_date': '2021-01-01',
        'end_date': '2021-01-05',
        'norm_cols': norm_cols,
        'cols': cols,
    },
    portfolio={

    },
)

target_net = DuelingNet(
    input_dim=batch_dur*4+3,
    output_dim=3,
)

policy_net = DuelingNet(
    input_dim=batch_dur*4+3,
    output_dim=3,
)

dqn = DQN(
    env=env,
    env_type='vector',
    n_actions=3,
    log_freq=1,
    train_freq=3,
    batch_size=50,
    w_sync_freq=10,
    memory_size=500,
    epsilon_start=1,
    epsilon_decay=0.995,
    gamma=0.9,
    step_size=0.01,
    episodes=1000,
    target_net=target_net,
    policy_net=policy_net,
    loss_func=nn.MSELoss(),
    optimizer=torch.optim.Adam(policy_net.parameters(), lr=0.001),
    load_pretrained=False,
    save_pretrained=False,
    model_path='../models/dqn_nn',
)

In [None]:
dqn.run(1000)

collecting experience...

Ep: 0 | TS: 46981 | L: 39379057.964 | R: -4.07 | P: 49.75 | R.Avg P: 49.75 | NW: 149.75 | R.Avg NW: 149.75 | R.U: 246
Ep: 1 | TS: 92392 | L: 12542800.472 | R: -21.94 | P: -3.64 | R.Avg P: 23.06 | NW: 96.36 | R.Avg NW: 123.06 | R.U: 205
Ep: 2 | TS: 136886 | L: 27710762.73 | R: -9.08 | P: 35.88 | R.Avg P: 27.33 | NW: 135.88 | R.Avg NW: 127.33 | R.U: 211
Ep: 3 | TS: 184045 | L: 15196673.465 | R: -20.02 | P: 1.55 | R.Avg P: 20.88 | NW: 101.55 | R.Avg NW: 120.88 | R.U: 200
Ep: 4 | TS: 233827 | L: 36868748.156 | R: -7.96 | P: 36.72 | R.Avg P: 24.05 | NW: 136.72 | R.Avg NW: 124.05 | R.U: 206
Ep: 5 | TS: 281849 | L: 19869339.955 | R: -15.87 | P: 13.4 | R.Avg P: 22.28 | NW: 113.4 | R.Avg NW: 122.28 | R.U: 204
Ep: 6 | TS: 330670 | L: 14144999.37 | R: -19.41 | P: 1.57 | R.Avg P: 19.32 | NW: 101.57 | R.Avg NW: 119.32 | R.U: 200
Ep: 7 | TS: 380453 | L: 15702026.707 | R: -17.44 | P: 6.4 | R.Avg P: 17.7 | NW: 106.4 | R.Avg NW: 117.7 | R.U: 199
Ep: 8 | TS: 424995 | L: 2433259

In [None]:
dqn.evaluate(start_dt='2021-01-18 07:00', duration=1000)

## Visualizations

### Training

In [None]:
fig, ax = plt.subplots(3, 2, figsize=(16, 12))
r_avg_rewards = []
r_avg_profits = []
r_avg_bal = []
r_avg_units_held = []
r_avg_loss = []
r_avg_net_worth = []
count = 0

for _, log in dqn.logs.items():
    r_avg_rewards.append(log['r_avg_reward'])
    r_avg_profits.append(log['r_avg_profit'])
    r_avg_bal.append(log['r_avg_bal'])
    r_avg_units_held.append(log['r_avg_units_held'])
    r_avg_loss.append(log['r_avg_loss'])
    r_avg_net_worth.append(log['r_avg_net_worth'])
    count += 1

ax[0][0].plot(range(count), r_avg_loss)
ax[0][0].set_title('Rolling avg loss per episode')

ax[0][1].plot(range(count), r_avg_rewards)
ax[0][1].set_title('Rolling avg reward per episode')

ax[1][0].plot(range(count), r_avg_profits)
ax[1][0].set_title('Rolling avg profit per episode')

ax[1][1].plot(range(count), r_avg_units_held)
ax[1][1].set_title('Rolling avg units held per episode')

ax[2][0].plot(range(count), r_avg_net_worth)
ax[2][0].set_title('Rolling avg net worth per episode')

ax[2][1].plot(range(count), r_avg_bal)
ax[2][1].set_title('Rolling avg balance per episode')

In [None]:
# import pickle
# temp = dict(dqn.logs)
# with open("../pickles/dqn_d_logs.pickle","wb") as f:
#     pickle.dump(temp, f, pickle.HIGHEST_PROTOCOL)

In [None]:
# torch.save(dqn.target_net.state_dict(), '../models/dqn_d/target_net')
# torch.save(dqn.policy_net.state_dict(), '../models/dqn_d/policy_net')