In [1]:
# add parent directory to path: enable import from parent dir
import sys
sys.path.append('../')

from environment import SmartBrokerEnv
from agents.a2c import A2C
from networks.a2c.actor import Actor
from networks.a2c.critic import Critic

import matplotlib.pyplot as plt
import torch

norm_cols = ['Volume XRP']
cols = ['date'] + ['open', 'high', 'low', 'close', f'Volume XRP']
batch_dur = 15
n_features = 6

env = SmartBrokerEnv(
    batch_dur=batch_dur,
    df_info={
        'start_date': '2021-01-01',
        'end_date': '2021-02-01',
        'norm_cols': norm_cols,
        'cols': cols,
    },
    portfolio={

    },
)

actor_model = Actor(
    state_dim=batch_dur*n_features+3,
    action_dim=3,
)
critic_model = Critic(state_dim=batch_dur*n_features+3)
a2c = A2C(
    env=env, 
    actor=actor_model,
    critic=critic_model,
    n_actns=3,
    actor_optmz=torch.optim.Adam(actor_model.parameters(), lr=0.0008),
    critic_optmz=torch.optim.Adam(critic_model.parameters(), lr=0.0005),
    hyprprms={
        'gamma': 0.9995,
    },
    log_freq=1,
    mdl_pth='../models/a2c_nn_3',
    load_models=False,
)

In [None]:
a2c.run(1000)


Ep: 0 | TS: 26735 | L: 1.747 | R: -7.69 | P: 4.56 | R.Avg P: 4.56 | NW: 113.4 | R.Avg NW: 113.4 | R.U: 328
Ep: 1 | TS: 51314 | L: 1.213 | R: -8.4 | P: 7.91 | R.Avg P: 6.24 | NW: 113.57 | R.Avg NW: 113.48 | R.U: 331
Ep: 2 | TS: 80374 | L: 0.95 | R: -9.23 | P: 1.55 | R.Avg P: 4.67 | NW: 106.17 | R.Avg NW: 111.05 | R.U: 329
Ep: 3 | TS: 98615 | L: 0.85 | R: -8.81 | P: 18.09 | R.Avg P: 8.03 | NW: 122.1 | R.Avg NW: 113.81 | R.U: 335
Ep: 4 | TS: 114981 | L: 0.882 | R: -9.08 | P: 10.52 | R.Avg P: 8.53 | NW: 114.27 | R.Avg NW: 113.9 | R.U: 334
Ep: 5 | TS: 161055 | L: 0.927 | R: -8.32 | P: 9.71 | R.Avg P: 8.72 | NW: 115.99 | R.Avg NW: 114.25 | R.U: 338
Ep: 6 | TS: 198871 | L: 0.953 | R: -9.78 | P: -8.31 | R.Avg P: 6.29 | NW: 97.36 | R.Avg NW: 111.84 | R.U: 332
Ep: 7 | TS: 224901 | L: 0.842 | R: -8.96 | P: 0.97 | R.Avg P: 5.62 | NW: 108.2 | R.Avg NW: 111.38 | R.U: 330
Ep: 8 | TS: 268803 | L: 0.856 | R: -7.44 | P: 5.93 | R.Avg P: 5.66 | NW: 116.03 | R.Avg NW: 111.9 | R.U: 331
Ep: 9 | TS: 292322 |

In [None]:
rewards, profits, actions = a2c.evaluate(start_dt='2021-01-30 15:15', duration=500, show_pred=True)

In [None]:
rewards, profits, actions = a2c.evaluate(start_dt='2021-01-31 15:15', duration=500, show_pred=True)

## Visualizations

### Training 

In [None]:
fig, ax = plt.subplots(3, 2, figsize=(16, 12))
r_avg_rewards = []
r_avg_profits = []
r_avg_bal = []
r_avg_units_held = []
r_avg_loss = []
r_avg_net_worth = []
count = 0

for _, log in a2c.logs.items():
    r_avg_rewards.append(log['r_avg_reward'])
    r_avg_profits.append(log['r_avg_profit'])
    r_avg_bal.append(log['r_avg_bal'])
    r_avg_units_held.append(log['r_avg_units_held'])
    r_avg_loss.append(log['r_avg_loss'])
    r_avg_net_worth.append(log['r_avg_net_worth'])
    count += 1

ax[0][0].plot(range(count), r_avg_loss)
ax[0][0].set_title('Rolling avg loss per episode')

ax[0][1].plot(range(count), r_avg_rewards)
ax[0][1].set_title('Rolling avg reward per episode')

ax[1][0].plot(range(count), r_avg_profits)
ax[1][0].set_title('Rolling avg profit per episode')

ax[1][1].plot(range(count), r_avg_units_held)
ax[1][1].set_title('Rolling avg units held per episode')

ax[2][0].plot(range(count), r_avg_net_worth)
ax[2][0].set_title('Rolling avg net worth per episode')

ax[2][1].plot(range(count), r_avg_bal)
ax[2][1].set_title('Rolling avg balance per episode')

In [None]:
# torch.save(a2c.actor.state_dict(), '../models/a2c_nn_3/actor')
# torch.save(a2c.critic.state_dict(), '../models/a2c_nn_3/critic')

In [None]:
# import pickle
# with open("../pickles/a2c_logs_main.pickle","wb") as f:
#     temp = dict(a2c.logs)
#     pickle.dump(temp, f, pickle.HIGHEST_PROTOCOL)