In [1]:
# add parent directory to path: enable import from parent dir
import sys
sys.path.append('../')

from environment import SmartBrokerEnv
from agents.a2c import A2C
from networks.a2c.actor import Actor
from networks.a2c.critic import Critic
from networks.a2c_lstm.actor import ActorLSTM
from networks.a2c_lstm.critic import CriticLSTM

import gym
import matplotlib.pyplot as plt
import torch

In [11]:
norm_cols = []
cols = ['date'] + ['open', 'high', 'low', 'close', f'Volume XRP']
batch_dur = 30
env = SmartBrokerEnv(
    batch_dur=batch_dur,
    df_info={
        'start_date': '2021-02-01',
        'end_date': '2021-03-01',
        'norm_cols': norm_cols,
        'cols': cols,
    },
    portfolio={

    },
)

actor_model = ActorLSTM(
    state_dim=batch_dur*3+3,
    action_dim=3,
)
critic_model = Critic(state_dim=batch_dur*3+3)
a2c = A2C(
    env=env, 
    actor=actor_model,
    critic=critic_model,
    n_actns=3,
    actor_optmz=torch.optim.Adam(actor_model.parameters(), lr=0.00001),
    critic_optmz=torch.optim.Adam(critic_model.parameters(), lr=0.00001),
    hyprprms={
        'gamma': 0.995,
    },
    log_freq=1,
    p_net_type='lstm',
    c_net_type='nn',
    load_models=False,
    mdl_pth='../models/a2c_al_cn',
)

In [None]:
a2c.run(5000)


Ep: 0 | TS: 19970 | L: 2318398415.813 | R: -4.67 | P: -31.38 | R.Avg P: -31.38 | NW: 68.62 | R.Avg NW: 68.62 | R.U: 65
Ep: 1 | TS: 39940 | L: 2296998616.186 | R: -4.58 | P: -7.01 | R.Avg P: -19.2 | NW: 92.99 | R.Avg NW: 80.81 | R.U: 77
Ep: 2 | TS: 59910 | L: 2275732038.246 | R: -4.68 | P: -25.03 | R.Avg P: -21.14 | NW: 74.97 | R.Avg NW: 78.86 | R.U: 75
Ep: 3 | TS: 79880 | L: 2254598100.708 | R: -4.65 | P: -37.37 | R.Avg P: -25.2 | NW: 62.63 | R.Avg NW: 74.8 | R.U: 71
Ep: 4 | TS: 99850 | L: 2233600521.138 | R: -4.65 | P: -24.95 | R.Avg P: -25.15 | NW: 75.05 | R.Avg NW: 74.85 | R.U: 71
Ep: 5 | TS: 119820 | L: 2212743916.917 | R: -4.38 | P: 40.42 | R.Avg P: -14.22 | NW: 140.42 | R.Avg NW: 85.78 | R.U: 82
Ep: 6 | TS: 139790 | L: 2192030601.716 | R: -4.58 | P: -2.38 | R.Avg P: -12.53 | NW: 97.62 | R.Avg NW: 87.47 | R.U: 83
Ep: 7 | TS: 159760 | L: 2171459194.49 | R: -4.54 | P: 5.06 | R.Avg P: -10.33 | NW: 105.06 | R.Avg NW: 89.67 | R.U: 85
Ep: 8 | TS: 179730 | L: 2151034625.082 | R: -4.63 |

In [6]:
a2c.evaluate(1)

tensor(1) tensor(0) tensor(1) tensor(2) tensor(2) tensor(2) tensor(2) tensor(0) tensor(1) tensor(0) tensor(0) tensor(2) tensor(0) tensor(2) tensor(0) tensor(1) tensor(2) tensor(2) tensor(0) tensor(0) tensor(1) tensor(1) tensor(1) tensor(2) tensor(2) tensor(2) tensor(0) tensor(0) tensor(2) tensor(2) tensor(2) tensor(0) tensor(0) tensor(0) tensor(2) tensor(0) tensor(2) tensor(0) tensor(1) tensor(1) tensor(2) tensor(0) tensor(1) tensor(1) tensor(0) tensor(2) tensor(0) tensor(0) tensor(2) tensor(0) tensor(1) tensor(2) tensor(1) tensor(1) tensor(0) tensor(0) tensor(0) tensor(0) tensor(2) tensor(0) tensor(0) tensor(0) tensor(2) tensor(2) tensor(1) tensor(1) tensor(2) tensor(0) tensor(0) tensor(0) tensor(2) tensor(0) tensor(0) tensor(2) tensor(0) tensor(0) tensor(1) tensor(2) tensor(2) tensor(2) tensor(2) tensor(1) tensor(0) tensor(0) tensor(1) tensor(2) tensor(0) tensor(0) tensor(0) tensor(2) tensor(0) tensor(1) tensor(2) tensor(1) tensor(2) tensor(2) tensor(1) tensor(0) tensor(2) tensor(2) 

## Visualizations

### Training 

In [None]:
fig, ax = plt.subplots(3, 2, figsize=(16, 12))
r_avg_rewards = []
r_avg_profits = []
r_avg_bal = []
r_avg_units_held = []
r_avg_loss = []
r_avg_net_worth = []
count = 0

for _, log in a2c.logs.items():
    r_avg_rewards.append(log['r_avg_reward'])
    r_avg_profits.append(log['r_avg_profit'])
    r_avg_bal.append(log['r_avg_bal'])
    r_avg_units_held.append(log['r_avg_units_held'])
    r_avg_loss.append(log['r_avg_loss'])
    r_avg_net_worth.append(log['r_avg_net_worth'])
    count += 1

ax[0][0].plot(range(count), r_avg_loss)
ax[0][0].set_title('Rolling avg loss per episode')

ax[0][1].plot(range(count), r_avg_rewards)
ax[0][1].set_title('Rolling avg reward per episode')

ax[1][0].plot(range(count), r_avg_profits)
ax[1][0].set_title('Rolling avg profit per episode')

ax[1][1].plot(range(count), r_avg_units_held)
ax[1][1].set_title('Rolling avg units held per episode')

ax[2][0].plot(range(count), r_avg_net_worth)
ax[2][0].set_title('Rolling avg net worth per episode')

ax[2][1].plot(range(count), r_avg_bal)
ax[2][1].set_title('Rolling avg balance per episode')

### Evaluation

In [None]:
fig, ax = plt.subplots(3, 2, figsize=(16, 12))
r_avg_rewards = []
r_avg_profits = []
r_avg_bal = []
r_avg_units_held = []
r_avg_loss = []
r_avg_net_worth = []
count = 0

for _, log in a2c.eval_logs.items():
    r_avg_rewards.append(log['r_avg_reward'])
    r_avg_profits.append(log['r_avg_profit'])
    r_avg_bal.append(log['r_avg_bal'])
    r_avg_units_held.append(log['r_avg_units_held'])
    r_avg_loss.append(log['r_avg_loss'])
    r_avg_net_worth.append(log['r_avg_net_worth'])
    count += 1

ax[0][0].plot(range(count), r_avg_loss)
ax[0][0].set_title('Rolling avg loss per episode')

ax[0][1].plot(range(count), r_avg_rewards)
ax[0][1].set_title('Rolling avg reward per episode')

ax[1][0].plot(range(count), r_avg_profits)
ax[1][0].set_title('Rolling avg profit per episode')

ax[1][1].plot(range(count), r_avg_units_held)
ax[1][1].set_title('Rolling avg units held per episode')

ax[2][0].plot(range(count), r_avg_net_worth)
ax[2][0].set_title('Rolling avg net worth per episode')

ax[2][1].plot(range(count), r_avg_bal)
ax[2][1].set_title('Rolling avg balance per episode')

In [None]:
# torch.save(a2c.actor.state_dict(), '../models/a2c_cl_an/actor')
# torch.save(a2c.critic.state_dict(), '../models/a2c_cl_an/critic')

In [None]:
import pickle
from datetime import datetime

temp = dict(a2c.logs)
with open(f"../pickles/a2c_logs_{datetime.now()}.pickle","wb") as f:
    pickle.dump(temp, f, pickle.HIGHEST_PROTOCOL)

### Observations

- A2C performs well when actor is chained with LSTM and critic with Neural Network
- If the actor is chained with Neural Network we dont seem to learn the task