In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd /content/drive/MyDrive/smart-broker/notebooks

/content/drive/MyDrive/smart-broker/notebooks


In [3]:
# add parent directory to path: enable import from parent dir
import sys
sys.path.append('../')

from environment import SmartBrokerEnv
from agents.a2c import A2C
from networks.a2c.actor import Actor
from networks.a2c.critic import Critic

import gym
import matplotlib.pyplot as plt
import torch

In [7]:
norm_cols = ['Volume XRP']
cols = ['date'] + ['open', 'high', 'low', 'close', f'Volume XRP']
batch_dur = 15
env = SmartBrokerEnv(
    batch_dur=batch_dur,
    df_info={
        'start_date': '2021-01-01',
        'end_date': '2021-02-01',
        'norm_cols': norm_cols,
        'cols': cols,
    },
    portfolio={

    },
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.df['rolling_price'] = self.df[self.price_typ].rolling(self.roll_period).sum()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.df.sort_values('date', inplace=True)


In [8]:
actor_model = Actor(
    state_dim=batch_dur*3+3,
    action_dim=3,
)
critic_model = Critic(state_dim=batch_dur*3+3)
a2c = A2C(
    env=env, 
    actor=actor_model,
    critic=critic_model,
    n_actns=3,
    actor_optmz=torch.optim.Adam(actor_model.parameters(), lr=0.001),
    critic_optmz=torch.optim.Adam(critic_model.parameters(), lr=0.001),
    hyprprms={
        'gamma': 0.9995,
    },
    log_freq=25,
)

In [9]:
a2c.run(1000)


Ep: 0 | L: 1.536 | R: -53.9 | R.Avg.R: -53.9 | P: 3.18 | R.Avg P: 3 | B: 59.75 | R.Avg B: 59 | R.N_Units: 188
Ep: 25 | L: 0.964 | R: -55.57 | R.Avg.R: -54.68 | P: 6.29 | R.Avg P: 4 | B: 7.31 | R.Avg B: 21 | R.N_Units: 363
Ep: 50 | L: 0.998 | R: -54.79 | R.Avg.R: -54.87 | P: 5.31 | R.Avg P: 5 | B: 2.95 | R.Avg B: 12 | R.N_Units: 402
Ep: 75 | L: 1.009 | R: -55.08 | R.Avg.R: -54.93 | P: 5.75 | R.Avg P: 5 | B: 1.22 | R.Avg B: 4 | R.N_Units: 439
Ep: 100 | L: 0.991 | R: -55.08 | R.Avg.R: -55.0 | P: 5.67 | R.Avg P: 5 | B: 0.59 | R.Avg B: 1 | R.N_Units: 449
Ep: 125 | L: 0.981 | R: -55.07 | R.Avg.R: -55.05 | P: 5.64 | R.Avg P: 5 | B: 0.34 | R.Avg B: 0 | R.N_Units: 454
Ep: 150 | L: 0.966 | R: -55.09 | R.Avg.R: -55.07 | P: 5.68 | R.Avg P: 5 | B: 0.17 | R.Avg B: 0 | R.N_Units: 455
Ep: 175 | L: 0.981 | R: -55.24 | R.Avg.R: -55.05 | P: 5.88 | R.Avg P: 5 | B: 0.61 | R.Avg B: 0 | R.N_Units: 456
Ep: 200 | L: 0.976 | R: -54.96 | R.Avg.R: -55.08 | P: 5.51 | R.Avg P: 5 | B: 0.58 | R.Avg B: 0 | R.N_Units:

## Visualizations

### Traning 

In [None]:
fig, ax = plt.subplots(3, 2, figsize=(16, 12))
r_avg_rewards = []
r_avg_profits = []
r_avg_bal = []
r_avg_units_held = []
r_avg_loss = []
r_avg_net_worth = []
count = 0

for _, log in a2c.logs.items():
    r_avg_rewards.append(log['r_avg_reward'])
    r_avg_profits.append(log['r_avg_profit'])
    r_avg_bal.append(log['r_avg_bal'])
    r_avg_units_held.append(log['r_avg_units_held'])
    r_avg_loss.append(log['r_avg_loss'])
    r_avg_net_worth.append(log['r_avg_net_worth'])
    count += 1

ax[0][0].plot(range(count), r_avg_loss)
ax[0][0].set_title('Rolling avg loss per episode')

ax[0][1].plot(range(count), r_avg_rewards)
ax[0][1].set_title('Rolling avg reward per episode')

ax[1][0].plot(range(count), r_avg_profits)
ax[1][0].set_title('Rolling avg profit per episode')

ax[1][1].plot(range(count), r_avg_units_held)
ax[1][1].set_title('Rolling avg units held per episode')

ax[2][0].plot(range(count), r_avg_net_worth)
ax[2][0].set_title('Rolling avg net worth per episode')

ax[2][1].plot(range(count), r_avg_bal)
ax[2][1].set_title('Rolling avg balance per episode')

In [None]:
# import pickle
# with open("../pickles/a2c_logs.pickle","wb") as f:
#     pickle.dump(temp, f, pickle.HIGHEST_PROTOCOL)