In [1]:
from functions import *
import torch

T = torch.tensor([1])
dt = torch.tensor([0.01])
S0 = torch.tensor([1])
q = torch.tensor([0.01])
A = torch.tensor([2, 1.6, 1.2, 0.8, 0.4, 0.2])
B = torch.tensor([1, 0.8, 0.6, 0.4, 0.2, 0.1])
Q = torch.tensor([300]) 
z = torch.tensor([10, 20, 30, 40, 50, 60])
delta = torch.tensor([0.01])
gamma = torch.tensor([0.01])
sigma = torch.tensor([0.05])

# use cuda if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
T = T.to(device)
dt = dt.to(device)
S0 = S0.to(device)
q = q.to(device)
A = A.to(device)
B = B.to(device)
Q = Q.to(device)
z = z.to(device)
delta = delta.to(device)
gamma = gamma.to(device)
sigma = sigma.to(device)

In [5]:
# test the actor-critic method

policy_net = ResNet_Conv(3, 12, 1, 2, 3, nn.Sigmoid())
value_net = ResNet_Conv(3, 1, 1, 2, 3, nn.ReLU())
policy_net.to(device)
value_net.to(device)
optimizer_policy = torch.optim.Adam(policy_net.parameters(), lr=0.01)
optimizer_value = torch.optim.Adam(value_net.parameters(), lr=0.01)
losses_actor_critic = []
for epoch in range(50):
    S, buy_orders, sell_orders, q, t, bid_vectors, ask_vectors = Test_Data_Simulation_Stochastic(T, dt, sigma, S0, policy_net, A, B, gamma, z)
    v_loss = critic_loss(policy_net, value_net, S, q, t, buy_orders, sell_orders, T, dt, A, B, gamma, delta, z, Q)
    p_loss = policy_loss(policy_net, value_net, S, q, t, buy_orders, sell_orders, bid_vectors, ask_vectors, T, dt, A, B, gamma, delta, z, Q)
    # when we train the policy net, we need to detach the value net
    loss = v_loss + p_loss
    losses_actor_critic.append(loss.item())
    optimizer_policy.zero_grad()
    optimizer_value.zero_grad()
    loss.backward()
    optimizer_policy.step()
    optimizer_value.step()
    print('epoch: ', epoch, 'p_loss: ', p_loss.item())
    print('epoch: ', epoch, 'v_loss: ', v_loss.item())



epoch:  0 p_loss:  -328.1642150878906
epoch:  0 v_loss:  20751.04296875
epoch:  1 p_loss:  -1033.2398681640625
epoch:  1 v_loss:  111456.5625
epoch:  2 p_loss:  -1202.9716796875
epoch:  2 v_loss:  110305.625
epoch:  3 p_loss:  -2246.6806640625
epoch:  3 v_loss:  404437.125
epoch:  4 p_loss:  -303.4521484375
epoch:  4 v_loss:  13233.25390625
epoch:  5 p_loss:  -530.731201171875
epoch:  5 v_loss:  30771.921875
epoch:  6 p_loss:  -100.3923110961914
epoch:  6 v_loss:  3997.5126953125
epoch:  7 p_loss:  -450.77166748046875
epoch:  7 v_loss:  34670.25
epoch:  8 p_loss:  -435.9366455078125
epoch:  8 v_loss:  16955.6171875
epoch:  9 p_loss:  -232.10438537597656
epoch:  9 v_loss:  26390.7578125
epoch:  10 p_loss:  -301.6378173828125
epoch:  10 v_loss:  66247.9375


KeyboardInterrupt: 

In [None]:
trajectory_actor_critic = []
for _ in range(50):
    S, buy_orders, sell_orders, q, t, bid_vectors, ask_vectors = Test_Data_Simulation_Stochastic(T, dt, sigma, S0, policy_net, A, B, gamma, z)
    N = int(T / dt)
    r = torch.zeros(N, device = device)
    for i in range(N - 1):
        hold1 = z * buy_orders[i] * bid_vectors[i]
        hold2 = z * sell_orders[i] * ask_vectors[i]
        r[i] = torch.sum(hold1) + torch.sum(hold2)
        r[i] = r[i] + (q[i + 1] * S[i + 1] - q[i] * S[i]) 

    trajectory_actor_critic.append(torch.sum(r).item())