In [1]:
from functions import *
from simulation import *
from policy_simulation import *
import matplotlib.pyplot as plt
import torch 
import numpy as np
import torch.optim as optim

In [2]:
# some hyperparameters that's will be fixed for the simulation 

################################################################################################
# we will consider n different call options
n = 1

# bid_range is n * n * ... * n = n^10
# for each call option, we will consider 10 different bid spreads

bid_values = np.linspace(0.01, 0.1, 10)
ask_values = np.linspace(0.01, 0.1, 10)

# we will consider n different call options, each option has len(bid_values) different bid spreads
# so bid_range will be high dimensional tensor of shape len(bid_values) * len(bid_values) * ... * len(bid_values) = len(bid_values)^n
bid_ranges = np.array(np.meshgrid(*[bid_values for _ in range(n)])).T.reshape(-1, n)
ask_ranges = np.array(np.meshgrid(*[ask_values for _ in range(n)])).T.reshape(-1, n)

################################################################################################


# the entire trading period is T
# each trading time step is dt
T = 1
dt = 0.01
r = 0 
S0 = 1.00

# generate the identity matrix of size n
V = np.eye(n)

# generate the strike prices for the n call options
K = np.linspace(0.8, 1.2, n)
time = np.linspace(10, 20, n)
A = 10
kappa = 10

# final value penalty 
phi = 1
# policy penalty: gamma
gamma = 0.2



# epoch is the number of trading we simulate over one stock_path
epoch = 2

In [3]:
# I found that if there are so many parameters
# it is better to use a class to store all the parameters
class TradingParameters:
    def __init__(self, sigma, S0, T, dt, r, V, K, time, m, A, kappa, bid_ranges, ask_ranges, phi, gamma, epoch):
        self.sigma = sigma
        self.S0 = S0
        self.T = T
        self.dt = dt
        self.r = r
        self.V = V
        self.K = K
        self.time = time
        self.A = A
        self.kappa = kappa
        self.bid_ranges = bid_ranges
        self.ask_ranges = ask_ranges
        self.phi = phi
        self.gamma = gamma
        self.epoch = epoch

In [4]:
# initialize the initial policy and value network
paras = TradingParameters(0.2, S0, T, dt, r, V, K, time, 100, A, kappa, bid_ranges, ask_ranges, phi, gamma, epoch)
random_first_value = Net(n)
random_policy = TradingPolicy(random_first_value, paras.gamma, paras.A, paras.kappa, paras.bid_ranges, paras.ask_ranges)

# define value network to be trained
value_net_to_train = Net(n)
value_net_to_train.load_state_dict(random_first_value.state_dict())

# move the value_network to the GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
value_net_to_train.to(device)

print(device)
# check if the value network is on the GPU
print(next(value_net_to_train.parameters()).is_cuda)

cuda:0
True


In [6]:
random_value = Net(n)
random_policy = TradingPolicy(random_value, paras.gamma, paras.A, paras.kappa, paras.bid_ranges, paras.ask_ranges)

In [7]:
policy_iteration(initial_policy=random_policy, paras=paras, device=device, num_iter=3, num_epoch=1, lr=0.01)

iteration:  0
loss:  966062144.0
<policy_simulation.TradingPolicy object at 0x000002276F53FA30>
iteration:  1
loss:  9227070464.0
<policy_simulation.TradingPolicy object at 0x0000022708B56C40>
iteration:  2
loss:  48580444.0
<policy_simulation.TradingPolicy object at 0x0000022708B56B80>


<policy_simulation.TradingPolicy at 0x22708b56b80>