In [18]:
from functions import *
from simulation import *
from policy_simulation import *
import matplotlib.pyplot as plt
import torch 
import numpy as np

In [19]:
# define a neural network for the policy 
class Net(torch.nn.Module):
    def __init__(self, n):
        self.n = n
        super(Net, self).__init__()
        self.fc1 = torch.nn.Linear(2 + 2 * n, 128)
        self.fc2 = torch.nn.Linear(128, 1)
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        # add relu to make sure the output is positive
        x = torch.relu(x)
        return x
    

value_net = Net(2)

In [20]:
# let bid_range be n*2 array
bid_range = np.array([[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9]])
ask_range = np.array([[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9]])

t = np.array([0])
Q = np.array([0, 0])
P = np.array([12, 23])
S = np.array([1])   
penalty = 10
A = 30
kappa = 3
#print(policy_distribution(value_net, t, Q, P, S, penalty, A, kappa, bid_range, ask_range))


# define trading policy object
policy = TradingPolicy(value_net, penalty, A, kappa, bid_range, ask_range)

In [21]:
policy.policy_numerator(bid_range[2], ask_range[3], t, Q, P, S)

0.031966402593556535

In [22]:
x = policy.policy_distribution(t, Q, P, S)
print(np.sum(x))
print(x)

0.9999999999999999
[[0.00927996 0.01251064 0.00956014 0.0093006  0.00928133 0.00928004
  0.00927996 0.00927996 0.00927996 0.00927996]
 [0.01251064 0.01686604 0.01288836 0.01253846 0.01251248 0.01251075
  0.01251065 0.01251064 0.01251064 0.01251064]
 [0.00956014 0.01288836 0.00984878 0.0095814  0.00956155 0.00956023
  0.00956014 0.00956014 0.00956014 0.00956014]
 [0.0093006  0.01253846 0.0095814  0.00932128 0.00930197 0.00930068
  0.0093006  0.0093006  0.0093006  0.0093006 ]
 [0.00928133 0.01251248 0.00956155 0.00930197 0.0092827  0.00928141
  0.00928133 0.00928133 0.00928133 0.00928133]
 [0.00928004 0.01251075 0.00956023 0.00930068 0.00928141 0.00928013
  0.00928005 0.00928004 0.00928004 0.00928004]
 [0.00927996 0.01251065 0.00956014 0.0093006  0.00928133 0.00928005
  0.00927997 0.00927996 0.00927996 0.00927996]
 [0.00927996 0.01251064 0.00956014 0.0093006  0.00928133 0.00928004
  0.00927996 0.00927996 0.00927996 0.00927996]
 [0.00927996 0.01251064 0.00956014 0.0093006  0.00928133 0.00

In [23]:
policy.policy_act(t, Q, P, S)

(array([9, 9]), array([4, 4]))

In [27]:
# generate the training data 
V = np.array([[1, 0.5], [0.5, 1]])
stock_path = stock_price_path(100, 0.05, 1, 0.01)
K = np.array([30, 50])
daily_sigma = 0.01
option_price, delta, gamma = option_simulation(V, stock_path, 1, 0.01, K, 0.01, daily_sigma) 

inv, buy, sell = entire_trading(policy, option_price, stock_path, 0.01, A, kappa)
print(inv.shape)
print(buy.shape)
print(sell.shape)

print(inv)

(100, 2)
(100, 2)
(100, 2)
[[ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]
 [ 0. -1.]
 [ 0. -1.]
 [ 0. -1.]
 [ 0. -1.]
 [ 0. -1.]
 [ 0. -1.]
 [ 0. -1.]
 [ 0. -1.]
 [ 0. -1.]
 [ 0. -1.]
 [ 0. -1.]
 [ 0. -1.]
 [ 0. -1.]
 [ 0. -1.]
 [ 0. -1.]
 [ 0. -1.]
 [ 0. -2.]
 [ 0. -2.]
 [ 0. -2.]
 [ 0. -2.]
 [-1. -2.]
 [-1. -2.]
 [-1. -2.]
 [-1. -2.]
 [-1. -2.]
 [-1. -2.]
 [-1. -2.]
 [-1. -2.]
 [-1. -2.]
 [-1. -2.]
 [-1. -2.]
 [-1. -2.]
 [-1. -2.]
 [-1. -2.]
 [-1. -2.]
 [-1. -2.]
 [-1. -2.]
 [-2. -3.]
 [-2. -3.]
 [-2. -3.]
 [-2. -3.]
 [-2. -3.]
 [-2. -3.]
 [-2. -3.]
 [-2. -3.]
 [-2. -3.]
 [-2. -3.]
 [-2. -3.]
 [-2. -3.]
 [-2. -3.]
 [-2. -3.]
 [-2. -3.]
 [-2. -3.]
 [-2. -3.]
 [-2. -3.]
 [-2. -3.]
 [-2. -3.]
 [-2.