In [162]:
import torch
import torch.nn as nn

# define a model with 3 inputs, with 2 hidden layers, and 1 output layer
class Net(nn.Module):
    def __init__(self, input_size):
        super(Net, self).__init__()
        
        # define the layers
        self.fc1 = nn.Linear(input_size, 100)
        # add 5 more layers
        self.fc2 = nn.Linear(100, 1024)
        self.fc3 = nn.Linear(1024, 512)
        self.fc4 = nn.Linear(512, 256)
        self.fc5 = nn.Linear(256, 128)
        self.fc6 = nn.Linear(128, 64)
        self.fc7 = nn.Linear(64, 32)
        self.fc8 = nn.Linear(32, 16)
        self.fc9 = nn.Linear(16, 1)
        
    

        # define the activation function
        self.relu = nn.ReLU()
    

    def forward(self, t, S, q):
        # define the forward pass
        x = torch.tensor([t, S, q], dtype=torch.float32)
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        # add 3 more layers
        out = self.fc3(out)
        out = self.relu(out)
        out = self.fc4(out)
        out = self.relu(out)
        out = self.fc5(out)
        out = self.relu(out)
        out = self.fc6(out)
        out = self.relu(out)
        out = self.fc7(out)
        out = self.relu(out)
        out = self.fc8(out)
        out = self.relu(out)
        out = self.fc9(out)
        return out
    

def h(q, Q):
    if q < Q and q > - 1 * Q:
        return 0
    else:
        return 1

def Gaussian_Policy(t, S, q, net, A, B, Q, z, delta, gamma):
    N = len(A)
    bid_vector = torch.zeros(N)
    ask_vector = torch.zeros(N)
    x = torch.tensor([t, S, q], dtype=torch.float32)
    for i in range(N):
        bid_mean = (A[i] / (2 * B[i])) - (net.forward(t, S, q + z[i]) - net.forward(t, S, q) + z[i] * (S + delta * h(q, Q))) / (2 * z[i])
        ask_mean = (A[i] / (2 * B[i])) - (net.forward(t, S, q - z[i]) - net.forward(t, S, q) - z[i] * (S - delta * h(q, Q))) / (2 * z[i]) 
        variance = gamma / (2 * z[i] * B[i])
        std = torch.sqrt(variance)
        bid_vector[i] = torch.normal(bid_mean, std)
        ask_vector[i] = torch.normal(ask_mean, std)

    return bid_vector, ask_vector



In [163]:
import math 
def Stock_Prices_Simulation(T, dt, sigma, S0):
    N = int(T / dt)
    S = torch.zeros(N)
    S[0] = S0
    for i in range(1, N):
        S[i] = S[i - 1] + sigma * math.sqrt(dt) * torch.randn(1)
    return S

def Market_Order_Simulation(dt, A, B, Q, z, delta, gamma, net, S, q, t):
    N = len(A)
    bid_vector, ask_vector = Gaussian_Policy(t, S, q, net, A, B, Q, z, delta, gamma)
    buy_orders = torch.zeros(N)
    sell_orders = torch.zeros(N)

    for i in range(N):
        bid_intensity = A[i] - B[i] * bid_vector[i]
        ask_intensity = A[i] - B[i] * ask_vector[i]
        buy_orders[i] = torch.distributions.poisson.Poisson(torch.max(bid_intensity * dt, torch.tensor([0.01]))).sample()
        sell_orders[i] = torch.distributions.poisson.Poisson(torch.max(ask_intensity * dt, torch.tensor([0.01]))).sample()

    return buy_orders, sell_orders

def Train_Data_Simulation(T, dt, sigma, S0, A, B, Q, z, delta, gamma, net):
    N = int(T / dt)
    S = Stock_Prices_Simulation(T, dt, sigma, S0)
    buy_orders = torch.zeros(N, len(A))
    sell_orders = torch.zeros(N, len(A))
    q = torch.zeros(N)
    t = torch.zeros(N)
    for i in range(N - 1):
        buy_orders[i], sell_orders[i] = Market_Order_Simulation(dt, A, B, Q, z, delta, gamma, net, S[i], q[i], t[i])
        for j in range(len(A)):
            q[i + 1] += (buy_orders[i][j] - sell_orders[i][j]) * z[j]
        q[i + 1] += q[i]
        t[i + 1] = t[i] + dt
        
    return S, buy_orders, sell_orders, q, t

In [164]:
def value_function_loss(net, S, q, t, dt):
    N = len(q)
    loss = torch.zeros(N)
    for i in range(N - 1):
        loss[i] = (net.forward(t[i + 1], S[i + 1], q[i + 1]) - net.forward(t[i], S[i], q[i])) / dt
    return loss

def inventory_loss(net, S, q, t, dt, buy_orders, sell_orders, z, delta, Q, A, B):
    N = len(q)
    loss = torch.zeros(N)
    for i in range(N):
        for k in range(len(A)):
            loss[i] = loss[i] + (buy_orders[i][k] - sell_orders[i][k]) * (z[k] * S[i] - delta * h(q[i], Q))
            loss[i] = loss[i] +  (A[k] / (2 * B[k])- (net.forward(t[i], S[i], q[i] + z[k]) - net.forward(t[i], S[i], q[i]) + z[k] * (S[i] + delta * h(q[i], Q))) / (2 * z[k])) * buy_orders[i][k]
            loss[i] = loss[i] + (A[k] / (2 * B[k]) - (net.forward(t[i], S[i], q[i] - z[k]) - net.forward(t[i], S[i], q[i]) - z[k] * (S[i] - delta * h(q[i], Q))) / (2 * z[k])) * sell_orders[i][k]
    return loss

def total_loss(net, S, q, t, dt, buy_orders, sell_orders, z, delta, Q, A, B, gamma):
    N = len(S)
    K = len(A)
    loss = torch.zeros(N)
    loss1 = value_function_loss(net, S, q, t, dt)
    loss2 = inventory_loss(net, S, q, t, dt, buy_orders, sell_orders, z, delta, Q, A, B)
    loss = loss1 + loss2 - gamma * ((K * 1.7981798683) + torch.sum(gamma / (2 * z * B)))
    
    scalar_loss = 0.5 * torch.sum(loss[:-1] ** 2) * dt
    return scalar_loss

    

In [165]:
net = Net(3)
T = 1
dt = 0.01
S0 = 100
q = 0
A = torch.tensor([1, 1, 1])
B = torch.tensor([0.01, 0.01, 0.01])
Q = 100
z = torch.tensor([1, 2, 3])
delta = 0.01
gamma = 0.02
sigma = 0.05


In [166]:
# train the network
for epoch in range(200):
    S, buy_orders, sell_orders, q, t = Train_Data_Simulation(1, 0.01, 0.1, S0, A, B, Q, z, delta, gamma, net)
    loss = total_loss(net, S, q, t, 0.001, buy_orders, sell_orders, z, delta, Q, A, B, gamma)
    print(loss.item())
    loss.backward()
    with torch.no_grad():
        for param in net.parameters():
            param -= 0.001 * param.grad
    net.zero_grad()



70.14830017089844
90.71587371826172
82.81932067871094
221.34629821777344
83.93938446044922
200.03526306152344
115.32286834716797
89.4981689453125
197.84326171875
72.1910629272461


KeyboardInterrupt: 