In [None]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import math
import scipy.stats as scistat


In [None]:
class ResidualBlock_Conv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ResidualBlock_Conv, self).__init__()

        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.relu(out)
        out = self.conv2(out)

        out += residual
        out = self.relu(out)

        return out

class ResNet_Conv(nn.Module):
    def __init__(self, m, n, output_size, input_channels, output_channels, num_blocks, final_act):
        super(ResNet_Conv, self).__init__()

        self.fc = nn.Linear(1 + m * n, 1024)
        self.conv1 = nn.Conv1d(input_channels, output_channels, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.final_act = final_act

        self.blocks = nn.ModuleList()
        for _ in range(num_blocks):
            self.blocks.append(ResidualBlock_Conv(output_channels, output_channels))

        self.conv2 = nn.Conv1d(output_channels, input_channels, kernel_size=3, stride=1, padding=1)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, 256)
        self.fc5 = nn.Linear(256, output_size)

    def forward(self, t, q):
        q = q.view(-1)
        x = torch.cat((t, q), dim=0)
        out = self.fc(x)
        out = out.unsqueeze(0)
        out = out.unsqueeze(0)
        out = self.conv1(out)
        out = self.relu(out)

        for block in self.blocks:
            out = block(out)

        out = self.conv2(out)
        out = self.relu(out)
        out = out.squeeze()
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        out = self.relu(out)
        out = self.fc4(out)
        out = self.relu(out)
        out = self.fc5(out)
        out = 0.1 * self.final_act(out)


        return out

In [None]:
# this Gaussian policy implementation is for the actor-critic algorithm
# modeling the mean of bid-ask policy as a neural network directly
def Gaussian_Policy(net, t, q, A, B, gamma):
    number = A.shape[0] * A.shape[1]
    mean = net.forward(t, q) # the output for the neural network is of dim 2*m*n
    bid_mean = mean[:number]
    ask_mean = mean[number:]
    variance = (gamma / (2 * B)).view(-1)
    std = torch.sqrt(variance)
    bid_matrix = torch.normal(bid_mean, std).view(A.shape)
    ask_matrix = torch.normal(ask_mean, std).view(A.shape)

    return bid_matrix, ask_matrix


def Stock_Prices_Simulation(T, dt, mu, sigma, S0):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    N = int(T / dt)
    S = torch.zeros(N).to(device)
    S[0] = S0
    for count in range(N - 1):
        S[count + 1] = S[count] + mu * S[count] * dt + sigma * S[count] * torch.sqrt(dt) * torch.normal(0.0, 1.0, size=(1,)).to(device)
    return S


def Market_Order_Simulation(dt, A, B, bid_matrix, ask_matrix):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    orders = torch.zeros(bid_matrix.shape).to(device)
    buy_orders = torch.zeros(bid_matrix.shape).to(device)
    sell_orders = torch.zeros(bid_matrix.shape).to(device)
    m = bid_matrix.shape[0]
    n = bid_matrix.shape[1]
    for i in range(m):
        for j in range(n):
            intensity_bid = A[i, j] - B[i, j] * bid_matrix[i, j]
            intensity_ask = A[i, j] - B[i, j] * ask_matrix[i, j]
            buy_orders[i, j] = torch.poisson(intensity_bid * dt)
            sell_orders[i, j] = torch.poisson(intensity_ask * dt)
            orders[i, j] = buy_orders[i, j] - sell_orders[i, j]

    return orders, buy_orders, sell_orders
    

def Train_Data_Simulation(T, dt, A, B, gamma, net):
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    N = int(T / dt)
    m = A.shape[0]
    n = A.shape[1]
    q = torch.zeros((N, m, n)).to(device)
    buy = torch.zeros((N, m, n)).to(device)
    sell = torch.zeros((N, m, n)).to(device)
    bid = torch.zeros((N, m, n)).to(device)
    ask = torch.zeros((N, m, n)).to(device)

    for count in range(N - 1):
        t = count * dt
        bid_matrix, ask_matrix = Gaussian_Policy(net, t, q[count], A, B, gamma)
        orders, buy_orders, sell_orders = Market_Order_Simulation(dt, A, B, bid_matrix, ask_matrix)
        buy[count] = buy_orders
        sell[count] = sell_orders
        q[count + 1] = q[count] + orders
        bid[count] = bid_matrix
        ask[count] = ask_matrix
    
    return q, buy, sell, bid, ask


In [None]:
def reward(q, buy, sell, S, r, T, dt, Maturities, Strikes, Vol_surface, Policy_Net, gamma, B, Value_Net):
    N = int(T / dt)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    m = Vol_surface.shape[0]
    n = Vol_surface.shape[1]
    reward = torch.zeros(N).to(device)
    Opt_Theta = Options_Theta(Vol_surface, S, r, T, dt, Maturities, Strikes)
    Opt_Gamma = Options_Gamma(Vol_surface, S, r, T, dt, Maturities, Strikes)
    for count in range(N - 1):
        t = count * dt
        mean = Policy_Net(t, q[count])
        profit = torch.sum(buy[count] * (mean[ : m*n].view(Vol_surface.shape)) + sell[count] * (mean[m*n :].view(Vol_surface.shape)))
        Theta = Opt_Theta[count] * q[count] * dt
        Gamma = 0.5 * Opt_Gamma[count] * q[count] * dt
        reward[count] = profit + torch.sum(Theta) + torch.sum(Gamma) - gamma * ((m * n * 1.79817986835) + torch.sum(torch.log(gamma / (2 * B))))
        td_error = Value_Net(t + dt, q[count + 1]) - Value_Net(t, q[count])
        reward[count] += td_error.view(reward[count].shape)

    return reward


def critic_loss(q, buy, sell, S, r, T, dt, Maturities, Strikes, Vol_surface, Policy_Net, gamma, B, Value_Net):
    N = int(T / dt)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    m = Vol_surface.shape[0]
    n = Vol_surface.shape[1]
    r = reward(q, buy, sell, S, r, T, dt, Maturities, Strikes, Vol_surface, Policy_Net, gamma, B, Value_Net)
    loss = 0.5 * torch.sum(r ** 2)
    return loss


def probability(Policy_Net, q, T, dt, bid, ask, B, gamma):
    N = int(T / dt)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    m = bid.shape[0]
    n = bid.shape[1]
    prob = torch.zeros(N).to(device)
    for count in range(N - 1):
        t = count * dt
        mean = Policy_Net(t, q[count])
        bid_matrix = bid[count].view(-1)
        ask_matrix = ask[count].view(-1)
        bid_ask_matrix = torch.cat((bid_matrix, ask_matrix), 0)
        diag = torch.cat((gamma / (2 * B.view(-1)), gamma / (2 * B.view(-1))), 0)
        cov = torch.diag(diag)
        prob[count] = torch.distributions.multivariate_normal.MultivariateNormal(mean, cov).log_prob(bid_ask_matrix)

    return prob


def actor_loss(Policy_Net, q, T, dt, bid, ask, B, gamma, buy, sell, S, r, Maturities, Strikes, Vol_surface, Value_Net):
    prob = probability(Policy_Net, q, T, dt, bid, ask, B, gamma)
    r = reward(q, buy, sell, S, r, T, dt, Maturities, Strikes, Vol_surface, Policy_Net, gamma, B, Value_Net)
    loss = -torch.sum(prob * r)
    return loss

    

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
A = torch.tensor([[36, 34, 32, 30], [46, 44, 42, 40], [56, 54, 52, 50], [46, 44, 42, 40], [36, 34, 32, 30]]).to(device)
B = torch.tensor([[3, 3, 3, 3], [4, 4, 4, 4], [5, 5, 5, 5], [4, 4, 4, 4], [3, 3, 3, 3]]).to(device)
gamma = torch.tensor([0.1]).to(device)
T = torch.tensor([1]).to(device)
dt = torch.tensor([0.01]).to(device)
mu = torch.tensor([0.01]).to(device)
sigma = torch.tensor([0.05]).to(device)
S0 = torch.tensor([100]).to(device)
S = Stock_Prices_Simulation(T, dt, mu, sigma, S0)
m = 5
n = 4
q = torch.zeros(m, n).to(device)
Vol_surface = torch.tensor([[0.2, 0.2, 0.18, 0.18], [0.14, 0.14, 0.12, 0.12], [0.1, 0.1, 0.08, 0.08], [0.14, 0.14, 0.12, 0.12], [0.2, 0.2, 0.18, 0.18]]).to(device)
Strikes = torch.tensor([90, 95, 100, 105, 110]).to(device) # means i in the code
Maturities = torch.tensor([2, 3, 4, 5]).to(device) # means j in the code
r = torch.tensor([0]).to(device)


In [None]:
# call option price function with S, K, T, r, sigma
def call_option_price(S, K, T, r, sigma):
    d1 = (torch.log(S / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * torch.sqrt(T))
    d2 = d1 - sigma * torch.sqrt(T)
    return S * torch.distributions.normal.Normal(0, 1).cdf(d1) - K * torch.exp(-r * T) * torch.distributions.normal.Normal(0, 1).cdf(d2)


def final_return(buy, sell, bid, ask, T, dt):
    N = int(T / dt)
    profit = 0
    for count in range(N - 1):
        profit += torch.sum(buy[count] * bid[count] + sell[count] * ask[count])

    return profit.item()

In [None]:
policy_net = ResNet_Conv(m, n, 2 * m * n, 1, 1, 2, nn.Sigmoid()).to(device)
value_net = ResNet_Conv(m, n, 1, 1, 1, 2, nn.Sigmoid()).to(device)

optimizer = torch.optim.Adam(policy_net.parameters(), lr=0.001)
optimizer_value = torch.optim.Adam(value_net.parameters(), lr=0.001)

V = []
P = []

for epoch in range(100):
    optimizer.zero_grad()
    optimizer_value.zero_grad()
    S = Stock_Prices_Simulation(T, dt, mu, sigma, S0)
    q, buy, sell, bid, ask = Train_Data_Simulation(T, dt, A, B, gamma, policy_net)
    p_loss = actor_loss(policy_net, q, T, dt, bid, ask, B, gamma, buy, sell, S, r, Maturities, Strikes, Vol_surface, value_net)
    v_loss = critic_loss(q, buy, sell, S, r, T, dt, Maturities, Strikes, Vol_surface, policy_net, gamma, B, value_net)
    p_loss.backward()
    v_loss.backward()
    optimizer.step()
    optimizer_value.step()
    print(p_loss.item())
    print(v_loss.item())    
    V.append(v_loss.item())
    P.append(p_loss.item())

In [None]:
final_returns = []
for count in range(100):
    q, buy, sell, bid, ask = Train_Data_Simulation(T, dt, A, B, gamma, policy_net)
    final_returns.append(final_return(buy, sell, bid, ask, T, dt))
    print(q)

print(final_returns)

In [None]:
import torch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
q1 = torch.tensor([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]).to(device)
q2 = torch.tensor([[3 * abs(2 - i) + j for j in range(4)] for i in range(5)]).to(device)
q3 = torch.tensor([[3, 4, -5, -6], [1, 2, -3, -4], [0, 1, -1, 1], [1, -2, 3, 4], [2, -3, 4, 5]]).to(device)
inventory = torch.zeros(3, 5, 4).to(device)
inventory[0] = q1
inventory[1] = q2
inventory[2] = q3

print(inventory)
