In [16]:
import torch
import numpy as np
import matplotlib.pyplot as plt

import torch.nn as nn


def loc(nb):
    cells = load(open(nb))['cells']
    return sum(len(c['source']) for c in cells if c['cell_type'] == 'code')

def run(ipynb_files):
    return sum(loc(nb) for nb in ipynb_files)

# the following neural network is to model the value function
class MyNet(nn.Module):
    def __init__(self, m, n):
        super(MyNet, self).__init__()
        self.fc1 = nn.Linear(1 + m * n, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.relu = nn.ReLU()

# x is 2 dim torch tensor with shape (m, n)
    def forward(self, t, x):
        #make x 1 dim
        x = x.view(-1)
        x = torch.cat((t, x), dim=0)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x
    
# The Gaussian policy returns torch tensor with shape (m, n)
def Gaussian_Policy(net, t, q, A, B, gamma):
    
    bid_matrix  = torch.zeros(q.shape)
    ask_matrix  = torch.zeros(q.shape)

    for i in range(A.shape[0]):
        for j in range(A.shape[1]):
            x = torch.zeros(q.shape)
            x[i, j] = 1
            mean_bid = A[i, j] / (2 * B[i, j]) + 0.5 * (net.forward(t, q) - net.forward(t, q + x))
            mean_ask = A[i, j] / (2 * B[i, j]) + 0.5 * (net.forward(t, q) - net.forward(t, q - x))
            variance = gamma / (2 * B[i, j])
            std = torch.sqrt(variance)
            bid_matrix[i, j] = torch.normal(mean_bid, std)
            ask_matrix[i, j] = torch.normal(mean_ask, std)

    return bid_matrix, ask_matrix
    

import math
import scipy.stats as scistat

def Market_Order_Simulation(dt, A, B, bid_matrix, ask_matrix):
    orders = torch.zeros(bid_matrix.shape)
    buy_orders = torch.zeros(bid_matrix.shape)
    sell_orders = torch.zeros(bid_matrix.shape)
    m = bid_matrix.shape[0]
    n = bid_matrix.shape[1]
    for i in range(m):
        for j in range(n):
            intensity_bid = A[i, j] - B[i, j] * bid_matrix[i, j]
            intensity_ask = A[i, j] - B[i, j] * ask_matrix[i, j]
            buy_orders[i, j] = torch.poisson(intensity_bid * dt)
            sell_orders[i, j] = torch.poisson(intensity_ask * dt)
            orders[i, j] = buy_orders[i, j] - sell_orders[i, j]

    return [orders, buy_orders, sell_orders]

# this function returns the inventory path
def Train_Data_Simulation(T, dt, A, B, gamma, net):
    N = int(T / dt)
    m = A.shape[0]
    n = A.shape[1]
    q = torch.zeros((N, m, n))
    buy = torch.zeros((N, m, n))
    sell = torch.zeros((N, m, n))

    for count in range(N - 1):
        t = torch.Tensor([count * dt])
        bid_matrix, ask_matrix = Gaussian_Policy(net, t, q[count], A, B, gamma)
        orders = Market_Order_Simulation(dt, A, B, bid_matrix, ask_matrix)[0]
        buy[count] = Market_Order_Simulation(dt, A, B, bid_matrix, ask_matrix)[1]
        sell[count] = Market_Order_Simulation(dt, A, B, bid_matrix, ask_matrix)[2]
        q[count + 1] = q[count] + orders
    
    return q, buy, sell

# this function returns the stock prices path
def Stock_Prices_Simulation(T, dt, mu, sigma, S0):
    N = int(T / dt)
    S = torch.zeros(N)
    S[0] = S0
    for count in range(N - 1):
        S[count + 1] = S[count] + mu * S[count] * dt + sigma * S[count] * math.sqrt(dt) * torch.normal(0.0, 1.0, size=(1,))
    return S


#the following are option greeks' functions
def d1(S, K, r, sigma, T):
    return (np.log(S/K) + (r+sigma*sigma/2)*T)/(sigma*np.sqrt(T))

def d2(S, K, r, sigma, T):
    return d1(S, K, r, sigma, T) - sigma*np.sqrt(T)

def Gamma(S, K, r, sigma, T):
    return scistat.norm.pdf(d1(S, K, r, sigma, T))/(S*sigma*np.sqrt(T))

def Theta(S, K, r, sigma, T):
    aux1 = -S*scistat.norm.pdf(d1(S, K, r, sigma, T))*sigma/(2*np.sqrt(T))
    aux2 = -r*K*np.exp(-r*T)*scistat.norm.cdf(d2(S, K, r, sigma, T))
    return aux1+aux2


def Options_Theta(Vol_surface, S0, r, mu, sigma, T, dt, Maturities, Strikes):
    N = int(T / dt)
    m = Vol_surface.shape[0]
    n = Vol_surface.shape[1]
    theta = torch.zeros((N, m, n))
    S = Stock_Prices_Simulation(T, dt, mu, sigma, S0)
    for count in range(N):
        t = count * dt
        for i in range(m):
            for j in range(n):
                theta[count, i, j] = Theta(S[count], Strikes[i], r, Vol_surface[i, j], Maturities[j] - t)
    return theta


def Options_Gamma(Vol_surface, S0, r, mu, sigma, T, dt, Maturities, Strikes):
    N = int(T / dt)
    m = Vol_surface.shape[0]
    n = Vol_surface.shape[1]
    gamma = torch.zeros((N, m, n))
    S = Stock_Prices_Simulation(T, dt, mu, sigma, S0)
    for count in range(N):
        t = count * dt
        for i in range(m):
            for j in range(n):
                gamma[count, i, j] = Gamma(S[count], Strikes[i], r, Vol_surface[i, j], Maturities[j] - t)
    return gamma


In [13]:
# define the reward function in the following
# buy, sell are of shape(N, m,n)
# q is of shape(N, m, n)
def reward(dt, A, B, gamma, net, T, S0, r, mu, sigma, Maturities, Strikes, q, buy, sell, Vol_surface):
    N = int(T / dt)
    m = A.shape[0]
    n = A.shape[1]
    S = Stock_Prices_Simulation(T, dt, mu, sigma, S0)
    reward = torch.zeros(N)
    for count in range(N):
        t = torch.tensor([count * dt])
        reward[count] = torch.tensor([0.0])
        for i in range(m):
            for j in range(n):
                # declare a 2 dim tensor of shape (m,n)
                x = torch.zeros(m, n)
                x[i, j] = 1
                reward[count] = reward[count] + (buy[count, i, j] * ((A[i,j] / (2 * B[i,j])) + 0.5 * (net.forward(t, q[count]) - net.forward(t, q[count] + x)))) * dt
                reward[count] = reward[count] + (sell[count, i, j] * ((A[i,j] / (2 * B[i,j])) + 0.5 * (net.forward(t, q[count]) - net.forward(t, q[count] - x)))) * dt

        reward[count] += torch.sum((Options_Theta(Vol_surface, S0, r, mu, sigma, T, dt, Maturities, Strikes)[count] + 0.5 * Options_Gamma(Vol_surface, S0, r, mu, sigma, T, dt, Maturities, Strikes)[count]) * q[count]) * dt
        reward[count] = reward[count] - gamma * ((m * n * 1.79817986835) + torch.sum(torch.log(gamma / (2 * B)))) * dt
    
    return torch.sum(reward)

    

In [14]:
# check the reward function
Vol_surface = torch.tensor([[0.2, 0.3], [0.4, 0.5]])
S0 = 100
r = 0.05
T = 1
dt = 0.01
mu = 0.1
sigma = 0.2
gamma = 0.05
Maturities = torch.tensor([3, 5])
Strikes = torch.tensor([90, 110])
net = MyNet(2, 2)
learning_rate = 0.01
num_epochs = 30
A = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
B = torch.tensor([[0.2, 0.3], [0.2, 0.5]])
learning_rate = 0.01
num_epochs = 30
D = 10

S = Stock_Prices_Simulation(T, dt, mu, sigma, S0)
q, buy, sell = Train_Data_Simulation(T, dt, A, B, gamma, net)


In [15]:
def training_data_collections(D, T, dt, A, B, gamma, net):
    N = int(T / dt)
    m = A.shape[0]
    n = A.shape[1]
    train_q = torch.zeros((D, N, m, n))
    train_buy = torch.zeros((D, N, m, n))
    train_sell = torch.zeros((D, N, m, n))
    for count in range(D):
        train_q[count], train_buy[count], train_sell[count] = Train_Data_Simulation(T, dt, A, B, gamma, net)
    return train_q, train_buy, train_sell

def train_stock(D, T, dt, mu, sigma, S0):
    N = int(T / dt)
    train_data = torch.zeros((D, N))
    for count in range(D):
        train_data[count] = Stock_Prices_Simulation(T, dt, mu, sigma, S0)
    return train_data



In [11]:
import torch.distributions as dist

def kl_divergence(mu1, cov1, mu2, cov2):
    dist1 = dist.multivariate_normal.MultivariateNormal(mu1, cov1)
    dist2 = dist.multivariate_normal.MultivariateNormal(mu2, cov2)
    
    kl = dist.kl.kl_divergence(dist1, dist2)
    
    return kl.item()  


In [18]:
train_q, train_buy, train_sell = training_data_collections(D, T, dt, A, B, gamma, net)
train_S = train_stock(D, T, dt, mu, sigma, S0)



FileNotFoundError: [Errno 2] No such file or directory: '--ip=127.0.0.1'

In [None]:
new_net = MyNet(2, 2)
old_net = MyNet(2, 2)

for i in range(num_epochs):
    old_net.load_state_dict(new_net.state_dict())
    train_q, train_buy, train_sell = training_data_collections(D, T, dt, A, B, gamma, old_net)
    train_S = train_stock(D, T, dt, mu, sigma, S0)
    optimizer = torch.optim.Adam(new_net.parameters(), lr = learning_rate)
    for j in range(D):
        q = train_q[j]
        buy = train_buy[j]
        sell = train_sell[j]
        S = train_S[j]
        




In [76]:
reward(dt, A, B, gamma, net, T, S0, r, mu, sigma, Maturities, Strikes, q, buy, sell, Vol_surface)

tensor([0.0012, 0.0012, 0.0012, 0.0012, 0.0609, 0.0620, 0.0999, 0.0647, 0.0642,
        0.0622, 0.0623, 0.1369, 0.0622, 0.0650, 0.0622, 0.0606, 0.0582, 0.0678,
        0.0130, 0.1368, 0.0694, 0.0628, 0.0621, 0.0600, 0.0630, 0.0540, 0.0744,
        0.0614, 0.1035, 0.0649, 0.1078, 0.0686, 0.0977, 0.0683, 0.0690, 0.0563,
        0.0727, 0.0740, 0.0716, 0.0701, 0.0619, 0.0678, 0.0644, 0.0748, 0.1498,
        0.1048, 0.0492, 0.0649, 0.0677, 0.0597, 0.1385, 0.0686, 0.1378, 0.1493,
        0.1269, 0.1510, 0.1287, 0.1476, 0.2207, 0.1324, 0.1462, 0.1575, 0.1467,
        0.1530, 0.1326, 0.1208, 0.1343, 0.1468, 0.1475, 0.1924, 0.1953, 0.1988,
        0.1930, 0.2037, 0.2054, 0.2048, 0.1025, 0.2054, 0.1727, 0.2047, 0.1714,
        0.1822, 0.2049, 0.1780, 0.1672, 0.2078, 0.1365, 0.1987, 0.1740, 0.1543,
        0.1819, 0.1909, 0.1915, 0.1832, 0.2110, 0.1970, 0.2097, 0.1993, 0.1999,
        0.2142], grad_fn=<CopySlices>)

In [None]:
# generate the PPO algorithm

# define the PPO algorithm
def PPO(dt, A, B, gamma, net, T, S0, r, mu, sigma, Maturities, Strikes, Vol_surface, learning_rate, num_epochs):
    N = int(T / dt)
    m = A.shape[0]
    n = A.shape[1]
    S = Stock_Prices_Simulation(T, dt, mu, sigma, S0)
    q, buy, sell = Train_Data_Simulation(T, dt, A, B, gamma, net)
    optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
    for epoch in range(num_epochs):
        # forward pass
        reward_sum = reward(dt, A, B, gamma, net, T, S0, r, mu, sigma, Maturities, Strikes, q, buy, sell, Vol_surface)
        # backward pass
        optimizer.zero_grad()
        reward_sum.backward()
        optimizer.step()
    return net

