In [1]:
# generate one path for n assets
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt 

from simulation import *

In [2]:
import torch
import torch.nn as nn

class SimpleLSTMModel(nn.Module):
    def __init__(self, hidden_size, output_size=1):  # Output size is set to 1 by default
        super(SimpleLSTMModel, self).__init__()
        self.hidden_size = hidden_size
        # LSTM layer expects input of shape (batch_size, seq_length, features)
        # For a univariate sequence, features=1
        self.lstm = nn.LSTM(1, hidden_size, batch_first=True)
        # Fully connected layer to map the hidden state output to the desired output size
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Add a batch dimension and feature dimension to x
        # Reshaping x from (n,) to (1, n, 1) to fit LSTM input requirements
        x = x.unsqueeze(0).unsqueeze(-1)  # Now x is of shape [1, seq_length, 1]
        # Process x through the LSTM
        lstm_out, _ = self.lstm(x)
        # Only use the output from the last time step
        # This assumes you're interested in the final output for sequence processing
        out = self.fc(lstm_out[:, -1, :])
        # Squeeze the output to remove 1-dimensions, aiming for a scalar output
        return out.squeeze()

# Initialize the model
hidden_size = 10  # Number of LSTM units in the hidden layer

model = SimpleLSTMModel(hidden_size)
x = torch.randn(50)
f = Functional(model)

In [3]:
print(f.partial_t(x, 0.01))
print(f.partial_x(x, 0.02))
print(f.partial_xx(x, 0.03))


0.59746057
-0.019420683
0.002566311


In [4]:
path = generate_asset_path(mu = np.array([0.1, 0.2]), sigma = np.array([[0.3, 0.4], [0.1, 0.2]]), T = 1, dt = 0.01)   

In [5]:
# define a class of trading policy 
# the trading policy object

naive_states = np.array([[0.1, 0.2], [0.2, 0.3], [0.3, 0.4], [0.4, 0.5], [0.5, 0.6]])

policy = Trading_Policy(f, naive_states)

policy.entropy(path[0], mu = np.array([0.1, 0.2]), sigma = np.array([[0.3, 0.4], [0.1, 0.2]]), dt = 0.01, h = 0.02, gamma = 0.3)

0.48283133709678583

In [6]:
holding_history, wealth_history = simulated_trading(policy, path, 1, np.array([0.1, 0.2]), np.array([[0.3, 0.4], [0.1, 0.2]]), 0.01, 0.1, 0.1) 

In [7]:
print(wealth_history)

[1.         1.00255504 1.00065949 1.00309649 0.98839989 0.97981859
 1.00126334 1.00387321 1.0908802  1.09243676 1.078931   1.06882925
 1.07361598 1.01339509 1.02229497 1.03199591 1.04085764 1.07262394
 1.06402497 1.00691036 1.02431476 1.00941354 1.01356804 1.03489285
 1.03074069 1.06572    1.15833184 1.13606404 1.12131189 1.10846168
 1.09899142 1.08988344 1.11094287 1.10851342 1.11657737 1.11457145
 1.12916296 1.12714306 1.14734393 1.13090829 1.1226665  1.16761094
 1.21071511 1.22798497 1.23577677 1.27011818 1.28298025 1.2642871
 1.26731789 1.27280582 1.28110765 1.28984079 1.27357197 1.28666954
 1.26778151 1.26251127 1.24865546 1.21684725 1.24015009 1.25679043
 1.22907078 1.22341448 1.25622898 1.24364611 1.26368405 1.31406809
 1.2708017  1.26389787 1.25631698 1.27768157 1.25272251 1.25424772
 1.30605481 1.30538904 1.26991672 1.22431906 1.24587578 1.23835156
 1.25213454 1.22280639 1.21005233 1.20900439 1.19518256 1.1988265
 1.20040338 1.19692251 1.18919077 1.18818415 1.24560712 1.256928

In [37]:
# implement the loss function

gamma = 0.3
mu = np.array([0.1, 0.2])
sigma = np.array([[0.3, 0.4], [0.1, 0.2]])

# the following function is to calculate the loss of one path
# using the continuous TD error
def one_path_loss(new_value_net, policy, wealth_history, dt, h, gamma, mu, sigma):
    # wealth_history: path of wealth
    # policy: trading policy object
    # new_value_net: new value network, the network need to be trained 

    n = len(wealth_history)
    TD_error = np.zeros(n)

    # make the wealth_hisotry as 1 dimensional tensor 
    wealth_history = torch.tensor(wealth_history, dtype = torch.float32)

    for i in range(2, n):
        x = wealth_history[:i]
        x_short = wealth_history[:i-1]
        value_derivative = (new_value_net(x) - new_value_net(x_short)) / dt
        entropy = policy.entropy(x, mu, sigma, dt, h, gamma)
        TD_error[i] = value_derivative + entropy

    # return the sum of square of TD_error
    return 0.5 * np.sum(TD_error**2) * dt


new_value_net = SimpleLSTMModel(24)

wealth_hisotry = torch.tensor(wealth_history, dtype = torch.float32)

one_path_loss(new_value_net, policy, wealth_history, 0.01, 0.02, 0.3, np.array([0.1, 0.2]), np.array([[0.3, 0.4], [0.1, 0.2]]))



0.10818638840399397