In [36]:
# generate one path for n assets
import numpy as np
import pandas as pd
import torch
import matplotlib.pyplot as plt 

from simulation import *

In [37]:
import torch
import torch.nn as nn

class SimpleLSTMModel(nn.Module):
    def __init__(self, hidden_size, output_size=1):  # Output size is set to 1 by default
        super(SimpleLSTMModel, self).__init__()
        self.hidden_size = hidden_size
        # LSTM layer expects input of shape (batch_size, seq_length, features)
        # For a univariate sequence, features=1
        self.lstm = nn.LSTM(1, hidden_size, batch_first=True)
        # Fully connected layer to map the hidden state output to the desired output size
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Add a batch dimension and feature dimension to x
        # Reshaping x from (n,) to (1, n, 1) to fit LSTM input requirements
        x = x.unsqueeze(0).unsqueeze(-1)  # Now x is of shape [1, seq_length, 1]
        # Process x through the LSTM
        lstm_out, _ = self.lstm(x)
        # Only use the output from the last time step
        # This assumes you're interested in the final output for sequence processing
        out = self.fc(lstm_out[:, -1, :])
        # Squeeze the output to remove 1-dimensions, aiming for a scalar output
        return out.squeeze()

# Initialize the model
hidden_size = 10  # Number of LSTM units in the hidden layer

model = SimpleLSTMModel(hidden_size)
x = torch.randn(50)
f = Functional(model)

In [38]:
path = generate_asset_path(mu = np.array([0.1, 0.2]), sigma = np.array([[0.3, 0.4], [0.1, 0.2]]), T = 1, dt = 0.01)   

In [39]:
# define a class of trading policy 
# the trading policy object

naive_states = np.array([[0.1, 0.2], [0.2, 0.3], [0.3, 0.4], [0.4, 0.5], [0.5, 0.6]])

policy = Trading_Policy(f, naive_states)

policy.entropy(path[0], mu = np.array([0.1, 0.2]), sigma = np.array([[0.3, 0.4], [0.1, 0.2]]), dt = 0.01, h = 0.02, gamma = 0.3)

0.4828312758041713

In [40]:
holding_history, wealth_history = simulated_trading(policy, path, 1, np.array([0.1, 0.2]), np.array([[0.3, 0.4], [0.1, 0.2]]), 0.01, 0.1, 0.1) 

In [44]:
# implement the loss function
torch.autograd.set_detect_anomaly(True)

gamma = 0.3
mu = np.array([0.1, 0.2])
sigma = np.array([[0.3, 0.4], [0.1, 0.2]])

# the following function is to calculate the loss of one path
# using the continuous TD error
def one_path_loss(new_value_net, policy, wealth_history, dt, h, gamma, mu, sigma):
    # wealth_history: path of wealth
    # policy: trading policy object
    # new_value_net: new value network, the network need to be trained 

    n = len(wealth_history)
    TD_error = torch.tensor(0.0, dtype = torch.float32)

    # make the wealth_hisotry as 1 dimensional tensor 
    wealth_history = torch.tensor(wealth_history, dtype = torch.float32)

    for i in range(2, n):
        x = wealth_history[:i]
        x_short = wealth_history[:i-1]
        value_derivative = (new_value_net(x) - new_value_net(x_short)) / dt
        entropy = policy.entropy(x, mu, sigma, dt, h, gamma)

        TD_error += (value_derivative + entropy) ** 2        


    return 0.5 * TD_error * dt

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

new_value_net = SimpleLSTMModel(24)
optimizer = torch.optim.Adam(new_value_net.parameters(), lr=0.01)
optimizer.zero_grad()
wealth_hisotry = torch.tensor(wealth_history, dtype = torch.float32)
loss = one_path_loss(new_value_net, policy, wealth_history, 0.01, 0.02, 0.3, np.array([0.1, 0.2]), np.array([[0.3, 0.4], [0.1, 0.2]]))
# move loss to GPU 
loss = loss.to(device)
loss.backward()
optimizer.step()
print(loss)


KeyboardInterrupt: 