In [1]:
import os
import sys
sys.path.append(os.getcwd() + '/codes')
import copy
import torch
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from pricing_model import get_gamma
from agent import MVPITD3Agent
from utils import Config, MeanStdNormalizer
from Envs import DeltaHedgingEnv, DeltaHedgingEnvTiming
from component import Task
from component.replay import *
from component.random_process import *
from network import GaussianActorCriticNet, FCBody, TwoLayerFCBodyWithAction, ThreeLayerFCBodyWithAction, TD3Net, RTD3Net, OneDenseLSTM

2022-02-11 04:03:08.792774: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1


In [2]:
def create_config(trial_no, gpu_no=0, model_dir='./codes/model/'):
    # search existing model name
    file_list = [x for x in os.listdir(model_dir) if x.endswith('.model') and x.startswith(f't{trial_no}')]
    last_dash = file_list[0].rfind('-')
    model_name = file_list[0][:last_dash+1]
    parsers = model_name.split('-')
    
    # configs - varying
    config = Config()
    config.DEVICE = torch.device(f'cuda:{gpu_no}')
    config.tag = None
    config_dict = {'data_type':'simulation',
                   'hedging_task':parsers[2] + '-' + parsers[3],
                   'asset_model':parsers[4].split('_')[-1],
                   'burnin_len':int(parsers[5].split('_')[-1]),
                   'history_len':int(parsers[7].split('_')[-1]),
                   'lam':float(parsers[8].split('_')[-1]),
                   'lstm_hiddensize':int(parsers[9].split('_')[-1]),
                   'lstm_inputsize':int(parsers[10].split('_')[-1]),
                   'nn_model':parsers[11].split('_')[-1],
                   'option_type':parsers[12].split('_')[-1],
                   'strike_price':float(parsers[13].split('_')[-1])
                   }
    
    # configs - fixed
    config_dict.setdefault('log_level', 0)
    config_dict.setdefault('action_noise', 0)
    config.merge(config_dict)

    config.task = parsers[2]
    task = config.task
    config.task_fn = lambda: Task(config.hedging_task, action_noise=config.action_noise, config=config)
    config.eval_env = config.task_fn()
    config.eval_interval = int(5e4)
    config.eval_episodes = 1000
    config.actor_encoding_size = 3
    config.critic_encoding_size = 4

    config.network_fn = lambda: RTD3Net(
        config.state_dim,
        config.action_dim,
        config.actor_encoding_size,
        config.critic_encoding_size,
        actor_body_fn=lambda: OneDenseLSTM(config.state_dim+config.action_dim, config.lstm_inputsize,
                                       config.lstm_hiddensize, config=config, gate=F.relu),
        critic_body_fn=lambda: OneDenseLSTM(
            config.state_dim+config.action_dim, config.lstm_inputsize, config.lstm_hiddensize,
            config=config, gate=F.relu),
        actor_opt_fn=lambda params: torch.optim.Adam(params, lr=1e-3),
        critic_opt_fn=lambda params: torch.optim.Adam(params, lr=1e-3),
    config=config)


    config.discount = 0.99
    config.td3_delay = 2
    config.warm_up = int(1e4)
    config.target_network_mix = 5e-3
    config.replay_fn = lambda: Replay(memory_size=int(5e4), batch_size=100)
    config.random_process_fn = lambda: GaussianProcess(
        size=(config.action_dim,), std=LinearSchedule(0.1))
    config.td3_noise = 0.2
    config.td3_noise_clip = 0.5
    config.td3_delay = 2
    return model_name, file_list, config

In [7]:
reward_list = []
total_sum_array = np.zeros(0)
total_mean_array = np.zeros(0)
total_std_array = np.zeros(0)
total_delta_pnl_array = np.zeros(0)

# trial_num
trial_list = [0]
for t in trial_list:
    model_name, file_list, config = create_config(t)
    td3_agent = MVPITD3Agent(config)
    
    for file in file_list:
        filename = './codes/model/' + file

        state_dict = torch.load(filename)
        td3_agent.network.load_state_dict(state_dict)

        state_list = []
        action_array = np.array([])
        action_mean_array = np.array([])
        delta_array = np.array([])
        reward_array = np.array([])
        delta_pnl_array = np.array([])
        
        deltahedging_env = DeltaHedgingEnv(config, seed=0)
        
        for i in tqdm(range(100)):

            done = False
            state = deltahedging_env.reset()
            while not done:
                state = state.reshape(1,-1)
                action = td3_agent.eval_step(state, history=td3_agent.history)
                td3_agent.history.append(np.hstack([state, action]).flatten())
                delta = deltahedging_env.env_params.delta.copy()
                next_state, hedging_performance, done, _ = deltahedging_env.step(action, delta_check=True)
                if not done:
                    state_list.append(state)
                    action_array = np.append(action_array, action)
                    reward_array = np.append(reward_array, hedging_performance[0])
                    delta_pnl_array = np.append(delta_pnl_array, hedging_performance[1])
                    delta_array = np.append(delta_array, delta)
                    state = next_state
                    
            td3_agent.history_reset(config)         
            reward_list.append([reward_array.sum(), reward_array.mean(), reward_array.std()])
            
        print(str(model_name) + '\n' + str(model_index))
        
        total_delta_pnl_array = np.append(total_delta_pnl_array, delta_pnl_array)
        
        total_sum_array = np.append(total_sum_array, np.array(reward_list)[:,0].mean())
        total_mean_array = np.append(total_mean_array, np.array(reward_list)[:,1].mean())
        total_std_array = np.append(total_std_array, np.array(reward_list)[:,2].mean())

/nas1/yjun/research_deep_hedging/codes/model/t0-MVPITD3Agent-DeltaHedging-v0-asset_model_Heston-burnin_len_5-data_type_simulation-history_len_10-lam_0.5-lstm_hiddensize_24-lstm_inputsize_6-nn_model_lstm-option_type_C-strike_price_100.0-run-0-18000.model
<agent.MVPITD3_agent.MVPITD3Agent object at 0x7fe93033e310>


RuntimeError: Error(s) in loading state_dict for RTD3Net:
	size mismatch for fc_current_obs.weight: copying a param with shape torch.Size([6, 7]) from checkpoint, the shape in current model is torch.Size([3, 7]).
	size mismatch for fc_current_obs.bias: copying a param with shape torch.Size([6]) from checkpoint, the shape in current model is torch.Size([3]).
	size mismatch for fc_action.weight: copying a param with shape torch.Size([1, 30]) from checkpoint, the shape in current model is torch.Size([1, 27]).
	size mismatch for fc_current_ac_obs1.weight: copying a param with shape torch.Size([7, 8]) from checkpoint, the shape in current model is torch.Size([4, 8]).
	size mismatch for fc_current_ac_obs1.bias: copying a param with shape torch.Size([7]) from checkpoint, the shape in current model is torch.Size([4]).
	size mismatch for fc_current_ac_obs2.weight: copying a param with shape torch.Size([7, 8]) from checkpoint, the shape in current model is torch.Size([4, 8]).
	size mismatch for fc_current_ac_obs2.bias: copying a param with shape torch.Size([7]) from checkpoint, the shape in current model is torch.Size([4]).
	size mismatch for fc_critic_1.weight: copying a param with shape torch.Size([1, 31]) from checkpoint, the shape in current model is torch.Size([1, 28]).
	size mismatch for fc_critic_2.weight: copying a param with shape torch.Size([1, 31]) from checkpoint, the shape in current model is torch.Size([1, 28]).