In [None]:
import os

In [None]:
os.chdir(os.path.split(os.getcwd())[0])

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import gym
from agent import *
from optionpricing import *
import yaml
import torch
from collections import defaultdict
import matplotlib.style as style
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

In [None]:
style.use('seaborn-poster')

In [None]:
experiment_folder = None

In [None]:
with open(os.path.join('experiments', experiment_folder, 'config.yaml'), 'r') as f:
    args_dict = yaml.load(f, Loader = yaml.SafeLoader)

In [None]:
class Args:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)

In [None]:
args = Args(**args_dict)

### Provide config

In [None]:
config = {
        'S': 100,
        'T': 10, # 10 days
        'L': 1,
        'm': 100, # L options for m stocks
        'n': 0,
        'K': [95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105],
        'D': 5,
        'mu': 0,
        'sigma': 0.01,
        'r': 0,
        'ss': 5,
        'kappa': 0.1,
        'multiplier': args.trc_multiplier,
        'ticksize': args.trc_ticksize,
        }

env = OptionPricingEnv(config)
env.configure()

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
ngpu = 1 if torch.cuda.is_available() else 0

In [None]:
def load_estimator(env, device, nhidden, nunits, experiment_folder, kind = 'best'):
    state_shape = env.observation_space.shape
    state_space_dim = state_shape[0] if len(state_shape) == 1 else state_shape
    
    estimator = Estimator(nhidden, nunits, state_space_dim, env.action_space.n)
    if kind == 'best':
        checkpoint = torch.load(os.path.join('experiments', experiment_folder, 'best.pt'), map_location = torch.device('cpu'))
    elif kind == 'checkpoint':
        checkpoint = torch.load(os.path.join('experiments', experiment_folder, 'checkpoint.pt'), map_location = torch.device('cpu'))
    else:
        raise ValueError('Invalid choice for kind')
        
    estimator.load_state_dict(checkpoint['estimator'])
    estimator.eval()
    
    return estimator

In [None]:
def simulate_episode(env, device, estimator, policy = 'agent', seed = 1):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    state = torch.from_numpy(env.reset()).to(device)
    history = defaultdict(list)
    done = False
    
    while not done:
        history['delta'].append(env.delta)
        if policy == 'agent':
            with torch.no_grad():
                action = np.argmax(estimator(state).numpy())
        elif policy == 'delta':
            action = delta_neutral_policy(env)
            
        state, reward, done, info = env.step(action)
        
        history['reward'].append(reward)
        history['n'].append(env.n)
        history['stock_value'].append(env.stock_value)
        history['option_value'].append(env.option_value)
        history['cash'].append(env.cash)
        
        state = torch.from_numpy(state).to(device)
        
    return history

In [None]:
def delta_neutral_policy(env):
    return env.inv_action_map[-1 * int(env.delta * (env.L * env.m)) - env.n]

In [None]:
estimator = load_estimator(env, device, args.nhidden, args.nunits, experiment_folder, 'best')

In [None]:
history = simulate_episode(env, device, estimator, 'agent', seed = 7)

stock_pnl = np.array(history['stock_value'][1:]) - np.array(history['stock_value'][:-1])
stock_pnl = np.insert(stock_pnl, 0, 0)

option_pnl = np.array(history['option_value'][1:]) + np.array(history['cash'][1:]) - np.array(history['option_value'][:-1]) - np.array(history['cash'][:-1])
option_pnl = np.insert(option_pnl, 0, 0)

total_pnl = stock_pnl + option_pnl

steps = np.arange(1, len(history['delta']) + 1, 1)

fig, ax = plt.subplots(figsize = (12, 8), nrows = 2, ncols = 1, sharex = True)
ax[0].plot(steps, np.array(history['delta']) * -env.L * env.m, color = 'green', label = 'delta', lw = 1.5)
ax[0].plot(steps, history['n'], color = 'blue', label = 'n', lw = 1.5)
ax[0].legend()
#ax[1].plot(steps, history['cash'], color = 'red', label = 'cash', lw = 1.5)
ax[1].plot(steps, history['reward'], color = 'blue', label = 'reward', lw = 1.5)
ax[1].plot(steps, np.clip(history['reward'], -args.clip, args.clip), color = 'red', label = 'clipped', lw = 1.5)
#ax.plot(steps, total_pnl, lw = 1.5, label = 'total pnl', color = 'red')
ax[0].set_title(f'K: {env.K}')
ax[1].set_xlabel('step')
ax[0].set_ylabel('n')
plt.legend()
plt.show()

In [None]:
history = simulate_episode(env, device, estimator, 'delta', seed = 7)

stock_pnl = np.array(history['stock_value'][1:]) - np.array(history['stock_value'][:-1])
stock_pnl = np.insert(stock_pnl, 0, 0)

option_pnl = np.array(history['option_value'][1:]) + np.array(history['cash'][1:]) - np.array(history['option_value'][:-1]) - np.array(history['cash'][:-1])
option_pnl = np.insert(option_pnl, 0, 0)

total_pnl = stock_pnl + option_pnl

steps = np.arange(1, len(history['delta']) + 1, 1)

fig, ax = plt.subplots(figsize = (12, 8), nrows = 2, ncols = 1, sharex = True)
ax[0].plot(steps, np.array(history['delta']) * -env.L * env.m, color = 'green', label = 'delta', lw = 1.5)
ax[0].plot(steps, history['n'], color = 'blue', label = 'n', lw = 1.5)
ax[0].legend()
#ax[1].plot(steps, history['cash'], color = 'red', label = 'cash', lw = 1.5)
ax[1].plot(steps, history['reward'], color = 'blue', label = 'reward', lw = 1.5)
ax[1].plot(steps, np.clip(history['reward'], -args.clip, args.clip), color = 'red', label = 'clipped', lw = 1.5)
#ax.plot(steps, total_pnl, lw = 1.5, label = 'total pnl', color = 'red')
ax[0].set_title(f'K: {env.K}')
ax[1].set_xlabel('step')
ax[0].set_ylabel('n')
plt.legend()
plt.show()

## Evaluate State Space

In [None]:
def generate_state_space_data(env, device, estimator, data, variable = 'S'):
    env.reset() # Only need S0 even though strike might be different
    S0 = env.S0
    
    # Create input states
    if variable == 'S':
        S = data['S'].reshape(-1, 1) / S0
        t = np.ones(S.shape) * data['t']
        n = np.ones(S.shape) * data['n'] / env.high
        K = np.ones(S.shape) * data['K'] / S0
    
    elif variable == 'n':
        n = data['n'].reshape(-1, 1) / env.high
        t = np.ones(n.shape) * data['t']
        S = np.ones(n.shape) * data['S'] / S0
        K = np.ones(n.shape) * data['K'] / S0
    
    elif variable == 't':
        t = data['t'].reshape(-1, 1)
        S = np.ones(t.shape) * data['S'] / S0
        n = np.ones(t.shape) * data['n'] / env.high
        K = np.ones(t.shape) * data['K'] / S0
    
    else:
        raise ValueError
        
    state = np.hstack((S, t, n, K)).astype(np.float32)
    state = torch.from_numpy(state).to(device)
    with torch.no_grad():
        action = np.argmax(estimator(state).numpy(), axis = 1)
        
    action = [env.action_map[a] for a in action]
    
    return action

## Action vs State Variable

### Fixed S

In [None]:
@interact
def plot_S(n = (-100, 100, 1), t = (0, 1, 0.1), K = (90, 110, 1)):
    S = np.linspace(85, 115, 10000)
    fig, ax = plt.subplots(figsize = (12, 8))
    actions = generate_state_space_data(env, device, estimator, {'S': S, 't': t, 'n': n, 'K': K}, variable = 'S')
    ax.plot(S, actions, lw = 1.5, color = 'blue')
    ax.set_xlabel('S')
    ax.set_ylabel('action')
    plt.show()

### Fixed n

In [None]:
@interact
def plot_n(S = (85, 115, 1), t = (0, 1, 0.1), K = (90, 110, 1)):
    n = np.linspace(-100, 100, 10000)
    fig, ax = plt.subplots(figsize = (12, 8))
    actions = generate_state_space_data(env, device, estimator, {'S': S, 't': t, 'n': n, 'K': K}, variable = 'n')
    ax.plot(n, actions, lw = 1.5, color = 'blue')
    ax.set_xlabel('n')
    ax.set_ylabel('action')
    plt.show()

### Fixed t

In [None]:
@interact
def plot_t(S = (85, 115, 1), n = (-100, 100, 1), K = (90, 110, 1)):
    t = np.linspace(0, 1, 10000)
    fig, ax = plt.subplots(figsize = (12, 8))
    actions = generate_state_space_data(env, device, estimator, {'S': S, 't': t, 'n': n, 'K': K}, variable = 't')
    ax.plot(t, actions, lw = 1.5, color = 'blue')
    ax.set_xlabel('t')
    ax.set_ylabel('action')
    plt.show()