In [None]:
import os

In [None]:
os.chdir(os.path.split(os.getcwd())[0])

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import gym
from agent import *
from optionpricing import *
import yaml
import torch
from collections import defaultdict

In [None]:
import matplotlib.style as style

In [None]:
style.use('seaborn-poster')

In [None]:
experiment_folder = 'Nov7_DefaultArgs_MultiStrike_HighTrCost'

In [None]:
with open(os.path.join('experiments', experiment_folder, 'config.yaml'), 'r') as f:
    args_dict = yaml.load(f, Loader = yaml.SafeLoader)

In [None]:
class Args:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)

In [None]:
args = Args(**args_dict)

In [None]:
config = {
        'S': 100,
        'T': 10, # 10 days
        'L': 1,
        'm': 100, # L options for m stocks
        'n': 0,
        'K': [95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105],
        'D': 5,
        'mu': 0,
        'sigma': 0.01,
        'r': 0,
        'ss': 5,
        'kappa': 0.1,
        'multiplier': args.trc_multiplier,
        'ticksize': args.trc_ticksize,
        'clip_low': 0,
        'clip_high': 0
        }

env = OptionPricingEnv(config)
env.configure()

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
ngpu = 1 if torch.cuda.is_available() else 0

In [None]:
def load_estimator(env, device, ngpu, experiment_folder, kind = 'best'):
    state_shape = env.observation_space.shape
    state_space_dim = state_shape[0] if len(state_shape) == 1 else state_shape
    
    estimator = Estimator(ngpu, state_space_dim, env.action_space.n)
    if kind == 'best':
        checkpoint = torch.load(os.path.join('experiments', experiment_folder, 'best.pth'), map_location = torch.device('cpu'))
    elif kind == 'checkpoint':
        checkpoint = torch.load(os.path.join('experiments', experiment_folder, 'checkpoint.pth'), map_location = torch.device('cpu'))
    else:
        raise ValueError('Invalid choice for kind')
        
    estimator.load_state_dict(checkpoint['estimator'])
    estimator.eval()
    
    return estimator

In [None]:
def simulate_episode(env, device, estimator, policy = 'agent', seed = 1):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    state = torch.from_numpy(env.reset()).to(device)
    history = defaultdict(list)
    #history['delta'].append(env.delta)
    #history['stock_pnl'].append(0)
    #history['option_pnl'].append(0)
    #history['total_pnl'].append(0)
    #history['stock_value'].append(env.stock_value)
    #history['option_value'].append(env.option_value)
    #history['cash'].append(env.cash)
    done = False
    
    while not done:
        history['delta'].append(env.delta)
        if policy == 'agent':
            with torch.no_grad():
                action = np.argmax(estimator(state).numpy())
        elif policy == 'delta':
            action = delta_neutral_policy(env)
            
        state, reward, done, info = env.step(action)
        
        history['reward'].append(reward)
        history['n'].append(env.n)
        history['stock_value'].append(env.stock_value)
        history['option_value'].append(env.option_value)
        history['cash'].append(env.cash)
        #history['stock_pnl'].append(history['stock_value'][-1] - history['stock_value'][-2])
        #history['option_pnl'].append(history['option_value'][-1] + history['cash'][-1] - history['option_value'][-2] - history['cash'][-2])
        #history['total_pnl'].append(history['stock_pnl'] + history['option_pnl'])
        
        state = torch.from_numpy(state).to(device)
        
    return history

In [None]:
def delta_neutral_policy(env):
    return env.inv_action_map[-1 * int(env.delta * (env.L * env.m)) - env.n]

In [None]:
estimator = load_estimator(env, device, ngpu, experiment_folder, 'best')

In [None]:
history = simulate_episode(env, device, estimator, 'agent', seed = 7)

stock_pnl = np.array(history['stock_value'][1:]) - np.array(history['stock_value'][:-1])
stock_pnl = np.insert(stock_pnl, 0, 0)

option_pnl = np.array(history['option_value'][1:]) + np.array(history['cash'][1:]) - np.array(history['option_value'][:-1]) - np.array(history['cash'][:-1])
option_pnl = np.insert(option_pnl, 0, 0)

total_pnl = stock_pnl + option_pnl

steps = np.arange(1, len(history['delta']) + 1, 1)

fig, ax = plt.subplots(figsize = (12, 8), nrows = 2, ncols = 1, sharex = True)
ax[0].plot(steps, np.array(history['delta']) * -env.L * env.m, color = 'green', label = 'delta', lw = 1.5)
ax[0].plot(steps, history['n'], color = 'blue', label = 'n', lw = 1.5)
ax[0].legend()
#ax[1].plot(steps, history['cash'], color = 'red', label = 'cash', lw = 1.5)
ax[1].plot(steps, history['reward'], color = 'blue', label = 'reward', lw = 1.5)
ax[1].plot(steps, np.clip(history['reward'], -args.clip, args.clip), color = 'red', label = 'clipped', lw = 1.5)
#ax.plot(steps, total_pnl, lw = 1.5, label = 'total pnl', color = 'red')
ax[0].set_title(f'K: {env.K}')
ax[1].set_xlabel('step')
ax[0].set_ylabel('n')
plt.legend()
plt.show()

In [None]:
history = simulate_episode(env, device, estimator, 'delta', seed = 7)

stock_pnl = np.array(history['stock_value'][1:]) - np.array(history['stock_value'][:-1])
stock_pnl = np.insert(stock_pnl, 0, 0)

option_pnl = np.array(history['option_value'][1:]) + np.array(history['cash'][1:]) - np.array(history['option_value'][:-1]) - np.array(history['cash'][:-1])
option_pnl = np.insert(option_pnl, 0, 0)

total_pnl = stock_pnl + option_pnl

steps = np.arange(1, len(history['delta']) + 1, 1)

fig, ax = plt.subplots(figsize = (12, 8), nrows = 2, ncols = 1, sharex = True)
ax[0].plot(steps, np.array(history['delta']) * -env.L * env.m, color = 'green', label = 'delta', lw = 1.5)
ax[0].plot(steps, history['n'], color = 'blue', label = 'n', lw = 1.5)
ax[0].legend()
#ax[1].plot(steps, history['cash'], color = 'red', label = 'cash', lw = 1.5)
ax[1].plot(steps, history['reward'], color = 'blue', label = 'reward', lw = 1.5)
ax[1].plot(steps, np.clip(history['reward'], -args.clip, args.clip), color = 'red', label = 'clipped', lw = 1.5)
#ax.plot(steps, total_pnl, lw = 1.5, label = 'total pnl', color = 'red')
ax[0].set_title(f'K: {env.K}')
ax[1].set_xlabel('step')
ax[0].set_ylabel('n')
plt.legend()
plt.show()

## Evaluate State Space

In [None]:
def generate_state_space_data(env, device, estimator, data, variable = 'S'):
    env.reset() # Only need S0 even though strike might be different
    S0 = env.S0
    
    # Create input states
    if variable == 'S':
        S = data['S'].reshape(-1, 1) / S0
        t = np.ones(S.shape) * data['t']
        n = np.ones(S.shape) * data['n'] / env.high
        K = np.ones(S.shape) * data['K'] / S0
    
    elif variable == 'n':
        n = data['n'].reshape(-1, 1) / env.high
        t = np.ones(n.shape) * data['t']
        S = np.ones(n.shape) * data['S'] / S0
        K = np.ones(n.shape) * data['K'] / S0
    
    elif variable == 't':
        t = data['t'].reshape(-1, 1)
        S = np.ones(t.shape) * data['S'] / S0
        n = np.ones(t.shape) * data['n'] / env.high
        K = np.ones(t.shape) * data['K'] / S0
    
    else:
        raise ValueError
        
    state = np.hstack((S, t, n, K)).astype(np.float32)
    state = torch.from_numpy(state).to(device)
    with torch.no_grad():
        action = np.argmax(estimator(state).numpy(), axis = 1)
        
    action = [env.action_map[a] for a in action]
    
    return action

### List of Plots:
1. Fixed `n`, discrete values of `t` and vary `S`
2. Fixed `t`, discrete values of `n` and vary `S`
3. Fixed `t`, discrete values of `S` and vary `n`
4. Fixed `S`, discrete values of `t` and vary `n`
5. Fixed `n`, discrete values of `S` and vary `t`
6. Fixed `S`, discrete values of `n` and vary `t`

In [None]:
S = np.linspace(85, 115, 10000)
t = [0.9, 0.7, 0.5,]
n = -50
K = 100
colors = plt.cm.coolwarm(np.linspace(0, 1, len(t)))

fig, ax = plt.subplots(figsize = (12, 8))
for i, val in enumerate(t):
    actions = generate_state_space_data(env, device, estimator, {'S': S, 't': val, 'n': n, 'K': K}, variable = 'S')
    ax.plot(S, actions, lw = 1.5, label = f't: {val}', color = colors[i])
ax.set_xlabel('S')
ax.set_ylabel('action')
#ax.set_ylim([-100, 100])
ax.set_title(f'n: {n}')
plt.legend()
#fig.savefig(f'fixed_n_discrete_t_variable_S.png', dpi = 300)
plt.show()

In [None]:
S = np.linspace(85, 115, 10000)
t = 0.9
n = [-60, -55, -50, -45, -40]
K = 100
colors = plt.cm.coolwarm(np.linspace(0, 1, len(n)))

fig, ax = plt.subplots(figsize = (12, 8))
for i, val in enumerate(n):
    actions = generate_state_space_data(env, device, estimator, {'S': S, 't': t, 'n': val, 'K': K}, variable = 'S')
    ax.plot(S, actions, lw = 1.5, label = f'n: {val}', color = colors[i])
ax.set_xlabel('S')
ax.set_ylabel('action')
#ax.set_ylim([-100, 100])
ax.set_title(f't: {t}')
plt.legend()
#fig.savefig(f'fixed_t_discrete_n_variable_S.png', dpi = 300)
plt.show()

In [None]:
S = [80, 90, 100, 110, 120]
t = 0.5
n = np.linspace(-100, 100, 10000)
K = 100
colors = plt.cm.coolwarm(np.linspace(0, 1, len(S)))

fig, ax = plt.subplots(figsize = (12, 8))
for i, val in enumerate(S):
    actions = generate_state_space_data(env, device, estimator, {'S': val, 't': t, 'n': n, 'K': K}, variable = 'n')
    ax.plot(n, actions, lw = 1.5, label = f'S: {val}', color = colors[i])
ax.set_xlabel('n')
ax.set_ylabel('action')
ax.set_ylim([-100, 100])
ax.set_title(f't: {t}')
plt.legend()
#fig.savefig(f'fixed_t_discrete_S_variable_n.png', dpi = 300)
plt.show()

In [None]:
S = 100
t = [0.9, 0.7, 0.5, 0.3, 0.1]
n = np.linspace(-100, 100, 10000)
K = 100
colors = plt.cm.coolwarm(np.linspace(0, 1, len(t)))

fig, ax = plt.subplots(figsize = (12, 8))
for i, val in enumerate(t):
    actions = generate_state_space_data(env, device, estimator, {'S': S, 't': val, 'n': n, 'K': K}, variable = 'n')
    ax.plot(n, actions, lw = 1.5, label = f't: {val}', color = colors[i])
ax.set_xlabel('n')
ax.set_ylabel('action')
ax.set_ylim([-100, 100])
ax.set_title(f'S: {S}')
plt.legend()
#fig.savefig(f'fixed_S_discrete_t_variable_n.png', dpi = 300)
plt.show()

In [None]:
S = [80, 90, 100, 110, 120]
t = np.linspace(0.9, 0, 10000)
n = 0
K = 100
colors = plt.cm.coolwarm(np.linspace(0, 1, len(S)))

fig, ax = plt.subplots(figsize = (12, 8))
for i, val in enumerate(S):
    actions = generate_state_space_data(env, device, estimator, {'S': val, 't': t, 'n': n, 'K': K}, variable = 't')
    ax.plot(t, actions, lw = 1.5, label = f'S: {val}', color = colors[i])
ax.set_xlabel('t')
ax.set_ylabel('action')
ax.set_ylim([-100, 100])
ax.set_title(f'n: {n}')
plt.legend()
#fig.savefig(f'fixed_n_discrete_S_variable_t.png', dpi = 300)
plt.show()

In [None]:
S = 100
t = np.linspace(0.9, 0, 10000)
n = [-100, -50, 0, 50, 100]
K = 100
colors = plt.cm.coolwarm(np.linspace(0, 1, len(n)))

fig, ax = plt.subplots(figsize = (12, 8))
for i, val in enumerate(n):
    actions = generate_state_space_data(env, device, estimator, {'S': S, 't': t, 'n': val, 'K': K}, variable = 't')
    ax.plot(t, actions, lw = 1.5, label = f'n: {val}', color = colors[i])
ax.set_xlabel('t')
ax.set_ylabel('action')
ax.set_ylim([-100, 100])
ax.set_title(f'S: {S}')
plt.legend()
#fig.savefig(f'fixed_S_discrete_n_variable_t.png', dpi = 300)
plt.show()