In [None]:
import os
os.chdir(os.path.split(os.getcwd())[0])

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import gym
from agent import *
from optionpricing import *
import yaml
import torch
from collections import defaultdict

In [None]:
import matplotlib.style as style
style.use('seaborn-poster')

In [None]:
def load_estimator(env, device, ngpu, experiment_folder, kind = 'best'):
    """
    env: option pricing environment
    device: torch device
    ngpu: number of gpu
    experiment_folder: name of experiment (as passed in --savedir)
    kind: model to load; options- best, checkpoint
    """
    state_shape = env.observation_space.shape
    state_space_dim = state_shape[0] if len(state_shape) == 1 else state_shape
    
    estimator = Estimator(device, ngpu, state_space_dim, env.action_space.n)
    if kind == 'best':
        checkpoint = torch.load(os.path.join('experiments', experiment_folder, 'best.pth'))
    elif kind == 'checkpoint':
        checkpoint = torch.load(os.path.join('experiments', experiment_folder, 'checkpoint.pth'))
    else:
        raise ValueError('Invalid choice for kind')
        
    estimator.load_state_dict(checkpoint['estimator'])
    estimator.eval()
    
    return estimator

In [None]:
def simulate_episode(env, device, estimator):
    state = torch.from_numpy(env.reset()).to(device)
    done = False
    
    history = defaultdict(list)
    
    while not done:
        history['delta'].append(env.delta)
        history['stock_value'].append(env.stock_value)
        history['option_value'].append(env.option_value)
        history['cash'].append(env.cash)
        
        with torch.no_grad():
            action = np.argmax(estimator(state).numpy())
        state, reward, done, info = env.step(action)
        
        history['reward'].append(reward)
        history['n'].append(env.n)
        try:
            history['stock_pnl'].append(history['stock_value'][-1] - history['stock_value'][-2])
            history['option_pnl'].append(history['option_value'][-1] + history['cash'][-1] - history['option_value'][-2] - history['cash'][-2])
            history['total_pnl'].append(history['stock_pnl'][-1] + history['option_pnl'][-1])
        except:
            pass
        
        state = torch.from_numpy(state).to(device)
        
    history['stock_pnl'].append(history['stock_value'][-1] - history['stock_value'][-2])
    history['option_pnl'].append(history['option_value'][-1] + history['cash'][-1] - history['option_value'][-2] - history['cash'][-2])
    history['total_pnl'].append(history['stock_pnl'][-1] + history['option_pnl'][-1])
    
    history = {k: np.array(v) for k, v in history.items()}
    
    return history

In [None]:
def cost_volatility_kde(env, device, estimator):
    pass

In [None]:
experiment_folder = 'Oct1_lr-high_ne-low_clip-inc'

In [None]:
config = {
        'S': 100,
        'T': 10, # 10 days
        'L': 1,
        'm': 100, # L options for m stocks
        'n': 0,
        'K': 100,
        'D': 5,
        'mu': 0,
        'sigma': 0.01,
        'r': 0,
        'ss': 5,
        'kappa': 0.1
        }

env = OptionPricingEnv()
env.configure(**config)

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
ngpu = 1 if torch.cuda.is_available() else 0

In [None]:
state = env.reset()
init_stock_price = env.S

In [None]:
estimator = load_estimator(env, device, ngpu, experiment_folder, 'best')

In [None]:
history['stock_value'] + history['option_value'] + history['cash']

In [None]:
#history = simulate_episode(env, device, estimator)
steps = np.arange(1, len(history['delta']) + 1, 1)

fig, ax = plt.subplots()
ax.plot(steps, history['n'], color = 'blue', lw = 1.5, label = 'n')
ax.plot(steps, history['delta'] * -env.L * env.m, color = 'green', lw = 1.5, label = 'delta')
ax.plot(steps, history['option_pnl'] / init_stock_price, color = 'darkviolet', lw = 1.5, label = 'option.pnl')
ax.plot(steps, history['stock_pnl'] / init_stock_price, color = 'darkorange', lw = 1.5, label = 'stock.pnl')
ax.plot(steps, history['total_pnl'] / init_stock_price, color = 'red', lw = 1.5, label = 'total.pnl')
plt.legend()
plt.show()

In [None]:
for k, v in history.items():
    print(f'{k}: {len(v)}')