In [61]:
# reg
import math
import numpy as np
import pandas as pd
import os
import datetime

# learning
from sklearn.model_selection import train_test_split
import torch
import gpytorch
from gpytorch.priors import LogNormalPrior, NormalPrior, UniformPrior
import pyro
from pyro.infer.mcmc import NUTS, MCMC, HMC

# plotting
from matplotlib import pyplot as plt


#### Getting SPY option chain

In [62]:
daily_chains = {}

for file in os.listdir('../option_data/spy_data'):
    if file[-4:] == '.csv':
                
        df = pd.read_csv('../option_data/spy_data/' + file)        
        
        # moving to datetime and making features
        df['quote_datetime'] = pd.to_datetime(df['quote_datetime'])
        df['expiration'] = pd.to_datetime(df['expiration'])
        df['quote_date'] = df['quote_datetime'][0].date()
        df['quote_date'] = pd.to_datetime(df['quote_date'])
        
        # getting only 4:00 quotes
        eod = datetime.datetime.combine(df['quote_datetime'][0].date(), datetime.time(16,0, 0))
        df = df.loc[df['quote_datetime'] == eod]
        
        # getting time to expiration and moneyness
        df['T'] = df['expiration'] - df['quote_date']
        df['T'] = df['T'].dt.days
        df['moneyness'] = df['active_underlying_price'] / df['strike'] 
        
        # converting to ML features
        df['T'] = df['T'].astype(np.float32)
        df['mny'] = df['moneyness'].astype(np.float32)
        df['iv'] = df['implied_volatility'].astype(np.float32)
        
        # filtering for research paper criteria
        df = df.loc[(df['close']!=0) & (df['iv']!=0) & (df['T']>=20) & (df['T']<=365) & (df['mny']>0.7) & (df['mny']<1.3)]
                
        # splitting up into calls/puts
        calls = df.loc[df['option_type']=='C'][['T', 'mny', 'iv']]
        puts = df.loc[df['option_type']=='P'][['T', 'mny', 'iv']]
        opts = {'calls':calls, 'puts':puts}
    
        # assinging to date
        daily_chains[file[-14:-4]] = opts 

In [63]:
_ = [print(k) for k in sorted(daily_chains.keys())]

2023-10-02
2023-10-03
2023-10-04
2023-10-05
2023-10-06
2023-10-09
2023-10-10
2023-10-11
2023-10-12
2023-10-13
2023-10-16
2023-10-17
2023-10-18
2023-10-19
2023-10-20
2023-10-23
2023-10-24
2023-10-25
2023-10-26
2023-10-27
2023-10-30
2023-10-31


### Model Setup

In [64]:
# We will use the simplest form of GP model, exact inference
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

### Running Sampling

In [65]:
UniformPrior.__dict__

mappingproxy({'__module__': 'gpytorch.priors.torch_priors',
              '__doc__': '\n    Uniform prior.\n    ',
              '__init__': <function gpytorch.priors.torch_priors.UniformPrior.__init__(self, a, b, validate_args=None, transform=None)>,
              'expand': <function gpytorch.priors.torch_priors.UniformPrior.expand(self, batch_shape)>,
              '__abstractmethods__': frozenset(),
              '_abc_impl': <_abc._abc_data at 0x1559d0fc0>,
              '__signature__': <Signature (a, b, validate_args=None, transform=None)>})

In [66]:
gp_models = {}
#likelihood = gpytorch.likelihoods.GaussianLikelihood()

# #for day, options in daily_chains.items():
    # day, options = list(daily_chains.items())[0]


for day, options in daily_chains.items():
    
    info = {}

    # calls and puts
    c = options['calls']
    p = options['puts']

    # feature transformations
    c['mm_T'] = (c['T'] - 20)/(365-20)
    c['mm_mny'] = (c['mny'] - 0.7)/(1.3-0.7)
    c['ln_iv'] = np.log(c['iv'])

    p['mm_T'] = (p['T'] - 20)/(365-20)
    p['mm_mny'] = (p['mny'] - 0.7)/(1.3-0.7)
    p['ln_iv'] = np.log(p['iv'])

    # test/train split
    c_train, c_test = train_test_split(c, test_size=0.2)
    p_train, p_test = train_test_split(p, test_size=0.2)
    info['call_train'] = c_train
    info['call_test'] = c_test
    info['put_train'] = p_train
    info['put_test'] = p_test

    # into tensors
    cx_train = torch.tensor(c_train[['mm_T']].values) #, 'mm_mny']].values)
    cy_train = torch.tensor(c_train[['ln_iv']].values).reshape(len(c_train))
    cx_test = torch.tensor(c_test[['mm_T', 'mm_mny']].values)
    cy_test = torch.tensor(c_test[['ln_iv']].values).reshape(len(c_test))

    px_train = torch.tensor(p_train[['mm_T', 'mm_mny']].values)
    py_train = torch.tensor(p_train[['ln_iv']].values).reshape(len(p_train))
    px_test = torch.tensor(p_test[['mm_T', 'mm_mny']].values)
    py_test = torch.tensor(p_test[['ln_iv']].values).reshape(len(p_test))

    num_samples = 100
    warmup_steps = 100

    # Use a positive constraint instead of usual GreaterThan(1e-4) so that LogNormal has support over full range.
    c_likelihood = gpytorch.likelihoods.GaussianLikelihood()#noise_constraint=gpytorch.constraints.Positive())
    c_model = ExactGPModel(cx_train, cy_train, c_likelihood)
    
    p_likelihood = gpytorch.likelihoods.GaussianLikelihood()
    p_model = ExactGPModel(px_train, py_train, p_likelihood)

    #model.covar_module.base_kernel.register_prior("lengthscale_prior", UniformPrior(0.01, 0.5), "lengthscale")
    #model.covar_module.register_prior("outputscale_prior", UniformPrior(1, 2), "outputscale")

    #likelihood.register_prior("noise_prior", UniformPrior(0.01, 0.5), "noise")

    # prepare priors
    c_model.mean_module.register_prior("mean_prior", UniformPrior(-1, 1), "constant")
    c_model.covar_module.base_kernel.register_prior("lengthscale_prior", UniformPrior(0.01, 0.5), "lengthscale")
    c_model.covar_module.register_prior("outputscale_prior", UniformPrior(1, 2), "outputscale")
    c_likelihood.register_prior("noise_prior", UniformPrior(0.01, 0.5), "noise")

    p_model.mean_module.register_prior("mean_prior", UniformPrior(-1, 1), "constant")
    p_model.covar_module.base_kernel.register_prior("lengthscale_prior", UniformPrior(0.01, 0.5), "lengthscale")
    p_model.covar_module.register_prior("outputscale_prior", UniformPrior(1, 2), "outputscale")
    p_likelihood.register_prior("noise_prior", UniformPrior(0.01, 0.5), "noise")

    # define mlls
    c_mll = gpytorch.mlls.ExactMarginalLogLikelihood(c_likelihood, c_model)
    p_mll = gpytorch.mlls.ExactMarginalLogLikelihood(p_likelihood, p_model)

    # define pyro model
    def c_pyro_model(x, y):
        with gpytorch.settings.fast_computations(False, False, False):
            sampled_model = c_model.pyro_sample_from_prior()
            output = sampled_model.likelihood(sampled_model(x))
            pyro.sample("obs", output, obs=y)
        return y
    
    # define pyro for p too, maybe better way to do this but wasnt sure 
    # if i could pass the model and still have it work
    def p_pyro_model(x, y):
        with gpytorch.settings.fast_computations(False, False, False):
            sampled_model = p_model.pyro_sample_from_prior()
            output = sampled_model.likelihood(sampled_model(x))
            pyro.sample("obs", output, obs=y)
        return y

    print('STARTING CALL MCMC RUN FOR ', day)

    # set no u-turn sampler for HMC
    c_nuts_kernel = NUTS(c_pyro_model)
    # run mcmc to convergence
    c_mcmc_run = MCMC(c_nuts_kernel, num_samples=num_samples, warmup_steps=warmup_steps, disable_progbar=False)
    c_mcmc_run.run(cx_train, cy_train)
    
    print('STARTING PUT MCMC RUN FOR ', day)

    # set no u-turn sampler for HMC
    p_nuts_kernel = NUTS(p_pyro_model)
    p_mcmc_run = MCMC(p_nuts_kernel, num_samples=num_samples, warmup_steps=warmup_steps, disable_progbar=False)
    # run mcmc to convergence
    p_mcmc_run.run(px_train, py_train)

    # # set to eval mode
    # c_model.eval()
    # p_model.eval()
    


    # # get samples and predictions
    # with torch.no_grad():
    #     # get samples
    #     c_samples = c_mcmc_run.get_samples()
    #     p_samples = p_mcmc_run.get_samples()

    #     # get predictions
    #     c_pred = c_likelihood(c_model(cx_test))
    #     p_pred = p_likelihood(p_model(px_test))
    
    # # save above
    # info['call_samples'] = c_samples
    # info['put_samples'] = p_samples
    
    # info['call_pred'] = c_pred
    # info['put_pred'] = p_pred

    # # getting RMSE
    # c_rmse = torch.sqrt(torch.mean(torch.pow(math.e ** c_pred.mean - math.e ** cy_test, 2)))
    # p_rmse = torch.sqrt(torch.mean(torch.pow(math.e ** p_pred.mean - math.e ** py_test, 2)))
    # info['call_RMSE'] = c_rmse
    # info['put_RMSE'] = p_rmse

    # save samples
    info['call_samples'] = c_mcmc_run.get_samples()
    info['put_samples'] = p_mcmc_run.get_samples()

    torch.save(c_mcmc_run.get_samples(), 'samples/call_BGP_'+day+'.pt')
    torch.save(p_mcmc_run.get_samples(), 'samples/put_BGP_'+day+'.pt')

    # save likelihoods
    info['call_likelihood'] = c_likelihood
    info['put_likelihood'] = p_likelihood

    # save models
    torch.save(c_model.state_dict(), 'models/call_BGP_'+day+'.pt')
    torch.save(p_model.state_dict(), 'models/put_BGP_'+day+'.pt')

    gp_models[day] = info





STARTING CALL MCMC RUN FOR  2023-10-09


Sample: 100%|██████████| 200/200 [03:52,  1.16s/it, step size=6.52e-01, acc. prob=0.904]


STARTING PUT MCMC RUN FOR  2023-10-09


Sample: 100%|██████████| 200/200 [02:25,  1.37it/s, step size=4.27e-01, acc. prob=0.964]


STARTING CALL MCMC RUN FOR  2023-10-20


Sample: 100%|██████████| 200/200 [06:14,  1.87s/it, step size=4.81e-01, acc. prob=0.947]


STARTING PUT MCMC RUN FOR  2023-10-20


Sample: 100%|██████████| 200/200 [03:41,  1.11s/it, step size=5.11e-01, acc. prob=0.888]


STARTING CALL MCMC RUN FOR  2023-10-23


Sample: 100%|██████████| 200/200 [04:42,  1.41s/it, step size=5.45e-01, acc. prob=0.896]


STARTING PUT MCMC RUN FOR  2023-10-23


Sample: 100%|██████████| 200/200 [02:59,  1.11it/s, step size=6.74e-01, acc. prob=0.825]


STARTING CALL MCMC RUN FOR  2023-10-27


Sample: 100%|██████████| 200/200 [06:43,  2.02s/it, step size=2.74e-01, acc. prob=0.898]


STARTING PUT MCMC RUN FOR  2023-10-27


Sample: 100%|██████████| 200/200 [04:55,  1.48s/it, step size=4.41e-01, acc. prob=0.933]


STARTING CALL MCMC RUN FOR  2023-10-26


Sample: 100%|██████████| 200/200 [09:23,  2.82s/it, step size=1.71e-01, acc. prob=0.549]


STARTING PUT MCMC RUN FOR  2023-10-26


Sample: 100%|██████████| 200/200 [2:04:30, 37.35s/it, step size=4.58e-17, acc. prob=0.000]


STARTING CALL MCMC RUN FOR  2023-10-30


Sample: 100%|██████████| 200/200 [05:08,  1.54s/it, step size=2.70e-01, acc. prob=0.952]


STARTING PUT MCMC RUN FOR  2023-10-30


Sample: 100%|██████████| 200/200 [04:09,  1.25s/it, step size=3.52e-01, acc. prob=0.963]


STARTING CALL MCMC RUN FOR  2023-10-24


Sample: 100%|██████████| 200/200 [04:16,  1.28s/it, step size=1.99e-01, acc. prob=0.914]


STARTING PUT MCMC RUN FOR  2023-10-24


Sample: 100%|██████████| 200/200 [02:06,  1.58it/s, step size=4.21e-01, acc. prob=0.949]


STARTING CALL MCMC RUN FOR  2023-10-18


Sample: 100%|██████████| 200/200 [02:58,  1.12it/s, step size=4.72e-01, acc. prob=0.933]


STARTING PUT MCMC RUN FOR  2023-10-18


Sample: 100%|██████████| 200/200 [03:18,  1.01it/s, step size=5.34e-01, acc. prob=0.908]


STARTING CALL MCMC RUN FOR  2023-10-19


Sample: 100%|██████████| 200/200 [03:50,  1.15s/it, step size=9.94e-01, acc. prob=0.743]


STARTING PUT MCMC RUN FOR  2023-10-19


Sample: 100%|██████████| 200/200 [02:17,  1.45it/s, step size=1.28e+00, acc. prob=0.608]


STARTING CALL MCMC RUN FOR  2023-10-25


Sample: 100%|██████████| 200/200 [04:34,  1.37s/it, step size=3.58e-01, acc. prob=0.932]


STARTING PUT MCMC RUN FOR  2023-10-25


Sample: 100%|██████████| 200/200 [03:34,  1.07s/it, step size=5.93e-01, acc. prob=0.869]


STARTING CALL MCMC RUN FOR  2023-10-31


Sample: 100%|██████████| 200/200 [03:12,  1.04it/s, step size=9.73e-01, acc. prob=0.616]


STARTING PUT MCMC RUN FOR  2023-10-31


Sample: 100%|██████████| 200/200 [02:36,  1.28it/s, step size=5.13e-01, acc. prob=0.851]


STARTING CALL MCMC RUN FOR  2023-10-03


Sample: 100%|██████████| 200/200 [27:05,  8.13s/it, step size=2.55e-01, acc. prob=0.971]


STARTING PUT MCMC RUN FOR  2023-10-03


Sample: 100%|██████████| 200/200 [33:16,  9.98s/it, step size=5.06e-01, acc. prob=0.900] 


STARTING CALL MCMC RUN FOR  2023-10-17


Sample: 100%|██████████| 200/200 [10:09,  3.05s/it, step size=5.93e-01, acc. prob=0.914] 


STARTING PUT MCMC RUN FOR  2023-10-17


Sample: 100%|██████████| 200/200 [04:06,  1.23s/it, step size=6.44e-01, acc. prob=0.931]


STARTING CALL MCMC RUN FOR  2023-10-16


Sample: 100%|██████████| 200/200 [09:41,  2.91s/it, step size=8.98e-01, acc. prob=0.805]


STARTING PUT MCMC RUN FOR  2023-10-16


Sample: 100%|██████████| 200/200 [03:12,  1.04it/s, step size=4.78e-01, acc. prob=0.951]


STARTING CALL MCMC RUN FOR  2023-10-02


Sample: 100%|██████████| 200/200 [26:20,  7.90s/it, step size=3.03e-01, acc. prob=0.977] 


STARTING PUT MCMC RUN FOR  2023-10-02


Sample: 100%|██████████| 200/200 [18:13,  5.47s/it, step size=4.47e-01, acc. prob=0.974]


STARTING CALL MCMC RUN FOR  2023-10-06


Sample: 100%|██████████| 200/200 [07:37,  2.29s/it, step size=7.93e-01, acc. prob=0.888]


STARTING PUT MCMC RUN FOR  2023-10-06


Sample: 100%|██████████| 200/200 [06:25,  1.93s/it, step size=3.47e-01, acc. prob=0.928]


STARTING CALL MCMC RUN FOR  2023-10-12


Sample: 100%|██████████| 200/200 [20:00,  6.00s/it, step size=4.21e-01, acc. prob=0.929] 


STARTING PUT MCMC RUN FOR  2023-10-12


Sample: 100%|██████████| 200/200 [17:54,  5.37s/it, step size=4.53e-01, acc. prob=0.961]


STARTING CALL MCMC RUN FOR  2023-10-13


Sample: 100%|██████████| 200/200 [10:24,  3.12s/it, step size=5.10e-01, acc. prob=0.857]


STARTING PUT MCMC RUN FOR  2023-10-13


Sample: 100%|██████████| 200/200 [09:29,  2.85s/it, step size=5.67e-01, acc. prob=0.880]


STARTING CALL MCMC RUN FOR  2023-10-11


Sample: 100%|██████████| 200/200 [02:59,  1.11it/s, step size=7.14e-01, acc. prob=0.847]


STARTING PUT MCMC RUN FOR  2023-10-11


Sample: 100%|██████████| 200/200 [03:08,  1.06it/s, step size=3.73e-01, acc. prob=0.950]


STARTING CALL MCMC RUN FOR  2023-10-05


Sample: 100%|██████████| 200/200 [04:46,  1.43s/it, step size=6.48e-01, acc. prob=0.893]


STARTING PUT MCMC RUN FOR  2023-10-05


Warmup:  15%|█▌        | 30/200 [01:08,  4.05s/it, step size=4.37e-02, acc. prob=0.751]

In [None]:
likelihood = gpytorch.likelihoods.GaussianLikelihood()#noise_constraint=gpytorch.constraints.Positive())
model = ExactGPModel(cx_train, cy_train, likelihood)



model.mean_module.register_prior("mean_prior", UniformPrior(0, 1), "constant")

model.mean_module

ConstantMean(
  (mean_prior): UniformPrior(low: 0.0, high: 1.0)
)

In [None]:
prior, closure, setting_closure = model.mean_module._priors["mean_prior"]
#prior.sample()

a= setting_closure(model.mean_module, prior.sample())

a._parameters

OrderedDict([('raw_constant',
              Parameter containing:
              tensor(0.1004, requires_grad=True))])

In [None]:
model.pyro_sample_from_prior()

_RandomExactGPModel(
  (likelihood): _RandomGaussianLikelihood(
    (noise_covar): _RandomHomoskedasticNoise(
      (raw_noise_constraint): GreaterThan(1.000E-04)
    )
  )
  (mean_module): _RandomConstantMean(
    (mean_prior): UniformPrior(low: 0.0, high: 1.0)
  )
  (covar_module): _RandomScaleKernel(
    (base_kernel): _RandomRBFKernel(
      (raw_lengthscale_constraint): Positive()
    )
    (raw_outputscale_constraint): Positive()
  )
)

In [None]:
# exactly done as in docs:
import math
import torch
import gpytorch
import pyro
from pyro.infer.mcmc import NUTS, MCMC, HMC
from matplotlib import pyplot as plt

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
# Training data is 11 points in [0,1] inclusive regularly spaced
train_x = torch.linspace(0, 1, 4)
# True function is sin(2*pi*x) with Gaussian noise
train_y = torch.sin(train_x * (2 * math.pi)) + torch.randn(train_x.size()) * 0.2

In [None]:
# We will use the simplest form of GP model, exact inference
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [None]:
import os
smoke_test = ('CI' in os.environ)
num_samples = 2 if smoke_test else 100
warmup_steps = 2 if smoke_test else 100


from gpytorch.priors import LogNormalPrior, NormalPrior, UniformPrior
# Use a positive constraint instead of usual GreaterThan(1e-4) so that LogNormal has support over full range.
likelihood = gpytorch.likelihoods.GaussianLikelihood(noise_constraint=gpytorch.constraints.Positive())
model = ExactGPModel(train_x, train_y, likelihood)

model.mean_module.register_prior("mean_prior", UniformPrior(-1, 1), "constant")
model.covar_module.base_kernel.register_prior("lengthscale_prior", UniformPrior(0.01, 0.5), "lengthscale")
model.covar_module.register_prior("outputscale_prior", UniformPrior(1, 2), "outputscale")
likelihood.register_prior("noise_prior", UniformPrior(0.01, 0.5), "noise")

mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

def pyro_model(x, y):
    with gpytorch.settings.fast_computations(False, False, False):
        sampled_model = model.pyro_sample_from_prior()
        output = sampled_model.likelihood(sampled_model(x))
        pyro.sample("obs", output, obs=y)
    return y

nuts_kernel = NUTS(pyro_model)
mcmc_run = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=warmup_steps, disable_progbar=smoke_test)
mcmc_run.run(train_x, train_y)

Sample: 100%|██████████| 200/200 [00:14, 14.14it/s, step size=4.57e-01, acc. prob=0.959]
