In [49]:
# reg
import math
import numpy as np
import pandas as pd
import os
import datetime

# learning
from sklearn.model_selection import train_test_split
import torch
import gpytorch
from gpytorch.priors import LogNormalPrior, NormalPrior, UniformPrior
import pyro
from pyro.infer.mcmc import NUTS, MCMC, HMC

# plotting
from matplotlib import pyplot as plt


#### Getting SPY option chain

In [50]:
daily_chains = {}

for file in os.listdir('../option_data/spy_data'):
    if file[-4:] == '.csv':
                
        df = pd.read_csv('../option_data/spy_data/' + file)        
        
        # moving to datetime and making features
        df['quote_datetime'] = pd.to_datetime(df['quote_datetime'])
        df['expiration'] = pd.to_datetime(df['expiration'])
        df['quote_date'] = df['quote_datetime'][0].date()
        df['quote_date'] = pd.to_datetime(df['quote_date'])
        
        # getting only 4:00 quotes
        eod = datetime.datetime.combine(df['quote_datetime'][0].date(), datetime.time(16,0, 0))
        df = df.loc[df['quote_datetime'] == eod]
        
        # getting time to expiration and moneyness
        df['T'] = df['expiration'] - df['quote_date']
        df['T'] = df['T'].dt.days
        df['moneyness'] = df['active_underlying_price'] / df['strike'] 
        
        # converting to ML features
        df['T'] = df['T'].astype(np.float32)
        df['mny'] = df['moneyness'].astype(np.float32)
        df['iv'] = df['implied_volatility'].astype(np.float32)
        
        # filtering for research paper criteria
        df = df.loc[(df['close']!=0) & (df['iv']!=0) & (df['T']>=20) & (df['T']<=365) & (df['mny']>0.7) & (df['mny']<1.3)]
                
        # splitting up into calls/puts
        calls = df.loc[df['option_type']=='C'][['T', 'mny', 'iv']]
        puts = df.loc[df['option_type']=='P'][['T', 'mny', 'iv']]
        opts = {'calls':calls, 'puts':puts}
    
        # assinging to date
        daily_chains[file[-14:-4]] = opts 

### Model Setup

In [51]:
# We will use the simplest form of GP model, exact inference
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

### Running Sampling

In [55]:
UniformPrior.__dict__

mappingproxy({'__module__': 'gpytorch.priors.torch_priors',
              '__doc__': '\n    Uniform prior.\n    ',
              '__init__': <function gpytorch.priors.torch_priors.UniformPrior.__init__(self, a, b, validate_args=None, transform=None)>,
              'expand': <function gpytorch.priors.torch_priors.UniformPrior.expand(self, batch_shape)>,
              '__abstractmethods__': frozenset(),
              '_abc_impl': <_abc._abc_data at 0x1654d0700>})

In [88]:
gp_models = {}
#likelihood = gpytorch.likelihoods.GaussianLikelihood()

#for day, options in daily_chains.items():
day, options = list(daily_chains.items())[0]

info = {}

# calls and puts
c = options['calls']
p = options['puts']

# feature transformations
c['mm_T'] = (c['T'] - 20)/(365-20)
c['mm_mny'] = (c['mny'] - 0.7)/(1.3-0.7)
c['ln_iv'] = np.log(c['iv'])

p['mm_T'] = (p['T'] - 20)/(365-20)
p['mm_mny'] = (p['mny'] - 0.7)/(1.3-0.7)
p['ln_iv'] = np.log(p['iv'])

# test/train split
c_train, c_test = train_test_split(c, test_size=0.2)
p_train, p_test = train_test_split(p, test_size=0.2)
info['call_train'] = c_train
info['call_test'] = c_test
info['put_train'] = p_train
info['put_test'] = p_test

# into tensors
cx_train = torch.tensor(c_train[['mm_T']].values) #, 'mm_mny']].values)
cy_train = torch.tensor(c_train[['ln_iv']].values).reshape(len(c_train))
cx_test = torch.tensor(c_test[['mm_T', 'mm_mny']].values)
cy_test = torch.tensor(c_test[['ln_iv']].values).reshape(len(c_test))

px_train = torch.tensor(p_train[['mm_T', 'mm_mny']].values)
py_train = torch.tensor(p_train[['ln_iv']].values).reshape(len(p_train))
px_test = torch.tensor(p_test[['mm_T', 'mm_mny']].values)
py_test = torch.tensor(p_test[['ln_iv']].values).reshape(len(p_test))

num_samples = 100
warmup_steps = 100

# Use a positive constraint instead of usual GreaterThan(1e-4) so that LogNormal has support over full range.
likelihood = gpytorch.likelihoods.GaussianLikelihood()#noise_constraint=gpytorch.constraints.Positive())
model = ExactGPModel(cx_train, cy_train, likelihood)



model.mean_module.register_prior("mean_prior", UniformPrior(0, 1, validate_args=True), "constant")
#model.covar_module.base_kernel.register_prior("lengthscale_prior", UniformPrior(0.01, 0.5), "lengthscale")
#model.covar_module.register_prior("outputscale_prior", UniformPrior(1, 2), "outputscale")


#likelihood.register_prior("noise_prior", UniformPrior(0.01, 0.5), "noise")

mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

def pyro_model(x, y):
    with gpytorch.settings.fast_computations(False, False, False):
        sampled_model = model.pyro_sample_from_prior()
        output = sampled_model.likelihood(sampled_model(x))
        pyro.sample("obs", output, obs=y)
    return y

nuts_kernel = NUTS(pyro_model)
mcmc_run = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=warmup_steps, disable_progbar=False)
mcmc_run.run(cx_train, cy_train)




Warmup:   0%|          | 0/200 [00:00, ?it/s]

NotImplementedError: Module [UniformPrior] is missing the required "forward" function
Trace Shapes:
 Param Sites:
Sample Sites:

In [170]:
likelihood = gpytorch.likelihoods.GaussianLikelihood()#noise_constraint=gpytorch.constraints.Positive())
model = ExactGPModel(cx_train, cy_train, likelihood)



model.mean_module.register_prior("mean_prior", UniformPrior(0, 1), "constant")

model.mean_module

ConstantMean(
  (mean_prior): UniformPrior(low: 0.0, high: 1.0)
)

In [171]:
prior, closure, setting_closure = model.mean_module._priors["mean_prior"]
#prior.sample()

a= setting_closure(model.mean_module, prior.sample())

a._parameters

OrderedDict([('raw_constant',
              Parameter containing:
              tensor(0.0110, requires_grad=True))])

In [177]:
model.pyro_sample_from_prior()

NotImplementedError: Module [UniformPrior] is missing the required "forward" function

In [178]:
# exactly done as in docs:
import math
import torch
import gpytorch
import pyro
from pyro.infer.mcmc import NUTS, MCMC, HMC
from matplotlib import pyplot as plt

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [179]:
# Training data is 11 points in [0,1] inclusive regularly spaced
train_x = torch.linspace(0, 1, 4)
# True function is sin(2*pi*x) with Gaussian noise
train_y = torch.sin(train_x * (2 * math.pi)) + torch.randn(train_x.size()) * 0.2

In [180]:
# We will use the simplest form of GP model, exact inference
class ExactGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood):
        super(ExactGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean()
        self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

In [185]:
import os
smoke_test = ('CI' in os.environ)
num_samples = 2 if smoke_test else 100
warmup_steps = 2 if smoke_test else 100


from gpytorch.priors import LogNormalPrior, NormalPrior, UniformPrior
# Use a positive constraint instead of usual GreaterThan(1e-4) so that LogNormal has support over full range.
likelihood = gpytorch.likelihoods.GaussianLikelihood(noise_constraint=gpytorch.constraints.Positive())
model = ExactGPModel(train_x, train_y, likelihood)

model.mean_module.register_prior("mean_prior", UniformPrior(-1, 1), "constant")
model.covar_module.base_kernel.register_prior("lengthscale_prior", UniformPrior(0.01, 0.5), "lengthscale")
model.covar_module.register_prior("outputscale_prior", UniformPrior(1, 2), "outputscale")
likelihood.register_prior("noise_prior", UniformPrior(0.01, 0.5), "noise")

mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

def pyro_model(x, y):
    with gpytorch.settings.fast_computations(False, False, False):
        sampled_model = model.pyro_sample_from_prior()
        output = sampled_model.likelihood(sampled_model(x))
        pyro.sample("obs", output, obs=y)
    return y

nuts_kernel = NUTS(pyro_model)
mcmc_run = MCMC(nuts_kernel, num_samples=num_samples, warmup_steps=warmup_steps, disable_progbar=smoke_test)
mcmc_run.run(train_x, train_y)

Warmup:   0%|          | 0/200 [00:00, ?it/s]

NotImplementedError: Module [UniformPrior] is missing the required "forward" function
Trace Shapes:
 Param Sites:
Sample Sites: