In [2]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import pandas as pd
import torch
import gpytorch
from gpytorch.means import Mean
import seaborn as sns
import time
import copy
import sys
import os

sns.set_style('white')
# style.use('whitegrid')
palette = ["#1b4079", "#C6DDF0", "#048A81", "#B9E28C", "#8C2155", "#AF7595", "#E6480F", "#FA9500"]
sns.set(palette = palette, font_scale=2.0, style="white", rc={"lines.linewidth": 2.0})

In [2]:
def LoadDat(dat, sym, start_idx, ntrain, ntest):
    px = torch.FloatTensor(dat[dat.symbol == sym].close_price.to_numpy())
    train_y = px[start_idx:ntrain+start_idx].squeeze()
    test_y = px[start_idx + ntrain:start_idx + ntrain+ntest].squeeze()
    return train_y, test_y

def LoadSims(SPDR, sym, kernel, mean, k=100):
    fpath = "./saved-outputs/" + SPDR + "/"
    fname = sym + "_" + kernel + "_" + mean + str(k) + ".pt"
    
    return torch.load(fpath + fname)

In [3]:
hypers = torch.load("./saved-outputs/metadata.pt")
ntrain = hypers['ntrain']
ntest = hypers['ntest']
start_idxs = hypers['start_idxs']

SPDR = "XLE"
dat = pd.read_pickle(dpath + SPDR + ".pkl")
syms = dat.symbol.unique()
print(syms)

train_x = torch.arange(ntrain) * 1./252
test_x = torch.arange(ntest) * 1./252 + train_x[-1] + train_x[1]
percentiles = np.linspace(0.05, 0.95, 19)

['XOM' 'CVX' 'EOG' 'COP' 'SLB']


## Examples

In [4]:
hypers = torch.load("./saved-outputs/metadata.pt")
ntrain = hypers['ntrain']
ntest = hypers['ntest']
start_idxs = hypers['start_idxs']

SPDR = "XLK"
dat = pd.read_pickle(dpath + SPDR + ".pkl")
syms = dat.symbol.unique()
print(syms)
train_x = torch.arange(ntrain) * 1./252
test_x = torch.arange(ntest) * 1./252 + train_x[-1] + train_x[1]

['AAPL' 'MSFT' 'NVDA' 'V' 'PYPL']


In [5]:
def ECDF(sample_pxs, true_px):    
    return (torch.sum(sample_pxs < true_px, 0)/sample_pxs.shape[0])
 
def Calibration(pcts, percentile=0.95):
    in_band = np.where((pcts < percentile))[0].shape[0]
    return in_band/pcts.shape[0]

def GetCalibration(kernel, mean, k=100, horizon=np.arange(75,100), logger=[], exp=True):
    pcts = torch.zeros(len(syms), len(start_idxs), horizon.shape[0])
    for sym_idx, sym in enumerate(syms):
        
        fpath = "./saved-outputs/" + SPDR + "/"
        fname = sym + "_" + kernel + "_" + mean + str(k) + ".pt"
        if os.path.exists(fpath + fname):
            for idx, start_idx in enumerate(start_idxs):
                train_y, test_y = LoadDat(dat, sym, start_idx, ntrain, ntest)
                preds = LoadSims(SPDR, sym, kernel, mean, k=k)[idx, :, horizon]
                if exp:
                    preds = preds.exp()
                
            
    pcts = pcts.flatten()
    percentiles = np.linspace(0.05, 0.95, 19)
    log_name = kernel
    for pct in percentiles:
        clb = Calibration(pcts, pct)
        logger.append([clb, np.round(pct, 2), log_name, mean, k])
        
    return logger

In [20]:
SPDR = "XLRE"
dat = pd.read_pickle(dpath + SPDR + ".pkl")
syms = dat.symbol.unique()
print(syms)
horizon = np.arange(75, 100)
train_x = torch.arange(ntrain) * 1./252
test_x = torch.arange(ntest) * 1./252 + train_x[-1] + train_x[1]

logger = []
logger = GetCalibration('matern', 'ewma', 100, logger=logger, horizon=horizon)
logger = GetCalibration('matern', 'ewma', 200, logger=logger, horizon=horizon)
logger = GetCalibration('matern', 'ewma', 400, logger=logger, horizon=horizon)
logger = GetCalibration('matern', 'dewma', 100, logger=logger, horizon=horizon)
logger = GetCalibration('matern', 'dewma', 200, logger=logger, horizon=horizon)
logger = GetCalibration('matern', 'dewma', 400, logger=logger, horizon=horizon)
logger = GetCalibration('matern', 'tewma', 100, logger=logger, horizon=horizon)
logger = GetCalibration('matern', 'tewma', 200, logger=logger, horizon=horizon)
logger = GetCalibration('matern', 'tewma', 400, logger=logger, horizon=horizon)
logger = GetCalibration('matern', 'constant', 100, logger=logger, horizon=horizon, exp=False)
xlre_df = pd.DataFrame(logger)
xlre_df.columns = ["Calibration", "Percentile", "Type", 'Mean', "k"]

['AMT' 'PLD' 'CCI' 'EQIX' 'PSA']


In [22]:
pd.to_pickle(xlre_df, "./new_matern_calib.pkl")

## Get NLL

In [94]:
def GetNLL(kernel, mean, k=100, horizon=np.arange(75,100), logger=[], exp=True):
    N = 0
    nll = 0.
    for spdr_idx, spdr in enumerate(SPDRS):
        dat = pd.read_pickle(dpath + spdr + ".pkl")
        syms = dat.symbol.unique()
        for sym_idx, sym in enumerate(syms):
            fpath = "./saved-outputs/" + spdr + "/"
            fname = sym + "_" + kernel + "_" + mean + str(k) + ".pt"
            if os.path.exists(fpath + fname):
                for idx, start_idx in enumerate(start_idxs):
                    train_y, test_y = LoadDat(dat, sym, start_idx, ntrain, ntest)
                    test_y = test_y[horizon]
                    preds = LoadSims(spdr, sym, kernel, mean, k=k)[idx, :, horizon]
                    if exp:
                        preds = preds.exp() 
                    try:
                        nll -= torch.distributions.Normal(preds.mean(0), preds.std(0)).log_prob(test_y).sum()
                        N += test_y.numel()
                    except:
                        pass
                        # print("Failed:", spdr, sym, idx)

    if N >= 0:
        logger.append([nll.item(), N, kernel, mean, k])
    return logger

In [105]:
SPDRS = ["XLRE", "XLY", "XLF", "XLE", "XLK"]
horizon = np.arange(75, 100)
train_x = torch.arange(ntrain) * 1./252
test_x = torch.arange(ntest) * 1./252 + train_x[-1] + train_x[1]

logger = []
logger = GetNLL('matern', 'ewma', 100, logger=logger, horizon=horizon)
logger = GetNLL('matern', 'ewma', 200, logger=logger, horizon=horizon)
logger = GetNLL('matern', 'ewma', 400, logger=logger, horizon=horizon)
logger = GetNLL('matern', 'dewma', 100, logger=logger, horizon=horizon)
logger = GetNLL('matern', 'dewma', 200, logger=logger, horizon=horizon)
logger = GetNLL('matern', 'dewma', 400, logger=logger, horizon=horizon)
logger = GetNLL('matern', 'tewma', 100, logger=logger, horizon=horizon)
logger = GetNLL('matern', 'tewma', 200, logger=logger, horizon=horizon)
logger = GetNLL('matern', 'tewma', 400, logger=logger, horizon=horizon)
logger = GetNLL('matern', 'constant', 100, logger=logger, horizon=horizon, exp=False)

In [106]:
df = pd.DataFrame(logger)
df.columns = ["NLL", "N", "Kernel", "Mean", "K"]

In [107]:
df['Mean_NLL'] = df["NLL"]/df['N']

In [108]:
df

Unnamed: 0,NLL,N,Kernel,Mean,K,Mean_NLL
0,67267.55,4350,matern,ewma,100,15.463804
1,57043.42,4200,matern,ewma,200,13.581766
2,61801.94,6300,matern,ewma,400,9.809832
3,513817.7,4800,matern,dewma,100,107.045358
4,110954.7,4200,matern,dewma,200,26.417781
5,74891.09,6450,matern,dewma,400,11.611021
6,3585653.0,5250,matern,tewma,100,682.981476
7,121662.5,3900,matern,tewma,200,31.195507
8,74964.2,6450,matern,tewma,400,11.622357
9,57384.97,7500,matern,constant,100,7.65133


In [110]:
pd.to_pickle(df, "./matern_nll.pkl")

In [56]:
mean = torch.tensor([1., 5.])
std = torch.tensor([1., 1.])
nrml = torch.distributions.Normal(mean, std)

In [57]:
nrml.log_prob(torch.tensor([1., 5.])).exp()

tensor([0.3989, 0.3989])

In [68]:
preds, y = GetNLL('matern', 'tewma', 400, logger=logger, horizon=horizon)

In [69]:
preds.shape

torch.Size([1000, 25])

In [73]:
nll = 0.

In [78]:
nll -= torch.distributions.Normal(preds.mean(0), preds.std(0)).log_prob(y).sum()

In [79]:
nll

tensor(242.3718)