In [2]:
import pickle as pkl
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
import seaborn as sns
import os
from voltron.data import make_ticker_list, GetStockHistory

sns.set_style('white')
palette = ["#1b4079", "#C6DDF0", "#048A81", "#B9E28C", "#8C2155", "#AF7595", "#E6480F", "#FA9500"]
sns.set(palette = palette, font_scale=2.0, style="white", rc={"lines.linewidth": 4.0})



In [30]:
def ECDF(sample_pxs, true_px):    
    return (torch.sum(sample_pxs < true_px, 0)/sample_pxs.shape[0])
 
def Calibration(pcts, percentile=0.95):
    in_band = np.where((pcts < percentile))[0].shape[0]
    return in_band/pcts.shape[0]

def GetNLL(model, mean='ewma', k=100, horizon=np.arange(75,100), 
                   logger=[], exp=True, fdir="./saved-outputs/"):
    

    ntrain = 400
    n_test_times = 20
    ntest = 100
    nll = 0.
    N = 0
    nlls = torch.tensor([])
    for tckr in ticker_list:
        data = None
        try:
            data = GetStockHistory(tckr, history=1000, end_date=end_date)
        except:
            print("failed", tckr)
        
        if data is not None:

            for idx, date in enumerate(data.index):
                fpath = fdir + tckr + "/"
                fname = model + "_"
                if model in ['volt', 'matern', 'sm']:
                    fname +=  mean + str(k) + "_"

                fname += str(date.date()) + ".pt"
#                 print(fpath + fname)
                if os.path.exists(fpath + fname): 
                    preds = torch.load(fpath + fname)                  
                    if isinstance(preds, tuple):
                        preds = preds[0]
                    if preds.shape[-1] == 100:
                        preds = preds[:, horizon]

                        test_y = torch.tensor(data.iloc[idx:idx+100].Close.to_numpy())
                        if test_y.shape[0] == 100:
                            if exp:
                                preds = preds.exp()

                            try:
                                curr = torch.distributions.Normal(preds.mean(0), preds.std(0)).log_prob(test_y[horizon])
                                if curr.mean().abs() < 500:
                                    nlls = torch.cat((curr, nlls))
                            except:
                                pass


    if nlls.numel() > 0:
        logger.append([-nlls.sum().item(), -nlls.mean().item(), nlls.std().item(), model, mean, k])
        
    return logger

In [25]:
data_path = "../../voltron/data/"
ticker_list = make_ticker_list(data_path + "nasdaq100.txt")

In [27]:
log = []
end_date = "2022-01-20"
for k in [400]:
    for mean in ['ewma']:
        log = GetNLL('volt', mean=mean, k=k, horizon=np.arange(75,100), 
                       logger=log, exp=True, fdir="../trading/saved-outputs/")
        
# log = GetNLL('matern', mean='constant', k=100, horizon=np.arange(75,100), 
#                logger=log, exp=True)


1 Failed download:
- ALXN: No data found, symbol may be delisted
failed ALXN
failed CA

1 Failed download:
- CELG: No data found, symbol may be delisted
failed CELG

1 Failed download:
- CTRP: No data found, symbol may be delisted
failed CTRP
failed ESRX

1 Failed download:
- LVNTA: No data found for this date range, symbol may be delisted
failed LVNTA

1 Failed download:
- QVCA: No data found for this date range, symbol may be delisted
failed QVCA

1 Failed download:
- LMCA: No data found for this date range, symbol may be delisted
failed LMCA

1 Failed download:
- LMCK: No data found, symbol may be delisted
failed LMCK

1 Failed download:
- LLTC: No data found for this date range, symbol may be delisted
failed LLTC

1 Failed download:
- MXIM: No data found, symbol may be delisted
failed MXIM

1 Failed download:
- MYL: No data found, symbol may be delisted
failed MYL

1 Failed download:
- SYMC: No data found, symbol may be delisted
failed SYMC

1 Failed download:
- PCLN: No data foun

In [31]:
log = []
end_date = "2022-01-20"
log = GetNLL('sm', mean='constant', k=100, horizon=np.arange(75,100), 
                       logger=[], exp=True)
log = GetNLL('sm', mean='ewma', k=400, horizon=np.arange(75,100), 
                       logger=log, exp=True)



1 Failed download:
- ALXN: No data found, symbol may be delisted
failed ALXN
failed CA

1 Failed download:
- CELG: No data found, symbol may be delisted
failed CELG

1 Failed download:
- CTRP: No data found, symbol may be delisted
failed CTRP
failed ESRX

1 Failed download:
- LVNTA: No data found for this date range, symbol may be delisted
failed LVNTA

1 Failed download:
- QVCA: No data found for this date range, symbol may be delisted
failed QVCA

1 Failed download:
- LMCA: No data found for this date range, symbol may be delisted
failed LMCA

1 Failed download:
- LMCK: No data found, symbol may be delisted
failed LMCK

1 Failed download:
- LLTC: No data found for this date range, symbol may be delisted
failed LLTC

1 Failed download:
- MXIM: No data found, symbol may be delisted
failed MXIM

1 Failed download:
- MYL: No data found, symbol may be delisted
failed MYL

1 Failed download:
- SYMC: No data found, symbol may be delisted
failed SYMC

1 Failed download:
- PCLN: No data foun

In [32]:
df = pd.DataFrame(log)
df.columns = ["Mean_NLL", "NLL", "Std_NLL", "Model", "Mean", "k"]

In [33]:
pd.to_pickle(df, "./sm_nll.pkl")

In [34]:
df

Unnamed: 0,Mean_NLL,NLL,Std_NLL,Model,Mean,k
0,3201143.0,80.430728,113.82574,sm,constant,100
1,1134694.0,147.842929,161.222031,sm,ewma,400


## NLL Plotter

In [48]:
nll = pd.read_pickle("./volt_matern_const_nll.pkl")
nll = pd.concat((nll, pd.read_pickle("volt_const_nll.pkl")))
mat_nll = pd.read_pickle("matern_nll.pkl")
mat_nll[mat_nll["Mean"] != 'constant']
mat_nll.columns = nll.columns
nll = pd.concat((mat_nll, nll))

In [49]:
nll

Unnamed: 0,Mean_NLL,NLL,Std_NLL,Model,Mean,k
0,67267.56,15.463807,82.817116,matern,ewma,100
1,57043.43,13.581769,62.178253,matern,ewma,200
2,61801.95,9.809834,21.42758,matern,ewma,400
3,513817.8,107.04538,558.429443,matern,dewma,100
4,110954.7,26.417788,160.354996,matern,dewma,200
5,74891.07,11.611018,42.626156,matern,dewma,400
6,3585652.0,682.981384,2605.378174,matern,tewma,100
7,121662.4,31.195498,169.649368,matern,tewma,200
8,74964.16,11.622351,42.427406,matern,tewma,400
0,4061.322,7.735851,4.734124,matern,constant,100


In [52]:
temp_df = nll[(nll['Mean'].isin(['constant', 'ewma']))]
temp_df.loc[(temp_df['Mean']=='constant') & (temp_df['Model']=='volt'), 'k'] = 400
temp_df.loc[(temp_df['Mean']=='constant') & (temp_df['Model']=='matern'), 'k'] = 400

temp_df = temp_df[temp_df['k']==400]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [53]:
temp_df

Unnamed: 0,Mean_NLL,NLL,Std_NLL,Model,Mean,k
2,61801.953125,9.809834,21.42758,matern,ewma,400
0,4061.321999,7.735851,4.734124,matern,constant,400
0,1173.271529,4.693086,0.389815,volt,constant,400


In [51]:
temp_df

Unnamed: 0,Mean_NLL,NLL,Std_NLL,Model,Mean,k
2,61801.953125,9.809834,21.42758,matern,ewma,400
0,4061.321999,7.735851,4.734124,matern,constant,400
0,1173.271529,4.693086,0.389815,volt,constant,400
