# Timeseries Predictions

## Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import scipy as sp
import pandas as pd
import pandas_datareader.data as web
import datetime as dt

import warnings
import tqdm

import statsmodels.api as sm
import linearmodels as lm

import matplotlib.pyplot as plt
import seaborn as sns

import mypack.data as data
import mypack.calc as calc
import mypack.plot as plot
import mypack.clas as clas

In [3]:
pd.set_option('display.float_format', lambda x: '%.4f' % x) #sets decimals to be displayed

In [4]:
sns.set()
sns.set(rc={'figure.figsize': (17, 6)})

In [5]:
%matplotlib inline

## Data

In [6]:
df_market_estimates = pd.read_pickle('df_market.pkl')

In [7]:
df_market_predictions = pd.read_pickle('df_market_predictions.pkl')

## Calculate Predictions
### State Probabilities

In [8]:
hmm_name = 'HMM_mmss_20'

In [9]:
prediction_horizons = list(range(1,22))+[31,42,63,126,189,252]

In [8]:
df_market_predictions = pd.DataFrame(index=df_market_estimates.index, columns = pd.MultiIndex.from_product([[],[]], names=['horizon','variable']))

In [10]:
df_market_predictions[('data','return')] = df_market_estimates[('raw_data','return')]

In [9]:
%%time

for h in tqdm.trange(0,max(prediction_horizons)+1):
    p0 = []
    p1 = []
    for iObs in df_market_estimates.index:
        state_probabilities = [df_market_estimates[hmm_name,'filt_prob_0'].at[iObs], df_market_estimates[hmm_name,'filt_prob_1'].at[iObs]]
        transition_matrix = [[df_market_estimates[hmm_name,'p00'].at[iObs], 1-df_market_estimates[hmm_name,'p00'].at[iObs]],
                         [df_market_estimates[hmm_name,'p10'].at[iObs], 1-df_market_estimates[hmm_name,'p10'].at[iObs]]]
        p0 += [clas.MarkovChain(transition_matrix, state_probabilities).iterate(h, return_state_vector=True)[0]]
        p1 += [clas.MarkovChain(transition_matrix, state_probabilities).iterate(h, return_state_vector=True)[1]]

    df_market_predictions[(h,'p0')] = p0
    df_market_predictions[(h,'p1')] = p1

100%|████████████████████████████████████████████████████████████████████████████████| 252/252 [41:51<00:00,  9.91s/it]


In [10]:
df_market_predictions.to_pickle('df_market_predictions.pkl')

### Distribution Moments

In [29]:
%%time

# iterate through each period
for iObs in tqdm.tqdm(df_market_estimates.index[df_market_estimates[hmm_name,'convergence']=='True']):
    product = clas.ProductDistribution([])
    ent, l2, mins, posit = [], [], [], []
    
    # iterate through all forecast horizons
    for h in range(1,max(prediction_horizons)+1):
        # get component inputs for mixture distribution
        mus = [df_market_estimates[hmm_name,'mu_0'].at[iObs], df_market_estimates[hmm_name,'mu_1'].at[iObs]]
        sigmas = [df_market_estimates[hmm_name,'sigma2_0'].at[iObs]**0.5, df_market_estimates[hmm_name,'sigma2_1'].at[iObs]**0.5]
        weights = [df_market_predictions[(h,'p0')].at[iObs], df_market_predictions[(h,'p1')].at[iObs]]
        
        if h == 1:
            ks = calc.calculate_kolmogorov_smirnov_distance(mus[0], mus[1], sigmas[0], sigmas[1])
        
        # create forecast mixture distribution
        mixture = clas.MixtureDistribution([])
        for (m,s,p) in zip(mus,sigmas,weights):
            mixture.add_component((m,s,p))
        
        # calculate mixture moments and add central moments to factor distribution
        m,v,s,k = mixture.mvsk()
        ent += [mixture.entropy()]
        l2 += [1-4*(mixture.components[0][2]-0.5)**2]
        mins += [min(mixture.components[0][2], 1-mixture.components[0][2])]
        posit += [1-mixture.cdf(0)]
        
        product.add_factor((m+1,v,s*v**1.5,(k+3)*v**2)) #central moments as inputs
        
        # save outputs
        if h in prediction_horizons:
            # one-period distribution
            df_market_predictions.at[iObs,(h,'mean')] = m
            df_market_predictions.at[iObs,(h,'var')] = v
            df_market_predictions.at[iObs,(h,'skew')] = s
            df_market_predictions.at[iObs,(h,'kurt')] = k
            df_market_predictions.at[iObs,(h,'std')] = mixture.std()
            df_market_predictions.at[iObs,(h,'entropy')] = ent[-1]
            df_market_predictions.at[iObs,(h,'L2')] = l2[-1]
            df_market_predictions.at[iObs,(h,'min')] = mins[-1]
            df_market_predictions.at[iObs,(h,'positive')] = posit[-1]
            df_market_predictions.at[iObs,(h,'ks')] = ks
            
            # cumulative distribution
            df_market_predictions.at[iObs,(h,'cummean')] = product.mean()-1
            df_market_predictions.at[iObs,(h,'cumvar')] = product.var()
            df_market_predictions.at[iObs,(h,'cumskew')] = product.skew()
            df_market_predictions.at[iObs,(h,'cumkurt')] = product.kurt()
            df_market_predictions.at[iObs,(h,'cumstd')] = product.std()
            df_market_predictions.at[iObs,(h,'cumentropy')] = sum(ent)/len(ent)
            df_market_predictions.at[iObs,(h,'cumL2')] = sum(l2)/len(l2)
            df_market_predictions.at[iObs,(h,'cummin')] = sum(mins)/len(mins)
            df_market_predictions.at[iObs,(h,'cumpositive')] = sum(posit)/len(posit)

100%|██████████████████████████████████████████████████████████████████████████| 19495/19495 [1:45:39<00:00,  3.07it/s]


Wall time: 1h 45min 39s


In [None]:
df_market_predictions.to_pickle('df_market_predictions.pkl')