# Cross-Section Predictions

## Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import scipy as sp
import pandas as pd
import pandas_datareader.data as web
import datetime as dt

import warnings
import tqdm

import statsmodels.api as sm
import linearmodels as lm

import matplotlib.pyplot as plt
import seaborn as sns

import mypack.data as data
import mypack.calc as calc
import mypack.plot as plot
import mypack.clas as clas

In [3]:
pd.set_option('display.float_format', lambda x: '%.4f' % x) #sets decimals to be displayed

In [4]:
sns.set()
sns.set(rc={'figure.figsize': (17, 6)})

In [5]:
%matplotlib inline

## Data

In [6]:
df_xs_estimates = pd.read_pickle('df_xs_estimates.pkl')

In [7]:
df_xs_predictions = pd.read_pickle('df_xs_predictions.pkl')

## Calculate Predictions

In [8]:
hmm_name = 'HMM_mmss_20'

In [9]:
prediction_horizons = list(range(1,22))#+[31,42,63,126,189,252]

### State Probabilities

In [10]:
df_xs_predictions = pd.DataFrame(index=df_xs_estimates.index, columns = pd.MultiIndex.from_product([[],[]], names=['horizon','variable']))

In [11]:
df_xs_predictions[('data','return')] = df_xs_estimates[('data','return')]

In [12]:
%%time

for h in tqdm.trange(0,max(prediction_horizons)+1):
    p0 = []
    p1 = []
    for iObs in df_xs_predictions.index:
        state_probabilities = [df_xs_estimates.at[iObs,(hmm_name,'filt_prob_0')], df_xs_estimates.at[iObs,(hmm_name,'filt_prob_1')]]
        transition_matrix = [[df_xs_estimates.at[iObs,(hmm_name,'p00')], 1-df_xs_estimates.at[iObs,(hmm_name,'p00')]],
                             [df_xs_estimates.at[iObs,(hmm_name,'p10')], 1-df_xs_estimates.at[iObs,(hmm_name,'p10')]]]
        p0 += [clas.MarkovChain(transition_matrix, state_probabilities).iterate(h, return_state_vector=True)[0]]
        p1 += [clas.MarkovChain(transition_matrix, state_probabilities).iterate(h, return_state_vector=True)[1]]

    df_xs_predictions[(h,'p0')] = p0
    df_xs_predictions[(h,'p1')] = p1

    #if h % 5 == 0:
        # due to the size of the dataframe the saved file has to be split
        #df_xs_predictions.iloc[0:int(len(df_xs_predictions)/2)].to_pickle('df_xs_predictions_1.pkl')
        #df_xs_predictions.iloc[int(len(df_xs_predictions)/2):].to_pickle('df_xs_predictions_2.pkl')

df_xs_predictions.to_pickle('df_xs_predictions.pkl')

100%|███████████████████████████████████████████████████████████████████████████████| 22/22 [3:08:24<00:00, 595.10s/it]


Wall time: 3h 8min 55s


In [29]:
#df_xs_predictions.iloc[0:int(len(df_xs_predictions)/2)].to_pickle('df_xs_predictions1.pkl')
#df_xs_predictions.iloc[int(len(df_xs_predictions)/2):].to_pickle('df_xs_predictions2.pkl')

### Distribution Moments

In [10]:
prediction_horizons = [1]

In [11]:
for h in prediction_horizons:
    df_xs_predictions[h,'mean'] = np.nan
    df_xs_predictions[h,'var'] = np.nan
    df_xs_predictions[h,'skew'] = np.nan
    df_xs_predictions[h,'kurt'] = np.nan
    df_xs_predictions[h,'std'] = np.nan
    df_xs_predictions[h,'entropy'] = np.nan
    df_xs_predictions[h,'L2'] = np.nan
    df_xs_predictions[h,'min'] = np.nan
    df_xs_predictions[h,'positive'] = np.nan
    df_xs_predictions[h,'ks'] = np.nan
    
    df_xs_predictions[h,'cummean'] = np.nan
    df_xs_predictions[h,'cumvar'] = np.nan
    df_xs_predictions[h,'cumskew'] = np.nan
    df_xs_predictions[h,'cumkurt'] = np.nan
    df_xs_predictions[h,'cumstd'] = np.nan
    df_xs_predictions[h,'cumentropy'] = np.nan
    df_xs_predictions[h,'cumL2'] = np.nan
    df_xs_predictions[h,'cummin'] = np.nan
    df_xs_predictions[h,'cumpositive'] = np.nan

In [12]:
%%time

count = 0

# iterate through each period
for iObs in tqdm.tqdm(df_xs_estimates.index[df_xs_estimates[hmm_name,'convergence']=='True']):
    product = clas.ProductDistribution([])
    ent, l2, mins, posit = [], [], [], []
    
    # iterate through all forecast horizons
    for h in range(1,max(prediction_horizons)+1):
        # get component inputs for mixture distribution
        mus = [df_xs_estimates.at[iObs,(hmm_name,'mu_0')], df_xs_estimates.at[iObs,(hmm_name,'mu_1')]]
        sigmas = [df_xs_estimates.at[iObs,(hmm_name,'sigma2_0')]**0.5, df_xs_estimates.at[iObs,(hmm_name,'sigma2_1')]**0.5]
        weights = [df_xs_predictions.at[iObs,(h,'p0')], df_xs_predictions.at[iObs,(h,'p1')]]
        
        if h == 1:
            ks = calc.calculate_kolmogorov_smirnov_distance(mus[0], mus[1], sigmas[0], sigmas[1])
        
        # create forecast mixture distribution
        mixture = clas.MixtureDistribution([])
        for (m,s,p) in zip(mus,sigmas,weights):
            mixture.add_component((m,s,p))
        
        # calculate mixture moments and add central moments to factor distribution
        m,v,s,k = mixture.mvsk()
        ent += [mixture.entropy()]
        l2 += [1-4*(mixture.components[0][2]-0.5)**2]
        mins += [min(mixture.components[0][2], 1-mixture.components[0][2])]
        posit += [1-mixture.cdf(0)]
        
        product.add_factor((m+1,v,s*v**1.5,(k+3)*v**2)) #central moments as inputs
        
        # save outputs
        if h in prediction_horizons:
            # one-period distribution
            df_xs_predictions.at[iObs,(h,'mean')] = m
            df_xs_predictions.at[iObs,(h,'var')] = v
            df_xs_predictions.at[iObs,(h,'skew')] = s
            df_xs_predictions.at[iObs,(h,'kurt')] = k
            df_xs_predictions.at[iObs,(h,'std')] = mixture.std()
            df_xs_predictions.at[iObs,(h,'entropy')] = ent[-1]
            df_xs_predictions.at[iObs,(h,'L2')] = l2[-1]
            df_xs_predictions.at[iObs,(h,'min')] = mins[-1]
            df_xs_predictions.at[iObs,(h,'positive')] = posit[-1]
            df_xs_predictions.at[iObs,(h,'ks')] = ks
            
            # cumulative distribution
            df_xs_predictions.at[iObs,(h,'cummean')] = product.mean()-1
            df_xs_predictions.at[iObs,(h,'cumvar')] = product.var()
            df_xs_predictions.at[iObs,(h,'cumskew')] = product.skew()
            df_xs_predictions.at[iObs,(h,'cumkurt')] = product.kurt()
            df_xs_predictions.at[iObs,(h,'cumstd')] = product.std()
            df_xs_predictions.at[iObs,(h,'cumentropy')] = sum(ent)/len(ent)
            df_xs_predictions.at[iObs,(h,'cumL2')] = sum(l2)/len(l2)
            df_xs_predictions.at[iObs,(h,'cummin')] = sum(mins)/len(mins)
            df_xs_predictions.at[iObs,(h,'cumpositive')] = sum(posit)/len(posit)
      
    #if count % 50000 == 0:
    #    df_xs_predictions.to_pickle('df_xs_predictions.pkl')
    #count += 1
    
df_xs_predictions.to_pickle('df_xs_predictions.pkl')

100%|█████████████████████████████████████████████████████████████████████████| 847125/847125 [44:30<00:00, 317.19it/s]


Wall time: 44min 31s
