In [36]:
%matplotlib inline
import theano
floatX = theano.config.floatX
import pymc3 as pm
import theano.tensor as T
import sklearn
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from warnings import filterwarnings
filterwarnings('ignore')
sns.set_style('white')
from sklearn import datasets
from sklearn.preprocessing import scale
from sklearn.cross_validation import train_test_split
from sklearn.datasets import make_moons

In [37]:
try:
    combined = pd.read_pickle('combined.pkl')
except:
    prices, combined = build_prices_dfs()
    pd.to_pickle(combined, 'combined.pkl')

In [38]:
FEATURE_COLS = ['beta_abs', 'momentum']

In [39]:
X = combined[FEATURE_COLS]
y = np.where(combined['ra_month_return'] > 0, 1, 0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)

In [43]:
def construct_nn(ann_input, ann_output, n_hidden=64):
    # Initialize random weights between each layer
    init_1 = np.random.randn(X.shape[1], n_hidden).astype(floatX)
    init_2 = np.random.randn(n_hidden, n_hidden).astype(floatX)
    init_out = np.random.randn(n_hidden).astype(floatX)

    with pm.Model() as neural_network:
        # Weights from input to hidden layer
        weights_in_1 = pm.Normal('w_in_1', 0, sd=1,
                                 shape=(X.shape[1], n_hidden),
                                 testval=init_1)

        # Weights from 1st to 2nd layer
        weights_1_2 = pm.Normal('w_1_2', 0, sd=1,
                                shape=(n_hidden, n_hidden),
                                testval=init_2)

        # Weights from hidden layer to output
        weights_2_out = pm.Normal('w_2_out', 0, sd=1,
                                  shape=(n_hidden,),
                                  testval=init_out)

        # Build neural-network using tanh activation function
        act_1 = pm.math.tanh(pm.math.dot(ann_input,
                                         weights_in_1))
        act_2 = pm.math.tanh(pm.math.dot(act_1,
                                         weights_1_2))
        act_out = pm.math.sigmoid(pm.math.dot(act_2,
                                              weights_2_out))

        # Binary classification -> Bernoulli likelihood
        out = pm.Bernoulli('out',
                           act_out,
                           observed=ann_output,
                           total_size=y_train.shape[0] # IMPORTANT for minibatches
                          )
    return neural_network


In [47]:
ann_input = theano.shared(X_train.values)
ann_output = theano.shared(y_train)
neural_network = construct_nn(ann_input, ann_output)

In [None]:
with neural_network:
    # Run ADVI to estimate posterior means, standard deviations, and the evidence lower bound (ELBO)
    # here is a good chance to demonstrate `cost_part_grad_scale` parameter usage
    # the reason is described here: approximateinference.org/accepted/RoederEtAl2016.pdf
    # to be short it is used to reduce variance of gradient on final iterations
    s = theano.shared(pm.floatX(1))
    inference = pm.ADVI(cost_part_grad_scale=s)
    # ADVI has nearly converged
    pm.fit(n=20000, method=inference)
    # It is time to set `s` to zero
    s.set_value(pm.floatX(0))
    approx = pm.fit(n=30000)

Average Loss = 3.5455e+05:   1%|          | 169/20000 [01:08<2:11:52,  2.51it/s]