In [1]:
import pyfolio as pf
import pandas as pd
import pymc3 as pm
import scipy as sp

from pandas_datareader import data as web
from datetime import datetime



In [2]:
# get google ohlc data
# using pyfolio.utils.get_symbol_rets does not work

start = datetime(2017, 1, 1)
end = datetime(2018, 9, 24)
ohlc = web.DataReader("GOOG", "iex", start, end)

ohlc

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2017-01-03,778.81,789.6300,775.8000,786.140,1657268
2017-01-04,788.36,791.3400,783.1600,786.900,1072958
2017-01-05,786.08,794.4800,785.0200,794.020,1335167
2017-01-06,795.26,807.9000,792.2041,806.150,1640170
2017-01-09,806.40,809.9664,802.8300,806.650,1274645
2017-01-10,807.86,809.1299,803.5100,804.790,1176780
2017-01-11,805.00,808.1500,801.3700,807.910,1065936
2017-01-12,807.14,807.3900,799.1700,806.360,1353057
2017-01-13,807.48,811.2244,806.6900,807.880,1099215
2017-01-17,807.08,807.1400,800.3700,804.610,1362115


In [3]:
# convert ohlc data to daily returns: (today close - yesterday close) / yesterday close
# convert time index format to use numpy.datetime64

closing = ohlc.iloc[:, 3]
stock_rets = [0]
for i in range(1, closing.size):
    stock_rets.append((closing[i] - closing[i - 1]) / closing[i - 1])

# convert indices to pd.Timestamp format, following the format used by pyfolio.utils.get_symbol_rets
time_index = pd.to_datetime(ohlc.index)
stock_rets = pd.Series(stock_rets, index=time_index, name='daily_returns')

stock_rets

date
2017-01-03    0.000000
2017-01-04    0.000967
2017-01-05    0.009048
2017-01-06    0.015277
2017-01-09    0.000620
2017-01-10   -0.002306
2017-01-11    0.003877
2017-01-12   -0.001919
2017-01-13    0.001885
2017-01-17   -0.004048
2017-01-18    0.001815
2017-01-19   -0.004832
2017-01-20    0.003547
2017-01-23    0.017751
2017-01-24    0.005566
2017-01-25    0.014323
2017-01-26   -0.004212
2017-01-27   -0.010623
2017-01-30   -0.025495
2017-01-31   -0.006893
2017-02-01   -0.001374
2017-02-02    0.003563
2017-02-03    0.003707
2017-02-06   -0.000187
2017-02-07    0.007026
2017-02-08    0.001747
2017-02-09    0.001460
2017-02-10    0.005077
2017-02-13    0.006846
2017-02-14    0.001477
                ...   
2018-08-10   -0.009199
2018-08-13   -0.002101
2018-08-14    0.005741
2018-08-15   -0.022317
2018-08-16   -0.006497
2018-08-17   -0.004584
2018-08-20    0.005670
2018-08-21   -0.005092
2018-08-22    0.004752
2018-08-23   -0.001615
2018-08-24    0.012668
2018-08-27    0.017343
2018-0

In [4]:
# use pyfolio api
# https://quantopian.github.io/pyfolio/notebooks/bayesian/

# 80/20 split for training and testing
test_date = stock_rets.index[int(len(stock_rets.index) * 0.8)]
pf.create_bayesian_tear_sheet(stock_rets, live_start_date=test_date)



TypeError: Cannot compare tz-naive and tz-aware datetime-like objects.

In [10]:
# use pymc3
# Normal Model

data = stock_rets
n_samples = 3000

with pm.Model():
    mu = pm.Normal('mean returns', mu=0, sd=.01, testval=data.mean())
    sigma = pm.HalfCauchy('volatility', beta=1, testval=data.std())
    returns = pm.Normal('returns', mu=mu, sd=sigma, observed=data)

    # Fit the model
    start = pm.find_MAP()
    step = pm.NUTS(scaling=start)
    trace = pm.sample(n_samples, step, start=start)

logp = 1,286.5, ||grad|| = 9.9337: 100%|██████████| 5/5 [00:00<00:00, 1361.70it/s]
Multiprocess sampling (2 chains in 2 jobs)
NUTS: [volatility, mean returns]
Sampling 2 chains: 100%|██████████| 7000/7000 [00:03<00:00, 1877.11draws/s]


returns

In [9]:
# use pymc3
# StudentT Model

with pm.Model():
    mu = pm.Normal('mean returns', mu=0, sd=.01)
    sigma = pm.HalfCauchy('volatility', beta=1)
    nu = pm.Exponential('nu_minus_two', 1. / 10.)

    returns = pm.StudentT('returns', nu=nu + 2, mu=mu, sd=sigma, 
                   observed=data)

    # Fit model to data
    start = pm.find_MAP(fmin=sp.optimize.fmin_powell)
    step = pm.NUTS(scaling=start)
    trace = pm.sample(n_samples, step, start=start)

logp = 1,313:   3%|▎         | 163/5000 [00:00<00:02, 1665.01it/s]   


Optimization terminated successfully.
         Current function value: -1312.995725
         Iterations: 4
         Function evaluations: 163


Multiprocess sampling (2 chains in 2 jobs)
NUTS: [nu_minus_two, volatility, mean returns]
Sampling 2 chains: 100%|██████████| 7000/7000 [00:06<00:00, 1164.29draws/s]
