In [1]:
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pymc3 as pm
from pymc3 import Model, Normal, GaussianRandomWalk, HalfNormal
from pymc3 import NUTS, sample
from scipy import optimize
from pymc3 import traceplot
from pymc3 import summary
import theano.tensor as tt
import theano as T

df = pd.read_csv("../data/data-ss1.txt")
T = df.values[:,0].astype(np.float32)
Y = df.values[:,1].astype(np.float32)

n_times = len(df["X"].unique())

#shape = クラスの数の確率変数に、クラスの値を取るデータ数次元のベクトルを入れる操作がありますが
#その詳細な説明は(https://pymc-devs.github.io/pymc3/notebooks/GLM-hierarchical.html)参照

basic_model = Model()

#GaussianRandomWalkを使う方法と使わない方法どちらも実装しました。

#GaussianRandomWalkを使わない方法
with basic_model:
    #事前分布
    s_mu = HalfNormal('s_mu', sd=100) #隣接時刻の状態の誤差
    s_Y =  HalfNormal('s_Y', sd=100) #各時刻における状態と観測の誤差
    mu_0 = Normal('mu_0',mu=0, sd=100) #初期状態
    
    #誤差項
    e_mu = Normal('e_mu', mu=0, sd=s_mu, shape =n_times-1)
    
    mu = tt.zeros((n_times))
    mu = tt.set_subtensor(mu[0], mu_0)
    for i in list(range(n_times-1)):
        mu = tt.set_subtensor(mu[i+1], mu[i]+e_mu[i])

    #likelihood
    Y_obs = Normal('Y_obs', mu=mu, sd=s_Y, observed=Y)

    #サンプリング
    trace = sample(1000)
    summary(trace)
    
#GaussianRandomWalkを使う方法
with basic_model:
    #事前分布
    s_mu = HalfNormal('s_mu', sd=100) #隣接時刻の状態の誤差
    s_Y =  HalfNormal('s_Y', sd=100) #各時刻における状態と観測の誤差

    #likelihood
    mu = GaussianRandomWalk("mu",s_mu, shape=n_times )    
    Y_obs = Normal('Y_obs', mu=mu, sd=s_Y, observed=Y)

    #サンプリング
    trace = sample(1000)
    summary(trace)

Auto-assigning NUTS sampler...
Initializing NUTS using advi...
Average ELBO = -43.663: 100%|██████████| 200000/200000 [01:08<00:00, 2937.33it/s]
Finished [100%]: Average ELBO = -43.406
100%|██████████| 1000/1000 [02:43<00:00, 16.63it/s]


mu_0:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  11.167           0.162            0.006            [10.849, 11.514]

  Posterior quantiles:
  2.5            25             50             75             97.5
  
  10.825         11.078         11.178         11.257         11.501


e_mu:

  Mean             SD               MC Error         95% HPD interval
  -------------------------------------------------------------------
  
  -0.120           0.198            0.008            [-0.510, 0.267]
  0.157            0.209            0.011            [-0.254, 0.529]
  -0.332           0.220            0.013            [-0.697, 0.158]
  0.012            0.171            0.007            [-0.307, 0.390]
  0.332            0.219            0.011            [-0.110, 0.790]
  -0.095           0.200            0.009            [-0.484, 0.318]
  -0.028           0.189            0.009         


