## 46. Nonparametric Bayesian Models

### <font color=blue>**1.** </font> Dirichlet Process

#### <font color=green>**1.1.** </font> 棒折り過程（SBP : Stick-Breaking Process）

In [None]:
## 出典 : https://github.com/Ma-sa-ue/practice/tree/master/machine%20learning(python)/nonparabayes

In [None]:
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt

In [None]:
#### base distibution

def get_ck(v,_N):
  c =[]
  for i in range(_N):
    first = 1.0
    for j in range(i):
      first = first*(1 -v[j])
    c.append(first*v[i])
  return c

In [None]:
### Stick breaking process
def sbp(alpha, N):
  v = np.random.beta(1,alpha,N)
  ck = get_ck(v, N)
  return ck

In [None]:
N = int(5e2)

plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.plot(sbp(100.0, N))
plt.ylim(0, 0.05)
plt.title("alpha=100.0")

plt.subplot(1,2,2)
plt.plot(sbp(1.0, N))
plt.title("alpha=1.0")
plt.ylim(0, 0.6)

plt.show()

#### <font color=green>**1.2.** </font> SBP 別の実装例

In [None]:
## 出典 : https://gist.github.com/tok41/d3548e481d7e4fbb98ccb3ce5d21e075#file-sbp_try1-ipynb

概要
- 棒折り過程（SBP; Stick Breaking Process）を実装してみる
- SBPはディリクレ過程の実現れいの一つ
- クラス割当（データがどのクラスに属するかを示す変数）を積分消去し、無限次元の混合モデルを考える
 - 無限次元の混合比を生成するための確率過程としてSBPが利用できる

In [None]:
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()
np.random.seed(100)

In [None]:
alphas = [2, 5, 10]

xt = np.linspace(0, 1, 100)

fig = plt.figure(figsize=(7, 4))
ax = fig.subplots(1, 1)
for alp in alphas:
    p = sp.stats.beta(1, alp).pdf(xt)
    ax.plot(xt, p, label = f'(1, {alp})')
ax.legend()
plt.show()

In [None]:
def SBP(K, alpha):
  vi = sp.stats.beta(1, alpha).rvs(size=K)
  stick_len = np.concatenate([[1], np.cumprod((1-vi))[:-1]])
  pis = vi * stick_len
  return pis

In [None]:
K = 100
alphas = [2, 10]

lst_pis = []
for a in alphas:
  pis = SBP(K, a)
  lst_pis.append(pis)
  print(f'alpha={a}, {sum(pis)}')

In [None]:
fig = plt.figure(figsize=(10, 6))
ax = fig.subplots(1, 1)

for i, pis in enumerate(lst_pis):
  ax.plot(np.cumsum(pis), label=f'$\\alpha={alphas[i]}$')
ax.legend()
plt.show()

In [None]:
fig = plt.figure(figsize=(7, 8))
ax = fig.subplots(2, 1)

for i, pis in enumerate(lst_pis):
  #ax.plot(np.cumsum(pis), label=f'$\\alpha={alps[i]}$')
  ax[i].bar(np.arange(K), pis, label=f'$\\alpha={alphas[i]}$')
  ax[i].legend()
plt.show()

$\alpha$が大きい場合、混合比がそこまで大きくないケースが多数現れる。これが、CRPにおけるテーブルの数が多くなることに対応している。

$\alpha$が小さいと、大きい確率が割り当てられたクラスが少数現れる。実際SBPでは、計算の都合上クラスの上限を決めるため、微小な混合比が割り当てられたクラスがあるが、非常に微小。

#### <font color=green>**1.3.** </font> 中華料理店過程（CRP : Chinese Restaurant Process）

In [None]:
## 出典 : https://learning-with-machine.hatenablog.com/entry/2020/08/26/193000
##        https://gist.github.com/tok41/31e18bea891ddb31ebe2c9e0435d5a70#file-crp_try1-ipynb

概要
- 中華料理店過程（Chinese Restaurant Process;CRP）を実装して動作を確認してみる
- CRPはディリクレ過程の一つの実現例
- 分割の仕方（クラス割り当て）の事前分布として利用できる
  - n個のデータを分割するとすればどのような分割がどのような確率として起こり得るのかのモデル（「続 わかりやすいパターン認識」, p.226 より）
- CRPを使う場合、混合数を積分消去し、無限次元の混合モデルに拡張される
  - このときのクラスの割当についての事前分布としてCRPが使われる

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()
np.random.seed(100)

$
  p(z_l | \overline{\alpha}) = \begin{cases}
    \dfrac{n_l(z_l)}{l + \overline{\alpha}} & (着席済みテーブルに座る、それぞれの確率) \\
    \dfrac{\overline{\alpha}}{l + \overline{\alpha}} & (新しいテーブルに一人目として座る確率)
  \end{cases}
$

In [None]:
def prob_table_choice(lst_n, alpha=1.0):
  # テーブルの選択確率を算出
  n = sum(lst_n) + 1
  p = [ni / (n - 1 + alpha) for ni in lst_n]
  p.append( alpha / (n - 1 + alpha) )
  return p

In [None]:
def CRP(n, alpha=1.0):
  n_c = []
  history = {
      'table_nums':[], 
      'chosen_tables':[], 
      }
  for i in range(n):
    table_ids = np.arange(0, len(n_c)+1)
    p = prob_table_choice(n_c, alpha=alpha)
    chosen_table = np.random.choice(table_ids, p=p)
    if len(n_c) == chosen_table:
      n_c.append(1)
    else:
      n_c[chosen_table] += 1
    history['table_nums'].append(len(n_c))
    history['chosen_tables'].append(chosen_table)
  return n_c, history

In [None]:
N = 1000
customer_counts1, history1 = CRP(N, alpha=10)
customer_counts2, history2 = CRP(N, alpha=2)

fig = plt.figure(figsize=(7, 4))
ax = fig.subplots(1, 1)
ax.plot(history1['table_nums'], label=f'$\\alpha=10$')
ax.plot(history2['table_nums'], label=f'$\\alpha=2$')
ax.legend()
ax.set_xlabel('number of customer')
ax.set_ylabel('number of table')

In [None]:
fig = plt.figure(figsize=(12, 4))
ax = fig.subplots(1, 2, sharex=True, sharey=True)
ax[0].bar(np.arange(1, len(customer_counts1)+1), sorted(customer_counts1, reverse=True), label=f'$\\alpha=10$')
ax[1].bar(np.arange(1, len(customer_counts2)+1), sorted(customer_counts2, reverse=True), label=f'$\\alpha=2$')
ax[0].legend()
ax[1].legend()

#### <font color=green>**1.4.** </font> Pitman-Yor process

In [None]:
## 出典 : https://github.com/Ma-sa-ue/practice/tree/master/machine%20learning(python)/nonparabayes

In [None]:
import numpy as np
import matplotlib.pyplot as plt

$
  p(z_l | \overline{\alpha}) = \begin{cases}
    \dfrac{n_l(z_l) - d}{\sum{n_l(z_l)} + \overline{\alpha}} & (着席済みテーブルに座る、それぞれの確率) \\
    \dfrac{\overline{\alpha} + d\beta}{\sum{n_l(z_l)} + \overline{\alpha}} & (新しいテーブルに一人目として座る確率)
  \end{cases}
$

In [None]:
def alec(x,alpha,d):
  xxx = [0]
  n = sum(x)
  n_k = 0
  for i in range(len(x)):
    n_k = n_k + x[i]
    xxx.append((n_k*1.0 - d*(i+1))/(n+alpha)*1.0)
  xxx.append(1)
  return xxx

In [None]:
#### d=0 : Chinese Restaurant Process
def CRP(_N,alpha = 30):
  custom_list = [0]
  custom_number_list = [1]
  custom_judge =[0,1.0/((alpha+1)*1.0),1]
  for i in range(_N-1):
    judge =  np.random.uniform()
    for j in range(len(custom_judge)-1):
      if custom_judge[j]< judge < custom_judge[j+1]:
        custom_list.append(j)
        if j == len(custom_judge)-2:
          custom_number_list.append(0)
        custom_number_list[j] = custom_number_list[j] +1
    custom_judge = alec(custom_number_list, alpha, 0.0)
  return custom_number_list

In [None]:
### draw the sample 
xxx =[]
yyy =[]
for k in range(20):
  x = [len(CRP(100*k,30)) for j in range(5)]
  xxx.extend(x)
  y = [ k*100 for j in range(5)]
  yyy.extend(y)

In [None]:
####  calulcate the expectaion
yy = [ i/4.0 for i in range(8000)]
def  expect(_i,alpha=30):
  return alpha * np.log(1+ _i/alpha)
xx = [ expect(i/4.0) for i in range(8000)]

In [None]:
plt.figure(figsize=(12,6))
plt.plot(yy,xx) ## expectation
plt.scatter(yyy,xxx,c="r") ## sample
plt.title("CRP alpha=30.0")
plt.show()

In [None]:
### Pitman-Yor process
def PYR(_N, alpha=10, d=0.0):
  custom_list = [0]
  custom_number_list = [1]
  custom_judge =[0,1.0/((alpha+1)*1.0),1]
  for i in range(_N-1):
    judge =  np.random.uniform()
    judge =  np.random.uniform()
    for j in range(len(custom_judge)-1):
      if custom_judge[j]< judge < custom_judge[j+1]:
        custom_list.append(j)
        if j == len(custom_judge)-2:
          custom_number_list.append(0)
        custom_number_list[j] = custom_number_list[j] +1
    custom_judge = alec(custom_number_list, alpha, d)
  return custom_number_list

In [None]:
xxx2 =[]
yyy2 =[]
for k in range(20):
  x = [len(PYR(k*100, 10, 0.5)) for j in range(4)]
  xxx2.extend(x)
  y = [ k*100 for j in range(4)]
  yyy2.extend(y)

xxx3 =[]
yyy3 =[]
for k in range(20):
  x = [len(PYR(k*100, 10, 0.1)) for j in range(4)]
  xxx3.extend(x)
  y = [ k*100 for j in range(4)]
  yyy3.extend(y)

xxx4 =[]
yyy4 =[]
for k in range(20):
  x = [len(PYR(k*100, 10, 0.9)) for j in range(4)]
  xxx4.extend(x)
  y = [ k*100 for j in range(4)]
  yyy4.extend(y)

In [None]:
### convert log scale
def convert_log(_g):
  llog = np.log10(np.array(_g))
  return llog

In [None]:
plt.figure(figsize=(10,6))
plt.scatter(convert_log(yyy2),convert_log(xxx2), c="blue")
plt.scatter(convert_log(yyy3),convert_log(xxx3), c="yellow")
plt.scatter(convert_log(yyy3),convert_log(xxx4), c="green")
plt.xlim(0,4)
plt.ylim(0,4)
plt.title("pitman-yor")
plt.xlabel("#customers(log scale)")
plt.ylabel("#tables (log scale)")
plt.show()

### <font color=blue>**2.** </font> infiniteGMM

#### <font color=green>**2.1.** </font> CRPでクラスタリング

In [None]:
# 出典 : https://github.com/tokky-cpp/machine_learning

In [None]:
# CRP(中華料理店過程)に基づいてクラス数未定のクラスタリングのための事前分布を決定する。

import random
import numpy as np
import matplotlib.pyplot as plt
import math

In [None]:
alpha = 1.
n = 100

In [None]:
def crp(alpha=alpha, n=n):  # alpha:パラメータ  n:全体の人数
  s=[] #各人が座るテーブル番号のリスト
  table={} #テーブルごとの人数の辞書
  for i in range(n):
    if i==0: #1人目だったら無条件に0番目のテーブルに着席
      s.append(0)
      table.setdefault(0,1)
      continue
    else:
      prob = random.random()  # 0-1の範囲
      sum = 0.0 # 各テーブルの着席確率を累積していって、probを超えたら着席
      # 新規テーブルに対して
      new_p = alpha / (i + alpha)   ### i-1 -> i
      sum += new_p
      if sum >= prob:
        s.append(len(table))
        table.setdefault(len(table),1)
        continue
      # 既存テーブルに対して
      for t in table.keys():
        sit_p = table[t] / (i + alpha)  ### i-1 -> i
        sum += sit_p
        if sum >= prob:
          s.append(t)
          table[t] += 1
          break
  return s, table

$
  p(z_l | \overline{\alpha}) = \begin{cases}
    \dfrac{n_l(z_l)}{l + \overline{\alpha}} & (着席済みテーブルに座る、それぞれの確率) \\
    \dfrac{\overline{\alpha}}{l + \overline{\alpha}} & (新しいテーブルに一人目として座る確率)
  \end{cases}
$

In [None]:
def graph(table): # ディクショナリ形式を受け取ってグラフにする
  table = sorted(table.items(),key=lambda x: x[1])
  table.reverse()
  #print(table)
  number = []
  people = []
  for (t,n) in table:
    number.append(t)
    people.append(n)
  x = [i for i in range(len(people))]
  y = people
  plt.plot(x,y)
  plt.show()
  return

In [None]:
s, table = crp(10, 10000)

#print(s)
#print(table)

graph(table)

In [None]:
# 崩壊型ギブスサンプリング(続・パタp264)に基づいて記述

def cut(A, i):  # 行列Aからi行目i列目の要素をなくした行列を返す i=(0,…,N-1)
  if A.shape[0] != A.shape[1]:
    print("[Error]正方行列ではありません")
    print(A)
  return np.delete(np.delete(A, i, 1), i, 0)

In [None]:
def clustering(K=None, M=1., W=None, alpha=1., beta=1.):
  # K : 類似度行列
  # M : 特徴ベクトルの次元数
  # W : (N*N行列)中心化したウィシャート分布の共分散行列の事前分布
  # alpha : 所属クラスを表す潜在変数
  # beta : ナンダコレ？

  N = K.shape[0]

  # Step1 初期設定 
  s, s_i = crp(alpha, N)  # 所属クラスを示す潜在変数を初期化
  c = len(s_i)  # 総クラス数
  P_max = 0.0 # 事後確率の最大値を初期化

  # Step2 所属クラスの更新
  prob = []
  for i in range(N):
    p = 0.0
    p += -1*(M/2.0)*math.log(2)
    p += -1*((N-1)/4)*math.log(math.pi)
    p += -1*math.log(math.gamma((M-N+1)/2))
    p += ((M+beta)/2)*math.log((np.linalg.det(K+np.linalg.inv(W)))/(np.linalg.det(cut(K,i)+np.linalg.inv(cut(W,i)))))
    p += -1*(beta/2)*math.log(np.linalg.det(W)/np.linalg.det(cut(W,i)))
    p += (M-N+1)/2.0*math.log(np.linalg.det(K)/np.linalg.det(cut(K,i)))
    p += -1*(1/2.0)*math.log(np.linalg.det(cut(K,i)))
    #print(i, p)
    prob.append(p)

  return prob

In [None]:
# ガウスガンマ分布に基づいてデータ生成
# 生成モデルにおいて、クラスごとのガウス分布のパラメータを決定するために使用

def normal_gamma(a=5, b=6, mu0=0, beta=2, POINT=4, see=False):
  ll = []
  mumu = []
  for i in range(POINT):
    lamb = np.random.gamma(a, 1/b, 1)
    ll.append(lamb[0])
    mu = np.random.normal(mu0, 1/(beta*lamb))
    mumu.append(mu)
    
  if see:
    plt.plot(mumu, ll, "o")
    plt.show()

  return [(m, l) for (m, l) in zip(mumu, ll)]

In [None]:
# 生成モデルに基づいた特徴ベクトル生成
# 特徴ベクトル間の類似度K(行列)を生成

def make_K(N=5, M=8, alpha=1.0, a=5, b=10, visual=False):
  # N      data points
  # M      dimension of data
  # alpha  palameter for CRP
  # a      palameter for normal_gamma distribution
  # b      palameter for normal_gamma distribution
  #print("data points = {}".format(N))
  #print("dimension of data = {}".format(M))
    
  # クラスの事前分布(CRP)
  (s, table) = crp(alpha, N)
  cls_num = len(table)
  print("{} clusters".format(cls_num))
    
  # パラメータの事前分布(ガウスガンマ分布)
  theta = normal_gamma(POINT=cls_num, see=False)
    
  data = []   # データ点
  #data = np.empty((0, M))

  # ガウス分布に基づいてデータ点生成
  for i in s:   # 全データに対して
    d = []
    for j in range(M):  # 次元数分だけデータを生成
      d.append(np.random.normal(theta[i][0],theta[i][1]))
    #print("data : {}".format(data))
    #print("d : {}".format(d))
    #data = np.append(data, np.array([d]), axis=0)
    data.append(d)
  data = np.array(data)
  #print("data : {}".format(data))

  # data visualize
  if visual:
    for i in range(len(table)):
      x = []
      y = []
      for d in range(len(data)):
        if s[d]==i:
          x.append(data[d][0])
          y.append(data[d][1])
      plt.plot(x, y, "o")
    print(table)
    plt.show()
  #print(data)
  #print(len(data))

  #print("--------類似度計算-------")
  #print("data : {}".format(data))
  K = []
  N_ = min(N, len(s))   ###
  #K = np.empty((0,N))
  for i in range(N_):
    ker = []
    for j in range(N_):
      ker.append(data[i].T @ data[j])
    K.append(ker)
    #K = np.append(K, np.array([ker]), axis=0)

  #print("make_K complete")
  return np.array(K)

In [None]:
#------------[start]データ生成〜類似度行列計算----------
K = make_K() #オプションがなければデフォルト値(N=5,M=?)で類似度行列生成
if len(K)>1:
  K = K.reshape(len(K[0]), len(K[1]))
#print("K : {}".format(K))
#------------[end]データ生成〜類似度行列計算----------

#------------クラスタリング----------
# 類似度行列とその他のパラメータが与えられたもとで崩壊型ギブスサンプリングを行う
M=5
clustering(K=K, M=M, W=K, alpha=1, beta=2)

#### <font color=green>**2.2.** </font> scikit-learnを使用、irisデータで試す

In [None]:
import numpy as np
from scipy import linalg
import matplotlib.pyplot as plt
import matplotlib as mpl

from sklearn import datasets, mixture

In [None]:
colors = ['navy', 
          'c', 
          'cornflowerblue', 
          'gold',
          'darkorange']

In [None]:
def plot_results(X, Y_, means, covariances, size, index):
  splot = plt.subplot(1, 3, 1 + index)
  for i, (mean, covar) in enumerate(zip(means, covariances)):
    v, w = linalg.eigh(covar)
    v = 2. * np.sqrt(2.) * np.sqrt(v)
    u = w[0] / linalg.norm(w[0])
    if not np.any(Y_ == i):
      continue
    plt.scatter(X[Y_ == i, 0], X[Y_ == i, 1],size,  color=colors[i%5])
    angle = np.arctan(u[1] / u[0])
    angle = 180. * angle / np.pi  
    ell = mpl.patches.Ellipse(mean, v[0], v[1], 180. + angle, color=colors[i%5])
    ell.set_clip_box(splot.bbox)
    ell.set_alpha(0.5)
    splot.add_artist(ell)

In [None]:
# Number of samples per component
n_samples = 500

# Generate random sample, two components
np.random.seed(0)

C = np.array([[0., -0.1], [1.7, .4]])
X = np.r_[np.dot(np.random.randn(n_samples, 2), C),
          .7 * np.random.randn(n_samples, 2) + np.array([-6, 3])]

plt.figure(figsize=(17,5))

# Fit a Gaussian mixture with EM using five components
gmm = mixture.GaussianMixture(n_components=5, covariance_type='full').fit(X)
plot_results(X, gmm.predict(X), gmm.means_, 
             gmm.covariances_, 1.0, 0)

# Fit a Bayesian Gaussian mixture using five components
dpgmm = mixture.BayesianGaussianMixture(n_components=5,
                                        weight_concentration_prior_type='dirichlet_distribution', 
                                        covariance_type='full').fit(X)
plot_results(X, dpgmm.predict(X), dpgmm.means_, 
             dpgmm.covariances_, 1.0, 1)

# Fit a Bayesian Gaussian mixture using five components
dpgmm = mixture.BayesianGaussianMixture(n_components=n_samples,
                                        weight_concentration_prior_type='dirichlet_process', 
                                        covariance_type='full').fit(X)
plot_results(X, dpgmm.predict(X), dpgmm.means_, 
             dpgmm.covariances_, 1.0, 2)

plt.show()

In [None]:
def plot_results_2(X, Y_, means, covariances, size, index):
  splot = plt.subplot(1, 3, 1 + index)
  for i, (mean, covar) in enumerate(zip(means, covariances)):
    v, w = linalg.eigh(covar)
    v = 2. * np.sqrt(2.) * np.sqrt(v)
    u = w[0] / linalg.norm(w[0])
    if not np.any(Y_ == i):
      continue
    plt.scatter(X[Y_ == i, 0], X[Y_ == i, 1], size, color=color(i))
    angle = np.arctan(u[1] / u[0])
    angle = 180. * angle / np.pi  
    ell = mpl.patches.Ellipse(mean, v[0], v[1], 180. + angle, color=color(i))
    ell.set_clip_box(splot.bbox)
    ell.set_alpha(0.5)
    splot.add_artist(ell)

In [None]:
color = plt.get_cmap('tab20')

# Number of samples per component
n_samples = 150

# Generate random sample, two components
X = datasets.load_iris().data
Y = datasets.load_iris().target

In [None]:
plt.figure(figsize=(17,5))

# Fit a Gaussian mixture with EM using five components
gmm = mixture.GaussianMixture(n_components=5, covariance_type='full').fit(X)
plot_results_2(X, gmm.predict(X), gmm.means_, 
               gmm.covariances_, 30.0, 0)

# Fit a Bayesian Gaussian mixture using five components
dpgmm = mixture.BayesianGaussianMixture(n_components=5,
                                        weight_concentration_prior_type='dirichlet_distribution', 
                                        covariance_type='full').fit(X)
plot_results_2(X, dpgmm.predict(X), dpgmm.means_, 
               dpgmm.covariances_, 30.0, 1)

# Fit a Bayesian Gaussian mixture using five components
dpgmm = mixture.BayesianGaussianMixture(n_components=n_samples,
                                        weight_concentration_prior_type='dirichlet_process', 
                                        covariance_type='full').fit(X)
plot_results_2(X, dpgmm.predict(X), dpgmm.means_, 
               dpgmm.covariances_, 30.0, 2)

plt.show()

In [None]:
# 正解ラベル
plt.figure(figsize=(4.5,4.5))
plt.scatter(X[:,0],X[:,1], c=Y)
plt.show()

#### <font color=green>**2.3.** </font> pymc3を使用、irisデータで試す
<font color=red>注意：サンプリングに時間がかかる</font>

In [None]:
## 出典 : https://medium.com/@albertoarrigoni/dirichlet-processes-917f376b02d2

In [None]:
from scipy import stats
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def DP(h, alpha):
  n = max(int(5 * alpha + 2), 500)
  pi = stats.beta(1, alpha).rvs(size=n) # sample weights
  pi[1:] = pi[1:] * (1 - pi[:-1]).cumprod() # stick-breaking
  theta = h(size=n) # random draws from h
  return pi, theta # return parameters of G
        
def plot_normal_dp_approximation(alpha, n=2):
  #pi, theta = DP(stats.norm.rvs, alpha)
  x = np.linspace(-3, 3, 100)
    
  plt.figure(figsize=(14, 4))
  plt.suptitle(r'Two samples from DP($\alpha$). $\alpha$ = {}'.format(alpha))
  plt.ylabel(r'$\pi$')
  plt.xlabel(r'$\theta$')
  pltcount = int('1' + str(n) + '0')
    
  for i in range(n):
    pltcount += 1
    plt.subplot(pltcount)
    #pi, theta = dirichlet_process(stats.norm.rvs, alpha)
    pi, theta = DP(stats.norm.rvs, alpha)
    pi = pi * (stats.norm.pdf(0) / pi.max())
    plt.vlines(theta, 0, pi, alpha=0.5)
    plt.ylim(0, 1)
    plt.plot(x, stats.norm.pdf(x))
  plt.show()  ##

np.random.seed(3)
for alpha in [1, 10, 100]:
  plot_normal_dp_approximation(alpha)

In [None]:
import random
import matplotlib.pyplot as plt
from pylab import rcParams

rcParams['figure.figsize'] = 18, 6
fig, axs = plt.subplots(1, 3)
plot_count = 0
fig.suptitle('Chinese Restaurant Process customers distribution')

# Play with different concentrations
for concentration in [0.1, 1.0, 10]:
  # First customer always sits at the first table
  tables = [1]
  for n in range(2,100):
    # Get random number 0~1
    rand = random.random()
    p_total = 0
    existing_table = False
    for index, count in enumerate(tables):
      prob = count / (n + concentration)
      p_total += prob
      if rand < p_total:
        tables[index] += 1
        existing_table = True
        break
    # New table!!
    if not existing_table:
      tables.append(1)
  axs[plot_count].bar([i for i in range(len(tables))], tables)
  axs[plot_count].set_title(r'Concentration ($\alpha$) = {}'.format(concentration))
  plot_count+= 1
  for ax in axs.flat:
    ax.set(xlabel='Table number', ylabel='N customers')

In [None]:
from sklearn.datasets import load_iris
import pandas as pd

df = pd.DataFrame(load_iris()['data'])
y = df.values
# Standardize the data
y = (y - y.mean(axis=0)) / y.std(axis=0)

In [None]:
import seaborn as sns

plt.figure(figsize=(12, 6))
plt.title('Histogram of the 3rd column of the (standardized) Iris dataset.')
plt.xlabel('x')
plt.ylabel('count')
sns.distplot(y[:, 2], ### 3 -> 2 
             bins=20, kde=False, rug=True)

In [None]:
import pymc3 as pm
from theano import tensor as tt

def stick_breaking(beta):
  portion_remaining = tt.concatenate([[1], tt.extra_ops.cumprod(1 - beta)[:-1]])
  return beta * portion_remaining

K = 30
with pm.Model() as model:
  alpha = pm.Gamma('alpha', 1., 1.)
  beta = pm.Beta('beta', 1., alpha, shape=K)
  w = pm.Deterministic('w', stick_breaking(beta))
  tau = pm.Gamma('tau', 1., 1., shape=K)
  lambda_ = pm.Uniform('lambda', 0, 5, shape=K)
  mu = pm.Normal('mu', 0, tau=lambda_ * tau, shape=K)
  obs = pm.NormalMixture('obs', w, mu, tau=lambda_ * tau,
                         observed=y[:, 2])

with model:
  step = None
  trace = pm.sample(500, tune=500, init='advi', random_seed=35171, step=step)

In [None]:
x_plot = np.linspace(-2.4, 2.4, 200)

# Calculate pdf for points in x_plot
post_pdf_contribs = stats.norm.pdf(np.atleast_3d(x_plot),
                                   trace['mu'][:, np.newaxis, :],
                                   1. / np.sqrt(trace['lambda'] * trace['tau'])[:, np.newaxis, :])

# Weight (Gaussian) posterior probabilities by the posterior of w
post_pdfs = (trace['w'][:, np.newaxis, :] * post_pdf_contribs).sum(axis=-1)

In [None]:
import seaborn as sns

# fig, ax = plt.subplots(figsize=(8, 6))
rcParams['figure.figsize'] = 12, 6
sns.distplot(y[:, 2], rug=True, label='Original dataset', bins=20)
plt.plot(x_plot, post_pdfs[0],
         c='#CD5C5C', label='Posterior samples') # Add this to plot the legend
plt.plot(x_plot, post_pdfs[::100].T, c='#CD5C5C')
plt.xlabel('Iris dataset (3rd column values)')
# plt.yticklabels([]);
plt.ylabel('Density')
plt.legend()
plt.show()

### <font color=blue>**3.** </font> 構造変化推定

In [None]:
## 出典 : https://gist.github.com/narrowlyapplicable/0922b733fa2cc75167f71eff448bf1a4

In [None]:
# ライブラリ

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as st
import seaborn as sns

plt.style.use('ggplot')
np.random.seed(1234)

In [None]:
# シミュレーションデータ

x = np.arange(1,91,1)
y = np.zeros(90)
y_tru = np.zeros(90)

y[:30] = 1 + 0.5*x[:30] + st.norm.rvs(loc=0, scale=np.sqrt(0.3), size=30)
y[30:60] = 25 - 0.3*x[30:60] + st.norm.rvs(loc=0, scale=np.sqrt(0.1), size=30)
y[60:] = 1 +0.1*x[60:] + st.norm.rvs(loc=0, scale=np.sqrt(0.2), size=30)
y_tru[:30] = 1 + 0.5*x[:30]
y_tru[30:60] = 25 - 0.3*x[30:60]
y_tru[60:] = 1 +0.1*x[60:]

In [None]:
fig, ax = plt.subplots(figsize = (7,5))
ax.plot(x, y, marker=".", color="k", linewidth=0)
ax.plot(x, y_tru, color="b")
ax.set_xlabel('x'); ax.set_ylabel('y')
ax.set_title('simulation data')
plt.show()

In [None]:
# 潜在変数z

def sampler_z(t, yt, xt, theta, sigma_y, z, alpha, T, mu, sigma, n0, tau):
  k_plus, n_t = np.unique(z, return_counts=True)
  prob_z = np.empty(k_plus.shape[0]+1)    # P(z_t = k) not normalize
  for k in k_plus:
    ## culculate n_k^¥t
    if(z[t]==k):
      n_kt = n_t[k_plus==k] - 1
    else:
      n_kt = n_t[k_plus==k]
    ## culculate probability s.t. z_t = k
    prob_z[k] = st.norm.pdf(x=yt, loc = np.dot(theta[k_plus==k], np.r_[xt,1]),\
                            scale=sigma_y[k_plus==k])*(n_kt/(T-1+alpha))

  ## create new cluster
  theta_new = st.multivariate_normal.rvs(mean=mu, cov=sigma, size=1)
  sigma_new = np.sqrt(st.invgamma.rvs(a=n0/2, scale=tau/2, size=1))
  prob_z[-1] = st.norm.pdf(x=yt, loc = np.dot(theta_new, np.r_[xt,1]), scale=sigma_new)*(alpha/(T-1+alpha))
    
  ## sampling
  prob_z /= np.sum(prob_z)
  z_sample = np.random.choice(np.r_[k_plus, k_plus[-1]+1], size=1, p=prob_z)[0]   # random.choice?
  return z_sample, theta_new, sigma_new
  ### return new z[t]

In [None]:
# パラメータ

def sampler_theta(k, y, x, z, sigma_y, mu, sigma_inv):
  #print(mu, sigma_inv)
  sigma_yk = sigma_y[k]
  t_k = np.where(z == k)[0]
  x = np.c_[x.copy(), np.ones(x.shape[0])]#X_t = [X_t, 1]
  sigma_k_inv = sigma_inv.copy()
  mu_k_tmp = np.dot(sigma_inv.copy(), mu) #Sigma^-1 * mu
  for tt in t_k: # Sigma_(t in T_k)
    x_tt = x[tt][:,np.newaxis]
    sigma_k_inv += np.dot(x_tt,x_tt.T) / (sigma_yk**2)
    mu_k_tmp += (y[tt]*x[tt]) / (sigma_yk**2)
  sigma_k = np.linalg.inv(sigma_k_inv)
  mu_k = np.dot(sigma_k, mu_k_tmp)
  del mu_k_tmp
  return st.multivariate_normal.rvs(mean=mu_k, cov=sigma_k, size=1)
  ### return new theta[k]

In [None]:
# 観測時ノイズの分散

def sampler_sigma_y(k, y, x, z, theta, n0, tau):
  t_k = np.where(z == k)[0]
  n_k = t_k.shape[0]
  tau_k = tau
  for tt in t_k:
    resid = y[tt] - np.dot(theta[k],np.r_[x[tt],1])
    tau_k += np.dot(resid, resid)
  return np.sqrt(st.invgamma.rvs(a=(n0+n_k)/2, scale=tau_k/2))

In [None]:
# μ（θ_new の平均）

def sampler_mu(theta, sigma_inv, mu0, v0_inv):
  vp = np.linalg.inv(theta.shape[0] * sigma_inv + v0_inv )
  mup = np.dot(sigma_inv, np.sum(theta, axis=0)) + np.dot(v0_inv, mu0)
  mup = np.dot(vp, mup)
  return st.multivariate_normal.rvs(mean=mup, cov=vp)

In [None]:
# Σ（θ_new の共分散行列）

def sampler_Sigma_inv(theta, mu, nu0, sigma0_inv):
  nup = nu0 + theta.shape[0]
  sigmap_inv = sigma0_inv
  for ii in range(theta.shape[0]):
    tmp = (theta[ii] - mu)[:,np.newaxis]
    sigmap_inv += np.dot(tmp, tmp.T)
  return st.wishart.rvs(df=nup, scale=np.linalg.inv(sigmap_inv))

In [None]:
# τ（σy の生成過程のパラメータ）

def sampler_tau(sigma_y, n0, m0, tau0):
  mp = m0 + n0*sigma_y.shape[0]
  taup = tau0 + np.sum(1/sigma_y**2)
  return st.gamma.rvs(a=mp/2, scale=taup/2)

In [None]:
# サンプリング
# 初期値設定
alpha = 1
n0 = 10

In [None]:
# 事前分布のパラメータ群
mu0 = np.array([0, 0])
v0_tmp = np.random.uniform(-10, 10, (2, 2))   #np.array([[0.5, 0], [0,4]])
v0 = np.dot(v0_tmp, v0_tmp.T)
v0_inv = np.linalg.inv(v0)

nu0 = 2
sigma0_tmp = np.random.uniform(-10, 10, (2, 2))   #np.array([[0.5, 0], [0,10]])   #np.random.uniform(-1, 1, (2, 2))
sigma0 = np.dot(sigma0_tmp, sigma0_tmp.T)   # positive definite
sigma0_inv = np.linalg.inv(sigma0)

m0 = 0.5
tau0 = 2

In [None]:
# パラメータの初期値

T = x.shape[0]

theta = np.array([[0.5,1], [-0.3, 25]])#, [0.1, 1]])
sigma_y = np.sqrt(np.array([0.2, 0.2]))#, 0.2]))
z = np.repeat(np.array([0,1]), 45)#np.repeat(np.array([0,1,2]), 30) #np.zeros(T, dtype="int")
print(z)
mu = st.multivariate_normal.rvs(mean=mu0, cov=v0)
sigma = st.wishart.rvs(df=nu0, scale=sigma0)
sigma_inv = np.linalg.inv(sigma)
tau = st.gamma.rvs(a=m0/2, scale=tau0/2)
print("mu : ", mu)
print("Sigma : ", sigma)
print("tau :", tau)

In [None]:
# ギブスサンプリング
# 本では少なくとも12000ステップのサンプリングを行っていたが、ここでは2000ステップ（うちバーンイン1000ステップ）と設定した
%%time
n_step = 2000
burnin = 1000
z_sample = np.zeros((n_step, T))
theta_sample = np.zeros((n_step, T, theta.shape[1]))

n_cluster = theta.shape[0]
z_unique = np.unique(z)
for step in range(n_step):
  #print(step)
  for tt in range(T):
    z_new, theta_new, sigma_new = sampler_z(tt, y[tt], x[tt], theta, sigma_y, z, alpha, T, mu, sigma, n0, tau)
    z[tt] = z_new
    if z_unique is not np.unique(z):
      z_unique = np.unique(z)
      # クラスタ数が増えた場合の処理
      if n_cluster < z_unique.shape[0]:
        n_cluster += 1
        theta = np.r_[theta, [theta_new]]
        sigma_y = np.r_[sigma_y, sigma_new]
        #print(z_new, theta, sigma_y)
      # クラスタ数が減った場合の処理
      elif n_cluster > z_unique.shape[0]:
        n_cluster -= 1
        theta = theta[z_unique]
        sigma_y = sigma_y[z_unique]
      #Zを0,1,2,...に置き換える処理
      for ii, z_val in zip(range(z_unique.shape[0]), z_unique):
        z[z==z_val] = ii

  for kk in range(n_cluster):
    # sampler_thetaに渡すsigma_invは、copy()を取らないと内部で値が変化してしまう
    theta[kk] = sampler_theta(kk, y, x, z, sigma_y, mu, sigma_inv.copy())
    sigma_y[kk] = sampler_sigma_y(kk, y, x, z, theta, n0, tau)
  mu = sampler_mu(theta, sigma_inv, mu0, v0_inv)
  sigma_inv = sampler_Sigma_inv(theta, mu, nu0, sigma0_inv.copy())
  tau = sampler_tau(sigma_y, n0, m0, tau0)

  z_sample[step] = z
  theta_sample[step] = np.r_[theta, np.repeat([np.repeat(np.nan,theta.shape[1])], T-theta.shape[0], axis=0)]

In [None]:
np.unique(z)

In [None]:
# 結果
# クラスタ数の分布
print(np.unique(np.max(z_sample[burnin:].astype('int')+1, axis=1), return_counts=True))

In [None]:
fig, ax = plt.subplots()
ax.hist(np.max(z_sample[burnin:].astype('int'), axis=1)+1)
ax.set_xlabel('n_cluster')
fig.tight_layout()

In [None]:
# 傾き（勾配）の事後分布
z_after_burnin = z_sample[burnin:,:].astype('int')
theta_after_burnin = theta_sample[burnin:,:,0]

In [None]:
# 時刻0における傾きの事後分布
grad = [theta_after_burnin[ii,z_after_burnin[ii,0]] for ii in range(z_after_burnin.shape[0])]
grad = np.array(grad)
grad.shape

In [None]:
sns.distplot(grad)

In [None]:
# この結果を全時刻で統合する
grad_all = np.empty((T,n_step - burnin))
for tt in range(T):
  grad = [theta_after_burnin[ii,z_after_burnin[ii,tt]] for ii in range(z_after_burnin.shape[0])]
  grad_all[tt] = np.array(grad)

In [None]:
fig, ax = plt.subplots()
ax.plot(x, np.mean(grad_all, axis=1))
ax.fill_between(x, np.mean(grad_all, axis=1) +np.std(grad_all, axis=1), np.mean(grad_all, axis=1) -np.std(grad_all, axis=1),
               alpha=0.3, color="purple")
ax.set_xlabel('x')
ax.set_ylabel('grad')
fig.tight_layout()

### <font color=blue>**4.** </font> 階層ディレクレ過程（HDP-LDA : Hierechical Dirichlet Process - Latent Dirichlet Allocation）

#### <font color=green>**4.1.** </font> GENSIM core-tutorials

In [None]:
## https://radimrehurek.com/gensim/models/hdpmodel.html
## https://radimrehurek.com/gensim/auto_examples/index.html#core-tutorials-new-users-start-here

In [None]:
!pip install gensim

In [None]:
from gensim.test.utils import common_corpus, common_dictionary
from gensim.models import HdpModel

hdp = HdpModel(common_corpus, common_dictionary)

In [None]:
# You can then infer topic distributions on new, unseen documents, with
unseen_document = [(1, 3.), (2, 4)]
doc_hdp = hdp[unseen_document]

In [None]:
doc_hdp

In [None]:
# To print 20 topics with top 10 most probable words.
topic_info = hdp.print_topics(num_topics=20, num_words=10)

In [None]:
topic_info[0]

In [None]:
# The model can be updated (trained) with new documents via
hdp.update([[(1, 2)], [(1, 1), (4, 5)]])

In [None]:
doc_hdp_2 = hdp[unseen_document]
doc_hdp_2

In [None]:
topic_info_2 = hdp.print_topics(num_topics=20, num_words=10)

In [None]:
topic_info_2[0]

In [None]:
for i in range(20):
  print(i, '\t', topic_info[i] == topic_info_2[i])

#### <font color=green>**4.2.** </font> トピックモデルへの適用例

In [None]:
## 出典 : https://qiita.com/u6k/items/5170b8d8e3f41531f08a

In [None]:
import gensim
from gensim import corpora

In [None]:
documents = ["Human machine interface for lab abc computer applications",
             "A survey of user opinion of computer system response time",
             "The EPS user interface management system",
             "System and human system engineering testing of EPS",
             "Relation of user perceived response time to error measurement",
             "The generation of random binary unordered trees",
             "The intersection graph of paths in trees",
             "Graph minors IV Widths of trees and well quasi ordering",
             "Graph minors A survey"]

In [None]:
# ストップワードを定義
stop_words = set('for a of the and to in'.split())

# 文を単語に分割し、ストップワードを除去した配列を作成
texts = [[word for word in document.lower().split() if word not in stop_words] for document in documents]

In [None]:
print(texts)

In [None]:
from pprint import pprint
pprint(texts)

In [None]:
# 単語の出現回数を格納するfrequency変数を定義
from collections import defaultdict
frequency = defaultdict(int)

# 単語の出現回数をfrequency変数でカウント
for text in texts:
  for token in text:
    frequency[token] += 1

# frequency変数で1より上の単語のみを配列に構築
texts_2 = [[token for token in text if frequency[token] > 1] for text in texts]

In [None]:
pprint(texts_2)

In [None]:
dictionary = corpora.Dictionary(texts_2)

# ファイルに保存できます
#dictionary.save('/tmp/deerwester.dict')

# テキストファイルに保存することもできます
#dictionary.save_as_text('/tmp/deerwester.dict.txt')

In [None]:
corpus = [dictionary.doc2bow(text) for text in texts_2]

# ファイルに保存できる
#corpora.MmCorpus.serialize('/tmp/deerwester.mm', corpus)

In [None]:
pprint(corpus)

In [None]:
# num_topics=5で、5個のトピックを持つLDAモデルを作成
lda = gensim.models.ldamodel.LdaModel(corpus=corpus, num_topics=5, id2word=dictionary)

In [None]:
pprint(lda.show_topics())

In [None]:
# 文を定義
test_documents = ["Computer themselves and software yet to be developed will revolutionize the way we learn"]

# 単語を分割
test_texts = [[word for word in document.lower().split()] for document in test_documents]

# 既存の辞書を使用して、コーパスを作成
test_corpus = [dictionary.doc2bow(text) for text in test_texts]

In [None]:
pprint(test_corpus)

In [None]:
for topics_per_document in lda[test_corpus]:
  pprint(topics_per_document)

#### <font color=green>**4.3.** </font> トピックモデルへの適用例　その２

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.datasets import fetch_20newsgroups

data_samples, _ = fetch_20newsgroups(shuffle=True, random_state=1,
                             remove=('headers', 'footers', 'quotes'),
                             return_X_y=True)

In [None]:
stop_words = set('¥n'.split())

texts = [[word for word in data_samples.lower().split() if word not in stop_words] for data_samples in data_samples]

In [None]:
from collections import defaultdict
frequency = defaultdict(int)

for text in texts:
  for token in text:
    frequency[token] += 1

texts = [[token for token in text if frequency[token] >= 1] for text in texts]

In [None]:
import gensim
from gensim import corpora

In [None]:
dictionary = corpora.Dictionary(texts)

In [None]:
corpus = [dictionary.doc2bow(text) for text in texts]

In [None]:
num_topics=100

In [None]:
test_documents = ["In a new ad for Omaze, George Clooney pokes fun at the nipples on his Batsuit costume from Joel Schumacher’s notorious Batman & Robin from 1997"]

test_texts = [[word for word in document.lower().split()] for document in test_documents]

test_corpus = [dictionary.doc2bow(text) for text in test_texts]

In [None]:
lda = gensim.models.ldamodel.LdaModel(corpus=corpus, num_topics=num_topics, id2word=dictionary)
hdp = gensim.models.hdpmodel.HdpModel(corpus=corpus, id2word=dictionary)

In [None]:
import matplotlib.pyplot as plt
def draw_graph(test_corpus,index):
  y = [0 for i in range(num_topics)]
  for i in range(num_topics):
    for k in range(len(test_corpus)):
      if test_corpus[k][0] == i:
        y[i] = test_corpus[k][1]
  x=[i for i in range(num_topics)]
  plt.subplot(1, 2, index)
  plt.bar(x,y)


In [None]:
plt.figure(figsize=(20,5))
draw_graph(list(lda[test_corpus])[0],1)
draw_graph(list(hdp[test_corpus])[0],2)
plt.show()

### <font color=blue>**5.** </font> インド料理店過程 : Indian buffet process

In [None]:
## 出典 : https://github.com/Ma-sa-ue/practice/tree/master/machine%20learning(python)/nonparabayes

In [None]:
import numpy as np
import scipy as sp
import scipy.stats as st
import matplotlib.pyplot as plt

np.random.seed(100)

In [None]:
def get_ibp(alpha):
  z = np.zeros([15,30])
  new = 0
  for j in range(15):
    ### old phase
    to_beru = np.sum(z,0)
    for k in range(new):
      z[j,k]=np.random.binomial(1,to_beru[k]*1.0/(j+1))
    ### new phase
    new_alpha = alpha*1.0/(j+1)
    new_to_add = np.random.poisson(new_alpha)
    z[j,new:new+new_to_add]=1
    #### old phase
    ##for k in range(len(new_to_add)):
    ### update new
    new = new+new_to_add
  return z

In [None]:
para =[1.0,4.0,9.0]
plt.figure(figsize=(16,5))
for i in range(3):
  plt.subplot(1,3,i+1)
  plt.title("alpha="+str(para[i]))
  hoge = get_ibp(para[i])
  plt.imshow(hoge,interpolation='none' )
plt.savefig("IBP.png")
plt.show()

In [None]:
#### generate sample
n = 20
true_z = []
for i in range(4):
  true_z.append(np.zeros(25).reshape(5,5))
  if i==0:
    true_z[i][0:3,0:3]=1
  elif i==1:
    true_z[i][0:3,3:5]=1
    true_z[i][0,2]=1
    true_z[i][3,4]=1
  elif i==2:
    true_z[i][3:5,0:3]=1
    true_z[i][0,0]=1
    true_z[i][4,2]=0
  elif i==3:
    true_z[i][3:5,2:5]=1

In [None]:
true_z_flat2 = np.array([i.reshape(1,25) for i in true_z])
print(true_z_flat2.shape)
true_z_flat2 = np.transpose(true_z_flat2.reshape(4,25))

In [None]:
for i in range(4):
  plt.subplot(2,2,i+1)
  plt.imshow(true_z[i],interpolation='none')
plt.show()

In [None]:
plt.imshow(true_z_flat2,interpolation='none')
plt.show()

In [None]:
xxx = np.random.normal(0,2.0,4*n).reshape(4,n)
yyy = np.dot(true_z_flat2,xxx) + np.random.normal(0,0.1,25*n).reshape((25,n))

plt.figure(figsize=(8,8))
plt.subplot(1,3,1)
plt.tick_params(labelbottom='off')
plt.tick_params(labelleft='off')
plt.imshow(yyy,interpolation='none')

plt.subplot(1,3,2)
plt.tick_params(labelbottom='off')
plt.tick_params(labelleft='off')
plt.imshow(true_z_flat2,interpolation='none')
#plt.savefig("situation.png")

plt.subplot(1,3,3)
plt.tick_params(labelbottom='off')
plt.tick_params(labelleft='off')
plt.imshow(xxx,interpolation='none')

plt.show()

In [None]:
def factorial(n):
  if n==0:
    return 1
  elif n>=1:
    return n*factorial(n-1)

In [None]:
def poisson_pdf(_lambda,x):
  return np.power(_lambda,x)*np.exp(-_lambda)*1.0/factorial(x)

In [None]:
def gibbs_for_z(i,k,z,Y,X,sigma_y,alpha,N):
  ###### first calculate n_i_k
  pesdo_n_i_k = np.sum(z,0)[k]
  if z[i,k]==1:
    n_i_k = pesdo_n_i_k -1
  else:
    n_i_k = pesdo_n_i_k
    
  hoge1 = pesdo_n_i_k*1.0/N
  hoge0 = 1-hoge1
  ##### calculate likelihood
  z1 = np.copy(z[i,:].reshape(z.shape[1]))
  z1[k]=1
  mean1 = np.dot(z1,X)
  z2 = np.copy(z[i,:].reshape(z.shape[1]))
  z2[k]=0
  mean2 = np.dot(z2,X)
  sigma = np.identity(20)*sigma_y
  hoge3 = st.multivariate_normal.pdf(Y[i],mean1,sigma)*hoge1
  hoge4 = st.multivariate_normal.pdf(Y[i],mean2,sigma)*hoge0
  return np.random.binomial(1,hoge3*1.0/(hoge3+hoge4))

In [None]:
def gibbs_for_m(i,z,Y,X,sigma_y,sigma_x,alpha,N):
  sigma = np.identity(20)*sigma_y
  new_x = np.random.normal(0,2.0,100).reshape((5,20))
  candidate_list = []
  for j in range(4):
    p0 = poisson_pdf(alpha*1.0/N,j)
    hoge = st.multivariate_normal.pdf(Y[i],np.dot(z[i,:],X)+np.sum(new_x[0:j],0),sigma_y)
    candidate_list.append(p0*hoge)
  candidate_list = candidate_list/np.sum(np.array(candidate_list))
  multi_list = np.random.multinomial(1,candidate_list)
  for kkk in range(5):
    if multi_list[kkk]==1:
      newx = np.concatenate([X,new_x[0:kkk]])
      newz = np.concatenate([z,np.zeros(Y.shape[0]*4).reshape((Y.shape[0],4))[:,0:kkk]],1)
      if kkk!=0:
        newz[i,z.shape[1]:z.shape[1]+kkk] = 1
      return [newx,newz]

In [None]:
def gibbs_for_x(z,Y,X,sigma_y,sigma_x,alpha,N):
  newx = []
  inverse_vx = np.dot(np.transpose(z),z)+sigma_y*1.0/sigma_x*np.identity(z.shape[1])
  vx = np.linalg.inv(inverse_vx)
  vxy = sigma_y*vx
  for i in range(Y.shape[1]):
    yi = Y[:,i]
    mean = np.dot(vx,np.dot(np.transpose(z),yi))
    variance = vxy
    newx.append(np.random.multivariate_normal(mean,vxy))
  return np.transpose(np.array(newx))

In [None]:
x = np.random.normal(0,2.0,1*n).reshape(1,n)
true_z_flat = np.random.binomial(1,0.1,25).reshape((25,1))

In [None]:
alpha = 1   ###

plt.figure(figsize=(16,5))
for sss in range(36):
  i,j = true_z_flat.shape
  pesdo_z = np.zeros(i*j).reshape((i,j))
  #####
  for kk1 in range(i):
    for kk2 in range(j):
      pesdo_z[kk1,kk2] = gibbs_for_z(kk1,kk2,true_z_flat,yyy,x,0.3,alpha,n)
  true_z_flat = pesdo_z
  #######
  for j in range(25):
    x,true_z_flat = gibbs_for_m(j,true_z_flat,yyy,x,0.3,2.0,alpha,n)
  if sss%3==0:
    plt.subplot(1,12,sss/3+1)
    plt.imshow(true_z_flat,interpolation='none')
    plt.title(str(sss)+"steps")
    plt.axis('off')
  ############
  x= gibbs_for_x(true_z_flat,yyy,x,0.3,2.0,alpha,n)
plt.show()

In [None]:
plt.imshow(true_z_flat2,interpolation='none')
plt.title("true z")
plt.axis('off')
plt.show()