# 一般化線形モデル　ポアソン回帰モデル

Ref. 
 Possion GLM, https://onlinecourses.science.psu.edu/stat504/node/169  
 
$\lambda = \exp(\beta_0+ \beta_1)$

In [None]:
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd

import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
%matplotlib inline

np.random.seed(123)
FLAG_fig = False

#### ポアソン分布に従う確率変数ｙのデータ生成
$\lambda = \exp(\beta_0), \hspace{3mm} \beta_1 = 0$ の場合  
平均値をlamとおいた

In [None]:
Num = 1000
lam = 5 # lambda
y = np.random.poisson(lam,Num)

In [None]:
count, bins, ignored = plt.hist(y, 14, density=False)

if FLAG_fig: plt.savefig('fig_REG_GLM_Poisson1_hist.png')
plt.show()

#### 一部をプロット

In [None]:
n = 100
plt.plot( y[0:n])

if FLAG_fig: plt.savefig('fig_REG_GLM_Poisson1_plot.png')
plt.show()

一般化線形モデル問題を解く

In [None]:
x = range(len(y))
df = pd.DataFrame({'x':x, 'y':y})

In [None]:
glm_model = 'y ~ x'
result = smf.glm(formula=glm_model, data=df, family=sm.families.Poisson(link=sm.families.links.log)).fit()
print(result.summary())

In [None]:
b0, b1 = result.params
print('exp(b0) =',np.exp(b0)) 

In [None]:
print('Mean of y =',df.y.mean())

#### ポアソン分布に従う確率変数ｙのデータ生成
$\lambda = \exp(\beta_0 + \beta_1 x1)$, の場合  

In [None]:
Num = 1000
x = np.zeros(Num)
y = np.zeros(Num)

In [None]:
b0 , b1 = 0.5, 3.5
for i in range(Num):
    x[i] = i
    lam = np.exp( b0 + (b1/float(Num)) * (float(i)))
    y[i] = np.random.poisson(lam,1)

In [None]:
count, bins, ignored = plt.hist(y, 14, density=False)

if FLAG_fig: plt.savefig('fig_REG_GLM_Poisson2_hist.png')
plt.show()

In [None]:
plt.scatter(x, y)

In [None]:
df = pd.DataFrame({'x':x, 'y':y})
glm_model = 'y ~ x'
result = smf.glm(formula=glm_model, data=df, family=sm.families.Poisson(link=sm.families.links.log)).fit()
print(result.summary())

下記で　b1 = b1 * num としているのは，glmはlamの生成式にある(b1/float(num))を予測しており，この分母を払うため

In [None]:
b0, b1 = result.params
b1 = b1 * Num  # 見掛け上のパラメータの分母に(num)があるため，これを払う
print("b0 = %f  b1 = %f" % (b0,b1))
#e_b0 = np.exp(b0)
#e_b1 = np.exp(b1)
#print("exp(b0) = %f  exp(b1) = %f" % (e_b0, e_b1))

In [None]:
y_pre = np.exp(b0 + (b1/float(Num))*x)
plt.scatter(x[0:Num], y[0:Num])
plt.plot(x, y_pre, color = 'white')

if FLAG_fig: plt.savefig('fig_REG_GLM_Poisson2_plot.png')
plt.show()

#### データの前半500個[0:499]と後半500個[500:999]を入れ替える

In [None]:
nlen = len(x)
n2 = int(nlen/2)

xx = np.zeros(nlen)  # この1行は，いわゆる copy.deepcopy() ( import copy )の意味のメモリ確保
if nlen % 2 == 0: #even
    nst = n2
else:
    nst = n2 + 1
    xx[n2] = x[n2]
    
xx[0:n2] = x[nst:nlen]
xx[nst:nlen]= x[0:n2]
"""
print(xx[0:5])
print(xx[(n2-1):(n2+4)])
print(xx[-5:])
"""
yy = np.zeros(nlen)  # この1行は，いわゆる copy.deepcopy() ( import copy )の意味のメモリ確保
yy[0:n2] = y[nst:nlen]
yy[nst:nlen]= y[0:n2]

plt.plot(yy[0:nlen]) # 注意：plot(xx,yy)とすると，先のグラフと同じになる

if FLAG_fig: plt.savefig('fig_REG_GLM_Poisson3_plot.png')
plt.show()

In [None]:
df = pd.DataFrame({'x':xx, 'y':yy})
glm_model = 'y ~ x'
result = smf.glm(formula=glm_model, data=df, family=sm.families.Poisson(link=sm.families.links.log)).fit()
print(result.summary())