$p_{\theta}(s_{t+1}|s_t, a_t) = \{_{pdf(x_{t+1}, \; (x_t+a_t^0), \; \theta_2) \; \times \; pdf(y_{t+1}, \; (y_t+a_t^1), \; \theta_2) \; if \; x_t >= \theta_0}^{pdf(x_{t+1}, \; (x_t+a_t^0), \; \theta_1) \; \times \; pdf(y_{t+1}, \; (y_t+a_t^1), \; \theta_1) \; if \; x_t < \theta_0}$

In [147]:
import numpy as np
import random

theta = [5, (2,4)]

def transiction(s, a, p=2):
    return tuple(np.array(s) + np.array(a) + np.random.normal(0, p, 2))

def pickModel(s, theta):
    psi, sigma = theta
    x,_ = s
    return sigma[int(x >= psi)]

def generate(s, n, theta):   
    for _ in range(n):
        a = random.choice([(1,0), (-1,0), (0,1), (0,-1)])
        yield s, a
        s = transiction(s, a, pickModel(s, theta))

s_0 = (1,1)
O = [(s, a) for (s, a) in generate(s_0, 100, theta)]
O

[((1, 1), (-1, 0)),
 ((-1.0044556550899455, 2.5110750635543395), (0, 1)),
 ((-1.9546126352606636, 3.7882751058923954), (-1, 0)),
 ((-3.491839121199425, 2.940984080296791), (1, 0)),
 ((0.896809395152534, 0.892995240517521), (1, 0)),
 ((0.20853468042521905, 1.0678692804397274), (1, 0)),
 ((-0.29523757020887476, 1.4982501401111097), (1, 0)),
 ((0.2628925359383549, 1.4302179384062719), (1, 0)),
 ((3.364851790978209, 2.1709790129032553), (-1, 0)),
 ((-0.00817241033535332, 2.9724229283627785), (1, 0)),
 ((0.8333534288326957, 1.632852851417949), (-1, 0)),
 ((-1.670129786892399, 2.8680177091104504), (1, 0)),
 ((3.672606141984072, 2.3035917530748717), (0, -1)),
 ((1.3987286936132528, 3.2548170819228757), (-1, 0)),
 ((-1.851891299680121, 1.2695756872051283), (0, -1)),
 ((-3.247097982699901, 2.6932998045088112), (0, -1)),
 ((-3.7065714342699576, 1.9680212436398568), (0, 1)),
 ((-1.206567840979889, 1.4152686791196096), (0, 1)),
 ((-2.2311013034162284, 0.4512263698908565), (1, 0)),
 ((-3.8573661675

$ \theta = \{\psi, \sigma \} $

$\psi$: Parametros para definição do modelo que será utilizado em cada estado<br>
$\sigma$: Parametros para cada Modelo $\{\sigma_1, \sigma_2, ...\}$ <br>

$p_{\theta}(s_{t+1}|s_t, a_t) = \mu(s_{t+1}, s_t, a_t, \sigma_1) \; \times \; \beta(s_t) + \mu(s_{t+1}, s_t, a_t, \sigma_2) \; \times \; \beta(s_t) + ...$



$p_{\theta}(s_{t+1}|s_t, a_t) = \sum_{i=1}^k \mu(s_{t+1}, s_t, a_t, \sigma_i) \beta(s_t)_i \;\;\;\;\; for \;\; k = |\sigma_{\theta}|$  


$\mu: S \times S \times A \times \sigma_{\theta} \mapsto [0,1]; \;\; \mu(s', s, a, \sigma) = pdf(s'_x, (s_x+a_x), \sigma) \; \times \; pdf(s'_y, (s_y+a_y), \sigma)$

##### 2 modelos: Sigmoid

$ \beta: S \mapsto [0,1]; \;\; \beta(s; \psi_{\theta})_i = (i-1) + \frac{1}{1+e^{\psi_{s}}}(-1)^{i-1} $;  <br>
$ for \;\; i = \{1,2\} $

##### N modelos: Softmax

$ \beta: S \mapsto [0,1]; \;\; \beta(s; \psi_{\theta})_i = argmax[\frac{e^{\psi_{s,i}}}{\sum_{j=1}^k e^{\psi_{s,j}}}] $;   <br>
$ for \;\; k = |\sigma_{\theta}| \;\; ; \;\; i=1,...,k $

$\cal{L}[\xi | \theta] = \sum_{t=0}^{\mathrm{T}-1} log \; p_{\theta}(s_{t+1}|s_t, a_t)$

In [148]:
from scipy.stats import norm

def ll(O, theta):
    psi, sigma = theta

    def mi(x_,y_, x,y, ax, ay, sig):
        return norm.pdf(x_, (x + ax), sig) * norm.pdf(y_, (y + ay), sig) 
    def beta(x,_, i):
        return i + 1/(1+np.e**(10*(x-psi))) * (-1)**i
    def p(s_,s,a):
        return np.sum([mi(*s_,*s,*a, sig)*beta(*s, i) for i,sig in enumerate(sigma)])
    
    return np.sum([np.log(p(st, *O[t-1])) for t,(st,_) in enumerate(O) if t>0])

ll(O, (5, (2, 4)))

-424.67399909458106

In [149]:
# best_theta = 0
# best_ll = -np.inf

# for psi in range(10):
#     for p1 in range(10):
#         for p2 in range(10):
#             theta = (psi+1, (p1+1, p2+1))
#             new_ll = ll(O, theta)
#             if new_ll > best_ll:
#                 best_ll = new_ll
#                 best_theta = theta

# print(best_theta, best_ll)

In [150]:
import torch
from torch.autograd import Variable
from torch.distributions import normal

s = Variable(torch.from_numpy(np.array(O)[:-1,0])).type(torch.DoubleTensor)
a = Variable(torch.from_numpy(np.array(O)[:-1,1])).type(torch.IntTensor)
s_ = Variable(torch.from_numpy(np.array(O)[1:,0])).type(torch.DoubleTensor)

# p = torch.nn.parameter.Parameter(torch.Tensor(3).uniform_(1, 10))
p = torch.nn.parameter.Parameter(torch.Tensor([5,2,4]))

mi = torch.cat([(torch.exp(normal.Normal(s[:,0] + a[:,0], i).log_prob(s_[:,0])) * 
                 torch.exp(normal.Normal(s[:,1] + a[:,1], i).log_prob(s_[:,1]))
               ) for i in p[1:]])
beta =  torch.cat((torch.ones(len(O)-1) + torch.sigmoid(-10 * (p[0] - s[:,0])) * (-1)**torch.ones(len(O)-1),
                   torch.zeros(len(O)-1) + torch.sigmoid(-10 * (p[0] - s[:,0])) * (-1)**torch.zeros(len(O)-1)
                ))
p_theta = torch.sum((mi*beta).reshape(2, len(O)-1), 0)
ll = torch.sum(torch.log(p_theta))
ll
# beta.size()

tensor(-424.6740, dtype=torch.float64, grad_fn=<SumBackward0>)

In [170]:
import torch
from torch.autograd import Variable
from torch.distributions import normal

s = Variable(torch.from_numpy(np.array(O)[:-1,0])).type(torch.DoubleTensor)
a = Variable(torch.from_numpy(np.array(O)[:-1,1])).type(torch.IntTensor)
s_ = Variable(torch.from_numpy(np.array(O)[1:,0])).type(torch.DoubleTensor)

p = torch.nn.parameter.Parameter(torch.Tensor([2,4]))
# beta
beta_in = nn.Linear(2, 2)
beta_out = nn.Linear(2, 2, bias=False)
beta_activation_function = nn.Softmax(dim=1)
psi = list(beta_in.parameters()) + list(beta_out.parameters())

mi = torch.stack([(torch.exp(normal.Normal(s[:,0] + a[:,0], i).log_prob(s_[:,0])) * torch.exp(normal.Normal(s[:,1] + a[:,1], i).log_prob(s_[:,1]))) for i in p],
                  dim=1
                )
beta = beta_out(beta_activation_function(beta_in(s.float())))
# p_theta = torch.sum((mi * torch.cat([beta, 1-beta])).reshape(2, len(O)-1), 0)
p_theta = torch.sum((mi * beta), 1)
ll = torch.sum(torch.log(p_theta))
ll

tensor(nan, dtype=torch.float64, grad_fn=<SumBackward0>)

In [185]:
import torch
from torch.autograd import Variable
from torch.distributions import normal
import torch.nn as nn

def estimate(init_p=None):
    s = Variable(torch.from_numpy(np.array(O)[:-1,0])).type(torch.DoubleTensor)
    a = Variable(torch.from_numpy(np.array(O)[:-1,1])).type(torch.IntTensor)
    s_ = Variable(torch.from_numpy(np.array(O)[1:,0])).type(torch.DoubleTensor)

    if init_p is not None:
        sigma = torch.nn.parameter.Parameter(torch.tensor(init_p))
    else:
        sigma = torch.nn.parameter.Parameter(torch.Tensor(2).uniform_(1, 10))

    # beta
    beta_in = nn.Linear(2, 2)
    beta_out = nn.Linear(2, 2, bias=False)
    # beta_activation_function = nn.Sigmoid()
    beta_activation_function = nn.Softmax(dim=1)
    psi = list(beta_in.parameters()) + list(beta_out.parameters())

    def loglike():
        mi = torch.stack([(torch.exp(normal.Normal(s[:,0] + a[:,0], sig).log_prob(s_[:,0])) * 
                    torch.exp(normal.Normal(s[:,1] + a[:,1], sig).log_prob(s_[:,1]))
                ) for sig in sigma], dim=1)
        beta = beta_out(beta_activation_function(beta_in(s.float())))
        p_theta = torch.sum((mi * beta), 1)
        return -torch.sum(torch.log(p_theta))


    # optim = torch.optim.SGD([p], lr=1e-2, momentum=0.9)
    optim = torch.optim.SGD(psi+[sigma], lr=1e-2)

    for _ in range(1000):
        # print(i, p)
        ll = loglike()
        optim.zero_grad()
        ll.backward()
        optim.step() 

        with torch.no_grad():
            sigma[:] = sigma.clamp(min=1e-1)

    return sigma, lambda x: beta_out(beta_activation_function(beta_in(x.float())))[0]


p, beta = estimate()
p

Parameter containing:
tensor([2.2794, 0.1000], requires_grad=True)

In [186]:
beta(torch.Tensor([[3,2]]))

tensor([-44.0586,   6.5992], grad_fn=<SelectBackward0>)