$p_{\theta}(s_{t+1}|s_t, a_t) = \{_{pdf(x_{t+1}, \; (x_t+a_t^0), \; \theta_2) \; \times \; pdf(y_{t+1}, \; (y_t+a_t^1), \; \theta_2) \; if \; x_t >= \theta_0}^{pdf(x_{t+1}, \; (x_t+a_t^0), \; \theta_1) \; \times \; pdf(y_{t+1}, \; (y_t+a_t^1), \; \theta_1) \; if \; x_t < \theta_0}$

In [305]:
import numpy as np
import random

theta = [5, (2,4)]

def transiction(s, a, p=2):
    return tuple(np.array(s) + np.array(a) + np.random.normal(0, p, 2))

def pickModel(s, theta):
    psi, sigma = theta
    x,_ = s
    return sigma[int(x >= psi)]

def generate(s, n, theta):   
    for _ in range(n):
        a = random.choice([(1,0), (-1,0), (0,1), (0,-1)])
        yield s, a
        s = transiction(s, a, pickModel(s, theta))

s_0 = (1,1)
O = [(s, a) for (s, a) in generate(s_0, 100, theta)]
O

[((1, 1), (-1, 0)),
 ((0.9959485774055967, 4.607723646725001), (1, 0)),
 ((0.4744647441088885, 0.4105614202052088), (0, 1)),
 ((-0.7996525387207853, 3.6544994583264083), (1, 0)),
 ((1.882817063702133, 4.8614541759435035), (0, 1)),
 ((1.6780352903267675, 5.193383872884208), (-1, 0)),
 ((2.737440318019689, 4.494209423559863), (-1, 0)),
 ((-2.740755581633972, 3.859616740888901), (0, 1)),
 ((-3.9839237944011936, 3.699222405743485), (0, -1)),
 ((-2.9085441193933916, 4.988268300956815), (0, -1)),
 ((-2.9115285372154367, 4.508961769219203), (0, 1)),
 ((-5.433400819240807, 3.904973784566475), (-1, 0)),
 ((-5.301183155499765, 1.7798686529787844), (0, -1)),
 ((-1.0237870158496927, 0.12509951385072904), (-1, 0)),
 ((0.38444359380589566, -1.8094598364119725), (0, 1)),
 ((-4.433043440356112, 1.4107716502282162), (0, 1)),
 ((-6.201022206975728, 2.6010352075889815), (1, 0)),
 ((-8.407851681818169, 5.44531951835546), (-1, 0)),
 ((-6.650731401403752, 2.988092242828417), (1, 0)),
 ((-5.077049429854704, 

$ \theta = \{\psi, \sigma \} $

$\psi$: Parametros para definição do modelo que será utilizado em cada estado<br>
$\sigma$: Parametros para cada Modelo $\{\sigma_1, \sigma_2, ...\}$ <br>

$p_{\theta}(s_{t+1}|s_t, a_t) = \mu(s_{t+1}, s_t, a_t, \sigma_1) \; \times \; \beta(s_t) + \mu(s_{t+1}, s_t, a_t, \sigma_2) \; \times \; \beta(s_t) + ...$



$p_{\theta}(s_{t+1}|s_t, a_t) = \sum_{i=1}^k \mu(s_{t+1}, s_t, a_t, \sigma_i) \beta(s_t)_i \;\;\;\;\; for \;\; k = |\sigma_{\theta}|$  


$\mu: S \times S \times A \times \sigma_{\theta} \mapsto [0,1]; \;\; \mu(s', s, a, \sigma) = pdf(s'_x, (s_x+a_x), \sigma) \; \times \; pdf(s'_y, (s_y+a_y), \sigma)$

##### 2 modelos: Sigmoid

$ \beta: S \mapsto [0,1]; \;\; \beta(s; \psi_{\theta})_i = (i-1) + \frac{1}{1+e^{\psi_{s}}}(-1)^{i-1} $;  <br>
$ for \;\; i = \{1,2\} $

##### N modelos: Softmax

$ \beta: S \mapsto [0,1]; \;\; \beta(s; \psi_{\theta})_i = argmax[\frac{e^{\psi_{s,i}}}{\sum_{j=1}^k e^{\psi_{s,j}}}] $;   <br>
$ for \;\; k = |\sigma_{\theta}| \;\; ; \;\; i=1,...,k $

$\cal{L}[\xi | \theta] = \sum_{t=0}^{\mathrm{T}-1} log \; p_{\theta}(s_{t+1}|s_t, a_t)$

In [306]:
from scipy.stats import norm

def ll(O, theta):
    psi, sigma = theta

    def mi(x_,y_, x,y, ax, ay, sig):
        return norm.pdf(x_, (x + ax), sig) * norm.pdf(y_, (y + ay), sig) 
    def beta(x,_, i):
        return i + 1/(1+np.e**(10*(x-psi))) * (-1)**i
    def p(s_,s,a):
        return np.sum([mi(*s_,*s,*a, sig)*beta(*s, i) for i,sig in enumerate(sigma)])
    
    return np.sum([np.log(p(st, *O[t-1])) for t,(st,_) in enumerate(O) if t>0])

ll(O, (5, (2, 4)))

-460.2748975735503

In [307]:
# best_theta = 0
# best_ll = -np.inf

# for psi in range(10):
#     for p1 in range(10):
#         for p2 in range(10):
#             theta = (psi+1, (p1+1, p2+1))
#             new_ll = ll(O, theta)
#             if new_ll > best_ll:
#                 best_ll = new_ll
#                 best_theta = theta

# print(best_theta, best_ll)

In [308]:
import torch
from torch.autograd import Variable
from torch.distributions import normal

s = Variable(torch.from_numpy(np.array(O)[:-1,0])).type(torch.DoubleTensor)
a = Variable(torch.from_numpy(np.array(O)[:-1,1])).type(torch.IntTensor)
s_ = Variable(torch.from_numpy(np.array(O)[1:,0])).type(torch.DoubleTensor)

# p = torch.nn.parameter.Parameter(torch.Tensor(3).uniform_(1, 10))
p = torch.nn.parameter.Parameter(torch.Tensor([5,2,4]))

mi = torch.cat([(torch.exp(normal.Normal(s[:,0] + a[:,0], i).log_prob(s_[:,0])) * 
                 torch.exp(normal.Normal(s[:,1] + a[:,1], i).log_prob(s_[:,1]))
               ) for i in p[1:]])
beta =  torch.cat((torch.ones(len(O)-1) + torch.sigmoid(-10 * (p[0] - s[:,0])) * (-1)**torch.ones(len(O)-1),
                   torch.zeros(len(O)-1) + torch.sigmoid(-10 * (p[0] - s[:,0])) * (-1)**torch.zeros(len(O)-1)
                ))
p_theta = torch.sum((mi*beta).reshape(2, len(O)-1), 0)
ll = torch.sum(torch.log(p_theta))
ll

tensor(-460.2749, dtype=torch.float64, grad_fn=<SumBackward0>)

In [311]:
import torch
from torch.autograd import Variable
from torch.distributions import normal

def estimate(init_p=None):
    s = Variable(torch.from_numpy(np.array(O)[:-1,0])).type(torch.DoubleTensor)
    a = Variable(torch.from_numpy(np.array(O)[:-1,1])).type(torch.IntTensor)
    s_ = Variable(torch.from_numpy(np.array(O)[1:,0])).type(torch.DoubleTensor)

    if init_p is not None:
        p = torch.nn.parameter.Parameter(torch.tensor(init_p))
    else:
        p = torch.nn.parameter.Parameter(torch.Tensor(3).uniform_(1, 10))


    def loglike():
        mi = torch.cat([(torch.exp(normal.Normal(s[:,0] + a[:,0], i).log_prob(s_[:,0])) * 
                    torch.exp(normal.Normal(s[:,1] + a[:,1], i).log_prob(s_[:,1]))
                ) for i in p[1:]])
        beta =  torch.cat((torch.ones(len(O)-1) + torch.sigmoid(-10 * (p[0] - s[:,0])) * (-1)**torch.ones(len(O)-1),
                        torch.zeros(len(O)-1) + torch.sigmoid(-10 * (p[0] - s[:,0])) * (-1)**torch.zeros(len(O)-1)
                        ))
        p_theta = torch.sum((mi*beta).reshape(2, len(O)-1), 0)
        # ll = torch.sum(torch.log(p_theta))
        return -torch.sum(torch.log(p_theta))

    # optim = torch.optim.SGD([p], lr=1e-2, momentum=0.9)
    optim = torch.optim.SGD([p], lr=1e-2)

    for i in range(1000):
        # print(i, p)
        ll = loglike()
        optim.zero_grad()
        ll.backward()
        optim.step() 

        with torch.no_grad():
            p[:] = p.clamp(min=1e-1)

    return p


estimate()

Parameter containing:
tensor([2.7292, 2.0920, 3.6426], requires_grad=True)