In [3]:
import torch
import numpy as np
from torch.distributions import StudentT, Exponential
from sampler import TEST_SAMPLER
from tqdm import tqdm

#print('\n'*10)
class EM:
    def __init__(self, T=100, _alpha_r=0, _beta_r=1, _d=1, _alpha_u=0, _beta_u=1, _gamma=1, _theta=0, __lambda=1,rfilename="./r.npy"):
        self.alpha_r = torch.tensor(_alpha_r, dtype=torch.float64, requires_grad=True)
        self.beta_r = torch.tensor(_beta_r, dtype=torch.float64, requires_grad=True)
        self.alpha_u = torch.tensor(_alpha_u, dtype=torch.float64, requires_grad=True)
        self.beta_u = torch.tensor(_beta_u, dtype=torch.float64, requires_grad=True)
        self.gamma = torch.tensor(_gamma, dtype=torch.float64, requires_grad=True)
        self.theta = torch.tensor(_theta, dtype=torch.float64, requires_grad=True)
        self._lambda = torch.tensor(__lambda, dtype=torch.float64, requires_grad=True)
        self.d = torch.tensor(_d, dtype=torch.float64, requires_grad=True)
        self.parameters=[self.alpha_r, self.beta_r, self.alpha_u, self.beta_u, self.gamma, self.theta, self._lambda, self.d]
        self.names=["alpha_r", "beta_r", "alpha_u", "beta_u", "gamma", "theta", "_lambda", "d"]
        self.T=T
        self.r=np.load(rfilename)
        self.r=torch.tensor(self.r).reshape(1,-1)
        assert self.T ==self.r.shape[1]
    def singularlikelihood(self,epsilon):
        ''' Compute log joint likelihood of l=log p(eps_1,...,eps_T,r_1,...r_T)'''
        #Input:  epsilon  (n,T)
        #Output: log_prob (n,)

        n,T=epsilon.shape
        prior=0 #do we need prior on r_0, eps_0?

        ''' Calculate u, w, nu and eta'''
        epsilon_shift=torch.zeros(n,T)
        epsilon_shift[:,1:]=epsilon[:,:T-1] #eps_{t-1}
        u=(self.r-epsilon-self.alpha_r)/self.beta_r
        u_shift=torch.zeros_like(u) #u_{t-1}
        u_shift[:,1:]=u[:,:T-1]
        w=self.alpha_u+self.beta_u*u_shift+(self.gamma+self.theta*(epsilon_shift<0))*(epsilon_shift**2)
        nu=torch.sqrt(self.beta_r/(self.r-epsilon-self.alpha_r))*epsilon
        eta=(self.r-epsilon-self.alpha_r)/self.beta_r-w
        eta=torch.maximum(torch.zeros_like(eta),eta)+1e-6
        #print(eta)
        assert eta.min()>=0 #eta should follow exponential distribution

        ''' Calculate each component of the log-likelihood'''
        t_distr=StudentT(self.d)
        exp_distr=Exponential(self._lambda) #remains to be checked: lambda or 1/lambda
        logp_t=t_distr.log_prob(nu)
        logp_exp=exp_distr.log_prob(eta)
        log_joint=torch.sum(logp_t+logp_exp -0.5*(torch.log(self.beta_r)+torch.log(self.r-epsilon-self.alpha_r)), dim=-1)+prior


        return log_joint
    def call_sampler(self,n):
        #remember to add .item() at final version
        params=(self.alpha_r.item(), self.beta_r.item(), self.d.item(), self.alpha_u.item(), self.beta_u.item(), self.gamma.item(), self.theta.item(), self._lambda.item())
        sampler = TEST_SAMPLER(self.T, params)
        samples, weights=sampler.sample(n, np.array(self.r.T),exp_scale=1/self._lambda.item())
        #print("Weights&Samples",weights,samples)
        return torch.tensor(weights),torch.tensor(samples)
    def upd_param(self,lr=1e-4):
        with torch.no_grad():
            for param in self.parameters:
                param += lr * param.grad
                param.grad.zero_()
        #print(self.parameters)
    def log_total(self,weights,epsilon):
        likelihood=self.singularlikelihood(epsilon)
        #print("Sing l",likelihood)
        normed=likelihood+torch.log(weights) #item for removing normalization from gradients
        return torch.log(torch.sum(torch.exp(normed)))
    def state_EM(self):
        for i,param in enumerate(self.parameters):
            print(self.names[i],param.item(),param.grad.item())
    def optimize(self,num_steps=10,num_steps_hidden=50):
        for _ in tqdm(range(num_steps)):
            n=10000
            weights,epsilon=self.call_sampler(n)
            #print(_)
            for __ in range(num_steps_hidden):
                likelihood=self.log_total(weights,epsilon)
                #print("Likelihood:",likelihood)
                likelihood.backward()
                self.upd_param(lr=1e-5)
            if _%10==1:
                print("step",_,":",likelihood)
                self.state_EM()
    
        

    
    

# EM_sampler=EM(5,0.2, 0.2, 6.0, 0.6, 0.4, 0.1, 0.02, 2.5,rfilename="./Bayes_temp/r.npy")
EM_sampler=EM(5,0.1, 0.1, 3.0, 0.2, 0.2, 0.1, 0.02, 2.5,rfilename="./r.npy")
EM_sampler.optimize(num_steps=100)
print(EM_sampler.parameters)


  2%|▏         | 2/100 [00:01<00:57,  1.69it/s]

step 1 : tensor(0.5312, dtype=torch.float64, grad_fn=<LogBackward0>)
alpha_r 0.13034296236922885 0.0
beta_r 0.09713763504097611 0.0
alpha_u 0.20908475755163664 0.0
beta_u 0.20618544456611615 0.0
gamma 0.12650661877927707 0.0
theta 0.03156057554525556 0.0
_lambda 2.5010146438436625 0.0
d 2.9998455272874933 0.0


 12%|█▏        | 12/100 [00:05<00:40,  2.19it/s]

step 11 : tensor(-0.5151, dtype=torch.float64, grad_fn=<LogBackward0>)
alpha_r 0.2755457216326264 0.0
beta_r 0.15666622983359144 0.0
alpha_u 0.2505712943278626 0.0
beta_u 0.2437426336951329 0.0
gamma 0.24119591149661185 0.0
theta 0.08704281206781793 0.0
_lambda 2.5068783479339083 0.0
d 2.9996222845135163 0.0


 22%|██▏       | 22/100 [00:10<00:37,  2.07it/s]

step 21 : tensor(-3.5354, dtype=torch.float64, grad_fn=<LogBackward0>)
alpha_r 0.38014186702567976 0.0
beta_r 0.28223742364525106 0.0
alpha_u 0.293927870245602 0.0
beta_u 0.3093438747473389 0.0
gamma 0.34470448940332205 0.0
theta 0.15292381623699128 0.0
_lambda 2.5128223886988286 0.0
d 2.9996374919437754 0.0


 32%|███▏      | 32/100 [00:15<00:32,  2.07it/s]

step 31 : tensor(-6.1632, dtype=torch.float64, grad_fn=<LogBackward0>)
alpha_r 0.44452635873522367 0.0
beta_r 0.4094493647944498 0.0
alpha_u 0.33830065470696535 0.0
beta_u 0.4115795498008545 0.0
gamma 0.4359202721244157 0.0
theta 0.23284273281958953 0.0
_lambda 2.5189768255404474 0.0
d 2.9993539981147177 0.0


 42%|████▏     | 42/100 [00:20<00:27,  2.11it/s]

step 41 : tensor(-14.1165, dtype=torch.float64, grad_fn=<LogBackward0>)
alpha_r 0.47278282438475866 0.0
beta_r 0.5406348963765129 0.0
alpha_u 0.3740966959115111 0.0
beta_u 0.504879354730365 0.0
gamma 0.5379279601116264 0.0
theta 0.3342153855976852 0.0
_lambda 2.5264959046622453 0.0
d 2.9982739450839078 0.0


 52%|█████▏    | 52/100 [00:24<00:22,  2.15it/s]

step 51 : tensor(-31.7368, dtype=torch.float64, grad_fn=<LogBackward0>)
alpha_r 0.4868561135394113 0.0
beta_r 0.6622113554385666 0.0
alpha_u 0.4028744401581622 0.0
beta_u 0.5673336195336528 0.0
gamma 0.6427861808804568 0.0
theta 0.43907360636651804 0.0
_lambda 2.5346354173072094 0.0
d 2.990249472497555 0.0


 62%|██████▏   | 62/100 [00:29<00:18,  2.09it/s]

step 61 : tensor(-57.1987, dtype=torch.float64, grad_fn=<LogBackward0>)
alpha_r 0.49335640712965617 0.0
beta_r 0.8003874334251756 0.0
alpha_u 0.42421570155379756 0.0
beta_u 0.5978380912547311 0.0
gamma 0.7603116598986736 0.0
theta 0.5565990853847352 0.0
_lambda 2.5434302249170577 0.0
d 2.9631053796011346 0.0


 64%|██████▍   | 64/100 [00:30<00:17,  2.08it/s]


AssertionError: 

In [10]:
import torch
import torch.nn as nn

class VIScaler(nn.Module):
    def __init__(self, hidden_size=16):
        super().__init__()
        self.fc1 = nn.Linear(3, hidden_size)
        self.fc21 = nn.Linear(hidden_size, 1)
        self.fc22 = nn.Linear(hidden_size, 1)
        self.fc23 = nn.Linear(hidden_size, 1)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x1 = self.fc21(x)
        x2 = self.fc22(x)
        x3 = self.fc23(x)
        x1 = 1e-2+torch.sigmoid(x1)
        x2 = 1e-4+0.4*torch.sigmoid(x2)
        x3 = 1e-4+0.1*torch.sigmoid(x3)
        return x1,x2,x3
    
model=VIScaler(hidden_size=16)
model=torch.load("VIScaler_test1_153_loss_94.6197280883789.pth")

input=torch.tensor([[2,2,4.]])

model(input)

(tensor([[0.1532]], grad_fn=<AddBackward0>),
 tensor([[0.0002]], grad_fn=<AddBackward0>),
 tensor([[0.0877]], grad_fn=<AddBackward0>))

In [44]:
import torch

class EM:
    def __init__(self, T, _alpha_r=0, _beta_r=1, _d=1, _alpha_u=0, _beta_u=1, _gamma=1, _theta=0, __lambda=1):
        self.alpha_r = torch.tensor(_alpha_r, dtype=torch.float32, requires_grad=True)
        self.beta_r = torch.tensor(_beta_r, dtype=torch.float32, requires_grad=True)
        self.alpha_u = torch.tensor(_alpha_u, dtype=torch.float32, requires_grad=True)
        self.beta_u = torch.tensor(_beta_u, dtype=torch.float32, requires_grad=True)
        self.gamma = torch.tensor(_gamma, dtype=torch.float32, requires_grad=True)
        self.theta = torch.tensor(_theta, dtype=torch.float32, requires_grad=True)
        self._lambda = torch.tensor(__lambda, dtype=torch.float32, requires_grad=True)
        self.d = torch.tensor(_d, dtype=torch.float32, requires_grad=True)
        self.parameters=[self.alpha_r, self.beta_r, self.alpha_u, self.beta_u, self.gamma, self.theta, self._lambda, self.d]
        self.T=T
        self.load_data()
    def call_sampler(self,n):
        #remember to add .item() at final version
        return torch.randn(10), torch.randn(10,self.T)
        #The below is currently not working
        params=(self.alpha_r.item(), self.beta_r.item(), self.d.item(), self.alpha_u.item(), self.beta_u.item(), self.gamma.item(), self.theta.item(), self._lambda.item())
        sampler = TEST_SAMPLER(self.T, params)
        samples, weights=sampler.sample(n, self.r)
        return weights,samples.T
    def load_data(self):
        DG = Data_generator(0.2, 0.2, 6.0, 0.6, 0.4, 0.1, 0.02, 2.5)
        self.r=torch.tensor(DG.gen_data(1, self.T))
    def upd_param(self,lr=0.01):
        with torch.no_grad():
            for param in self.parameters:
                param -= lr * param.grad #check + or -
                param.grad.zero_()
    def singularlikelihood(self,epsilon):
        #calculate log-likelihood of each set of epsilon, (n,T) -> (n,)
        return torch.randn(epsilon.shape[0])
    def log_total(self):
        n=10
        weights,epsilon=self.call_sampler(n)
        likelihood=self.singularlikelihood(epsilon)
        normed=likelihood-torch.sum(likelihood).item()/n+torch.log(weights) #item for removing normalization from gradients
        return torch.log(torch.sum(torch.exp(normed)))
    def optimize(self,num_steps=10):
        for _ in range(num_steps):
            likelihood=self.log_total()
            likelihood.backward()
            self.upd_param(lr=0.01)
    
        

    
    

EM_sampler=EM(10,0.2, 0.2, 6.0, 0.6, 0.4, 0.1, 0.02, 2.5)
EM_sampler.r.shape

torch.Size([10, 1])