In [1]:
import numpy as np
import torch 

import matplotlib.pyplot as plt
%matplotlib inline

import torch, torch.nn as nn

import time 
from IPython import  display

# Dataset

In [12]:
def f(x, a, w, b):
    return a[0]*np.sin(w[0]*x+b[0])+a[1]*np.sin(2*w[1]*x+b[1])
    
    

In [31]:
def generate_dataset(N):
    
    param_set = {'w':[], 'a':[], 'b':[], 'mu_x':[]}
    datasets = []
    for i in range(N):
        Size = np.random.randint(low=4, high=50)
        w = np.random.uniform(low=5, high=7, size=2)
        b = np.random.uniform(low=0, high=2*np.pi, size=2)
        a = np.random.normal(loc=0, scale=1.0, size=2)
        mu_x = np.random.uniform(low=-4, high=4, size=1)

        param_set['w'].append(w); param_set['b'].append(b); param_set['a'].append(a); param_set['mu_x'].append(mu_x); 
        
        x = np.random.normal(loc=mu_x, scale=1.0, size=Size)
        y = np.random.normal(loc=f(x, a, w, b), scale=1.0, size=Size)
    
        datasets.append((x, y))
        
    return datasets, param_set

In [32]:
datasets, param_set = generate_dataset(5)

In [68]:
def iterate_minibatches(datasets, batchsize):
    
    for d in datasets:
        indices = np.random.permutation(np.arange(len(d[0])))
        for start in range(0, len(indices), batchsize):
            ix = indices[start: start + batchsize]
            
            yield  torch.FloatTensor(d[0][ix]), torch.FloatTensor(d[1][ix])

In [69]:
generator = iterate_minibatches(datasets, 4)

In [70]:
next(generator)

(tensor([ 3.3031,  1.7793,  4.3289,  2.4372]),
 tensor([ 0.1408,  0.6248,  0.1443,  0.0805]))

# Model

In [71]:
class fc_block(nn.Module):
    def __init__(self, dim_in, dim_out):
        
        super(self.__class__, self).__init__()
        self.fc = nn.Sequential(nn.Linear(dim_in, dim_out), 
                                nn.LayerNorm(dim_out),
                                nn.ReLU())
        def forward(self, X, z):
            
            C = torch.cat([X, z], dim=-1)
            return self.fc(C), X
            

class Decoder(nn.Module):
    def __init__(self, in_z, in_x, dim_middle, dim_out, n_layers):
        """ 
        nn that maps from latent space Z and the sample X to the target variable Y
        
        """
        super(self.__class__, self).__init__()
        
        self.fc1 = fc_block(in_z+in_x, dim_middle)
                
        self.layer_stack = nn.ModuleList([
            fc_block(in_x+in_x, dim_middle)
            for _ in range(n_layers)])
        
        self.fc_last_mu = nn.Linear(dim_middle, dim_out)
        self.fc_last_sigma = nn.Linear(dim_middle, dim_out)

        self.sigmoid = nn.Sigmoid()
    
 
    def forward(self, X, z):
        """
        takes x and latent z and produces target y
        """
  
        c, c_prev = self.fc1(X, z)

        for fc in self.layer_stack:
            c, c_prev = l(c, c_prev)

        self.mu_y = self.fc_last_mu(c)
        self.sigma_y = self.sigmoid(self.fc_last_sigma(c))*0.1+0.001


        return self.mu_y, self.sigma_y
    


In [72]:
class Encoder(nn.Module):
    def __init__(self, N_batch, in_x, dim_middle, dim_out, n_layers):
        """ 
        nn that maps dataset to the latent z that represent it      
        """
        super(self.__class__, self).__init__()
        
        # пока самая простая 
        self.fc1 = nn.Linear(in_x, dim_middle)
        
        self.fc_last_mu = nn.Linear(dim_middle, dim_out)
        self.fc_last_sigma = nn.Linear(dim_middle, dim_out)
        
 
    def forward(self, S):
        """
        takes dataset S and produces mu and sigma of normal distribution z
        """
  
        S = self.fc1(S)    
        S = S.mean(0)
        
        self.mu_z = self.fc_last_mu(S)
        self.sigma_z = self.fc_last_sigma(S).exp() # экспонента чтобы не было отрицательных значений дисперсии

        return self.mu_z, self.sigma_z
    


$$\ln p(D) \ge \sum_j E_{N(z | 0, 1)} \sum_i \ln p(y_{ij} | x_{ij}, h_\alpha(\mu_z+\Sigma_z^{1/2} z)) - \sum_j KL(q_{\theta_j}(z_j, S_j) || p(z_j)) $$

${KL({\mathcal {N}}_{0}\|{\mathcal {N}}_{1})={1 \over 2}\left\{\operatorname {tr} \left({\boldsymbol {\Sigma }}_{1}^{-1}{\boldsymbol {\Sigma }}_{0}\right)+\left({\boldsymbol {\mu }}_{1}-{\boldsymbol {\mu }}_{0}\right)^{\rm {T}}{\boldsymbol {\Sigma }}_{1}^{-1}({\boldsymbol {\mu }}_{1}-{\boldsymbol {\mu }}_{0})-k+\ln {|{\boldsymbol {\Sigma }}_{1}| \over |{\boldsymbol {\Sigma }}_{0}|}\right\},}$

$$\Sigma_0 = \Sigma_z \\ 
\mu_0 = \mu_z, \\ 
\Sigma_1 = I, \\ 
\mu_1 = 0$$

$$KL(q_{\theta_j}(z_j, S_j) || p(z_j))= \frac 1 2 ( \sum_i \sigma_i + \mu_z^T\mu_z - 2D -\sum_i\ln{\sigma_i}) $$


In [73]:
class DeepPrior():
    def __init__(self, N_batch, in_x, in_z, dim_middle, dim_out, n_layers):
        
        super(self.__class__, self).__init__()
    
        self.dim_z = in_z
        
        self.encoder = Encoder(N_batch, in_x, dim_middle, dim_out, n_layers)
        self.decoder = Decoder(in_z, in_x, dim_middle, dim_out, n_layers)
        
        self.ELBO = []
 
    def forward(self, X, y):
        """
        takes dataset S and produces ELBO lower bound for p(D) where D - dataset
        """
        
        # now let compute stohastic part
        S = torch.cat([X,y], dim = -1)
        mu_z, sigma_z = self.encoder(S)
        sigma_z = sigma_z.pow(1/2)
   
        # firstly let compute not stochastic component that corresponds to KL term
        KL_part = -1/2 * (sigma_z.sum()+(mu_z*mu_z).sum() - 2*self.dim_z - sigma_z.log().sum())
        
        #sampling
        N_batch = S.shape[0]
        z = torch.normal(mean = torch.zeros((N_batch, self.dim_z)), 
                          std = torch.ones((N_batch,  self.dim_z)))
        
        mu_y, sigma_y = self.decoder(X, z)
        
        log_likelihood = (-1/2*(y - mu_y)*(y-mu_y)* sigma_y.pow(2)).sum()-sigma_y.log().sum()
        
        loss = log_likelihood+KL_part
        
        self.ELBO.append(loss.cpu().data.numpy())
        
        return -loss


In [74]:
# оптимайзер
from torch import optim
model = DeepPrior(N_batch=4, in_x=1, in_z=1, dim_middle=128, dim_out=1, n_layers=2)
optimizer = optim.Adam(model.parameters(), lr=0.001)

AttributeError: 'DeepPrior' object has no attribute 'parameters'

# Training

In [None]:
def train_model(num_epochs=20, lr=1e-3, verbose=True):
    
    num_epochs = num_epochs # total amount of full passes over training data

    for epoch in range(num_epochs):
        start_time = time.time()
        model.train(True) # enable dropout / batch_norm training behavior
        for (X_batch, y_batch) in train_loader:
            
            loss = model(torch.Tensor(X_batch), torch.Tensor(y_batch))
            
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        

          # Visualize
        # Then we print the results for this epoch:
        display.clear_output(wait=True)
        print("Epoch {} of {} took {:.3f}s".format(
            epoch + 1, num_epochs, time.time() - start_time)) 
        print('current elbo: {}'.format(model.ELBO[-1]))
        
#         print('acc hard', accuracy((model.hard_predict(X_batch)>0.5).data.numpy(), y_batch.data.numpy() ))
#         print('acc soft', accuracy((model.soft_predict(X_batch)>0.5).data.numpy(), y_batch.data.numpy() ))

        if verbose:
            plt.figure(figsize=(16, 6))
            plt.subplot(221)
            plt.title("ELBO")
            plt.xlabel("#iteration")
            plt.ylabel("elbo")
            plt.plot(model.ELBO, 'b', label = 'train_elbo')
            plt.show()
           