In [1]:
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline  


import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import Parameter
import torch.optim as optim
import torch.nn.functional as F

from collections import Counter, defaultdict, OrderedDict
from tqdm import tqdm
import math
import os, pickle, gc
import seaborn as sns
from scipy.stats import norm

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:98% !important; }</style>"))

In [2]:
def log_sum_exp(A, dim=-1, keepdim=False, sum_op=torch.sum):
    """Computes `log(exp(A).sum(axis=axis))` avoiding numerical issues using the log-sum-exp trick.
    Direct calculation of :math:`\log \sum_i \exp A_i` can result in underflow or overflow numerical
    issues. Big positive values can cause overflow :math:`\exp A_i = \inf`, and big negative values
    can cause underflow :math:`\exp A_i = 0`. The latter can eventually cause the sum to go to zero
    and finally resulting in :math:`\log 0 = -\inf`.
    The log-sum-exp trick avoids these issues by using the identity,
    .. math::
        \log \sum_i \exp A_i = \log \sum_i \exp(A_i - c) + c, \text{using},  \\
        c = \max A.7
    This avoids overflow, and while underflow can still happen for individual elements it avoids
    the sum being zero.

    Parameters
    ----------
    A : tensor
        Tensor of which we wish to compute the log-sum-exp.
    axis : int, tuple, list, None
        Axis or axes to sum over; None (default) sums over all axes.
    sum_op : function
        Summing function to apply; default is torch.sum, but can also be torch.mean for log-mean-exp.

    Returns
    -------
    tensor
        The log-sum-exp of `A`, dimensions over which is summed will be dropped.
    """
    A_max = torch.max(A,dim=dim,keepdim=True)[0]
    B = torch.log(sum_op(torch.exp(A - A_max), dim=dim, keepdim=True)) + A_max
    
    if not keepdim:
        B = B.squeeze(dim)
    
    return B



class Distribution():
    def pdf(self, x):
        raise NotImplementedError

    def logpdf(self, x):
        raise NotImplementedError

    def cdf(self, x):
        raise NotImplementedError

    def logcdf(self, x):
        raise NotImplementedError

    def sample(self):
        raise NotImplementedError

    
class Normal(Distribution):
    # scalar version
    def __init__(self, loc, logvar):
        self.loc = loc
        self.logvar = logvar
        self.shp = loc.size()

        super(Normal,self).__init__()

    def logpdf(self, x, eps=0.0):
        c = - float(0.5 * math.log(2 * math.pi))
        return c - 0.5*self.logvar - (x - self.loc).pow(2) / (2 * torch.exp((self.logvar)) + eps)

    def pdf(self, x):
        return torch.exp(self.logpdf(x))

    def sample(self):
        if self.loc.is_cuda:
            eps = torch.cuda.FloatTensor(self.shp).normal_()
        else:
            eps = torch.FloatTensor(self.shp).normal_()
        return self.loc + torch.exp(0.5*self.logvar) * Variable(eps)
    
    def entropy(self):
        return 0.5 * math.log(2. * math.pi * math.e) + 0.5*self.logvar



def kl_normal2_normal2(mean1, log_var1, mean2, log_var2, eps=0.0):
    """
    Compute closed-form solution to the KL-divergence between two Gaussians parameterized
    with diagonal log variance.
    .. math::
       D_{KL}[q||p] &= -\int p(x) \log q(x) dx + \int p(x) \log p(x) dx     \\
                    &= -\int \mathcal{N}(x; \mu_2, \sigma^2_2) \log \mathcal{N}(x; \mu_1, \sigma^2_1) dx
                        + \int \mathcal{N}(x; \mu_2, \sigma^2_2) \log \mathcal{N}(x; \mu_2, \sigma^2_2) dx     \\
                    &= \frac{1}{2} \log(2\pi\sigma^2_2) + \frac{\sigma^2_1 + (\mu_1 - \mu_2)^2}{2\sigma^2_2}
                        - \frac{1}{2}( 1 + \log(2\pi\sigma^2_1) )      \\
                    &= \log \frac{\sigma_2}{\sigma_1} + \frac{\sigma^2_1 + (\mu_1 - \mu_2)^2}{2\sigma^2_2} - \frac{1}{2}
    """
    return 0.5*log_var2 - 0.5*log_var1 + (torch.exp(log_var1) + (mean1 - mean2)**2) / (2*torch.exp(log_var2) + eps) - 0.5

    
class FixedMixtureNormal(nn.Module):   #needs to be a nn.Moudle to register the parameters correcly
    # takes loc, logvar and pi as list of float values and assumes they are shared across all dimenisions
    def __init__(self, loc, logvar, pi):
        super(FixedMixtureNormal,self).__init__()
        assert sum(pi) -1 < 0.0001
        self.loc = Parameter(torch.from_numpy(np.array(loc)).float(),requires_grad=False)
        self.logvar = Parameter(torch.from_numpy(np.array(logvar)).float(),requires_grad=False)
        self.pi = Parameter(torch.from_numpy(np.array(pi)).float(),requires_grad=False)

    def _component_logpdf(self, x, eps=0.0):
        ndim = len(x.size())
        shpexpand = ndim*(None,)
        x = x.unsqueeze(-1)
        
        c = - float(0.5 * math.log(2 * math.pi))
        loc = self.loc[shpexpand]
        logvar = self.logvar[shpexpand]
        pi = self.pi[shpexpand]
        
        return c - 0.5*logvar - (x - loc).pow(2) / (2 * torch.exp(logvar) + eps)

    def logpdf(self,x):
        ndim = len(x.size())
        shpexpand = ndim*(None,)
        pi = self.pi[shpexpand]
        px = torch.exp(self._component_logpdf(x))   #... x num_components
        return torch.log(torch.sum(pi*px,-1))          

    
    
class FixedNormal(Distribution):
    # takes loc and logvar as float values and assumes they are shared across all dimenisions
    def __init__(self, loc, logvar):
        self.loc = loc
        self.logvar = logvar
        super(FixedNormal,self).__init__()

    def logpdf(self, x, eps=0.0):
        c = - float(0.5 * math.log(2 * math.pi))
        return c - 0.5*self.logvar - (x - self.loc).pow(2) / (2 * math.exp((self.logvar)) + eps)

    

In [3]:
def distribution_selector(loc, logvar, pi):    
    if isinstance(logvar,(list,tuple)) and isinstance(pi,(list,tuple)):
        assert len(logvar) == len(pi)
        num_components = len(logvar)
        print('Mixture of Normal prior, nc:',num_components)
        if not isinstance(loc,(list,tuple)):
            loc = (loc,)*num_components
        return FixedMixtureNormal(loc,logvar,pi)
    else:
        print('Normal prior')
        return FixedNormal(loc,logvar)


class BBBLinearFactorial(nn.Module):
    def __init__(self,in_features, out_features, p_logvar_init=-3, p_pi=1.0, q_logvar_init=-5):
        # p_logvar_init, p_pi can be either
        #    (list/tuples): prior model is a mixture of gaussians components=len(p_pi)=len(p_logvar_init)
        #    float: Gussian distribution
        # q_logvar_init: float, the approximate posterior is currently always a factorized gaussian
        super(BBBLinearFactorial,self).__init__()
        
        self.in_features = in_features
        self.out_features = out_features
        self.p_logvar_init = p_logvar_init
        self.q_logvar_init = q_logvar_init
                
        #Approximate Posterior model
        self.qw_mean = Parameter(torch.Tensor(out_features, in_features))
        self.qw_logvar = Parameter(torch.Tensor(out_features, in_features))
        self.qb_mean = Parameter(torch.Tensor(out_features))
        self.qb_logvar = Parameter(torch.Tensor(out_features))     

        self.qw = Normal(loc=self.qw_mean, logvar=self.qw_logvar)
        self.qb = Normal(loc=self.qb_mean, logvar=self.qb_logvar)
            
        #Prior Model (the prior model does not have any trainable parameters so we use special versions of the normal distributions)
        self.pw = distribution_selector(loc=0.0, logvar=p_logvar_init, pi=p_pi)
        self.pb = distribution_selector(loc=0.0, logvar=p_logvar_init, pi=p_pi)

        #initialize all paramaters
        self.reset_parameters()

    def reset_parameters(self):
        #initialize (learnable) approximate posterior parameters        
        stdv = 10. / math.sqrt(self.in_features)
        self.qw_mean.data.uniform_(-stdv, stdv)
        self.qw_logvar.data.uniform_(-stdv, stdv).add_(self.q_logvar_init)
        self.qb_mean.data.uniform_(-stdv, stdv)
        self.qb_logvar.data.uniform_(-stdv, stdv).add_(self.q_logvar_init)

        
    def forward(self, input):
        raise NotImplementedError()
        
    def probforward(self, input, MAP=False):
        #input: BS, in_features
        #W: BS, in_features
        #MAP: maximum a posterior (return the mean instead of sampling from the distributions)
        if MAP:
            w_sample = self.qw.loc
            b_sample = self.qb.loc
        else:
            w_sample = self.qw.sample()
            b_sample = self.qb.sample()
        
        kl_w = torch.sum(self.qw.logpdf(w_sample) - self.pw.logpdf(w_sample))
        kl_b = torch.sum(self.qb.logpdf(b_sample) - self.pb.logpdf(b_sample))
        kl = kl_w + kl_b
        
        diagnostics = {'kl_w':kl_w.data.mean(), 'kl_b':kl_b.data.mean(), 
                       'Hq_w': self.qw.entropy().data.mean(), 'Hq_b': self.qb.entropy().data.mean()}   #Hq_w and Hq_b are the differential entropy
        output = F.linear(input, w_sample, b_sample)

        return output, kl, diagnostics


    def __repr__(self):
        return self.__class__.__name__ + ' (' \
            + str(self.in_features) + ' -> ' \
            + str(self.out_features) + ')'
    
    
    
class BBBMLP(nn.Module):
    def __init__(self,in_features, num_class, num_hidden, num_layers, p_logvar_init=-3, p_pi=1.0, q_logvar_init=-5):
        #create a simple MLP model with probabilistic weights
        super(BBBMLP, self).__init__()        
        layers = [BBBLinearFactorial(in_features=in_features,out_features=num_hidden, p_logvar_init=p_logvar_init, p_pi=p_pi, q_logvar_init=q_logvar_init), nn.ELU()]
        for i in range(num_layers-1):
            layers += [BBBLinearFactorial(in_features=num_hidden,out_features=num_hidden, p_logvar_init=p_logvar_init, p_pi=p_pi, q_logvar_init=q_logvar_init), nn.ELU()]
        layers += [BBBLinearFactorial(in_features=num_hidden,out_features=num_class, p_logvar_init=p_logvar_init, p_pi=p_pi, q_logvar_init=q_logvar_init)]
        
        self.layers = nn.ModuleList(layers)
        self.loss = nn.CrossEntropyLoss()
        
    def probforward(self,x, MAP=False):
        diagnostics = defaultdict(list)
        kl = 0
        for l in self.layers:
            if hasattr(l, 'probforward' ) and callable( l.probforward ): 
                x, _kl, _diagnostics = l.probforward(x,MAP=MAP) 
                kl += _kl
                for k,v in _diagnostics.items():
                    diagnostics[k].append(v)
            else:
                x = l(x)        
        logits = x
        return logits, kl, diagnostics

    def getloss(self,x,y, dataset_size, MAP=False):
        logits, kl, _diagnostics = self.probforward(x, MAP=MAP)
        #_diagnostics is here a dictinary of list of floats
        # We need the dataset_size in order to 'spread' the KL divergence across all samples - this is dscribed in EQ (8) in Blundel et. al. 2015

        logpy = -self.loss(logits,y) #sample average
        kl /= dataset_size  # see EQ (8) in Blundell et al 2015

        ll = logpy - kl   #ELBO
        loss = -ll

        acc = (logits.max(1)[1].data == th_y).float().mean() #accuracy
        
        #the xxx.data.mean() is just an easy way to transfer to cpu and convert from torch to normal floats
        diagnostics = {'loss': [loss.data.mean()],
                       'll': [ll.data.mean()],
                       'kl': [kl.data.mean()],
                       'logpy': [logpy.data.mean()],
                       'acc': [acc],
                       'kl_w': _diagnostics['kl_w'],
                       'kl_b': _diagnostics['kl_b'],
                       'Hq_w': _diagnostics['Hq_w'],
                       'Hq_b': _diagnostics['Hq_b'],}
        return logits, loss, diagnostics
    
    

def plothist(model,filename):
    N = norm(loc=0,scale=np.exp(p_logvar_init))
    x =np.linspace(-0.5,0.5,100)
    W = torch.cat([model.layers[0].qw_mean.view(-1), model.layers[2].qw_mean.view(-1), model.layers[4].qw_mean.view(-1)]).data.cpu().numpy()
    b = torch.cat([model.layers[0].qb_mean.view(-1), model.layers[2].qb_mean.view(-1), model.layers[4].qb_mean.view(-1)]).data.cpu().numpy()

    plt.figure(figsize=(10,5))
    plt.subplot(121)
    _ = plt.hist(W,np.linspace(-0.5,0.5,100),normed=True,label='q samples')
    plt.plot(x, N.pdf(x),label='prior pdf')
    plt.xlim([-0.5,0.5])
    plt.ylim([0,10])
    plt.legend()
    plt.title('Weights')

    plt.subplot(122)
    _ = plt.hist(b,np.linspace(-0.5,0.5,100),normed=True,label='q samples')
    plt.plot(x, N.pdf(x),label='prior pdf')
    plt.xlim([-0.5,0.5])
    plt.ylim([0,10])
    plt.legend()
    plt.title('bias')
    
    plt.savefig(filename)
    plt.close('all')

def addres(old,new):
    for k in new.keys():
        if k in old:
            old[k] = [vo+vn for vo,vn in zip(old[k],new[k])]
        else:
            old[k] = new[k]
    return old
    
def averres(res,num_batch):
    for k in res.keys():
        res[k] = [v/num_batch for v in res[k]]
    return res
    
def listdict2dictlist(LD):
    DL = dict(zip(sorted(LD[0].keys()),zip(*[[v for k,v in sorted(d.items())]for d in LD])))
    return DL
    

In [4]:
data = np.load('mnist.npz')  #Download from https://www.dropbox.com/s/k92825vinroxh6i/mnist.npz?dl=0
xtrain, ytrain = data['x_train'],data['y_train']
xvalid, yvalid = data['x_valid'],data['y_valid']
xtest, ytest = data['x_test'],data['y_test']


#MU, SIGMA = xtrain.mean(keepdims=True), xtrain.std(keepdims=True)
#xtrain = (xtrain-0.5)/0.5
#xvalid = (xvalid-0.5)/0.5
#xtest = (xtest-0.5)/0.5


In [5]:
batch_size = 128
nsamples = 16
dataset_size, in_features = xtrain.shape
num_hidden = 512
num_layers = 2
p_logvar_init = -5 #[0.,-1,-3,-5,-7]
q_logvar_init = -5 #[-4,-5,-6]
p_pi = 1.0 #(1./10.,2./10.,4./10.,2./10.,1./10.)

num_class = 10
learningrate = 1e-3
num_epochs = 100
cuda = True
save_every_epoch = 1

for p_logvar_init in [0.,-1,-3,-5,-7]:
    for q_logvar_init in [-4,-5,-6]:   

        expname = 'normal_prior_qlogvar%0.1f_plogvar%0.1f_lr%0.5f_bs%i_ns%i'%(q_logvar_init,p_logvar_init,learningrate,batch_size,nsamples)
        weights_dir = os.path.join(expname,'weights')
        figure_dir = os.path.join(expname,'figures')

        logfile = os.path.join(expname,'logfile.txt')
        diagnosticsfile = os.path.join(expname,'diagnostics.pkl')
        weightsfile = os.path.join(weights_dir,'model_epoch%i.pkl')
        histfigurefile = os.path.join(figure_dir,'weighthistogram_epoch%i.png')


        if not os.path.exists(expname): os.makedirs(expname)
        if not os.path.exists(weights_dir): os.makedirs(weights_dir)  
        if not os.path.exists(figure_dir): os.makedirs(figure_dir)  
        with open(logfile,'w') as fh: fh.write('')

        model = BBBMLP(in_features=in_features, num_class=num_class, num_hidden=num_hidden, num_layers=num_layers, p_logvar_init=p_logvar_init, p_pi=p_pi, q_logvar_init=q_logvar_init)
        optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learningrate)
        th_x = torch.FloatTensor(batch_size*nsamples,in_features).fill_(0)
        th_y = torch.LongTensor(batch_size*nsamples).fill_(0)

        if cuda:
            model.cuda()
            th_x = th_x.cuda()
            th_y = th_y.cuda()


        def run_epoch(np_x, np_y, batch_size, dataset_size, MAP=False, is_training=False):
            N = np_x.shape[0]
            idx = np.arange(N)
            np.random.shuffle(idx)
            diagnostics = {}
            nbatch_per_epoch = N//batch_size

            for i in tqdm(range(nbatch_per_epoch)):
                batch_idx = idx[i*batch_size : (i+1)*batch_size]

                th_x.copy_(torch.from_numpy(np.tile(np_x[batch_idx],[nsamples,1])))
                th_y.copy_(torch.from_numpy(np.tile(np_y[batch_idx],[nsamples])))

                logits, loss, _diagnostics = model.getloss(Variable(th_x),Variable(th_y), dataset_size=dataset_size, MAP=MAP)
                diagnostics = addres(diagnostics, _diagnostics)
                if is_training:
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
            diagnostics = averres(diagnostics,nbatch_per_epoch)
            return diagnostics



        diagnostics_batch_train, diagnostics_batch_valid, diagnostics_batch_valid_MAP = [],[],[]
        for e in range(num_epochs):

            if e % save_every_epoch == 0:
                #get weights before training
                weights = model.state_dict()
                for k in weights: 
                    weights[k] = weights[k].cpu()
                plothist(model,histfigurefile%(e))   

            diagnostics_batch_train += [run_epoch(xtrain, ytrain, batch_size, dataset_size, MAP=False, is_training=True)]
            diagnostics_batch_valid += [run_epoch(xvalid, yvalid, batch_size, dataset_size, MAP=False, is_training=False)]
            diagnostics_batch_valid_MAP += [run_epoch(xvalid, yvalid, batch_size, dataset_size, MAP=True, is_training=False)]

            ltr = "Train %i | "%(e) + ", ".join(["%s: %s"%(k,"|".join(["%0.3f"%(_v) for _v in v])) for k,v in sorted(diagnostics_batch_train[-1].items())])
            lte = "Test %i | "%(e)     + ", ".join(["%s: %s"%(k,"|".join(["%0.3f"%(_v) for _v in v])) for k,v in sorted(diagnostics_batch_valid[-1].items())])
            ltemap = "MAP-Test %i | "%(e)     + ", ".join(["%s: %s"%(k,"|".join(["%0.3f"%(_v) for _v in v])) for k,v in sorted(diagnostics_batch_valid_MAP[-1].items())])

            print('\n'.join([ltr,lte,ltemap]))
            with open(logfile,'a') as fh: fh.write('\n'.join([ltr,lte,ltemap]) + '\n')

            if e % save_every_epoch == 0:
                with open(diagnosticsfile,'wb') as fh: 
                    pickle.dump({'train':listdict2dictlist(diagnostics_batch_train),
                                 'valid':listdict2dictlist(diagnostics_batch_valid),
                                 'validMAP':listdict2dictlist(diagnostics_batch_valid_MAP)},fh)
                with open(weightsfile%(e),'wb') as fh: pickle.dump(weights,fh)
            gc.collect()
          

Normal prior
Normal prior
Normal prior
Normal prior
Normal prior
Normal prior


100%|██████████| 390/390 [00:09<00:00, 40.78it/s]
100%|██████████| 78/78 [00:00<00:00, 83.11it/s]
100%|██████████| 78/78 [00:00<00:00, 90.16it/s]


Train 0 | Hq_b: -0.584|-0.578|-0.570, Hq_w: -0.558|-0.581|-0.583, acc: 0.717, kl: 20.362, kl_b: 785.552|788.714|15.265, kl_w: 604334.179|404266.976|7897.876, ll: -31.343, logpy: -10.981, loss: 31.343
Test 0 | Hq_b: -0.585|-0.578|-0.570, Hq_w: -0.536|-0.581|-0.585, acc: 0.843, kl: 20.177, kl_b: 785.325|790.718|15.344, kl_w: 595220.730|404133.183|7917.562, ll: -24.212, logpy: -4.035, loss: 24.212
MAP-Test 0 | Hq_b: -0.585|-0.578|-0.570, Hq_w: -0.536|-0.581|-0.585, acc: 0.922, kl: 26.742, kl_b: 1036.586|1039.103|20.190, kl_w: 791808.000|532750.000|10421.385, ll: -28.504, logpy: -1.762, loss: 28.504


100%|██████████| 390/390 [00:08<00:00, 44.60it/s]
100%|██████████| 78/78 [00:00<00:00, 83.54it/s]
100%|██████████| 78/78 [00:00<00:00, 89.92it/s]


Train 1 | Hq_b: -0.585|-0.577|-0.571, Hq_w: -0.514|-0.580|-0.587, acc: 0.842, kl: 20.003, kl_b: 786.075|788.194|15.357, kl_w: 586600.789|404038.599|7910.718, ll: -23.576, logpy: -3.573, loss: 23.576
Test 1 | Hq_b: -0.586|-0.577|-0.572, Hq_w: -0.492|-0.580|-0.588, acc: 0.864, kl: 19.825, kl_b: 787.846|787.787|15.179, kl_w: 577863.279|403868.620|7921.231, ll: -22.379, logpy: -2.554, loss: 22.379
MAP-Test 1 | Hq_b: -0.586|-0.577|-0.572, Hq_w: -0.492|-0.580|-0.588, acc: 0.937, kl: 26.378, kl_b: 1037.294|1038.572|20.198, kl_w: 773898.000|532474.875|10433.107, ll: -27.246, logpy: -0.868, loss: 27.246


100%|██████████| 390/390 [00:08<00:00, 44.75it/s]
100%|██████████| 78/78 [00:00<00:00, 85.12it/s]
100%|██████████| 78/78 [00:00<00:00, 91.96it/s]


Train 2 | Hq_b: -0.586|-0.576|-0.573, Hq_w: -0.469|-0.579|-0.589, acc: 0.861, kl: 19.648, kl_b: 786.359|787.436|15.293, kl_w: 569182.378|403684.239|7927.828, ll: -21.880, logpy: -2.233, loss: 21.880
Test 2 | Hq_b: -0.587|-0.575|-0.574, Hq_w: -0.446|-0.578|-0.591, acc: 0.877, kl: 19.468, kl_b: 787.748|787.351|14.869, kl_w: 560390.442|403470.725|7934.500, ll: -21.142, logpy: -1.675, loss: 21.142
MAP-Test 2 | Hq_b: -0.587|-0.575|-0.574, Hq_w: -0.446|-0.578|-0.591, acc: 0.935, kl: 26.003, kl_b: 1037.856|1037.924|20.210, kl_w: 755568.750|532064.250|10443.592, ll: -26.558, logpy: -0.554, loss: 26.558


100%|██████████| 390/390 [00:08<00:00, 44.88it/s]
100%|██████████| 78/78 [00:00<00:00, 85.36it/s]
100%|██████████| 78/78 [00:00<00:00, 91.81it/s]


Train 3 | Hq_b: -0.587|-0.574|-0.576, Hq_w: -0.423|-0.577|-0.592, acc: 0.872, kl: 19.285, kl_b: 786.404|786.662|15.362, kl_w: 551468.707|403232.153|7938.733, ll: -20.775, logpy: -1.490, loss: 20.775
Test 3 | Hq_b: -0.587|-0.573|-0.579, Hq_w: -0.399|-0.576|-0.593, acc: 0.880, kl: 19.101, kl_b: 783.721|784.858|15.168, kl_w: 542497.872|403020.322|7937.644, ll: -20.346, logpy: -1.245, loss: 20.346
MAP-Test 3 | Hq_b: -0.587|-0.573|-0.579, Hq_w: -0.399|-0.576|-0.593, acc: 0.923, kl: 25.614, kl_b: 1038.234|1036.944|20.250, kl_w: 736657.312|531507.125|10453.146, ll: -26.067, logpy: -0.453, loss: 26.067


100%|██████████| 390/390 [00:08<00:00, 44.98it/s]
100%|██████████| 78/78 [00:00<00:00, 85.61it/s]
100%|██████████| 78/78 [00:00<00:00, 91.68it/s]


Train 4 | Hq_b: -0.587|-0.571|-0.580, Hq_w: -0.375|-0.575|-0.594, acc: 0.877, kl: 18.915, kl_b: 787.294|784.874|15.257, kl_w: 533628.167|402613.150|7945.496, ll: -20.074, logpy: -1.159, loss: 20.074
Test 4 | Hq_b: -0.587|-0.570|-0.579, Hq_w: -0.351|-0.573|-0.595, acc: 0.890, kl: 18.730, kl_b: 786.036|784.295|15.864, kl_w: 524711.940|402247.516|7946.140, ll: -19.759, logpy: -1.029, loss: 19.759
MAP-Test 4 | Hq_b: -0.587|-0.570|-0.579, Hq_w: -0.351|-0.573|-0.595, acc: 0.921, kl: 25.211, kl_b: 1038.287|1035.405|20.245, kl_w: 717220.250|530796.000|10463.997, ll: -25.644, logpy: -0.433, loss: 25.644


100%|██████████| 390/390 [00:08<00:00, 44.95it/s]
100%|██████████| 78/78 [00:00<00:00, 85.36it/s]
100%|██████████| 78/78 [00:00<00:00, 92.44it/s]


Train 5 | Hq_b: -0.587|-0.568|-0.580, Hq_w: -0.326|-0.572|-0.597, acc: 0.888, kl: 18.543, kl_b: 786.892|783.108|15.364, kl_w: 515808.646|401823.953|7955.377, ll: -19.549, logpy: -1.005, loss: 19.549
Test 5 | Hq_b: -0.587|-0.566|-0.581, Hq_w: -0.302|-0.570|-0.598, acc: 0.902, kl: 18.361, kl_b: 785.984|782.333|15.509, kl_w: 507087.536|401409.974|7968.576, ll: -19.256, logpy: -0.895, loss: 19.256
MAP-Test 5 | Hq_b: -0.587|-0.566|-0.581, Hq_w: -0.302|-0.570|-0.598, acc: 0.929, kl: 24.799, kl_b: 1038.404|1033.626|20.263, kl_w: 697460.500|529913.875|10474.982, ll: -25.197, logpy: -0.398, loss: 25.197


100%|██████████| 390/390 [00:08<00:00, 43.84it/s]
100%|██████████| 78/78 [00:00<00:00, 84.99it/s]
100%|██████████| 78/78 [00:00<00:00, 90.71it/s]


Train 6 | Hq_b: -0.587|-0.564|-0.582, Hq_w: -0.277|-0.568|-0.599, acc: 0.899, kl: 18.178, kl_b: 785.115|781.753|15.443, kl_w: 498460.666|400878.091|7966.451, ll: -19.023, logpy: -0.845, loss: 19.023
Test 6 | Hq_b: -0.587|-0.562|-0.583, Hq_w: -0.252|-0.566|-0.600, acc: 0.910, kl: 17.994, kl_b: 785.647|779.685|15.602, kl_w: 489920.314|400251.207|7958.681, ll: -18.789, logpy: -0.795, loss: 18.789
MAP-Test 6 | Hq_b: -0.587|-0.562|-0.583, Hq_w: -0.252|-0.566|-0.600, acc: 0.927, kl: 24.379, kl_b: 1038.177|1031.420|20.270, kl_w: 677552.562|528840.938|10486.215, ll: -24.762, logpy: -0.383, loss: 24.762


100%|██████████| 390/390 [00:08<00:00, 43.85it/s]
100%|██████████| 78/78 [00:01<00:00, 75.28it/s]
100%|██████████| 78/78 [00:01<00:00, 75.16it/s]


Train 7 | Hq_b: -0.587|-0.559|-0.586, Hq_w: -0.228|-0.564|-0.602, acc: 0.905, kl: 17.821, kl_b: 787.431|780.169|15.346, kl_w: 481799.603|399710.805|7978.494, ll: -18.584, logpy: -0.762, loss: 18.584
Test 7 | Hq_b: -0.586|-0.557|-0.589, Hq_w: -0.203|-0.561|-0.603, acc: 0.910, kl: 17.651, kl_b: 789.220|775.778|15.330, kl_w: 473872.610|399121.319|7986.727, ll: -18.412, logpy: -0.761, loss: 18.412
MAP-Test 7 | Hq_b: -0.586|-0.557|-0.589, Hq_w: -0.203|-0.561|-0.603, acc: 0.937, kl: 23.959, kl_b: 1037.912|1029.027|20.328, kl_w: 657800.688|527576.375|10498.726, ll: -24.286, logpy: -0.327, loss: 24.286


100%|██████████| 390/390 [00:09<00:00, 40.92it/s]
100%|██████████| 78/78 [00:01<00:00, 77.28it/s]
100%|██████████| 78/78 [00:00<00:00, 83.81it/s]


Train 8 | Hq_b: -0.586|-0.554|-0.591, Hq_w: -0.179|-0.558|-0.604, acc: 0.912, kl: 17.486, kl_b: 786.668|776.266|15.518, kl_w: 466364.799|398352.941|7988.922, ll: -18.153, logpy: -0.667, loss: 18.153
Test 8 | Hq_b: -0.586|-0.551|-0.592, Hq_w: -0.155|-0.555|-0.605, acc: 0.917, kl: 17.325, kl_b: 786.624|775.714|15.592, kl_w: 459018.885|397632.821|7999.553, ll: -17.965, logpy: -0.641, loss: 17.965
MAP-Test 8 | Hq_b: -0.586|-0.551|-0.592, Hq_w: -0.155|-0.555|-0.605, acc: 0.931, kl: 23.546, kl_b: 1037.770|1026.144|20.358, kl_w: 638616.375|526074.812|10511.262, ll: -23.896, logpy: -0.351, loss: 23.896


100%|██████████| 390/390 [00:08<00:00, 44.07it/s]
100%|██████████| 78/78 [00:00<00:00, 84.05it/s]
100%|██████████| 78/78 [00:00<00:00, 91.13it/s]


Train 9 | Hq_b: -0.585|-0.548|-0.592, Hq_w: -0.133|-0.552|-0.607, acc: 0.921, kl: 17.172, kl_b: 786.752|773.092|15.726, kl_w: 452187.242|396815.006|8004.323, ll: -17.783, logpy: -0.612, loss: 17.783
Test 9 | Hq_b: -0.585|-0.544|-0.592, Hq_w: -0.110|-0.549|-0.608, acc: 0.921, kl: 17.024, kl_b: 786.760|769.509|15.291, kl_w: 445677.910|395937.019|8014.588, ll: -17.666, logpy: -0.642, loss: 17.666
MAP-Test 9 | Hq_b: -0.585|-0.544|-0.592, Hq_w: -0.110|-0.549|-0.608, acc: 0.943, kl: 23.147, kl_b: 1037.253|1022.481|20.355, kl_w: 620422.312|524339.000|10523.962, ll: -23.449, logpy: -0.301, loss: 23.449


100%|██████████| 390/390 [00:08<00:00, 44.75it/s]
100%|██████████| 78/78 [00:00<00:00, 85.11it/s]
100%|██████████| 78/78 [00:00<00:00, 91.92it/s]


Train 10 | Hq_b: -0.584|-0.540|-0.596, Hq_w: -0.089|-0.545|-0.610, acc: 0.922, kl: 16.879, kl_b: 785.761|769.285|15.422, kl_w: 439422.658|394954.922|8017.637, ll: -17.465, logpy: -0.586, loss: 17.465
Test 10 | Hq_b: -0.584|-0.536|-0.600, Hq_w: -0.068|-0.541|-0.611, acc: 0.929, kl: 16.739, kl_b: 786.913|766.838|15.560, kl_w: 433408.133|393967.446|8022.770, ll: -17.351, logpy: -0.612, loss: 17.351
MAP-Test 10 | Hq_b: -0.584|-0.536|-0.600, Hq_w: -0.068|-0.541|-0.611, acc: 0.956, kl: 22.769, kl_b: 1036.788|1018.313|20.434, kl_w: 603498.625|522356.531|10537.883, ll: -23.009, logpy: -0.239, loss: 23.009


100%|██████████| 390/390 [00:08<00:00, 44.92it/s]
100%|██████████| 78/78 [00:00<00:00, 84.86it/s]
100%|██████████| 78/78 [00:00<00:00, 92.19it/s]


Train 11 | Hq_b: -0.583|-0.531|-0.603, Hq_w: -0.049|-0.537|-0.612, acc: 0.931, kl: 16.606, kl_b: 785.366|766.072|15.632, kl_w: 427802.251|392896.690|8032.167, ll: -17.110, logpy: -0.504, loss: 17.110
Test 11 | Hq_b: -0.582|-0.527|-0.607, Hq_w: -0.030|-0.533|-0.614, acc: 0.931, kl: 16.471, kl_b: 783.579|760.426|15.981, kl_w: 422213.390|391753.693|8032.552, ll: -17.046, logpy: -0.575, loss: 17.046
MAP-Test 11 | Hq_b: -0.582|-0.527|-0.607, Hq_w: -0.030|-0.533|-0.614, acc: 0.954, kl: 22.412, kl_b: 1036.119|1013.605|20.507, kl_w: 587861.812|520092.250|10550.822, ll: -22.667, logpy: -0.256, loss: 22.667


100%|██████████| 390/390 [00:08<00:00, 44.75it/s]
100%|██████████| 78/78 [00:00<00:00, 84.82it/s]
100%|██████████| 78/78 [00:00<00:00, 92.14it/s]


Train 12 | Hq_b: -0.582|-0.522|-0.607, Hq_w: -0.011|-0.528|-0.615, acc: 0.935, kl: 16.341, kl_b: 785.802|759.937|15.501, kl_w: 416930.495|390516.677|8041.380, ll: -16.817, logpy: -0.476, loss: 16.817
Test 12 | Hq_b: -0.581|-0.517|-0.609, Hq_w: 0.007|-0.523|-0.616, acc: 0.933, kl: 16.211, kl_b: 782.085|760.468|15.724, kl_w: 411652.993|389298.012|8049.648, ll: -16.732, logpy: -0.521, loss: 16.732
MAP-Test 12 | Hq_b: -0.581|-0.517|-0.609, Hq_w: 0.007|-0.523|-0.616, acc: 0.949, kl: 22.070, kl_b: 1035.354|1008.446|20.520, kl_w: 573307.250|517560.281|10563.621, ll: -22.345, logpy: -0.275, loss: 22.345


100%|██████████| 390/390 [00:08<00:00, 44.80it/s]
100%|██████████| 78/78 [00:00<00:00, 84.40it/s]
100%|██████████| 78/78 [00:00<00:00, 90.14it/s]


Train 13 | Hq_b: -0.580|-0.511|-0.608, Hq_w: 0.024|-0.518|-0.618, acc: 0.936, kl: 16.082, kl_b: 784.902|754.954|15.500, kl_w: 406576.731|387917.550|8056.216, ll: -16.537, logpy: -0.455, loss: 16.537
Test 13 | Hq_b: -0.578|-0.505|-0.606, Hq_w: 0.040|-0.512|-0.619, acc: 0.940, kl: 15.953, kl_b: 785.196|750.528|15.856, kl_w: 401597.034|386447.481|8062.489, ll: -16.504, logpy: -0.551, loss: 16.504
MAP-Test 13 | Hq_b: -0.578|-0.505|-0.606, Hq_w: 0.040|-0.512|-0.619, acc: 0.950, kl: 21.740, kl_b: 1034.024|1002.632|20.491, kl_w: 559672.062|514715.781|10577.925, ll: -22.034, logpy: -0.293, loss: 22.034


100%|██████████| 390/390 [00:08<00:00, 44.96it/s]
100%|██████████| 78/78 [00:00<00:00, 84.58it/s]
100%|██████████| 78/78 [00:00<00:00, 91.06it/s]


Train 14 | Hq_b: -0.577|-0.499|-0.608, Hq_w: 0.056|-0.507|-0.621, acc: 0.938, kl: 15.825, kl_b: 781.791|750.247|15.577, kl_w: 396640.834|384974.346|8070.491, ll: -16.275, logpy: -0.450, loss: 16.275
Test 14 | Hq_b: -0.576|-0.492|-0.609, Hq_w: 0.072|-0.500|-0.622, acc: 0.941, kl: 15.695, kl_b: 780.279|743.839|15.652, kl_w: 391756.022|383388.690|8077.705, ll: -16.215, logpy: -0.520, loss: 16.215
MAP-Test 14 | Hq_b: -0.576|-0.492|-0.609, Hq_w: 0.072|-0.500|-0.622, acc: 0.954, kl: 21.420, kl_b: 1032.894|996.079|20.525, kl_w: 546766.500|511574.375|10593.683, ll: -21.697, logpy: -0.277, loss: 21.697


100%|██████████| 390/390 [00:08<00:00, 44.80it/s]
100%|██████████| 78/78 [00:00<00:00, 85.31it/s]
100%|██████████| 78/78 [00:00<00:00, 92.12it/s]


Train 15 | Hq_b: -0.574|-0.486|-0.608, Hq_w: 0.088|-0.494|-0.624, acc: 0.945, kl: 15.568, kl_b: 781.058|741.187|15.615, kl_w: 387033.428|381719.534|8085.481, ll: -15.963, logpy: -0.395, loss: 15.963
Test 15 | Hq_b: -0.572|-0.478|-0.607, Hq_w: 0.103|-0.487|-0.625, acc: 0.942, kl: 15.439, kl_b: 778.919|739.280|15.593, kl_w: 382328.300|380018.664|8078.680, ll: -15.917, logpy: -0.478, loss: 15.917
MAP-Test 15 | Hq_b: -0.572|-0.478|-0.607, Hq_w: 0.103|-0.487|-0.625, acc: 0.961, kl: 21.104, kl_b: 1031.265|988.817|20.502, kl_w: 534450.250|508091.312|10606.473, ll: -21.329, logpy: -0.225, loss: 21.329


100%|██████████| 390/390 [00:08<00:00, 45.04it/s]
100%|██████████| 78/78 [00:00<00:00, 85.15it/s]
100%|██████████| 78/78 [00:00<00:00, 92.07it/s]


Train 16 | Hq_b: -0.571|-0.471|-0.608, Hq_w: 0.118|-0.480|-0.626, acc: 0.946, kl: 15.310, kl_b: 779.593|735.819|15.630, kl_w: 377706.087|378169.184|8096.129, ll: -15.701, logpy: -0.391, loss: 15.701
Test 16 | Hq_b: -0.569|-0.463|-0.607, Hq_w: 0.132|-0.473|-0.628, acc: 0.947, kl: 15.182, kl_b: 778.881|733.998|15.465, kl_w: 373141.569|376337.554|8087.593, ll: -15.630, logpy: -0.448, loss: 15.630
MAP-Test 16 | Hq_b: -0.569|-0.463|-0.607, Hq_w: 0.132|-0.473|-0.628, acc: 0.959, kl: 20.793, kl_b: 1029.814|980.990|20.498, kl_w: 522681.406|504310.000|10619.579, ll: -21.047, logpy: -0.255, loss: 21.047


100%|██████████| 390/390 [00:08<00:00, 45.08it/s]
100%|██████████| 78/78 [00:00<00:00, 85.00it/s]
100%|██████████| 78/78 [00:00<00:00, 92.13it/s]


Train 17 | Hq_b: -0.568|-0.455|-0.607, Hq_w: 0.146|-0.465|-0.629, acc: 0.950, kl: 15.054, kl_b: 778.848|728.316|15.736, kl_w: 368727.133|374337.114|8114.403, ll: -15.408, logpy: -0.354, loss: 15.408
Test 17 | Hq_b: -0.566|-0.447|-0.607, Hq_w: 0.160|-0.457|-0.630, acc: 0.952, kl: 14.925, kl_b: 776.720|723.136|15.308, kl_w: 364236.084|372383.407|8110.397, ll: -15.335, logpy: -0.410, loss: 15.335
MAP-Test 17 | Hq_b: -0.566|-0.447|-0.607, Hq_w: 0.160|-0.457|-0.630, acc: 0.958, kl: 20.485, kl_b: 1028.155|972.467|20.502, kl_w: 511392.188|500206.219|10631.969, ll: -20.744, logpy: -0.259, loss: 20.744


100%|██████████| 390/390 [00:08<00:00, 45.03it/s]
100%|██████████| 78/78 [00:00<00:00, 85.43it/s]
100%|██████████| 78/78 [00:00<00:00, 92.03it/s]


Train 18 | Hq_b: -0.564|-0.438|-0.608, Hq_w: 0.174|-0.449|-0.631, acc: 0.952, kl: 14.796, kl_b: 776.045|719.409|15.687, kl_w: 359965.614|370203.490|8121.554, ll: -15.138, logpy: -0.342, loss: 15.138
Test 18 | Hq_b: -0.563|-0.429|-0.609, Hq_w: 0.187|-0.441|-0.633, acc: 0.951, kl: 14.667, kl_b: 773.617|711.324|15.729, kl_w: 355669.622|368034.425|8121.714, ll: -15.086, logpy: -0.419, loss: 15.086
MAP-Test 18 | Hq_b: -0.563|-0.429|-0.609, Hq_w: 0.187|-0.441|-0.633, acc: 0.960, kl: 20.178, kl_b: 1026.410|963.266|20.513, kl_w: 500431.469|495823.531|10644.473, ll: -20.416, logpy: -0.237, loss: 20.416


100%|██████████| 390/390 [00:08<00:00, 44.85it/s]
100%|██████████| 78/78 [00:00<00:00, 83.25it/s]
100%|██████████| 78/78 [00:00<00:00, 90.99it/s]


Train 19 | Hq_b: -0.560|-0.420|-0.607, Hq_w: 0.200|-0.432|-0.634, acc: 0.953, kl: 14.537, kl_b: 774.856|708.943|15.566, kl_w: 351427.482|365791.864|8134.312, ll: -14.868, logpy: -0.331, loss: 14.868
Test 19 | Hq_b: -0.558|-0.410|-0.605, Hq_w: 0.213|-0.423|-0.635, acc: 0.951, kl: 14.408, kl_b: 774.099|703.432|15.738, kl_w: 347283.101|363487.970|8134.754, ll: -14.827, logpy: -0.419, loss: 14.827
MAP-Test 19 | Hq_b: -0.558|-0.410|-0.605, Hq_w: 0.213|-0.423|-0.635, acc: 0.964, kl: 19.875, kl_b: 1024.115|953.649|20.477, kl_w: 489943.219|491156.844|10654.965, ll: -20.086, logpy: -0.211, loss: 20.086


100%|██████████| 390/390 [00:08<00:00, 45.04it/s]
100%|██████████| 78/78 [00:00<00:00, 84.72it/s]
100%|██████████| 78/78 [00:00<00:00, 91.55it/s]


Train 20 | Hq_b: -0.556|-0.400|-0.605, Hq_w: 0.226|-0.414|-0.636, acc: 0.954, kl: 14.281, kl_b: 770.920|700.654|15.563, kl_w: 343290.954|361145.160|8138.906, ll: -14.606, logpy: -0.325, loss: 14.606
Test 20 | Hq_b: -0.553|-0.390|-0.605, Hq_w: 0.238|-0.405|-0.637, acc: 0.951, kl: 14.153, kl_b: 769.102|693.009|15.733, kl_w: 339367.264|358651.387|8141.465, ll: -14.608, logpy: -0.455, loss: 14.608
MAP-Test 20 | Hq_b: -0.553|-0.390|-0.605, Hq_w: 0.238|-0.405|-0.637, acc: 0.966, kl: 19.576, kl_b: 1021.764|943.183|20.468, kl_w: 479911.562|486227.250|10665.796, ll: -19.790, logpy: -0.214, loss: 19.790


100%|██████████| 390/390 [00:08<00:00, 45.05it/s]
100%|██████████| 78/78 [00:00<00:00, 85.23it/s]
100%|██████████| 78/78 [00:00<00:00, 91.51it/s]


Train 21 | Hq_b: -0.551|-0.379|-0.602, Hq_w: 0.250|-0.395|-0.638, acc: 0.957, kl: 14.027, kl_b: 770.817|689.701|15.582, kl_w: 335507.369|356233.762|8153.622, ll: -14.327, logpy: -0.300, loss: 14.327
Test 21 | Hq_b: -0.549|-0.369|-0.602, Hq_w: 0.262|-0.385|-0.639, acc: 0.953, kl: 13.902, kl_b: 768.053|685.595|15.757, kl_w: 331766.097|353705.823|8163.998, ll: -14.326, logpy: -0.424, loss: 14.326
MAP-Test 21 | Hq_b: -0.549|-0.369|-0.602, Hq_w: 0.262|-0.385|-0.639, acc: 0.964, kl: 19.283, kl_b: 1019.416|932.305|20.436, kl_w: 470440.344|481073.250|10676.503, ll: -19.509, logpy: -0.226, loss: 19.509


100%|██████████| 390/390 [00:08<00:00, 44.92it/s]
100%|██████████| 78/78 [00:00<00:00, 84.92it/s]
100%|██████████| 78/78 [00:00<00:00, 92.01it/s]


Train 22 | Hq_b: -0.546|-0.358|-0.602, Hq_w: 0.273|-0.375|-0.640, acc: 0.959, kl: 13.776, kl_b: 768.250|677.786|15.295, kl_w: 328091.994|351099.304|8163.717, ll: -14.060, logpy: -0.284, loss: 14.060
Test 22 | Hq_b: -0.544|-0.347|-0.602, Hq_w: 0.284|-0.365|-0.641, acc: 0.945, kl: 13.652, kl_b: 766.729|670.820|15.067, kl_w: 324506.478|348477.352|8175.554, ll: -14.112, logpy: -0.460, loss: 14.112
MAP-Test 22 | Hq_b: -0.544|-0.347|-0.602, Hq_w: 0.284|-0.365|-0.641, acc: 0.956, kl: 18.995, kl_b: 1016.853|920.847|20.437, kl_w: 461410.344|475714.375|10687.141, ll: -19.260, logpy: -0.264, loss: 19.260


100%|██████████| 390/390 [00:08<00:00, 45.00it/s]
100%|██████████| 78/78 [00:00<00:00, 84.94it/s]
100%|██████████| 78/78 [00:00<00:00, 91.49it/s]


Train 23 | Hq_b: -0.541|-0.335|-0.600, Hq_w: 0.295|-0.355|-0.642, acc: 0.961, kl: 13.530, kl_b: 764.710|667.928|15.517, kl_w: 321021.785|345843.415|8177.080, ll: -13.814, logpy: -0.285, loss: 13.814
Test 23 | Hq_b: -0.538|-0.324|-0.598, Hq_w: 0.305|-0.344|-0.643, acc: 0.957, kl: 13.405, kl_b: 762.521|661.065|15.595, kl_w: 317551.922|343086.071|8177.366, ll: -13.791, logpy: -0.386, loss: 13.791
MAP-Test 23 | Hq_b: -0.538|-0.324|-0.598, Hq_w: 0.305|-0.344|-0.643, acc: 0.964, kl: 18.712, kl_b: 1014.159|908.962|20.397, kl_w: 452798.125|470162.719|10695.205, ll: -18.945, logpy: -0.233, loss: 18.945


100%|██████████| 390/390 [00:08<00:00, 45.00it/s]
100%|██████████| 78/78 [00:00<00:00, 84.69it/s]
100%|██████████| 78/78 [00:00<00:00, 91.59it/s]


Train 24 | Hq_b: -0.536|-0.312|-0.597, Hq_w: 0.316|-0.333|-0.644, acc: 0.961, kl: 13.285, kl_b: 762.950|656.582|15.522, kl_w: 314241.017|340405.367|8185.199, ll: -13.560, logpy: -0.275, loss: 13.560
Test 24 | Hq_b: -0.533|-0.301|-0.598, Hq_w: 0.326|-0.323|-0.644, acc: 0.956, kl: 13.167, kl_b: 760.973|648.064|15.416, kl_w: 311009.710|337730.014|8187.403, ll: -13.591, logpy: -0.424, loss: 13.591
MAP-Test 24 | Hq_b: -0.533|-0.301|-0.598, Hq_w: 0.326|-0.323|-0.644, acc: 0.962, kl: 18.436, kl_b: 1011.594|896.934|20.402, kl_w: 444636.562|464516.031|10703.702, ll: -18.719, logpy: -0.283, loss: 18.719


100%|██████████| 390/390 [00:08<00:00, 45.04it/s]
100%|██████████| 78/78 [00:00<00:00, 85.11it/s]
100%|██████████| 78/78 [00:00<00:00, 91.82it/s]


Train 25 | Hq_b: -0.530|-0.289|-0.597, Hq_w: 0.335|-0.312|-0.645, acc: 0.963, kl: 13.046, kl_b: 758.132|643.245|15.565, kl_w: 307790.622|334919.939|8192.809, ll: -13.315, logpy: -0.269, loss: 13.315
Test 25 | Hq_b: -0.527|-0.277|-0.597, Hq_w: 0.345|-0.301|-0.646, acc: 0.959, kl: 12.928, kl_b: 759.106|637.299|15.636, kl_w: 304634.948|332168.749|8204.275, ll: -13.331, logpy: -0.402, loss: 13.331
MAP-Test 25 | Hq_b: -0.527|-0.277|-0.597, Hq_w: 0.345|-0.301|-0.646, acc: 0.962, kl: 18.165, kl_b: 1008.394|884.935|20.385, kl_w: 436822.438|458809.781|10712.188, ll: -18.444, logpy: -0.279, loss: 18.444


100%|██████████| 390/390 [00:08<00:00, 45.00it/s]
100%|██████████| 78/78 [00:00<00:00, 85.42it/s]
100%|██████████| 78/78 [00:00<00:00, 92.17it/s]


Train 26 | Hq_b: -0.524|-0.266|-0.597, Hq_w: 0.354|-0.290|-0.647, acc: 0.966, kl: 12.814, kl_b: 757.253|632.023|15.523, kl_w: 301615.116|329462.892|8195.536, ll: -13.061, logpy: -0.248, loss: 13.061
Test 26 | Hq_b: -0.521|-0.254|-0.597, Hq_w: 0.364|-0.280|-0.648, acc: 0.958, kl: 12.698, kl_b: 754.679|625.215|15.775, kl_w: 298594.288|326698.191|8207.397, ll: -13.105, logpy: -0.407, loss: 13.105
MAP-Test 26 | Hq_b: -0.521|-0.254|-0.597, Hq_w: 0.364|-0.280|-0.648, acc: 0.969, kl: 17.901, kl_b: 1005.572|872.594|20.385, kl_w: 429352.531|453103.000|10719.117, ll: -18.108, logpy: -0.207, loss: 18.108


100%|██████████| 390/390 [00:08<00:00, 44.89it/s]
100%|██████████| 78/78 [00:00<00:00, 84.30it/s]
100%|██████████| 78/78 [00:00<00:00, 91.97it/s]


Train 27 | Hq_b: -0.518|-0.241|-0.599, Hq_w: 0.373|-0.269|-0.648, acc: 0.965, kl: 12.583, kl_b: 753.357|620.749|15.459, kl_w: 295656.909|323875.185|8205.748, ll: -12.828, logpy: -0.245, loss: 12.828
Test 27 | Hq_b: -0.516|-0.229|-0.600, Hq_w: 0.382|-0.257|-0.649, acc: 0.956, kl: 12.468, kl_b: 750.676|612.353|15.691, kl_w: 292755.347|321081.730|8206.989, ll: -12.873, logpy: -0.405, loss: 12.873
MAP-Test 27 | Hq_b: -0.516|-0.229|-0.600, Hq_w: 0.382|-0.257|-0.649, acc: 0.965, kl: 17.640, kl_b: 1002.797|859.913|20.409, kl_w: 422137.406|447249.281|10726.590, ll: -17.885, logpy: -0.245, loss: 17.885


100%|██████████| 390/390 [00:08<00:00, 45.05it/s]
100%|██████████| 78/78 [00:00<00:00, 85.31it/s]
100%|██████████| 78/78 [00:00<00:00, 91.70it/s]


Train 28 | Hq_b: -0.512|-0.217|-0.598, Hq_w: 0.390|-0.246|-0.650, acc: 0.966, kl: 12.354, kl_b: 750.480|608.948|15.464, kl_w: 289889.997|318217.770|8216.665, ll: -12.594, logpy: -0.241, loss: 12.594
Test 28 | Hq_b: -0.509|-0.204|-0.597, Hq_w: 0.399|-0.235|-0.650, acc: 0.962, kl: 12.238, kl_b: 748.619|599.893|15.517, kl_w: 287057.465|315280.070|8216.406, ll: -12.628, logpy: -0.390, loss: 12.628
MAP-Test 28 | Hq_b: -0.509|-0.204|-0.597, Hq_w: 0.399|-0.235|-0.650, acc: 0.970, kl: 17.379, kl_b: 999.400|846.859|20.390, kl_w: 415099.656|441231.312|10733.731, ll: -17.594, logpy: -0.215, loss: 17.594


100%|██████████| 390/390 [00:08<00:00, 44.97it/s]
100%|██████████| 78/78 [00:00<00:00, 84.83it/s]
100%|██████████| 78/78 [00:00<00:00, 91.87it/s]


Train 29 | Hq_b: -0.506|-0.192|-0.597, Hq_w: 0.407|-0.224|-0.651, acc: 0.968, kl: 12.128, kl_b: 747.671|596.650|15.634, kl_w: 284288.052|312528.107|8221.555, ll: -12.363, logpy: -0.235, loss: 12.363
Test 29 | Hq_b: -0.503|-0.179|-0.596, Hq_w: 0.416|-0.212|-0.652, acc: 0.960, kl: 12.016, kl_b: 748.452|590.731|15.505, kl_w: 281572.704|309636.036|8211.881, ll: -12.460, logpy: -0.444, loss: 12.460
MAP-Test 29 | Hq_b: -0.503|-0.179|-0.596, Hq_w: 0.416|-0.212|-0.652, acc: 0.968, kl: 17.126, kl_b: 996.584|833.691|20.377, kl_w: 408366.375|435318.719|10739.463, ll: -17.359, logpy: -0.234, loss: 17.359


100%|██████████| 390/390 [00:08<00:00, 44.81it/s]
100%|██████████| 78/78 [00:00<00:00, 85.11it/s]
100%|██████████| 78/78 [00:00<00:00, 91.75it/s]


Train 30 | Hq_b: -0.500|-0.167|-0.597, Hq_w: 0.424|-0.202|-0.652, acc: 0.970, kl: 11.908, kl_b: 745.605|582.220|15.331, kl_w: 278909.531|306923.918|8220.945, ll: -12.133, logpy: -0.225, loss: 12.133
Test 30 | Hq_b: -0.497|-0.155|-0.598, Hq_w: 0.432|-0.191|-0.652, acc: 0.961, kl: 11.800, kl_b: 744.329|574.552|15.632, kl_w: 276198.348|304226.493|8226.365, ll: -12.239, logpy: -0.440, loss: 12.239
MAP-Test 30 | Hq_b: -0.497|-0.155|-0.598, Hq_w: 0.432|-0.191|-0.652, acc: 0.968, kl: 16.878, kl_b: 993.307|821.582|20.393, kl_w: 401823.562|429517.812|10743.516, ll: -17.134, logpy: -0.256, loss: 17.134


100%|██████████| 390/390 [00:08<00:00, 44.95it/s]
100%|██████████| 78/78 [00:00<00:00, 85.03it/s]
100%|██████████| 78/78 [00:00<00:00, 91.58it/s]


Train 31 | Hq_b: -0.494|-0.144|-0.598, Hq_w: 0.440|-0.180|-0.653, acc: 0.970, kl: 11.693, kl_b: 740.539|572.513|15.253, kl_w: 273656.007|301431.765|8226.633, ll: -11.920, logpy: -0.227, loss: 11.920
Test 31 | Hq_b: -0.490|-0.132|-0.598, Hq_w: 0.448|-0.169|-0.653, acc: 0.962, kl: 11.586, kl_b: 742.647|564.302|15.485, kl_w: 271074.724|298688.486|8225.429, ll: -11.959, logpy: -0.373, loss: 11.959
MAP-Test 31 | Hq_b: -0.490|-0.132|-0.598, Hq_w: 0.448|-0.169|-0.653, acc: 0.968, kl: 16.636, kl_b: 990.191|809.291|20.399, kl_w: 395492.062|423747.656|10746.992, ll: -16.861, logpy: -0.225, loss: 16.861


100%|██████████| 390/390 [00:08<00:00, 45.07it/s]
100%|██████████| 78/78 [00:00<00:00, 84.95it/s]
100%|██████████| 78/78 [00:00<00:00, 91.54it/s]


Train 32 | Hq_b: -0.487|-0.119|-0.595, Hq_w: 0.455|-0.158|-0.654, acc: 0.969, kl: 11.482, kl_b: 738.316|559.346|15.512, kl_w: 268580.255|295974.496|8231.744, ll: -11.696, logpy: -0.214, loss: 11.696
Test 32 | Hq_b: -0.484|-0.107|-0.593, Hq_w: 0.463|-0.147|-0.654, acc: 0.960, kl: 11.376, kl_b: 736.333|552.278|15.133, kl_w: 266056.788|293202.416|8228.234, ll: -11.796, logpy: -0.420, loss: 11.796
MAP-Test 32 | Hq_b: -0.484|-0.107|-0.593, Hq_w: 0.463|-0.147|-0.654, acc: 0.971, kl: 16.397, kl_b: 986.645|796.606|20.341, kl_w: 389354.031|417928.969|10751.521, ll: -16.624, logpy: -0.228, loss: 16.624


100%|██████████| 390/390 [00:08<00:00, 45.04it/s]
100%|██████████| 78/78 [00:00<00:00, 84.54it/s]
100%|██████████| 78/78 [00:00<00:00, 92.12it/s]


Train 33 | Hq_b: -0.480|-0.095|-0.590, Hq_w: 0.470|-0.136|-0.654, acc: 0.970, kl: 11.274, kl_b: 733.496|546.251|15.525, kl_w: 263689.549|290471.231|8231.464, ll: -11.486, logpy: -0.212, loss: 11.486
Test 33 | Hq_b: -0.476|-0.082|-0.589, Hq_w: 0.478|-0.125|-0.655, acc: 0.961, kl: 11.171, kl_b: 730.693|543.888|15.601, kl_w: 261333.351|287683.975|8244.517, ll: -11.554, logpy: -0.383, loss: 11.554
MAP-Test 33 | Hq_b: -0.476|-0.082|-0.589, Hq_w: 0.478|-0.125|-0.655, acc: 0.971, kl: 16.161, kl_b: 983.109|783.818|20.302, kl_w: 383452.875|412076.156|10754.422, ll: -16.364, logpy: -0.203, loss: 16.364


100%|██████████| 390/390 [00:10<00:00, 36.22it/s]
100%|██████████| 78/78 [00:00<00:00, 85.41it/s]
100%|██████████| 78/78 [00:00<00:00, 92.42it/s]


Train 34 | Hq_b: -0.473|-0.070|-0.590, Hq_w: 0.485|-0.114|-0.655, acc: 0.970, kl: 11.069, kl_b: 730.619|536.256|15.408, kl_w: 258975.864|284966.881|8236.798, ll: -11.276, logpy: -0.207, loss: 11.276
Test 34 | Hq_b: -0.470|-0.058|-0.590, Hq_w: 0.492|-0.103|-0.656, acc: 0.958, kl: 10.970, kl_b: 729.189|529.892|15.377, kl_w: 256694.507|282265.502|8252.952, ll: -11.384, logpy: -0.415, loss: 11.384
MAP-Test 34 | Hq_b: -0.470|-0.058|-0.590, Hq_w: 0.492|-0.103|-0.656, acc: 0.972, kl: 15.929, kl_b: 979.791|771.219|20.314, kl_w: 377728.562|406206.812|10758.240, ll: -16.131, logpy: -0.202, loss: 16.131


100%|██████████| 390/390 [00:08<00:00, 45.00it/s]
100%|██████████| 78/78 [00:00<00:00, 85.26it/s]
100%|██████████| 78/78 [00:00<00:00, 92.07it/s]


Train 35 | Hq_b: -0.466|-0.045|-0.588, Hq_w: 0.499|-0.091|-0.656, acc: 0.971, kl: 10.866, kl_b: 728.630|525.224|15.326, kl_w: 254358.125|279433.308|8240.043, ll: -11.076, logpy: -0.210, loss: 11.076
Test 35 | Hq_b: -0.462|-0.033|-0.589, Hq_w: 0.506|-0.080|-0.656, acc: 0.963, kl: 10.764, kl_b: 728.169|518.701|15.262, kl_w: 252114.688|276613.209|8229.694, ll: -11.171, logpy: -0.407, loss: 11.171
MAP-Test 35 | Hq_b: -0.462|-0.033|-0.589, Hq_w: 0.506|-0.080|-0.656, acc: 0.969, kl: 15.699, kl_b: 976.103|758.397|20.299, kl_w: 372148.094|400273.469|10760.841, ll: -15.936, logpy: -0.237, loss: 15.936


100%|██████████| 390/390 [00:08<00:00, 45.04it/s]
100%|██████████| 78/78 [00:00<00:00, 85.66it/s]
100%|██████████| 78/78 [00:00<00:00, 92.55it/s]


Train 36 | Hq_b: -0.459|-0.021|-0.589, Hq_w: 0.513|-0.069|-0.656, acc: 0.971, kl: 10.667, kl_b: 725.824|512.372|15.287, kl_w: 249921.106|273920.576|8245.015, ll: -10.878, logpy: -0.212, loss: 10.878
Test 36 | Hq_b: -0.456|-0.009|-0.590, Hq_w: 0.519|-0.058|-0.657, acc: 0.963, kl: 10.567, kl_b: 724.095|507.880|15.252, kl_w: 247693.600|271173.786|8240.672, ll: -11.015, logpy: -0.447, loss: 11.015
MAP-Test 36 | Hq_b: -0.456|-0.009|-0.590, Hq_w: 0.519|-0.058|-0.657, acc: 0.975, kl: 15.473, kl_b: 972.628|745.871|20.309, kl_w: 366769.125|394385.906|10764.729, ll: -15.687, logpy: -0.214, loss: 15.687


100%|██████████| 390/390 [00:08<00:00, 45.11it/s]
100%|██████████| 78/78 [00:00<00:00, 84.85it/s]
100%|██████████| 78/78 [00:00<00:00, 91.28it/s]


Train 37 | Hq_b: -0.452|0.004|-0.589, Hq_w: 0.526|-0.047|-0.657, acc: 0.973, kl: 10.472, kl_b: 721.101|501.686|15.493, kl_w: 245620.298|268477.865|8251.712, ll: -10.677, logpy: -0.205, loss: 10.677
Test 37 | Hq_b: -0.449|0.016|-0.588, Hq_w: 0.532|-0.036|-0.657, acc: 0.963, kl: 10.374, kl_b: 719.079|491.796|15.386, kl_w: 243477.241|265778.763|8239.093, ll: -10.756, logpy: -0.382, loss: 10.756
MAP-Test 37 | Hq_b: -0.449|0.016|-0.588, Hq_w: 0.532|-0.036|-0.657, acc: 0.970, kl: 15.253, kl_b: 969.347|733.157|20.297, kl_w: 361575.219|388569.125|10765.334, ll: -15.489, logpy: -0.237, loss: 15.489


100%|██████████| 390/390 [00:08<00:00, 44.87it/s]
100%|██████████| 78/78 [00:00<00:00, 84.59it/s]
100%|██████████| 78/78 [00:00<00:00, 91.15it/s]


Train 38 | Hq_b: -0.445|0.028|-0.588, Hq_w: 0.539|-0.025|-0.657, acc: 0.972, kl: 10.281, kl_b: 718.497|488.515|15.397, kl_w: 241435.575|263137.595|8247.215, ll: -10.491, logpy: -0.210, loss: 10.491
Test 38 | Hq_b: -0.442|0.040|-0.586, Hq_w: 0.545|-0.014|-0.658, acc: 0.963, kl: 10.187, kl_b: 716.472|481.231|15.142, kl_w: 239415.658|260492.585|8244.855, ll: -10.587, logpy: -0.400, loss: 10.587
MAP-Test 38 | Hq_b: -0.442|0.040|-0.586, Hq_w: 0.545|-0.014|-0.658, acc: 0.969, kl: 15.037, kl_b: 965.642|720.768|20.271, kl_w: 356497.000|382862.562|10767.473, ll: -15.265, logpy: -0.228, loss: 15.265


100%|██████████| 390/390 [00:08<00:00, 45.09it/s]
100%|██████████| 78/78 [00:00<00:00, 84.65it/s]
100%|██████████| 78/78 [00:00<00:00, 92.12it/s]


Train 39 | Hq_b: -0.439|0.051|-0.590, Hq_w: 0.551|-0.004|-0.658, acc: 0.972, kl: 10.096, kl_b: 714.504|477.980|15.603, kl_w: 237412.877|257919.515|8257.039, ll: -10.308, logpy: -0.212, loss: 10.308
Test 39 | Hq_b: -0.435|0.064|-0.595, Hq_w: 0.557|0.007|-0.659, acc: 0.966, kl: 10.004, kl_b: 713.575|471.426|15.424, kl_w: 235416.343|255344.482|8260.576, ll: -10.419, logpy: -0.415, loss: 10.419
MAP-Test 39 | Hq_b: -0.435|0.064|-0.595, Hq_w: 0.557|0.007|-0.659, acc: 0.974, kl: 14.826, kl_b: 962.571|708.462|20.357, kl_w: 351593.312|377238.312|10772.188, ll: -15.034, logpy: -0.208, loss: 15.034


100%|██████████| 390/390 [00:08<00:00, 44.85it/s]
100%|██████████| 78/78 [00:00<00:00, 85.27it/s]
100%|██████████| 78/78 [00:00<00:00, 92.06it/s]


Train 40 | Hq_b: -0.432|0.075|-0.594, Hq_w: 0.563|0.017|-0.659, acc: 0.974, kl: 9.916, kl_b: 711.658|466.575|15.270, kl_w: 233521.658|252832.680|8256.524, ll: -10.119, logpy: -0.203, loss: 10.119
Test 40 | Hq_b: -0.429|0.087|-0.594, Hq_w: 0.569|0.028|-0.659, acc: 0.964, kl: 9.828, kl_b: 712.193|461.552|14.760, kl_w: 231631.741|250340.402|8252.185, ll: -10.243, logpy: -0.415, loss: 10.243
MAP-Test 40 | Hq_b: -0.429|0.087|-0.594, Hq_w: 0.569|0.028|-0.659, acc: 0.975, kl: 14.621, kl_b: 959.418|696.570|20.349, kl_w: 346864.312|371725.062|10772.147, ll: -14.832, logpy: -0.211, loss: 14.832


100%|██████████| 390/390 [00:08<00:00, 45.01it/s]
100%|██████████| 78/78 [00:00<00:00, 84.66it/s]
100%|██████████| 78/78 [00:00<00:00, 91.89it/s]


Train 41 | Hq_b: -0.426|0.098|-0.592, Hq_w: 0.575|0.038|-0.659, acc: 0.974, kl: 9.742, kl_b: 708.124|457.678|15.440, kl_w: 229779.358|247860.683|8255.272, ll: -9.939, logpy: -0.197, loss: 9.939
Test 41 | Hq_b: -0.422|0.109|-0.590, Hq_w: 0.580|0.048|-0.659, acc: 0.966, kl: 9.653, kl_b: 703.774|451.326|15.563, kl_w: 227827.667|245383.498|8258.606, ll: -10.055, logpy: -0.402, loss: 10.055
MAP-Test 41 | Hq_b: -0.422|0.109|-0.590, Hq_w: 0.580|0.048|-0.659, acc: 0.967, kl: 14.421, kl_b: 955.786|684.816|20.312, kl_w: 342280.375|366335.875|10773.423, ll: -14.697, logpy: -0.276, loss: 14.697


100%|██████████| 390/390 [00:08<00:00, 44.89it/s]
100%|██████████| 78/78 [00:00<00:00, 85.20it/s]
100%|██████████| 78/78 [00:00<00:00, 91.64it/s]


Train 42 | Hq_b: -0.418|0.120|-0.592, Hq_w: 0.586|0.058|-0.659, acc: 0.974, kl: 9.570, kl_b: 705.840|444.947|15.410, kl_w: 226099.445|242963.569|8255.365, ll: -9.763, logpy: -0.194, loss: 9.763
Test 42 | Hq_b: -0.415|0.132|-0.592, Hq_w: 0.592|0.068|-0.659, acc: 0.964, kl: 9.485, kl_b: 701.767|440.996|15.743, kl_w: 224306.177|240558.621|8248.756, ll: -9.905, logpy: -0.420, loss: 9.905
MAP-Test 42 | Hq_b: -0.415|0.132|-0.592, Hq_w: 0.592|0.068|-0.659, acc: 0.970, kl: 14.223, kl_b: 952.138|673.390|20.322, kl_w: 337754.844|360977.094|10773.963, ll: -14.491, logpy: -0.268, loss: 14.491


100%|██████████| 390/390 [00:08<00:00, 44.91it/s]
100%|██████████| 78/78 [00:00<00:00, 84.89it/s]
100%|██████████| 78/78 [00:00<00:00, 91.94it/s]


Train 43 | Hq_b: -0.411|0.142|-0.591, Hq_w: 0.597|0.078|-0.659, acc: 0.974, kl: 9.400, kl_b: 701.726|435.269|15.503, kl_w: 222447.242|238162.632|8257.426, ll: -9.601, logpy: -0.200, loss: 9.601
Test 43 | Hq_b: -0.408|0.153|-0.592, Hq_w: 0.603|0.088|-0.659, acc: 0.961, kl: 9.320, kl_b: 701.577|429.094|15.271, kl_w: 220747.292|235846.930|8246.575, ll: -9.732, logpy: -0.412, loss: 9.732
MAP-Test 43 | Hq_b: -0.408|0.153|-0.592, Hq_w: 0.603|0.088|-0.659, acc: 0.973, kl: 14.032, kl_b: 948.672|662.165|20.324, kl_w: 333385.125|355814.938|10774.837, ll: -14.249, logpy: -0.217, loss: 14.249


100%|██████████| 390/390 [00:08<00:00, 45.00it/s]
100%|██████████| 78/78 [00:00<00:00, 84.87it/s]
100%|██████████| 78/78 [00:00<00:00, 91.80it/s]


Train 44 | Hq_b: -0.404|0.164|-0.591, Hq_w: 0.608|0.097|-0.660, acc: 0.974, kl: 9.240, kl_b: 696.401|425.543|15.460, kl_w: 219002.645|233582.059|8259.051, ll: -9.433, logpy: -0.193, loss: 9.433
Test 44 | Hq_b: -0.400|0.174|-0.589, Hq_w: 0.613|0.107|-0.660, acc: 0.964, kl: 9.160, kl_b: 696.178|420.968|15.196, kl_w: 217268.033|231334.862|8263.849, ll: -9.586, logpy: -0.426, loss: 9.586
MAP-Test 44 | Hq_b: -0.400|0.174|-0.589, Hq_w: 0.613|0.107|-0.660, acc: 0.975, kl: 13.845, kl_b: 944.643|651.363|20.297, kl_w: 329114.688|350735.438|10776.687, ll: -14.065, logpy: -0.221, loss: 14.065


100%|██████████| 390/390 [00:08<00:00, 44.94it/s]
100%|██████████| 78/78 [00:00<00:00, 85.04it/s]
100%|██████████| 78/78 [00:00<00:00, 91.97it/s]


Train 45 | Hq_b: -0.397|0.184|-0.588, Hq_w: 0.618|0.116|-0.660, acc: 0.975, kl: 9.080, kl_b: 693.890|414.838|15.527, kl_w: 215603.965|229034.637|8257.788, ll: -9.268, logpy: -0.188, loss: 9.268
Test 45 | Hq_b: -0.393|0.195|-0.590, Hq_w: 0.624|0.126|-0.660, acc: 0.965, kl: 9.003, kl_b: 693.994|411.898|15.168, kl_w: 213919.722|226862.944|8259.065, ll: -9.397, logpy: -0.394, loss: 9.397
MAP-Test 45 | Hq_b: -0.393|0.195|-0.590, Hq_w: 0.624|0.126|-0.660, acc: 0.969, kl: 13.662, kl_b: 941.379|640.674|20.309, kl_w: 324968.125|345766.438|10775.205, ll: -13.909, logpy: -0.247, loss: 13.909


100%|██████████| 390/390 [00:08<00:00, 45.00it/s]
100%|██████████| 78/78 [00:00<00:00, 85.15it/s]
100%|██████████| 78/78 [00:00<00:00, 91.65it/s]


Train 46 | Hq_b: -0.390|0.205|-0.587, Hq_w: 0.629|0.135|-0.660, acc: 0.974, kl: 8.927, kl_b: 691.392|407.197|15.365, kl_w: 212307.101|224653.306|8255.775, ll: -9.113, logpy: -0.187, loss: 9.113
Test 46 | Hq_b: -0.386|0.216|-0.585, Hq_w: 0.634|0.144|-0.660, acc: 0.966, kl: 8.852, kl_b: 690.026|402.949|15.171, kl_w: 210698.922|222525.263|8261.376, ll: -9.310, logpy: -0.458, loss: 9.310
MAP-Test 46 | Hq_b: -0.386|0.216|-0.585, Hq_w: 0.634|0.144|-0.660, acc: 0.975, kl: 13.483, kl_b: 937.614|629.878|20.255, kl_w: 320889.250|340882.750|10776.107, ll: -13.728, logpy: -0.245, loss: 13.728


100%|██████████| 390/390 [00:08<00:00, 45.00it/s]
100%|██████████| 78/78 [00:00<00:00, 85.36it/s]
100%|██████████| 78/78 [00:00<00:00, 91.82it/s]


Train 47 | Hq_b: -0.383|0.226|-0.584, Hq_w: 0.639|0.153|-0.660, acc: 0.976, kl: 8.778, kl_b: 687.607|398.331|15.426, kl_w: 209107.350|220420.048|8257.708, ll: -8.968, logpy: -0.190, loss: 8.968
Test 47 | Hq_b: -0.380|0.236|-0.583, Hq_w: 0.644|0.162|-0.660, acc: 0.963, kl: 8.705, kl_b: 686.785|390.129|15.678, kl_w: 207540.915|218334.694|8262.593, ll: -9.137, logpy: -0.432, loss: 9.137
MAP-Test 47 | Hq_b: -0.380|0.236|-0.583, Hq_w: 0.644|0.162|-0.660, acc: 0.974, kl: 13.309, kl_b: 934.267|619.379|20.239, kl_w: 316955.219|336161.219|10774.499, ll: -13.534, logpy: -0.225, loss: 13.534


100%|██████████| 390/390 [00:08<00:00, 45.09it/s]
100%|██████████| 78/78 [00:00<00:00, 84.80it/s]
100%|██████████| 78/78 [00:00<00:00, 91.88it/s]


Train 48 | Hq_b: -0.376|0.246|-0.580, Hq_w: 0.648|0.171|-0.660, acc: 0.974, kl: 8.632, kl_b: 683.342|387.603|15.370, kl_w: 205986.320|216277.403|8257.929, ll: -8.833, logpy: -0.201, loss: 8.833
Test 48 | Hq_b: -0.373|0.256|-0.578, Hq_w: 0.653|0.180|-0.660, acc: 0.965, kl: 8.561, kl_b: 682.147|383.909|14.930, kl_w: 204490.760|214235.080|8255.224, ll: -8.981, logpy: -0.420, loss: 8.981
MAP-Test 48 | Hq_b: -0.373|0.256|-0.578, Hq_w: 0.653|0.180|-0.660, acc: 0.969, kl: 13.142, kl_b: 931.181|609.002|20.186, kl_w: 313224.969|331550.750|10776.649, ll: -13.380, logpy: -0.237, loss: 13.380


100%|██████████| 390/390 [00:08<00:00, 45.03it/s]
100%|██████████| 78/78 [00:00<00:00, 85.58it/s]
100%|██████████| 78/78 [00:00<00:00, 91.77it/s]


Train 49 | Hq_b: -0.370|0.266|-0.581, Hq_w: 0.658|0.188|-0.660, acc: 0.975, kl: 8.493, kl_b: 679.671|380.207|15.262, kl_w: 203063.105|212252.875|8262.831, ll: -8.678, logpy: -0.185, loss: 8.678
Test 49 | Hq_b: -0.367|0.276|-0.582, Hq_w: 0.662|0.197|-0.660, acc: 0.963, kl: 8.423, kl_b: 677.872|374.756|15.367, kl_w: 201539.001|210264.543|8257.893, ll: -8.836, logpy: -0.414, loss: 8.836
MAP-Test 49 | Hq_b: -0.367|0.276|-0.582, Hq_w: 0.662|0.197|-0.660, acc: 0.968, kl: 12.977, kl_b: 927.940|598.819|20.224, kl_w: 309541.250|327001.438|10776.203, ll: -13.233, logpy: -0.256, loss: 13.233


100%|██████████| 390/390 [00:08<00:00, 43.73it/s]
100%|██████████| 78/78 [00:00<00:00, 85.37it/s]
100%|██████████| 78/78 [00:00<00:00, 92.67it/s]


Train 50 | Hq_b: -0.363|0.286|-0.581, Hq_w: 0.667|0.205|-0.660, acc: 0.976, kl: 8.354, kl_b: 678.733|371.328|15.355, kl_w: 200074.088|208290.533|8259.349, ll: -8.529, logpy: -0.175, loss: 8.529
Test 50 | Hq_b: -0.360|0.297|-0.579, Hq_w: 0.671|0.214|-0.660, acc: 0.966, kl: 8.285, kl_b: 680.206|368.068|15.268, kl_w: 198666.303|206234.850|8264.008, ll: -8.671, logpy: -0.386, loss: 8.671
MAP-Test 50 | Hq_b: -0.360|0.297|-0.579, Hq_w: 0.671|0.214|-0.660, acc: 0.970, kl: 12.812, kl_b: 924.591|588.258|20.191, kl_w: 305868.969|322428.844|10775.771, ll: -13.031, logpy: -0.219, loss: 13.031


100%|██████████| 390/390 [00:08<00:00, 45.05it/s]
100%|██████████| 78/78 [00:00<00:00, 85.20it/s]
100%|██████████| 78/78 [00:00<00:00, 92.24it/s]


Train 51 | Hq_b: -0.356|0.306|-0.578, Hq_w: 0.676|0.223|-0.660, acc: 0.976, kl: 8.216, kl_b: 673.623|362.561|15.247, kl_w: 197200.452|204272.680|8256.701, ll: -8.400, logpy: -0.184, loss: 8.400
Test 51 | Hq_b: -0.352|0.315|-0.577, Hq_w: 0.680|0.231|-0.660, acc: 0.961, kl: 8.149, kl_b: 675.170|355.601|15.342, kl_w: 195793.360|202334.575|8255.721, ll: -8.565, logpy: -0.416, loss: 8.565
MAP-Test 51 | Hq_b: -0.352|0.315|-0.577, Hq_w: 0.680|0.231|-0.660, acc: 0.975, kl: 12.648, kl_b: 920.903|578.503|20.166, kl_w: 302235.406|317874.188|10775.021, ll: -12.845, logpy: -0.197, loss: 12.845


100%|██████████| 390/390 [00:09<00:00, 41.78it/s]
100%|██████████| 78/78 [00:01<00:00, 68.15it/s]
100%|██████████| 78/78 [00:00<00:00, 82.62it/s]


Train 52 | Hq_b: -0.349|0.325|-0.577, Hq_w: 0.685|0.240|-0.660, acc: 0.977, kl: 8.080, kl_b: 671.211|353.986|15.335, kl_w: 194324.298|200367.768|8253.920, ll: -8.255, logpy: -0.175, loss: 8.255
Test 52 | Hq_b: -0.346|0.335|-0.576, Hq_w: 0.689|0.248|-0.660, acc: 0.964, kl: 8.012, kl_b: 671.403|348.552|15.266, kl_w: 192934.938|198397.844|8250.873, ll: -8.413, logpy: -0.401, loss: 8.413
MAP-Test 52 | Hq_b: -0.346|0.335|-0.576, Hq_w: 0.689|0.248|-0.660, acc: 0.971, kl: 12.487, kl_b: 917.624|568.658|20.160, kl_w: 298712.344|313374.500|10773.867, ll: -12.736, logpy: -0.248, loss: 12.736


100%|██████████| 390/390 [00:08<00:00, 44.94it/s]
100%|██████████| 78/78 [00:00<00:00, 84.60it/s]
100%|██████████| 78/78 [00:00<00:00, 91.70it/s]


Train 53 | Hq_b: -0.342|0.344|-0.575, Hq_w: 0.694|0.257|-0.660, acc: 0.976, kl: 7.946, kl_b: 668.650|344.303|15.264, kl_w: 191544.909|196484.336|8257.187, ll: -8.134, logpy: -0.187, loss: 8.134
Test 53 | Hq_b: -0.339|0.354|-0.575, Hq_w: 0.698|0.265|-0.660, acc: 0.965, kl: 7.880, kl_b: 666.668|340.338|15.070, kl_w: 190140.626|194567.162|8249.080, ll: -8.304, logpy: -0.424, loss: 8.304
MAP-Test 53 | Hq_b: -0.339|0.354|-0.575, Hq_w: 0.698|0.265|-0.660, acc: 0.972, kl: 12.329, kl_b: 914.496|558.868|20.151, kl_w: 295275.531|308908.219|10773.488, ll: -12.574, logpy: -0.245, loss: 12.574


100%|██████████| 390/390 [00:08<00:00, 44.96it/s]
100%|██████████| 78/78 [00:00<00:00, 85.00it/s]
100%|██████████| 78/78 [00:00<00:00, 91.40it/s]


Train 54 | Hq_b: -0.336|0.363|-0.578, Hq_w: 0.702|0.274|-0.660, acc: 0.977, kl: 7.816, kl_b: 664.730|336.515|15.320, kl_w: 188843.018|192685.841|8253.987, ll: -7.997, logpy: -0.181, loss: 7.997
Test 54 | Hq_b: -0.333|0.371|-0.579, Hq_w: 0.706|0.282|-0.660, acc: 0.962, kl: 7.752, kl_b: 663.358|333.645|15.160, kl_w: 187497.421|190826.699|8258.411, ll: -8.189, logpy: -0.437, loss: 8.189
MAP-Test 54 | Hq_b: -0.333|0.371|-0.579, Hq_w: 0.706|0.282|-0.660, acc: 0.973, kl: 12.174, kl_b: 911.333|549.798|20.189, kl_w: 291914.469|304543.250|10772.559, ll: -12.407, logpy: -0.233, loss: 12.407


100%|██████████| 390/390 [00:08<00:00, 45.01it/s]
100%|██████████| 78/78 [00:00<00:00, 85.01it/s]
100%|██████████| 78/78 [00:00<00:00, 91.65it/s]


Train 55 | Hq_b: -0.330|0.381|-0.577, Hq_w: 0.710|0.290|-0.660, acc: 0.977, kl: 7.690, kl_b: 662.176|329.819|15.441, kl_w: 186232.890|188989.894|8248.984, ll: -7.875, logpy: -0.185, loss: 7.875
Test 55 | Hq_b: -0.326|0.390|-0.574, Hq_w: 0.715|0.298|-0.660, acc: 0.965, kl: 7.626, kl_b: 661.160|326.135|14.955, kl_w: 184905.642|187124.724|8247.412, ll: -8.045, logpy: -0.420, loss: 8.045
MAP-Test 55 | Hq_b: -0.326|0.390|-0.574, Hq_w: 0.715|0.298|-0.660, acc: 0.968, kl: 12.021, kl_b: 907.860|540.087|20.140, kl_w: 288602.469|300199.156|10771.428, ll: -12.286, logpy: -0.265, loss: 12.286


100%|██████████| 390/390 [00:08<00:00, 45.02it/s]
100%|██████████| 78/78 [00:00<00:00, 85.19it/s]
100%|██████████| 78/78 [00:00<00:00, 91.69it/s]


Train 56 | Hq_b: -0.322|0.399|-0.574, Hq_w: 0.719|0.306|-0.660, acc: 0.977, kl: 7.564, kl_b: 658.577|322.437|15.261, kl_w: 183628.150|185319.756|8255.898, ll: -7.739, logpy: -0.175, loss: 7.739
Test 56 | Hq_b: -0.319|0.408|-0.575, Hq_w: 0.723|0.315|-0.660, acc: 0.964, kl: 7.500, kl_b: 658.092|316.805|15.152, kl_w: 182267.026|183490.369|8242.675, ll: -7.959, logpy: -0.459, loss: 7.959
MAP-Test 56 | Hq_b: -0.319|0.408|-0.575, Hq_w: 0.723|0.315|-0.660, acc: 0.974, kl: 11.867, kl_b: 904.190|530.791|20.150, kl_w: 285296.406|295848.500|10770.029, ll: -12.099, logpy: -0.232, loss: 12.099


100%|██████████| 390/390 [00:08<00:00, 45.03it/s]
100%|██████████| 78/78 [00:00<00:00, 84.98it/s]
100%|██████████| 78/78 [00:00<00:00, 91.26it/s]


Train 57 | Hq_b: -0.316|0.417|-0.574, Hq_w: 0.727|0.323|-0.660, acc: 0.977, kl: 7.439, kl_b: 655.792|314.712|15.126, kl_w: 181039.957|181676.269|8252.430, ll: -7.618, logpy: -0.179, loss: 7.618
Test 57 | Hq_b: -0.313|0.426|-0.570, Hq_w: 0.731|0.331|-0.660, acc: 0.967, kl: 7.378, kl_b: 653.508|310.343|15.300, kl_w: 179748.517|179907.679|8247.818, ll: -7.805, logpy: -0.427, loss: 7.805
MAP-Test 57 | Hq_b: -0.313|0.426|-0.570, Hq_w: 0.731|0.331|-0.660, acc: 0.971, kl: 11.718, kl_b: 901.312|521.836|20.103, kl_w: 282063.500|291600.406|10768.549, ll: -11.962, logpy: -0.245, loss: 11.962


100%|██████████| 390/390 [00:08<00:00, 44.94it/s]
100%|██████████| 78/78 [00:00<00:00, 85.31it/s]
100%|██████████| 78/78 [00:00<00:00, 91.27it/s]


Train 58 | Hq_b: -0.309|0.434|-0.570, Hq_w: 0.735|0.339|-0.660, acc: 0.977, kl: 7.319, kl_b: 650.792|306.243|15.187, kl_w: 178524.219|178181.036|8251.407, ll: -7.511, logpy: -0.192, loss: 7.511
Test 58 | Hq_b: -0.306|0.442|-0.569, Hq_w: 0.739|0.346|-0.659, acc: 0.966, kl: 7.260, kl_b: 651.153|303.229|15.313, kl_w: 177325.794|176432.590|8253.826, ll: -7.675, logpy: -0.415, loss: 7.675
MAP-Test 58 | Hq_b: -0.306|0.442|-0.569, Hq_w: 0.739|0.346|-0.659, acc: 0.974, kl: 11.572, kl_b: 898.086|513.321|20.094, kl_w: 278933.438|287480.219|10767.447, ll: -11.786, logpy: -0.214, loss: 11.786


100%|██████████| 390/390 [00:09<00:00, 42.99it/s]
100%|██████████| 78/78 [00:01<00:00, 75.72it/s]
100%|██████████| 78/78 [00:00<00:00, 79.35it/s]


Train 59 | Hq_b: -0.303|0.450|-0.567, Hq_w: 0.743|0.354|-0.660, acc: 0.976, kl: 7.203, kl_b: 648.986|300.375|15.090, kl_w: 176150.182|174800.333|8247.045, ll: -7.399, logpy: -0.196, loss: 7.399
Test 59 | Hq_b: -0.300|0.459|-0.563, Hq_w: 0.746|0.361|-0.659, acc: 0.965, kl: 7.145, kl_b: 647.775|298.458|14.493, kl_w: 174892.739|173137.665|8248.160, ll: -7.589, logpy: -0.444, loss: 7.589
MAP-Test 59 | Hq_b: -0.300|0.459|-0.563, Hq_w: 0.746|0.361|-0.659, acc: 0.977, kl: 11.432, kl_b: 894.765|504.996|20.033, kl_w: 275913.062|283515.938|10766.375, ll: -11.632, logpy: -0.199, loss: 11.632


100%|██████████| 390/390 [00:08<00:00, 43.99it/s]
100%|██████████| 78/78 [00:00<00:00, 84.56it/s]
100%|██████████| 78/78 [00:00<00:00, 91.91it/s]


Train 60 | Hq_b: -0.297|0.467|-0.565, Hq_w: 0.750|0.369|-0.659, acc: 0.977, kl: 7.091, kl_b: 646.778|293.650|15.200, kl_w: 173795.429|171538.294|8252.577, ll: -7.281, logpy: -0.190, loss: 7.281
Test 60 | Hq_b: -0.293|0.475|-0.564, Hq_w: 0.754|0.376|-0.659, acc: 0.964, kl: 7.035, kl_b: 644.371|289.563|14.972, kl_w: 172637.584|169923.289|8236.196, ll: -7.494, logpy: -0.460, loss: 7.494
MAP-Test 60 | Hq_b: -0.293|0.475|-0.564, Hq_w: 0.754|0.376|-0.659, acc: 0.968, kl: 11.294, kl_b: 891.752|496.562|20.040, kl_w: 272943.812|279592.156|10765.469, ll: -11.583, logpy: -0.289, loss: 11.583


100%|██████████| 390/390 [00:08<00:00, 44.83it/s]
100%|██████████| 78/78 [00:00<00:00, 85.41it/s]
100%|██████████| 78/78 [00:00<00:00, 91.84it/s]


Train 61 | Hq_b: -0.290|0.482|-0.564, Hq_w: 0.757|0.384|-0.659, acc: 0.977, kl: 6.981, kl_b: 643.822|287.162|15.211, kl_w: 171513.258|168324.572|8249.772, ll: -7.155, logpy: -0.174, loss: 7.155
Test 61 | Hq_b: -0.287|0.490|-0.563, Hq_w: 0.761|0.391|-0.659, acc: 0.966, kl: 6.925, kl_b: 641.380|283.064|15.070, kl_w: 170363.134|166711.949|8246.633, ll: -7.393, logpy: -0.467, loss: 7.393
MAP-Test 61 | Hq_b: -0.287|0.490|-0.563, Hq_w: 0.761|0.391|-0.659, acc: 0.973, kl: 11.158, kl_b: 888.610|488.669|20.035, kl_w: 270033.875|275717.469|10763.520, ll: -11.403, logpy: -0.245, loss: 11.403


100%|██████████| 390/390 [00:08<00:00, 45.09it/s]
100%|██████████| 78/78 [00:00<00:00, 84.71it/s]
100%|██████████| 78/78 [00:00<00:00, 91.71it/s]


Train 62 | Hq_b: -0.284|0.498|-0.563, Hq_w: 0.765|0.398|-0.659, acc: 0.978, kl: 6.871, kl_b: 640.439|281.026|15.013, kl_w: 169213.309|165161.497|8245.933, ll: -7.055, logpy: -0.184, loss: 7.055
Test 62 | Hq_b: -0.280|0.506|-0.564, Hq_w: 0.768|0.405|-0.659, acc: 0.965, kl: 6.818, kl_b: 639.081|275.925|14.824, kl_w: 168102.660|163637.977|8245.364, ll: -7.251, logpy: -0.433, loss: 7.251
MAP-Test 62 | Hq_b: -0.280|0.506|-0.564, Hq_w: 0.768|0.405|-0.659, acc: 0.975, kl: 11.024, kl_b: 885.250|480.676|20.039, kl_w: 267121.094|271908.031|10760.965, ll: -11.243, logpy: -0.219, loss: 11.243


100%|██████████| 390/390 [00:08<00:00, 44.99it/s]
100%|██████████| 78/78 [00:00<00:00, 84.83it/s]
100%|██████████| 78/78 [00:00<00:00, 91.68it/s]


Train 63 | Hq_b: -0.277|0.514|-0.565, Hq_w: 0.772|0.413|-0.659, acc: 0.977, kl: 6.766, kl_b: 637.530|274.658|15.456, kl_w: 167015.021|162127.976|8242.147, ll: -6.941, logpy: -0.175, loss: 6.941
Test 63 | Hq_b: -0.274|0.522|-0.567, Hq_w: 0.776|0.420|-0.659, acc: 0.968, kl: 6.713, kl_b: 637.440|272.549|14.603, kl_w: 165852.696|160603.470|8244.700, ll: -7.137, logpy: -0.424, loss: 7.137
MAP-Test 63 | Hq_b: -0.274|0.522|-0.567, Hq_w: 0.776|0.420|-0.659, acc: 0.972, kl: 10.891, kl_b: 882.323|472.611|20.068, kl_w: 264248.688|268173.750|10759.727, ll: -11.118, logpy: -0.227, loss: 11.118


100%|██████████| 390/390 [00:08<00:00, 45.06it/s]
100%|██████████| 78/78 [00:00<00:00, 85.05it/s]
100%|██████████| 78/78 [00:00<00:00, 91.33it/s]


Train 64 | Hq_b: -0.270|0.529|-0.567, Hq_w: 0.779|0.427|-0.658, acc: 0.978, kl: 6.660, kl_b: 635.454|267.608|15.190, kl_w: 164780.369|159082.201|8237.584, ll: -6.828, logpy: -0.168, loss: 6.828
Test 64 | Hq_b: -0.267|0.537|-0.565, Hq_w: 0.783|0.434|-0.658, acc: 0.961, kl: 6.609, kl_b: 632.049|266.818|15.081, kl_w: 163722.972|157587.738|8239.539, ll: -7.126, logpy: -0.517, loss: 7.126
MAP-Test 64 | Hq_b: -0.267|0.537|-0.565, Hq_w: 0.783|0.434|-0.658, acc: 0.969, kl: 10.760, kl_b: 878.932|464.724|20.049, kl_w: 261423.094|264432.281|10756.215, ll: -11.017, logpy: -0.258, loss: 11.017


100%|██████████| 390/390 [00:08<00:00, 45.01it/s]
100%|██████████| 78/78 [00:00<00:00, 85.20it/s]
100%|██████████| 78/78 [00:00<00:00, 92.62it/s]


Train 65 | Hq_b: -0.264|0.545|-0.563, Hq_w: 0.786|0.441|-0.658, acc: 0.977, kl: 6.558, kl_b: 630.963|261.963|15.194, kl_w: 162612.304|156135.612|8236.073, ll: -6.741, logpy: -0.183, loss: 6.741
Test 65 | Hq_b: -0.260|0.552|-0.562, Hq_w: 0.790|0.448|-0.658, acc: 0.967, kl: 6.508, kl_b: 628.912|258.957|15.357, kl_w: 161603.014|154673.240|8232.805, ll: -6.966, logpy: -0.457, loss: 6.966
MAP-Test 65 | Hq_b: -0.260|0.552|-0.562, Hq_w: 0.790|0.448|-0.658, acc: 0.971, kl: 10.630, kl_b: 875.417|457.028|20.018, kl_w: 258635.516|260735.500|10754.413, ll: -10.915, logpy: -0.285, loss: 10.915


100%|██████████| 390/390 [00:08<00:00, 44.09it/s]
100%|██████████| 78/78 [00:00<00:00, 78.05it/s]
100%|██████████| 78/78 [00:00<00:00, 87.04it/s]


Train 66 | Hq_b: -0.257|0.559|-0.558, Hq_w: 0.793|0.455|-0.658, acc: 0.978, kl: 6.456, kl_b: 628.832|255.943|15.095, kl_w: 160515.712|153175.600|8233.787, ll: -6.630, logpy: -0.173, loss: 6.630
Test 66 | Hq_b: -0.254|0.567|-0.556, Hq_w: 0.797|0.462|-0.657, acc: 0.964, kl: 6.405, kl_b: 628.383|255.769|14.980, kl_w: 159426.089|151715.418|8231.377, ll: -6.894, logpy: -0.488, loss: 6.894
MAP-Test 66 | Hq_b: -0.254|0.567|-0.556, Hq_w: 0.797|0.462|-0.657, acc: 0.971, kl: 10.502, kl_b: 872.453|449.517|19.951, kl_w: 255898.656|257089.469|10751.615, ll: -10.769, logpy: -0.268, loss: 10.769


100%|██████████| 390/390 [00:08<00:00, 44.62it/s]
100%|██████████| 78/78 [00:00<00:00, 84.02it/s]
100%|██████████| 78/78 [00:00<00:00, 90.38it/s]


Train 67 | Hq_b: -0.251|0.574|-0.552, Hq_w: 0.800|0.469|-0.657, acc: 0.977, kl: 6.358, kl_b: 625.789|250.779|14.967, kl_w: 158439.159|150313.456|8235.675, ll: -6.543, logpy: -0.185, loss: 6.543
Test 67 | Hq_b: -0.248|0.581|-0.552, Hq_w: 0.803|0.475|-0.657, acc: 0.966, kl: 6.309, kl_b: 625.653|248.237|15.007, kl_w: 157431.581|148889.308|8234.615, ll: -6.741, logpy: -0.433, loss: 6.741
MAP-Test 67 | Hq_b: -0.248|0.581|-0.552, Hq_w: 0.803|0.475|-0.657, acc: 0.975, kl: 10.376, kl_b: 869.413|442.039|19.915, kl_w: 253237.812|253493.641|10749.107, ll: -10.596, logpy: -0.219, loss: 10.596


100%|██████████| 390/390 [00:08<00:00, 44.54it/s]
100%|██████████| 78/78 [00:00<00:00, 85.32it/s]
100%|██████████| 78/78 [00:00<00:00, 90.59it/s]


Train 68 | Hq_b: -0.245|0.588|-0.549, Hq_w: 0.806|0.482|-0.657, acc: 0.977, kl: 6.261, kl_b: 622.391|246.024|14.844, kl_w: 156422.882|147515.021|8227.019, ll: -6.442, logpy: -0.181, loss: 6.442
Test 68 | Hq_b: -0.242|0.595|-0.543, Hq_w: 0.810|0.489|-0.657, acc: 0.967, kl: 6.213, kl_b: 619.813|242.916|14.590, kl_w: 155397.828|146136.156|8226.895, ll: -6.689, logpy: -0.476, loss: 6.689
MAP-Test 68 | Hq_b: -0.242|0.595|-0.543, Hq_w: 0.810|0.489|-0.657, acc: 0.976, kl: 10.254, kl_b: 866.444|434.964|19.829, kl_w: 250638.453|250009.859|10746.076, ll: -10.465, logpy: -0.211, loss: 10.465


100%|██████████| 390/390 [00:08<00:00, 44.66it/s]
100%|██████████| 78/78 [00:00<00:00, 84.88it/s]
100%|██████████| 78/78 [00:00<00:00, 90.42it/s]


Train 69 | Hq_b: -0.238|0.602|-0.539, Hq_w: 0.813|0.495|-0.656, acc: 0.979, kl: 6.166, kl_b: 619.384|239.631|14.983, kl_w: 154417.279|144771.251|8227.243, ll: -6.333, logpy: -0.167, loss: 6.333
Test 69 | Hq_b: -0.235|0.609|-0.535, Hq_w: 0.816|0.502|-0.656, acc: 0.966, kl: 6.117, kl_b: 619.262|234.348|14.971, kl_w: 153357.996|143392.227|8230.671, ll: -6.607, logpy: -0.490, loss: 6.607
MAP-Test 69 | Hq_b: -0.235|0.609|-0.535, Hq_w: 0.816|0.502|-0.656, acc: 0.976, kl: 10.131, kl_b: 863.543|427.621|19.753, kl_w: 248020.047|246493.844|10742.570, ll: -10.355, logpy: -0.224, loss: 10.355


100%|██████████| 390/390 [00:08<00:00, 44.49it/s]
100%|██████████| 78/78 [00:00<00:00, 81.85it/s]
100%|██████████| 78/78 [00:00<00:00, 90.69it/s]


Train 70 | Hq_b: -0.232|0.616|-0.535, Hq_w: 0.820|0.509|-0.656, acc: 0.978, kl: 6.072, kl_b: 616.264|234.344|14.833, kl_w: 152455.550|142045.912|8223.944, ll: -6.242, logpy: -0.171, loss: 6.242
Test 70 | Hq_b: -0.229|0.624|-0.538, Hq_w: 0.823|0.515|-0.656, acc: 0.966, kl: 6.024, kl_b: 616.614|233.373|15.251, kl_w: 151420.349|140720.608|8214.240, ll: -6.495, logpy: -0.470, loss: 6.495
MAP-Test 70 | Hq_b: -0.229|0.624|-0.538, Hq_w: 0.823|0.515|-0.656, acc: 0.972, kl: 10.011, kl_b: 860.350|420.416|19.780, kl_w: 245447.422|243040.531|10741.215, ll: -10.249, logpy: -0.238, loss: 10.249


100%|██████████| 390/390 [00:08<00:00, 44.65it/s]
100%|██████████| 78/78 [00:00<00:00, 84.17it/s]
100%|██████████| 78/78 [00:00<00:00, 91.52it/s]


Train 71 | Hq_b: -0.226|0.630|-0.537, Hq_w: 0.826|0.522|-0.656, acc: 0.978, kl: 5.980, kl_b: 612.855|229.458|14.726, kl_w: 150487.652|139422.182|8220.693, ll: -6.153, logpy: -0.174, loss: 6.153
Test 71 | Hq_b: -0.223|0.637|-0.539, Hq_w: 0.829|0.528|-0.656, acc: 0.964, kl: 5.934, kl_b: 612.037|225.183|15.070, kl_w: 149529.741|138088.488|8213.741, ll: -6.372, logpy: -0.439, loss: 6.372
MAP-Test 71 | Hq_b: -0.223|0.637|-0.539, Hq_w: 0.829|0.528|-0.656, acc: 0.976, kl: 9.889, kl_b: 857.387|413.367|19.784, kl_w: 242881.125|239563.984|10738.533, ll: -10.100, logpy: -0.211, loss: 10.100


100%|██████████| 390/390 [00:08<00:00, 44.81it/s]
100%|██████████| 78/78 [00:00<00:00, 84.75it/s]
100%|██████████| 78/78 [00:00<00:00, 90.95it/s]


Train 72 | Hq_b: -0.219|0.644|-0.537, Hq_w: 0.832|0.535|-0.655, acc: 0.977, kl: 5.887, kl_b: 610.830|224.952|14.741, kl_w: 148533.184|136742.603|8215.512, ll: -6.068, logpy: -0.181, loss: 6.068
Test 72 | Hq_b: -0.216|0.651|-0.538, Hq_w: 0.836|0.542|-0.655, acc: 0.969, kl: 5.842, kl_b: 610.791|222.774|14.373, kl_w: 147599.215|135460.285|8214.079, ll: -6.261, logpy: -0.419, loss: 6.261
MAP-Test 72 | Hq_b: -0.216|0.651|-0.538, Hq_w: 0.836|0.542|-0.655, acc: 0.973, kl: 9.770, kl_b: 854.323|406.490|19.770, kl_w: 240370.078|236109.234|10735.613, ll: -10.019, logpy: -0.249, loss: 10.019


100%|██████████| 390/390 [00:08<00:00, 44.67it/s]
100%|██████████| 78/78 [00:00<00:00, 84.02it/s]
100%|██████████| 78/78 [00:00<00:00, 89.63it/s]


Train 73 | Hq_b: -0.213|0.658|-0.537, Hq_w: 0.839|0.548|-0.655, acc: 0.979, kl: 5.796, kl_b: 608.722|219.484|14.983, kl_w: 146655.086|134107.890|8217.833, ll: -5.965, logpy: -0.168, loss: 5.965
Test 73 | Hq_b: -0.209|0.665|-0.537, Hq_w: 0.842|0.555|-0.655, acc: 0.966, kl: 5.753, kl_b: 607.827|218.616|14.603, kl_w: 145787.119|132808.224|8217.736, ll: -6.181, logpy: -0.428, loss: 6.181
MAP-Test 73 | Hq_b: -0.209|0.665|-0.537, Hq_w: 0.842|0.555|-0.655, acc: 0.973, kl: 9.651, kl_b: 850.886|399.281|19.767, kl_w: 237881.266|232672.609|10731.953, ll: -9.897, logpy: -0.245, loss: 9.897


100%|██████████| 390/390 [00:08<00:00, 44.70it/s]
100%|██████████| 78/78 [00:00<00:00, 84.84it/s]
100%|██████████| 78/78 [00:00<00:00, 92.16it/s]


Train 74 | Hq_b: -0.206|0.671|-0.538, Hq_w: 0.845|0.561|-0.654, acc: 0.977, kl: 5.710, kl_b: 605.758|214.433|14.886, kl_w: 144841.868|131609.839|8216.716, ll: -5.897, logpy: -0.186, loss: 5.897
Test 74 | Hq_b: -0.203|0.678|-0.539, Hq_w: 0.848|0.567|-0.654, acc: 0.965, kl: 5.666, kl_b: 605.739|209.909|15.008, kl_w: 143955.953|130312.366|8223.394, ll: -6.122, logpy: -0.455, loss: 6.122
MAP-Test 74 | Hq_b: -0.203|0.678|-0.539, Hq_w: 0.848|0.567|-0.654, acc: 0.973, kl: 9.536, kl_b: 847.924|392.695|19.777, kl_w: 235464.984|229349.750|10730.137, ll: -9.784, logpy: -0.247, loss: 9.784


100%|██████████| 390/390 [00:08<00:00, 44.78it/s]
100%|██████████| 78/78 [00:00<00:00, 85.08it/s]
100%|██████████| 78/78 [00:00<00:00, 90.53it/s]


Train 75 | Hq_b: -0.200|0.684|-0.536, Hq_w: 0.851|0.573|-0.654, acc: 0.977, kl: 5.624, kl_b: 602.138|209.538|15.015, kl_w: 143003.788|129130.635|8213.958, ll: -5.800, logpy: -0.177, loss: 5.800
Test 75 | Hq_b: -0.196|0.691|-0.535, Hq_w: 0.854|0.580|-0.654, acc: 0.966, kl: 5.581, kl_b: 600.202|204.468|14.700, kl_w: 142134.184|127898.135|8205.487, ll: -6.079, logpy: -0.497, loss: 6.079
MAP-Test 75 | Hq_b: -0.196|0.691|-0.535, Hq_w: 0.854|0.580|-0.654, acc: 0.971, kl: 9.425, kl_b: 844.632|385.828|19.739, kl_w: 233136.672|226114.188|10727.761, ll: -9.712, logpy: -0.287, loss: 9.712


100%|██████████| 390/390 [00:08<00:00, 44.63it/s]
100%|██████████| 78/78 [00:00<00:00, 83.73it/s]
100%|██████████| 78/78 [00:00<00:00, 90.27it/s]


Train 76 | Hq_b: -0.194|0.697|-0.531, Hq_w: 0.857|0.585|-0.654, acc: 0.977, kl: 5.542, kl_b: 599.399|204.879|14.733, kl_w: 141315.196|126775.093|8206.478, ll: -5.733, logpy: -0.191, loss: 5.733
Test 76 | Hq_b: -0.191|0.703|-0.527, Hq_w: 0.859|0.591|-0.654, acc: 0.966, kl: 5.503, kl_b: 598.232|204.371|15.011, kl_w: 140537.493|125606.077|8208.745, ll: -5.954, logpy: -0.450, loss: 5.954
MAP-Test 76 | Hq_b: -0.191|0.703|-0.527, Hq_w: 0.859|0.591|-0.654, acc: 0.975, kl: 9.318, kl_b: 841.954|379.859|19.656, kl_w: 230899.984|223043.906|10726.076, ll: -9.548, logpy: -0.230, loss: 9.548


100%|██████████| 390/390 [00:08<00:00, 44.73it/s]
100%|██████████| 78/78 [00:00<00:00, 84.49it/s]
100%|██████████| 78/78 [00:00<00:00, 91.13it/s]


Train 77 | Hq_b: -0.188|0.709|-0.525, Hq_w: 0.862|0.597|-0.654, acc: 0.978, kl: 5.463, kl_b: 596.331|201.242|14.684, kl_w: 139645.089|124483.889|8207.340, ll: -5.638, logpy: -0.175, loss: 5.638
Test 77 | Hq_b: -0.184|0.715|-0.523, Hq_w: 0.865|0.603|-0.653, acc: 0.965, kl: 5.422, kl_b: 595.637|199.158|14.827, kl_w: 138860.286|123248.994|8188.230, ll: -5.896, logpy: -0.474, loss: 5.896
MAP-Test 77 | Hq_b: -0.184|0.715|-0.523, Hq_w: 0.865|0.603|-0.653, acc: 0.975, kl: 9.209, kl_b: 838.652|373.603|19.616, kl_w: 228620.938|219886.422|10722.564, ll: -9.439, logpy: -0.229, loss: 9.439


100%|██████████| 390/390 [00:08<00:00, 44.63it/s]
100%|██████████| 78/78 [00:00<00:00, 84.60it/s]
100%|██████████| 78/78 [00:00<00:00, 91.47it/s]


Train 78 | Hq_b: -0.181|0.721|-0.528, Hq_w: 0.868|0.609|-0.653, acc: 0.977, kl: 5.383, kl_b: 594.541|195.884|14.697, kl_w: 137940.492|122179.483|8201.502, ll: -5.568, logpy: -0.186, loss: 5.568
Test 78 | Hq_b: -0.178|0.727|-0.529, Hq_w: 0.871|0.615|-0.653, acc: 0.964, kl: 5.344, kl_b: 590.639|195.018|14.379, kl_w: 137122.577|121064.429|8209.361, ll: -5.821, logpy: -0.477, loss: 5.821
MAP-Test 78 | Hq_b: -0.178|0.727|-0.529, Hq_w: 0.871|0.615|-0.653, acc: 0.969, kl: 9.102, kl_b: 835.709|367.054|19.680, kl_w: 226400.125|216760.453|10720.025, ll: -9.379, logpy: -0.277, loss: 9.379


100%|██████████| 390/390 [00:08<00:00, 44.64it/s]
100%|██████████| 78/78 [00:00<00:00, 84.85it/s]
100%|██████████| 78/78 [00:00<00:00, 91.43it/s]


Train 79 | Hq_b: -0.174|0.734|-0.532, Hq_w: 0.873|0.621|-0.653, acc: 0.979, kl: 5.306, kl_b: 590.818|193.108|14.913, kl_w: 136340.316|119946.663|8204.747, ll: -5.487, logpy: -0.181, loss: 5.487
Test 79 | Hq_b: -0.171|0.739|-0.531, Hq_w: 0.876|0.627|-0.653, acc: 0.967, kl: 5.266, kl_b: 588.344|189.899|14.802, kl_w: 135477.996|118831.918|8196.970, ll: -5.736, logpy: -0.470, loss: 5.736
MAP-Test 79 | Hq_b: -0.171|0.739|-0.531, Hq_w: 0.876|0.627|-0.653, acc: 0.973, kl: 9.000, kl_b: 832.385|360.963|19.697, kl_w: 224263.891|213784.219|10718.584, ll: -9.253, logpy: -0.253, loss: 9.253


100%|██████████| 390/390 [00:08<00:00, 44.57it/s]
100%|██████████| 78/78 [00:00<00:00, 84.63it/s]
100%|██████████| 78/78 [00:00<00:00, 91.75it/s]


Train 80 | Hq_b: -0.168|0.745|-0.529, Hq_w: 0.879|0.632|-0.653, acc: 0.979, kl: 5.230, kl_b: 588.860|188.146|14.822, kl_w: 134725.613|117776.855|8200.477, ll: -5.403, logpy: -0.173, loss: 5.403
Test 80 | Hq_b: -0.164|0.751|-0.525, Hq_w: 0.882|0.638|-0.652, acc: 0.967, kl: 5.192, kl_b: 587.101|185.523|14.646, kl_w: 133879.770|116748.777|8202.635, ll: -5.642, logpy: -0.450, loss: 5.642
MAP-Test 80 | Hq_b: -0.164|0.751|-0.525, Hq_w: 0.882|0.638|-0.652, acc: 0.976, kl: 8.897, kl_b: 829.307|355.192|19.637, kl_w: 222100.328|210816.875|10716.117, ll: -9.133, logpy: -0.237, loss: 9.133


100%|██████████| 390/390 [00:08<00:00, 44.71it/s]
100%|██████████| 78/78 [00:00<00:00, 84.42it/s]
100%|██████████| 78/78 [00:00<00:00, 92.16it/s]


Train 81 | Hq_b: -0.161|0.756|-0.524, Hq_w: 0.884|0.643|-0.652, acc: 0.980, kl: 5.156, kl_b: 585.948|184.246|14.648, kl_w: 133130.333|115705.580|8199.463, ll: -5.329, logpy: -0.172, loss: 5.329
Test 81 | Hq_b: -0.158|0.762|-0.524, Hq_w: 0.887|0.649|-0.652, acc: 0.966, kl: 5.120, kl_b: 583.419|182.999|15.202, kl_w: 132317.046|114693.562|8207.699, ll: -5.520, logpy: -0.400, loss: 5.520
MAP-Test 81 | Hq_b: -0.158|0.762|-0.524, Hq_w: 0.887|0.649|-0.652, acc: 0.972, kl: 8.796, kl_b: 826.389|349.358|19.625, kl_w: 219975.328|207909.672|10713.025, ll: -9.026, logpy: -0.231, loss: 9.026


100%|██████████| 390/390 [00:08<00:00, 44.67it/s]
100%|██████████| 78/78 [00:00<00:00, 83.86it/s]
100%|██████████| 78/78 [00:00<00:00, 90.42it/s]


Train 82 | Hq_b: -0.155|0.768|-0.525, Hq_w: 0.890|0.654|-0.652, acc: 0.978, kl: 5.084, kl_b: 582.530|180.645|14.791, kl_w: 131579.370|113648.699|8193.443, ll: -5.267, logpy: -0.183, loss: 5.267
Test 82 | Hq_b: -0.153|0.773|-0.523, Hq_w: 0.892|0.660|-0.652, acc: 0.966, kl: 5.051, kl_b: 581.826|180.498|14.699, kl_w: 130895.682|112698.481|8192.967, ll: -5.521, logpy: -0.470, loss: 5.521
MAP-Test 82 | Hq_b: -0.153|0.773|-0.523, Hq_w: 0.892|0.660|-0.652, acc: 0.973, kl: 8.699, kl_b: 823.959|343.891|19.619, kl_w: 217939.906|205132.859|10710.386, ll: -8.935, logpy: -0.236, loss: 8.935


100%|██████████| 390/390 [00:08<00:00, 44.66it/s]
100%|██████████| 78/78 [00:00<00:00, 85.34it/s]
100%|██████████| 78/78 [00:00<00:00, 91.33it/s]


Train 83 | Hq_b: -0.150|0.779|-0.520, Hq_w: 0.895|0.665|-0.652, acc: 0.978, kl: 5.015, kl_b: 580.217|177.390|14.682, kl_w: 130110.072|111695.603|8193.064, ll: -5.194, logpy: -0.178, loss: 5.194
Test 83 | Hq_b: -0.148|0.784|-0.518, Hq_w: 0.897|0.670|-0.651, acc: 0.968, kl: 4.981, kl_b: 578.564|175.971|14.811, kl_w: 129385.526|110709.176|8183.128, ll: -5.420, logpy: -0.439, loss: 5.420
MAP-Test 83 | Hq_b: -0.148|0.784|-0.518, Hq_w: 0.897|0.670|-0.651, acc: 0.975, kl: 8.603, kl_b: 821.404|338.022|19.566, kl_w: 215904.203|202352.672|10707.776, ll: -8.840, logpy: -0.238, loss: 8.840


100%|██████████| 390/390 [00:08<00:00, 44.64it/s]
100%|██████████| 78/78 [00:00<00:00, 83.95it/s]
100%|██████████| 78/78 [00:00<00:00, 91.61it/s]


Train 84 | Hq_b: -0.145|0.790|-0.518, Hq_w: 0.900|0.675|-0.651, acc: 0.979, kl: 4.946, kl_b: 577.552|173.264|14.811, kl_w: 128595.173|109757.614|8192.646, ll: -5.122, logpy: -0.176, loss: 5.122
Test 84 | Hq_b: -0.142|0.796|-0.516, Hq_w: 0.902|0.681|-0.651, acc: 0.966, kl: 4.911, kl_b: 574.665|171.698|14.566, kl_w: 127832.365|108741.524|8197.126, ll: -5.373, logpy: -0.462, loss: 5.373
MAP-Test 84 | Hq_b: -0.142|0.796|-0.516, Hq_w: 0.902|0.681|-0.651, acc: 0.977, kl: 8.507, kl_b: 818.593|332.309|19.548, kl_w: 213879.281|199587.141|10703.621, ll: -8.726, logpy: -0.219, loss: 8.726


100%|██████████| 390/390 [00:08<00:00, 44.69it/s]
100%|██████████| 78/78 [00:00<00:00, 84.14it/s]
100%|██████████| 78/78 [00:00<00:00, 91.20it/s]


Train 85 | Hq_b: -0.138|0.801|-0.515, Hq_w: 0.905|0.686|-0.650, acc: 0.979, kl: 4.877, kl_b: 575.191|170.318|14.571, kl_w: 127101.822|107792.952|8184.653, ll: -5.050, logpy: -0.173, loss: 5.050
Test 85 | Hq_b: -0.135|0.807|-0.512, Hq_w: 0.907|0.691|-0.650, acc: 0.967, kl: 4.843, kl_b: 575.988|167.839|14.666, kl_w: 126386.349|106838.764|8176.895, ll: -5.329, logpy: -0.486, loss: 5.329
MAP-Test 85 | Hq_b: -0.135|0.807|-0.512, Hq_w: 0.907|0.691|-0.650, acc: 0.972, kl: 8.411, kl_b: 815.206|326.621|19.507, kl_w: 211865.047|196804.469|10699.422, ll: -8.664, logpy: -0.254, loss: 8.664


100%|██████████| 390/390 [00:08<00:00, 44.55it/s]
100%|██████████| 78/78 [00:00<00:00, 83.72it/s]
100%|██████████| 78/78 [00:00<00:00, 91.56it/s]


Train 86 | Hq_b: -0.131|0.813|-0.513, Hq_w: 0.910|0.697|-0.650, acc: 0.978, kl: 4.810, kl_b: 571.992|164.877|14.554, kl_w: 125650.608|105899.181|8185.846, ll: -4.991, logpy: -0.181, loss: 4.991
Test 86 | Hq_b: -0.128|0.818|-0.511, Hq_w: 0.912|0.702|-0.650, acc: 0.965, kl: 4.777, kl_b: 570.086|164.961|13.973, kl_w: 124948.438|104966.658|8181.960, ll: -5.297, logpy: -0.520, loss: 5.297
MAP-Test 86 | Hq_b: -0.128|0.818|-0.511, Hq_w: 0.912|0.702|-0.650, acc: 0.975, kl: 8.317, kl_b: 811.970|321.145|19.493, kl_w: 209897.203|194098.656|10697.088, ll: -8.572, logpy: -0.255, loss: 8.572


100%|██████████| 390/390 [00:08<00:00, 44.61it/s]
100%|██████████| 78/78 [00:00<00:00, 84.17it/s]
100%|██████████| 78/78 [00:00<00:00, 91.73it/s]


Train 87 | Hq_b: -0.125|0.823|-0.511, Hq_w: 0.915|0.707|-0.649, acc: 0.977, kl: 4.744, kl_b: 567.967|161.129|14.672, kl_w: 124222.892|104071.620|8184.410, ll: -4.933, logpy: -0.189, loss: 4.933
Test 87 | Hq_b: -0.122|0.828|-0.510, Hq_w: 0.917|0.712|-0.649, acc: 0.964, kl: 4.713, kl_b: 566.122|159.608|14.321, kl_w: 123509.276|103218.572|8173.446, ll: -5.171, logpy: -0.458, loss: 5.171
MAP-Test 87 | Hq_b: -0.122|0.828|-0.510, Hq_w: 0.917|0.712|-0.649, acc: 0.974, kl: 8.226, kl_b: 809.237|315.942|19.481, kl_w: 207981.500|191471.641|10693.688, ll: -8.465, logpy: -0.239, loss: 8.465


100%|██████████| 390/390 [00:08<00:00, 44.57it/s]
100%|██████████| 78/78 [00:00<00:00, 85.24it/s]
100%|██████████| 78/78 [00:00<00:00, 91.70it/s]


Train 88 | Hq_b: -0.119|0.833|-0.508, Hq_w: 0.920|0.717|-0.649, acc: 0.977, kl: 4.681, kl_b: 566.522|159.466|14.675, kl_w: 122850.977|102278.623|8177.261, ll: -4.868, logpy: -0.187, loss: 4.868
Test 88 | Hq_b: -0.115|0.837|-0.505, Hq_w: 0.922|0.722|-0.649, acc: 0.967, kl: 4.649, kl_b: 562.943|154.700|14.787, kl_w: 122131.429|101424.977|8175.955, ll: -5.102, logpy: -0.453, loss: 5.102
MAP-Test 88 | Hq_b: -0.115|0.837|-0.505, Hq_w: 0.922|0.722|-0.649, acc: 0.974, kl: 8.135, kl_b: 806.223|310.992|19.435, kl_w: 206084.281|188841.641|10690.023, ll: -8.366, logpy: -0.231, loss: 8.366


100%|██████████| 390/390 [00:08<00:00, 44.68it/s]
100%|██████████| 78/78 [00:00<00:00, 84.08it/s]
100%|██████████| 78/78 [00:00<00:00, 91.30it/s]


Train 89 | Hq_b: -0.112|0.842|-0.507, Hq_w: 0.924|0.727|-0.649, acc: 0.978, kl: 4.618, kl_b: 562.625|156.720|14.410, kl_w: 121475.831|100516.669|8176.040, ll: -4.804, logpy: -0.186, loss: 4.804
Test 89 | Hq_b: -0.109|0.847|-0.506, Hq_w: 0.927|0.731|-0.648, acc: 0.965, kl: 4.587, kl_b: 563.425|156.317|14.894, kl_w: 120795.241|99667.165|8177.952, ll: -5.103, logpy: -0.515, loss: 5.103
MAP-Test 89 | Hq_b: -0.109|0.847|-0.506, Hq_w: 0.927|0.731|-0.648, acc: 0.973, kl: 8.046, kl_b: 803.259|305.972|19.442, kl_w: 204209.625|186276.609|10688.035, ll: -8.335, logpy: -0.289, loss: 8.335


100%|██████████| 390/390 [00:08<00:00, 44.43it/s]
100%|██████████| 78/78 [00:00<00:00, 84.70it/s]
100%|██████████| 78/78 [00:00<00:00, 91.16it/s]


Train 90 | Hq_b: -0.106|0.852|-0.506, Hq_w: 0.929|0.736|-0.648, acc: 0.978, kl: 4.557, kl_b: 561.193|153.539|14.597, kl_w: 120165.410|98793.131|8172.393, ll: -4.746, logpy: -0.189, loss: 4.746
Test 90 | Hq_b: -0.103|0.857|-0.508, Hq_w: 0.931|0.741|-0.648, acc: 0.967, kl: 4.527, kl_b: 557.327|152.251|14.873, kl_w: 119473.139|97987.362|8180.501, ll: -5.005, logpy: -0.477, loss: 5.005
MAP-Test 90 | Hq_b: -0.103|0.857|-0.508, Hq_w: 0.931|0.741|-0.648, acc: 0.974, kl: 7.960, kl_b: 800.231|300.966|19.457, kl_w: 202419.203|183755.109|10685.708, ll: -8.203, logpy: -0.243, loss: 8.203


100%|██████████| 390/390 [00:08<00:00, 43.68it/s]
100%|██████████| 78/78 [00:01<00:00, 54.70it/s]
100%|██████████| 78/78 [00:01<00:00, 57.84it/s]


Train 91 | Hq_b: -0.100|0.862|-0.506, Hq_w: 0.934|0.746|-0.648, acc: 0.979, kl: 4.498, kl_b: 558.346|149.449|14.481, kl_w: 118854.287|97135.236|8165.193, ll: -4.673, logpy: -0.175, loss: 4.673
Test 91 | Hq_b: -0.097|0.867|-0.505, Hq_w: 0.936|0.751|-0.647, acc: 0.965, kl: 4.469, kl_b: 556.871|147.030|14.798, kl_w: 118220.680|96322.336|8163.701, ll: -4.940, logpy: -0.472, loss: 4.940
MAP-Test 91 | Hq_b: -0.097|0.867|-0.505, Hq_w: 0.936|0.751|-0.647, acc: 0.973, kl: 7.874, kl_b: 797.570|295.870|19.436, kl_w: 200631.000|181257.078|10681.957, ll: -8.117, logpy: -0.243, loss: 8.117


100%|██████████| 390/390 [00:10<00:00, 37.44it/s]
100%|██████████| 78/78 [00:00<00:00, 89.48it/s]
100%|██████████| 78/78 [00:00<00:00, 97.24it/s]


Train 92 | Hq_b: -0.094|0.872|-0.502, Hq_w: 0.938|0.755|-0.647, acc: 0.979, kl: 4.440, kl_b: 556.032|146.428|14.407, kl_w: 117601.229|95494.639|8167.662, ll: -4.616, logpy: -0.176, loss: 4.616
Test 92 | Hq_b: -0.091|0.876|-0.498, Hq_w: 0.940|0.760|-0.647, acc: 0.965, kl: 4.411, kl_b: 555.212|144.348|14.835, kl_w: 116972.652|94708.166|8162.487, ll: -4.880, logpy: -0.469, loss: 4.880
MAP-Test 92 | Hq_b: -0.091|0.876|-0.498, Hq_w: 0.940|0.760|-0.647, acc: 0.974, kl: 7.789, kl_b: 795.335|291.008|19.357, kl_w: 198854.016|178805.312|10678.887, ll: -8.033, logpy: -0.244, loss: 8.033


100%|██████████| 390/390 [00:08<00:00, 46.53it/s]
100%|██████████| 78/78 [00:00<00:00, 90.20it/s]
100%|██████████| 78/78 [00:00<00:00, 96.39it/s]


Train 93 | Hq_b: -0.088|0.881|-0.497, Hq_w: 0.943|0.765|-0.647, acc: 0.979, kl: 4.380, kl_b: 554.124|143.125|14.451, kl_w: 116269.860|93862.523|8162.314, ll: -4.557, logpy: -0.177, loss: 4.557
Test 93 | Hq_b: -0.085|0.886|-0.499, Hq_w: 0.945|0.769|-0.647, acc: 0.966, kl: 4.353, kl_b: 553.818|142.825|14.610, kl_w: 115655.809|93123.098|8167.184, ll: -4.807, logpy: -0.454, loss: 4.807
MAP-Test 93 | Hq_b: -0.085|0.886|-0.499, Hq_w: 0.945|0.769|-0.647, acc: 0.974, kl: 7.704, kl_b: 792.008|286.159|19.364, kl_w: 197043.766|176361.750|10676.730, ll: -7.951, logpy: -0.247, loss: 7.951


100%|██████████| 390/390 [00:09<00:00, 41.83it/s]
100%|██████████| 78/78 [00:01<00:00, 75.26it/s]
100%|██████████| 78/78 [00:00<00:00, 91.44it/s]


Train 94 | Hq_b: -0.081|0.890|-0.497, Hq_w: 0.947|0.774|-0.647, acc: 0.978, kl: 4.322, kl_b: 550.181|139.920|14.471, kl_w: 114980.519|92267.439|8156.082, ll: -4.508, logpy: -0.186, loss: 4.508
Test 94 | Hq_b: -0.078|0.895|-0.497, Hq_w: 0.949|0.779|-0.646, acc: 0.967, kl: 4.294, kl_b: 550.657|139.753|14.576, kl_w: 114363.836|91481.284|8157.250, ll: -4.713, logpy: -0.419, loss: 4.713
MAP-Test 94 | Hq_b: -0.078|0.895|-0.497, Hq_w: 0.949|0.779|-0.646, acc: 0.975, kl: 7.620, kl_b: 788.977|281.258|19.345, kl_w: 195258.000|173956.391|10673.282, ll: -7.851, logpy: -0.231, loss: 7.851


100%|██████████| 390/390 [00:08<00:00, 44.84it/s]
100%|██████████| 78/78 [00:00<00:00, 84.45it/s]
100%|██████████| 78/78 [00:00<00:00, 91.89it/s]


Train 95 | Hq_b: -0.076|0.899|-0.499, Hq_w: 0.952|0.783|-0.646, acc: 0.978, kl: 4.267, kl_b: 549.441|137.786|14.619, kl_w: 113722.510|90762.196|8158.218, ll: -4.453, logpy: -0.186, loss: 4.453
Test 95 | Hq_b: -0.073|0.903|-0.497, Hq_w: 0.954|0.788|-0.646, acc: 0.967, kl: 4.239, kl_b: 546.582|138.215|14.450, kl_w: 113062.777|90025.802|8143.799, ll: -4.681, logpy: -0.442, loss: 4.681
MAP-Test 95 | Hq_b: -0.073|0.903|-0.497, Hq_w: 0.954|0.788|-0.646, acc: 0.973, kl: 7.537, kl_b: 786.602|277.208|19.343, kl_w: 193497.688|171606.781|10669.383, ll: -7.805, logpy: -0.268, loss: 7.805


100%|██████████| 390/390 [00:08<00:00, 44.98it/s]
100%|██████████| 78/78 [00:00<00:00, 85.38it/s]
100%|██████████| 78/78 [00:00<00:00, 91.85it/s]


Train 96 | Hq_b: -0.070|0.907|-0.495, Hq_w: 0.956|0.792|-0.646, acc: 0.978, kl: 4.212, kl_b: 544.843|136.021|14.552, kl_w: 112484.209|89245.519|8153.353, ll: -4.400, logpy: -0.188, loss: 4.400
Test 96 | Hq_b: -0.067|0.911|-0.491, Hq_w: 0.958|0.796|-0.645, acc: 0.969, kl: 4.183, kl_b: 544.641|136.239|13.990, kl_w: 111817.852|88497.139|8143.944, ll: -4.640, logpy: -0.457, loss: 4.640
MAP-Test 96 | Hq_b: -0.067|0.911|-0.491, Hq_w: 0.958|0.796|-0.645, acc: 0.974, kl: 7.455, kl_b: 783.798|272.813|19.283, kl_w: 191758.688|169255.672|10667.877, ll: -7.729, logpy: -0.274, loss: 7.729


100%|██████████| 390/390 [00:09<00:00, 41.34it/s]
100%|██████████| 78/78 [00:01<00:00, 76.07it/s]
100%|██████████| 78/78 [00:00<00:00, 82.57it/s]


Train 97 | Hq_b: -0.064|0.916|-0.491, Hq_w: 0.960|0.801|-0.645, acc: 0.978, kl: 4.157, kl_b: 542.806|133.310|14.287, kl_w: 111267.648|87737.857|8151.449, ll: -4.343, logpy: -0.186, loss: 4.343
Test 97 | Hq_b: -0.061|0.920|-0.490, Hq_w: 0.962|0.805|-0.645, acc: 0.967, kl: 4.132, kl_b: 545.426|130.996|14.629, kl_w: 110694.201|87049.206|8145.090, ll: -4.623, logpy: -0.491, loss: 4.623
MAP-Test 97 | Hq_b: -0.061|0.920|-0.490, Hq_w: 0.962|0.805|-0.645, acc: 0.974, kl: 7.377, kl_b: 781.155|268.417|19.265, kl_w: 190078.891|167014.781|10665.768, ll: -7.656, logpy: -0.279, loss: 7.656


100%|██████████| 390/390 [00:08<00:00, 43.36it/s]
100%|██████████| 78/78 [00:00<00:00, 84.01it/s]
100%|██████████| 78/78 [00:00<00:00, 91.14it/s]


Train 98 | Hq_b: -0.058|0.924|-0.490, Hq_w: 0.965|0.809|-0.645, acc: 0.980, kl: 4.105, kl_b: 541.058|129.924|14.242, kl_w: 110069.597|86357.869|8146.433, ll: -4.285, logpy: -0.180, loss: 4.285
Test 98 | Hq_b: -0.055|0.929|-0.489, Hq_w: 0.967|0.813|-0.645, acc: 0.967, kl: 4.081, kl_b: 539.736|125.912|14.630, kl_w: 109514.061|85679.210|8153.438, ll: -4.595, logpy: -0.515, loss: 4.595
MAP-Test 98 | Hq_b: -0.055|0.929|-0.489, Hq_w: 0.967|0.813|-0.645, acc: 0.973, kl: 7.299, kl_b: 778.457|264.004|19.255, kl_w: 188428.938|164805.734|10661.649, ll: -7.576, logpy: -0.277, loss: 7.576


100%|██████████| 390/390 [00:08<00:00, 45.10it/s]
100%|██████████| 78/78 [00:00<00:00, 84.77it/s]
100%|██████████| 78/78 [00:00<00:00, 91.54it/s]


Train 99 | Hq_b: -0.052|0.933|-0.487, Hq_w: 0.969|0.818|-0.644, acc: 0.977, kl: 4.054, kl_b: 537.821|128.237|14.414, kl_w: 108891.632|84976.298|8143.925, ll: -4.244, logpy: -0.190, loss: 4.244
Test 99 | Hq_b: -0.049|0.937|-0.484, Hq_w: 0.971|0.822|-0.644, acc: 0.966, kl: 4.029, kl_b: 536.219|126.814|14.275, kl_w: 108330.173|84296.741|8151.286, ll: -4.528, logpy: -0.499, loss: 4.528
MAP-Test 99 | Hq_b: -0.049|0.937|-0.484, Hq_w: 0.971|0.822|-0.644, acc: 0.977, kl: 7.221, kl_b: 775.453|259.736|19.210, kl_w: 186740.766|162590.984|10659.203, ll: -7.451, logpy: -0.230, loss: 7.451
Normal prior
Normal prior
Normal prior
Normal prior
Normal prior
Normal prior


100%|██████████| 390/390 [00:08<00:00, 45.03it/s]
100%|██████████| 78/78 [00:00<00:00, 85.01it/s]
100%|██████████| 78/78 [00:00<00:00, 91.96it/s]


Train 0 | Hq_b: -1.086|-1.078|-1.068, Hq_w: -1.054|-1.080|-1.081, acc: 0.807, kl: 26.932, kl_b: 1040.091|1039.652|20.190, kl_w: 800640.453|533427.943|10418.792, ll: -31.969, logpy: -5.037, loss: 31.969
Test 0 | Hq_b: -1.086|-1.077|-1.068, Hq_w: -1.027|-1.078|-1.082, acc: 0.890, kl: 26.699, kl_b: 1042.137|1040.863|19.989, kl_w: 789417.816|533023.595|10426.289, ll: -28.776, logpy: -2.077, loss: 28.776
MAP-Test 0 | Hq_b: -1.086|-1.077|-1.068, Hq_w: -1.027|-1.078|-1.082, acc: 0.938, kl: 33.346, kl_b: 1294.189|1292.323|25.100, kl_w: 988586.875|663157.375|12963.498, ll: -34.443, logpy: -1.097, loss: 34.443


100%|██████████| 390/390 [00:08<00:00, 45.02it/s]
100%|██████████| 78/78 [00:00<00:00, 85.18it/s]
100%|██████████| 78/78 [00:00<00:00, 92.17it/s]


Train 1 | Hq_b: -1.086|-1.075|-1.067, Hq_w: -0.999|-1.076|-1.083, acc: 0.900, kl: 26.468, kl_b: 1041.139|1037.892|20.090, kl_w: 778485.087|532414.549|10422.682, ll: -28.020, logpy: -1.552, loss: 28.020
Test 1 | Hq_b: -1.087|-1.072|-1.067, Hq_w: -0.971|-1.074|-1.084, acc: 0.922, kl: 26.233, kl_b: 1041.064|1038.330|20.392, kl_w: 767296.016|531804.179|10428.847, ll: -27.384, logpy: -1.152, loss: 27.384
MAP-Test 1 | Hq_b: -1.087|-1.072|-1.067, Hq_w: -0.971|-1.074|-1.084, acc: 0.953, kl: 32.875, kl_b: 1294.383|1290.137|25.094, kl_w: 966227.625|661950.000|12968.926, ll: -33.507, logpy: -0.632, loss: 33.507


100%|██████████| 390/390 [00:08<00:00, 44.63it/s]
100%|██████████| 78/78 [00:00<00:00, 85.17it/s]
100%|██████████| 78/78 [00:00<00:00, 92.47it/s]


Train 2 | Hq_b: -1.086|-1.069|-1.067, Hq_w: -0.942|-1.071|-1.084, acc: 0.923, kl: 25.990, kl_b: 1039.786|1034.202|20.043, kl_w: 755919.943|531050.076|10424.200, ll: -26.868, logpy: -0.879, loss: 26.868
Test 2 | Hq_b: -1.086|-1.066|-1.067, Hq_w: -0.913|-1.068|-1.085, acc: 0.922, kl: 25.742, kl_b: 1042.126|1031.241|20.207, kl_w: 744326.494|530237.188|10433.250, ll: -26.583, logpy: -0.841, loss: 26.583
MAP-Test 2 | Hq_b: -1.086|-1.066|-1.067, Hq_w: -0.913|-1.068|-1.085, acc: 0.957, kl: 32.376, kl_b: 1294.242|1287.243|25.093, kl_w: 942838.062|660383.375|12972.019, ll: -32.822, logpy: -0.446, loss: 32.822


100%|██████████| 390/390 [00:08<00:00, 45.09it/s]
100%|██████████| 78/78 [00:00<00:00, 85.59it/s]
100%|██████████| 78/78 [00:00<00:00, 92.52it/s]


Train 3 | Hq_b: -1.086|-1.063|-1.066, Hq_w: -0.883|-1.064|-1.085, acc: 0.932, kl: 25.486, kl_b: 1039.428|1032.483|20.177, kl_w: 732439.601|529352.026|10432.652, ll: -26.114, logpy: -0.628, loss: 26.114
Test 3 | Hq_b: -1.086|-1.059|-1.063, Hq_w: -0.852|-1.061|-1.086, acc: 0.930, kl: 25.224, kl_b: 1040.226|1029.577|20.298, kl_w: 720383.316|528312.270|10435.735, ll: -25.876, logpy: -0.652, loss: 25.876
MAP-Test 3 | Hq_b: -1.086|-1.059|-1.063, Hq_w: -0.852|-1.061|-1.086, acc: 0.963, kl: 31.849, kl_b: 1294.028|1283.573|25.057, kl_w: 918429.375|658459.625|12974.252, ll: -32.172, logpy: -0.323, loss: 32.172


100%|██████████| 390/390 [00:08<00:00, 45.06it/s]
100%|██████████| 78/78 [00:00<00:00, 85.34it/s]
100%|██████████| 78/78 [00:00<00:00, 92.02it/s]


Train 4 | Hq_b: -1.085|-1.055|-1.062, Hq_w: -0.821|-1.056|-1.086, acc: 0.938, kl: 24.958, kl_b: 1038.237|1027.389|20.071, kl_w: 708162.134|527228.956|10431.831, ll: -25.457, logpy: -0.499, loss: 25.457
Test 4 | Hq_b: -1.085|-1.050|-1.060, Hq_w: -0.789|-1.052|-1.087, acc: 0.936, kl: 24.685, kl_b: 1035.985|1022.202|20.510, kl_w: 695683.573|526044.401|10436.397, ll: -25.236, logpy: -0.551, loss: 25.236
MAP-Test 4 | Hq_b: -1.085|-1.050|-1.060, Hq_w: -0.789|-1.052|-1.087, acc: 0.964, kl: 31.295, kl_b: 1293.477|1279.079|25.029, kl_w: 893040.375|656156.062|12976.600, ll: -31.570, logpy: -0.275, loss: 31.570


100%|██████████| 390/390 [00:08<00:00, 44.91it/s]
100%|██████████| 78/78 [00:00<00:00, 85.12it/s]
100%|██████████| 78/78 [00:00<00:00, 92.03it/s]


Train 5 | Hq_b: -1.084|-1.045|-1.061, Hq_w: -0.757|-1.047|-1.087, acc: 0.945, kl: 24.410, kl_b: 1039.322|1022.235|20.185, kl_w: 683211.912|524756.076|10434.325, ll: -24.793, logpy: -0.384, loss: 24.793
Test 5 | Hq_b: -1.083|-1.040|-1.059, Hq_w: -0.724|-1.041|-1.087, acc: 0.941, kl: 24.129, kl_b: 1037.831|1018.143|20.773, kl_w: 670553.885|523370.960|10438.021, ll: -24.657, logpy: -0.528, loss: 24.657
MAP-Test 5 | Hq_b: -1.083|-1.040|-1.059, Hq_w: -0.724|-1.041|-1.087, acc: 0.963, kl: 30.716, kl_b: 1292.525|1273.804|25.012, kl_w: 866786.438|653444.312|12978.381, ll: -30.984, logpy: -0.268, loss: 30.984


100%|██████████| 390/390 [00:08<00:00, 44.86it/s]
100%|██████████| 78/78 [00:00<00:00, 85.16it/s]
100%|██████████| 78/78 [00:00<00:00, 91.92it/s]


Train 6 | Hq_b: -1.082|-1.034|-1.060, Hq_w: -0.691|-1.036|-1.087, acc: 0.952, kl: 23.843, kl_b: 1038.854|1017.142|20.217, kl_w: 657822.830|521825.226|10436.068, ll: -24.167, logpy: -0.324, loss: 24.167
Test 6 | Hq_b: -1.081|-1.027|-1.062, Hq_w: -0.657|-1.030|-1.088, acc: 0.947, kl: 23.557, kl_b: 1039.161|1011.150|19.652, kl_w: 645057.091|520277.668|10444.216, ll: -23.990, logpy: -0.433, loss: 23.990
MAP-Test 6 | Hq_b: -1.081|-1.027|-1.062, Hq_w: -0.657|-1.030|-1.088, acc: 0.967, kl: 30.112, kl_b: 1291.527|1267.446|25.040, kl_w: 839737.625|650291.562|12978.756, ll: -30.335, logpy: -0.223, loss: 30.335


100%|██████████| 390/390 [00:09<00:00, 43.07it/s]
100%|██████████| 78/78 [00:00<00:00, 83.34it/s]
100%|██████████| 78/78 [00:00<00:00, 91.17it/s]


Train 7 | Hq_b: -1.080|-1.021|-1.058, Hq_w: -0.623|-1.023|-1.088, acc: 0.956, kl: 23.266, kl_b: 1036.197|1010.107|19.908, kl_w: 632271.733|518500.799|10436.767, ll: -23.536, logpy: -0.271, loss: 23.536
Test 7 | Hq_b: -1.078|-1.013|-1.054, Hq_w: -0.589|-1.016|-1.088, acc: 0.951, kl: 22.974, kl_b: 1033.162|1004.905|19.859, kl_w: 619529.535|516685.702|10439.426, ll: -23.389, logpy: -0.415, loss: 23.389
MAP-Test 7 | Hq_b: -1.078|-1.013|-1.054, Hq_w: -0.589|-1.016|-1.088, acc: 0.968, kl: 29.488, kl_b: 1290.090|1260.220|24.958, kl_w: 812157.812|646678.125|12978.942, ll: -29.710, logpy: -0.222, loss: 29.710


100%|██████████| 390/390 [00:08<00:00, 44.57it/s]
100%|██████████| 78/78 [00:00<00:00, 84.37it/s]
100%|██████████| 78/78 [00:00<00:00, 90.95it/s]


Train 8 | Hq_b: -1.076|-1.006|-1.051, Hq_w: -0.554|-1.008|-1.088, acc: 0.960, kl: 22.683, kl_b: 1035.151|1001.955|19.994, kl_w: 606985.602|514696.724|10434.522, ll: -22.934, logpy: -0.251, loss: 22.934
Test 8 | Hq_b: -1.075|-0.998|-1.050, Hq_w: -0.519|-1.000|-1.088, acc: 0.950, kl: 22.391, kl_b: 1033.873|998.715|19.707, kl_w: 594491.769|512578.200|10428.861, ll: -22.773, logpy: -0.382, loss: 22.773
MAP-Test 8 | Hq_b: -1.075|-0.998|-1.050, Hq_w: -0.519|-1.000|-1.088, acc: 0.969, kl: 28.849, kl_b: 1288.469|1252.208|24.919, kl_w: 784352.125|642568.438|12978.713, ll: -29.059, logpy: -0.210, loss: 29.059


100%|██████████| 390/390 [00:08<00:00, 44.37it/s]
100%|██████████| 78/78 [00:00<00:00, 82.22it/s]
100%|██████████| 78/78 [00:00<00:00, 90.43it/s]


Train 9 | Hq_b: -1.073|-0.988|-1.047, Hq_w: -0.485|-0.992|-1.088, acc: 0.963, kl: 22.105, kl_b: 1033.419|994.743|19.937, kl_w: 582449.043|510305.091|10432.786, ll: -22.324, logpy: -0.220, loss: 22.324
Test 9 | Hq_b: -1.071|-0.979|-1.044, Hq_w: -0.451|-0.983|-1.088, acc: 0.953, kl: 21.820, kl_b: 1030.842|987.191|19.924, kl_w: 570516.321|508011.389|10440.419, ll: -22.189, logpy: -0.369, loss: 22.189
MAP-Test 9 | Hq_b: -1.071|-0.979|-1.044, Hq_w: -0.451|-0.983|-1.088, acc: 0.971, kl: 28.203, kl_b: 1286.470|1242.652|24.862, kl_w: 756646.062|637948.625|12977.020, ll: -28.404, logpy: -0.202, loss: 28.404


100%|██████████| 390/390 [00:08<00:00, 44.53it/s]
100%|██████████| 78/78 [00:00<00:00, 84.37it/s]
100%|██████████| 78/78 [00:00<00:00, 91.58it/s]


Train 10 | Hq_b: -1.069|-0.969|-1.040, Hq_w: -0.417|-0.973|-1.088, acc: 0.964, kl: 21.545, kl_b: 1033.045|984.046|19.963, kl_w: 559223.569|505538.400|10430.215, ll: -21.758, logpy: -0.213, loss: 21.758
Test 10 | Hq_b: -1.067|-0.959|-1.037, Hq_w: -0.384|-0.963|-1.088, acc: 0.956, kl: 21.271, kl_b: 1028.391|976.622|19.548, kl_w: 548137.816|502936.345|10429.679, ll: -21.637, logpy: -0.367, loss: 21.637
MAP-Test 10 | Hq_b: -1.067|-0.959|-1.037, Hq_w: -0.384|-0.963|-1.088, acc: 0.971, kl: 27.560, kl_b: 1284.481|1232.176|24.794, kl_w: 729653.875|632831.750|12974.863, ll: -27.759, logpy: -0.199, loss: 27.759


100%|██████████| 390/390 [00:09<00:00, 41.65it/s]
100%|██████████| 78/78 [00:00<00:00, 84.32it/s]
100%|██████████| 78/78 [00:00<00:00, 90.79it/s]


Train 11 | Hq_b: -1.065|-0.948|-1.036, Hq_w: -0.351|-0.953|-1.087, acc: 0.966, kl: 21.009, kl_b: 1028.805|972.377|19.880, kl_w: 537831.330|500185.902|10431.673, ll: -21.206, logpy: -0.197, loss: 21.206
Test 11 | Hq_b: -1.062|-0.936|-1.035, Hq_w: -0.320|-0.942|-1.087, acc: 0.958, kl: 20.755, kl_b: 1024.755|970.335|19.787, kl_w: 527820.135|497470.421|10429.413, ll: -21.127, logpy: -0.373, loss: 21.127
MAP-Test 11 | Hq_b: -1.062|-0.936|-1.035, Hq_w: -0.320|-0.942|-1.087, acc: 0.971, kl: 26.933, kl_b: 1282.147|1220.772|24.772, kl_w: 703930.438|627243.562|12971.922, ll: -27.168, logpy: -0.234, loss: 27.168


100%|██████████| 390/390 [00:08<00:00, 43.55it/s]
100%|██████████| 78/78 [00:00<00:00, 84.24it/s]
100%|██████████| 78/78 [00:00<00:00, 90.22it/s]


Train 12 | Hq_b: -1.060|-0.925|-1.034, Hq_w: -0.290|-0.931|-1.087, acc: 0.970, kl: 20.508, kl_b: 1026.171|962.037|19.572, kl_w: 518534.022|494429.523|10426.199, ll: -20.686, logpy: -0.178, loss: 20.686
Test 12 | Hq_b: -1.057|-0.913|-1.031, Hq_w: -0.260|-0.919|-1.087, acc: 0.957, kl: 20.267, kl_b: 1024.185|955.995|19.515, kl_w: 509535.297|491377.291|10425.367, ll: -20.656, logpy: -0.389, loss: 20.656
MAP-Test 12 | Hq_b: -1.057|-0.913|-1.031, Hq_w: -0.260|-0.919|-1.087, acc: 0.972, kl: 26.334, kl_b: 1279.367|1208.541|24.734, kl_w: 680014.000|621184.500|12968.402, ll: -26.556, logpy: -0.222, loss: 26.556


100%|██████████| 390/390 [00:08<00:00, 44.51it/s]
100%|██████████| 78/78 [00:00<00:00, 83.64it/s]
100%|██████████| 78/78 [00:00<00:00, 91.83it/s]


Train 13 | Hq_b: -1.054|-0.900|-1.030, Hq_w: -0.233|-0.907|-1.086, acc: 0.972, kl: 20.039, kl_b: 1023.425|948.023|19.789, kl_w: 501278.619|488249.386|10420.961, ll: -20.202, logpy: -0.163, loss: 20.202
Test 13 | Hq_b: -1.051|-0.887|-1.027, Hq_w: -0.206|-0.895|-1.086, acc: 0.960, kl: 19.810, kl_b: 1019.332|940.034|19.662, kl_w: 493133.067|484963.340|10424.737, ll: -20.178, logpy: -0.368, loss: 20.178
MAP-Test 13 | Hq_b: -1.051|-0.887|-1.027, Hq_w: -0.206|-0.895|-1.086, acc: 0.975, kl: 25.766, kl_b: 1276.219|1195.198|24.692, kl_w: 658132.375|614712.750|12963.711, ll: -25.973, logpy: -0.207, loss: 25.973


100%|██████████| 390/390 [00:08<00:00, 44.71it/s]
100%|██████████| 78/78 [00:00<00:00, 84.38it/s]
100%|██████████| 78/78 [00:00<00:00, 90.66it/s]


Train 14 | Hq_b: -1.048|-0.873|-1.024, Hq_w: -0.181|-0.882|-1.085, acc: 0.973, kl: 19.595, kl_b: 1020.532|935.483|19.708, kl_w: 485741.063|481622.442|10419.259, ll: -19.754, logpy: -0.159, loss: 19.754
Test 14 | Hq_b: -1.045|-0.859|-1.021, Hq_w: -0.157|-0.869|-1.085, acc: 0.961, kl: 19.381, kl_b: 1017.996|927.161|19.705, kl_w: 478516.030|478136.152|10411.139, ll: -19.769, logpy: -0.389, loss: 19.769
MAP-Test 14 | Hq_b: -1.045|-0.859|-1.021, Hq_w: -0.157|-0.869|-1.085, acc: 0.972, kl: 25.231, kl_b: 1273.176|1181.181|24.629, kl_w: 638291.875|607846.750|12956.587, ll: -25.463, logpy: -0.232, loss: 25.463


100%|██████████| 390/390 [00:08<00:00, 43.95it/s]
100%|██████████| 78/78 [00:01<00:00, 77.85it/s]
100%|██████████| 78/78 [00:00<00:00, 92.26it/s]


Train 15 | Hq_b: -1.041|-0.845|-1.018, Hq_w: -0.134|-0.855|-1.084, acc: 0.974, kl: 19.174, kl_b: 1016.159|922.824|19.689, kl_w: 471676.463|474638.166|10410.901, ll: -19.328, logpy: -0.154, loss: 19.328
Test 15 | Hq_b: -1.037|-0.831|-1.013, Hq_w: -0.113|-0.841|-1.083, acc: 0.959, kl: 18.971, kl_b: 1012.284|914.192|19.474, kl_w: 465099.130|471106.700|10397.807, ll: -19.328, logpy: -0.357, loss: 19.328
MAP-Test 15 | Hq_b: -1.037|-0.831|-1.013, Hq_w: -0.113|-0.841|-1.083, acc: 0.972, kl: 24.733, kl_b: 1269.065|1166.418|24.549, kl_w: 620562.500|600669.312|12949.507, ll: -24.945, logpy: -0.212, loss: 24.945


100%|██████████| 390/390 [00:08<00:00, 45.03it/s]
100%|██████████| 78/78 [00:00<00:00, 85.31it/s]
100%|██████████| 78/78 [00:00<00:00, 90.89it/s]


Train 16 | Hq_b: -1.033|-0.816|-1.012, Hq_w: -0.093|-0.828|-1.083, acc: 0.976, kl: 18.774, kl_b: 1012.964|905.252|19.470, kl_w: 458829.357|467538.728|10406.095, ll: -18.918, logpy: -0.144, loss: 18.918
Test 16 | Hq_b: -1.030|-0.801|-1.014, Hq_w: -0.073|-0.814|-1.082, acc: 0.966, kl: 18.576, kl_b: 1009.481|897.285|19.568, kl_w: 452719.824|463763.050|10397.253, ll: -18.918, logpy: -0.342, loss: 18.918
MAP-Test 16 | Hq_b: -1.030|-0.801|-1.014, Hq_w: -0.073|-0.814|-1.082, acc: 0.975, kl: 24.263, kl_b: 1265.376|1150.950|24.559, kl_w: 604487.875|593294.875|12940.653, ll: -24.479, logpy: -0.215, loss: 24.479


100%|██████████| 390/390 [00:08<00:00, 44.95it/s]
100%|██████████| 78/78 [00:00<00:00, 84.98it/s]
100%|██████████| 78/78 [00:00<00:00, 91.88it/s]


Train 17 | Hq_b: -1.026|-0.785|-1.011, Hq_w: -0.054|-0.799|-1.081, acc: 0.975, kl: 18.386, kl_b: 1008.693|890.296|19.428, kl_w: 446924.947|460065.195|10398.248, ll: -18.534, logpy: -0.148, loss: 18.534
Test 17 | Hq_b: -1.022|-0.770|-1.009, Hq_w: -0.036|-0.785|-1.080, acc: 0.963, kl: 18.198, kl_b: 1009.024|881.212|19.597, kl_w: 441184.097|456408.917|10384.175, ll: -18.536, logpy: -0.338, loss: 18.536
MAP-Test 17 | Hq_b: -1.022|-0.770|-1.009, Hq_w: -0.036|-0.785|-1.080, acc: 0.977, kl: 23.819, kl_b: 1261.567|1135.031|24.512, kl_w: 589827.312|585757.438|12931.430, ll: -24.014, logpy: -0.196, loss: 24.014


100%|██████████| 390/390 [00:08<00:00, 44.90it/s]
100%|██████████| 78/78 [00:00<00:00, 85.13it/s]
100%|██████████| 78/78 [00:00<00:00, 91.62it/s]


Train 18 | Hq_b: -1.018|-0.754|-1.009, Hq_w: -0.019|-0.770|-1.079, acc: 0.975, kl: 18.013, kl_b: 1006.790|873.744|19.455, kl_w: 435771.405|452569.323|10385.310, ll: -18.154, logpy: -0.141, loss: 18.154
Test 18 | Hq_b: -1.014|-0.738|-1.007, Hq_w: -0.003|-0.756|-1.079, acc: 0.963, kl: 17.828, kl_b: 1002.574|864.069|19.841, kl_w: 430371.929|448778.110|10384.156, ll: -18.164, logpy: -0.335, loss: 18.164
MAP-Test 18 | Hq_b: -1.014|-0.738|-1.007, Hq_w: -0.003|-0.756|-1.079, acc: 0.974, kl: 23.391, kl_b: 1257.287|1118.508|24.489, kl_w: 576218.562|578020.188|12923.682, ll: -23.597, logpy: -0.206, loss: 23.597


100%|██████████| 390/390 [00:08<00:00, 43.41it/s]
100%|██████████| 78/78 [00:01<00:00, 71.18it/s]
100%|██████████| 78/78 [00:00<00:00, 79.39it/s]


Train 19 | Hq_b: -1.009|-0.721|-1.002, Hq_w: 0.013|-0.741|-1.078, acc: 0.976, kl: 17.645, kl_b: 1001.922|857.495|19.433, kl_w: 425147.114|444867.741|10374.533, ll: -17.787, logpy: -0.142, loss: 17.787
Test 19 | Hq_b: -1.004|-0.704|-0.996, Hq_w: 0.029|-0.726|-1.077, acc: 0.964, kl: 17.463, kl_b: 997.694|848.829|19.720, kl_w: 419935.974|440996.608|10375.349, ll: -17.796, logpy: -0.333, loss: 17.796
MAP-Test 19 | Hq_b: -1.004|-0.704|-0.996, Hq_w: 0.029|-0.726|-1.077, acc: 0.976, kl: 22.973, kl_b: 1252.580|1101.500|24.384, kl_w: 563297.688|570080.562|12914.703, ll: -23.171, logpy: -0.198, loss: 23.171


100%|██████████| 390/390 [00:09<00:00, 42.32it/s]
100%|██████████| 78/78 [00:00<00:00, 83.71it/s]
100%|██████████| 78/78 [00:00<00:00, 89.75it/s]


Train 20 | Hq_b: -1.000|-0.688|-0.993, Hq_w: 0.044|-0.710|-1.076, acc: 0.978, kl: 17.284, kl_b: 995.649|840.985|19.454, kl_w: 414958.210|437019.994|10370.675, ll: -17.414, logpy: -0.130, loss: 17.414
Test 20 | Hq_b: -0.995|-0.671|-0.989, Hq_w: 0.059|-0.695|-1.075, acc: 0.963, kl: 17.108, kl_b: 992.458|830.438|19.653, kl_w: 410121.389|433076.571|10366.546, ll: -17.498, logpy: -0.390, loss: 17.498
MAP-Test 20 | Hq_b: -0.995|-0.671|-0.989, Hq_w: 0.059|-0.695|-1.075, acc: 0.970, kl: 22.573, kl_b: 1247.670|1084.384|24.310, kl_w: 551312.250|562084.562|12903.746, ll: -22.828, logpy: -0.255, loss: 22.828


100%|██████████| 390/390 [00:08<00:00, 44.50it/s]
100%|██████████| 78/78 [00:00<00:00, 79.99it/s]
100%|██████████| 78/78 [00:00<00:00, 88.18it/s]


Train 21 | Hq_b: -0.990|-0.655|-0.988, Hq_w: 0.073|-0.680|-1.074, acc: 0.978, kl: 16.934, kl_b: 991.118|823.571|19.225, kl_w: 405377.877|429135.226|10359.295, ll: -17.070, logpy: -0.136, loss: 17.070
Test 21 | Hq_b: -0.985|-0.638|-0.987, Hq_w: 0.087|-0.665|-1.073, acc: 0.962, kl: 16.761, kl_b: 991.727|817.731|19.191, kl_w: 400753.402|425118.727|10349.697, ll: -17.109, logpy: -0.348, loss: 17.109
MAP-Test 21 | Hq_b: -0.985|-0.638|-0.987, Hq_w: 0.087|-0.665|-1.073, acc: 0.975, kl: 22.186, kl_b: 1242.895|1067.367|24.284, kl_w: 540010.688|554065.062|12890.720, ll: -22.393, logpy: -0.207, loss: 22.393


100%|██████████| 390/390 [00:08<00:00, 43.89it/s]
100%|██████████| 78/78 [00:00<00:00, 82.28it/s]
100%|██████████| 78/78 [00:00<00:00, 90.00it/s]


Train 22 | Hq_b: -0.981|-0.622|-0.988, Hq_w: 0.101|-0.650|-1.071, acc: 0.979, kl: 16.593, kl_b: 986.356|808.254|19.341, kl_w: 396221.788|421249.971|10341.068, ll: -16.720, logpy: -0.127, loss: 16.720
Test 22 | Hq_b: -0.976|-0.605|-0.987, Hq_w: 0.114|-0.635|-1.070, acc: 0.966, kl: 16.424, kl_b: 982.212|798.694|19.269, kl_w: 391740.630|417339.827|10341.730, ll: -16.755, logpy: -0.331, loss: 16.755
MAP-Test 22 | Hq_b: -0.976|-0.605|-0.987, Hq_w: 0.114|-0.635|-1.070, acc: 0.975, kl: 21.810, kl_b: 1237.855|1050.558|24.288, kl_w: 529242.812|546075.875|12877.807, ll: -22.029, logpy: -0.219, loss: 22.029


100%|██████████| 390/390 [00:08<00:00, 44.52it/s]
100%|██████████| 78/78 [00:00<00:00, 85.23it/s]
100%|██████████| 78/78 [00:00<00:00, 89.63it/s]


Train 23 | Hq_b: -0.971|-0.589|-0.981, Hq_w: 0.127|-0.620|-1.069, acc: 0.980, kl: 16.260, kl_b: 981.551|789.964|19.235, kl_w: 387406.048|413491.272|10331.227, ll: -16.384, logpy: -0.124, loss: 16.384
Test 23 | Hq_b: -0.966|-0.573|-0.977, Hq_w: 0.139|-0.605|-1.068, acc: 0.965, kl: 16.096, kl_b: 980.046|782.831|19.521, kl_w: 383126.297|409569.740|10317.752, ll: -16.499, logpy: -0.403, loss: 16.499
MAP-Test 23 | Hq_b: -0.966|-0.573|-0.977, Hq_w: 0.139|-0.605|-1.068, acc: 0.975, kl: 21.445, kl_b: 1232.882|1033.692|24.183, kl_w: 518960.594|538154.250|12866.468, ll: -21.682, logpy: -0.237, loss: 21.682


 96%|█████████▌| 373/390 [00:08<00:00, 41.86it/s]


KeyboardInterrupt: 