In [34]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import random
from uuid import uuid4
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import getopt
import sys
import os
import math
import time
import argparse
from visdom import Visdom

from tqdm import tqdm

import pandas as pd
import plotly.graph_objects as go

sys.path.insert(0, os.path.join('..', '..'))

import torch as T
from torch.autograd import Variable as var
import torch.nn.functional as F
import torch.optim as optim

from torch.nn.utils import clip_grad_norm_

from dnc.dnc import DNC
from dnc.sdnc import SDNC
from dnc.sam import SAM
from dnc.util import *

from dnc.lib import *

import copy
import pickle

In [35]:
st = InputStorage()

viz = Visdom()
# assert viz.check_connection()



def llprint(message):
  sys.stdout.write(message)
  sys.stdout.flush()

def debprint(*args):
  DEBUG = False
  if DEBUG:
    print(*args)

def genSeq(sizeTpl, min=None, max=None, default=True):
  if default:
    return np.random.binomial(1, 0.5, sizeTpl)
  assert min is not None and max is not None
  return np.random.randint(min, max, sizeTpl)
  


def generate_data(batch_size, length, maxnumberofcopies=3, currentmaxnocopies=3, testoccurance=True):
  length=length+1
  size=1 # ignore size

  minseq = 1
  maxseq = 9

  
  
  numberOfCopies = np.random.randint(low=2, high=currentmaxnocopies, size=(batch_size, 1))


  input_data = np.zeros((batch_size, length*maxnumberofcopies, size), dtype=np.float32)
  target_output = np.zeros((batch_size, length*maxnumberofcopies, size), dtype=np.float32)

  sequence = genSeq((batch_size, length-1, size), min=minseq, max=maxseq, default=False)




  #print(batch_size, length, size, maxnumberofcopies, currentmaxnocopies)

  finputdata = np.zeros((batch_size, length*maxnumberofcopies, size))
  finputdata[:, :length-1, :] = sequence
  finputdata[:, length-1, :] = ENDSYM  # the end symbol
  finputdata[:, -2, :] = ENDSYM
  finputdata[:, -1, :] = numberOfCopies  # the end symbol

  for i in range(batch_size): # assure no empty sequences
    while np.sum(sequence[i]) == 0 or (testoccurance and st.isSaved(finputdata[i], flag="testData")):
      sequence[i] = genSeq((1, length-1, size), min=minseq, max=maxseq, default=False)
    
  input_data[:, :length-1, :] = sequence
  input_data[:, length-1, :] = ENDSYM  # the end symbol
  input_data[:, -2, :] = ENDSYM
  input_data[:, -1, :] = numberOfCopies  # the end symbol

  for i in range(batch_size): # save input data to avoid intersection between train, test and validation data
    for j in range(numberOfCopies[i][0]):
      target_output[i, (j*length)-j:((j+1)*length) - (j+1), :] = sequence[i]

  #debprint("inputdata:", input_data.shape)
  #debprint("outpudtata:",target_output.shape)


  return input_data, target_output 


def combLoss(output, target):
  return mse(output, target)

def incrementCurriculum(trainError, epoch, sequence_length, maxsequence_length, curriculum_fre):
  return epoch != 0 and sequence_length < maxsequence_length and epoch % curriculum_fre == 0



Setting up a new session...


In [36]:
batch_size = 100
sequence_max_length = 5
iterations = 1*10**4
summarize_freq = 100
check_freq = 500
curriculum_freq = 5000
curriculumMaxNoCopies_freq = 5000


# input_size = output_size = args.input_size
mem_slot = 16 # number of memory slots
mem_size = 1 # size of each memory slot
read_heads = 1


maxnumberofcopies=5
currentmaxnocopies=3

input_length = 6
input_size = 1#input_length*maxnumberofcopies #+10 memory = input
output_size = 64

In [37]:

#   dirname = os.path.dirname(__file__)
#   ckpts_dir = os.path.join(dirname, 'checkpoints')
#   if not os.path.isdir(ckpts_dir):
#     os.mkdir(ckpts_dir)

datas = []

name = 'mc_' + str(uuid4().hex)[:3] + ''

import os
def create_directory_if_not_exists(directory_path):
    if not os.path.exists(directory_path):
        os.makedirs(directory_path)
        print("Directory created successfully!")
    else:
        print("Directory already exists.")

lastcp = None

create_directory_if_not_exists(name)

print(name)


debprint(input_size, output_size)

rnn = DNC(
        input_size=input_size,
        hidden_size=output_size,
        #rnn_type='lstm',
        rnn_type='rnn',
        num_layers=3,
        num_hidden_layers=1,
        dropout=0,
        nr_cells=mem_slot,
        cell_size=mem_size,
        read_heads=read_heads,
        gpu_id=-1,
        debug='store_true',
        batch_first=True,
        independent_linears=True
    )

with open(f'{name}/output.txt', 'w') as f:
  loadcp = False #= 'checkpoint_mc_631_66000.pth'
  if loadcp != False:
    rnn.load_state_dict(T.load(loadcp, weights_only=True))
    rnn.eval()

  print(rnn)

  last_save_losses = []



  optimizer = optim.Adam(rnn.parameters(), lr=0.001, eps=1e-9, betas=[0.9, 0.98]) # 0.0001

  print(sequence_max_length)

  for i in range(1, sequence_max_length+1):
    inputdataspace = 2**i*maxnumberofcopies # i bit numbers amd up to maxnumberofcopies
    testdatasize = int(inputdataspace*0.1)+1 # at least 1
    input_data, target_output = generate_data(testdatasize, i, maxnumberofcopies=maxnumberofcopies, currentmaxnocopies=maxnumberofcopies, testoccurance=False)

    for i in range(testdatasize):
      st.saveInput(input_data[i], output=target_output[i], withoutIncrement=True, flag="testData") 



  (chx, mhx, rv) = (None, None, None)
  Testloss = 0
  for epoch in tqdm(range(iterations + 1)):
    #llprint("\rIteration {ep}/{tot}".format(ep=epoch, tot=iterations))
    optimizer.zero_grad()

    random_length = np.random.randint(1, sequence_max_length + 1)
    input_data, target_output = generate_data(batch_size, random_length, maxnumberofcopies=maxnumberofcopies, currentmaxnocopies=currentmaxnocopies)
    input_data = var(T.from_numpy(input_data))
    target_output = var(T.from_numpy(target_output)) 
    # generate test data

    if rnn.debug:
      output, (chx, mhx, rv), v = rnn(input_data, (None, mhx, None), reset_experience=True, pass_through_memory=True)
    else:
      output, (chx, mhx, rv) = rnn(input_data, (None, mhx, None), reset_experience=True, pass_through_memory=True)


    loss = combLoss((output), target_output)

    if epoch % 100 == 0: # calculate test loss
      testset = st.getDataByFlag("testData") # get test data
      testlosses = []

      for k in range(int(len(testset) / batch_size)+1): # split to batches
        input_TEST_data = np.zeros((batch_size, (sequence_max_length+1)*maxnumberofcopies, 1), dtype=np.float32)
        target_TEST_output = np.zeros((batch_size, (sequence_max_length+1)*maxnumberofcopies, 1), dtype=np.float32)
        for i in range(batch_size):
          if i + k * batch_size < len(testset):
            sh1 = testset[k*batch_size+i]["input"].shape[0]
            sh2 = testset[k*batch_size+i]["output"].shape[0]
            input_TEST_data[i,:sh1] = testset[k*batch_size+i]["input"]
            target_TEST_output[i,:sh2] = testset[k*batch_size+i]["output"]
          else: # fill batch with random elements
            rel = random.choice(testset)
            sh1 = rel["input"].shape[0]
            sh2 = rel["output"].shape[0]
            input_TEST_data[i, :sh1] = rel["input"]
            target_TEST_output[i, :sh1] = rel["output"]
        input_TEST_data = var(T.from_numpy(input_TEST_data))
        target_TEST_output = var(T.from_numpy(target_TEST_output))

        TEST_output = np.zeros(target_TEST_output.shape)
        if rnn.debug:
          eTEST_output, _, _ = rnn(input_data, (None, mhx, None), reset_experience=True, pass_through_memory=True)
        else:
          eTEST_output, _ = rnn(input_data, (None, mhx, None), reset_experience=True, pass_through_memory=True)

        TEST_output[:eTEST_output.shape[0], :eTEST_output.shape[1], :eTEST_output.shape[2]] = eTEST_output.data.cpu().numpy()
        TEST_output = var(T.from_numpy(TEST_output))
        MyTestloss = combLoss((TEST_output), target_TEST_output).item() # calculate test loss
        testlosses.append(MyTestloss)
      Testloss = np.mean(testlosses) # calculate average

    datas.append({"epoch": epoch, "loss": loss.item(), "testloss": Testloss, "sequencelength": input_length})



    loss.backward()

    T.nn.utils.clip_grad_norm_(rnn.parameters(), 50)
    optimizer.step()
    loss_value = loss.item()

    summarize = (epoch % summarize_freq == 0)
    take_checkpoint = (epoch != 0) and (epoch % check_freq == 0)
    

    # detach memory from graph
    mhx = { k : (v.detach() if isinstance(v, var) else v) for k, v in mhx.items() }

    last_save_losses.append(loss_value)

    if summarize:
      loss = np.mean(last_save_losses)
      llprint("\n\tAvg. Loss: %.4f\n" % (loss))
      if np.isnan(loss):
        raise Exception('nan Loss')

    if summarize and rnn.debug:

      loss = np.mean(last_save_losses)
      last_save_losses = []

      viz.heatmap(
            v['memory'],
            opts=dict(
                xtickstep=10,
                ytickstep=2,
                title=name + 'Memory, t: ' + str(epoch) + ', loss: ' + str(loss),
                ylabel='layer * time',
                xlabel='mem_slot * mem_size'
            )
        )

      viz.heatmap(
            v['link_matrix'][-1].reshape(mem_slot, mem_slot),
            opts=dict(
                xtickstep=10,
                ytickstep=2,
                title=name + 'Link Matrix, t: ' + str(epoch) + ', loss: ' + str(loss),
                ylabel='mem_slot',
                xlabel='mem_slot'
            )
      )
     
      viz.heatmap(
            v['precedence'],
            opts=dict(
                xtickstep=10,
                ytickstep=2,
                title=name + 'Precedence, t: ' + str(epoch) + ', loss: ' + str(loss),
                ylabel='layer * time',
                xlabel='mem_slot'
            )
      )

    if incrementCurriculum(loss, epoch, currentmaxnocopies, maxnumberofcopies, curriculumMaxNoCopies_freq):
      currentmaxnocopies = currentmaxnocopies + 1
      if currentmaxnocopies > maxnumberofcopies:
        currentmaxnocopies = maxnumberofcopies
      print("Increasing max number of copies to " + str(currentmaxnocopies))


    if incrementCurriculum(loss, epoch, sequence_max_length, 5, curriculum_freq):
      sequence_max_length = sequence_max_length + 1
      print("Increasing max length to " + str(sequence_max_length))

    
    if summarize:
      random_length = np.random.randint(2, sequence_max_length + 1)
      input_data, target_output = generate_data(batch_size, random_length,  maxnumberofcopies=maxnumberofcopies, currentmaxnocopies=currentmaxnocopies)

      input_data = var(T.from_numpy(input_data))
      target_output = var(T.from_numpy(target_output))

      if rnn.debug:
        output, (chx, mhx, rv), v = rnn(input_data, (None, mhx, None), reset_experience=True, pass_through_memory=True)
      else:
        output, (chx, mhx, rv) = rnn(input_data, (None, mhx, None), reset_experience=True, pass_through_memory=True)

      print("\n\n")
      print("Input: ", torch.flatten(input_data[0]))
      print("Output: ", torch.flatten(torch.round(output[0], decimals=1)))
      print("Target: ", torch.flatten(target_output[0]))
      print("MSE Loss: ", str(mse(output, target_output).item()))
      print("CE Loss: ", str(criterion(output, target_output).item()))
      print("EXP Loss: ", str(exp_loss(output, target_output).item()))
      print("\n\n")
    
    if take_checkpoint:
      cur_weights = rnn.state_dict()
      T.save(cur_weights, f'{name}/checkpoint_{epoch}.pth')
      lastcp = f'{name}/checkpoint_{epoch}.pth'
      df = pd.DataFrame(datas)
      pickle.dump(df, open(f"{name}/df_{epoch}.pkl", "wb"))


  df = pd.DataFrame(datas) # plot loss 
  pickle.dump(df, open(f"{name}/df_total.pkl", "wb"))


  fig = go.Figure()
  fig.add_trace(go.Scatter(x=df["epoch"], y=df["loss"], mode='lines', name='Train Data'))
  fig.add_trace(go.Scatter(x=df["epoch"], y=df["testloss"], mode='lines', name='Test Data'))
  fig.update_layout(title='Losses', xaxis_title='Epoch', yaxis_title='Loss')
  fig.show()
  fig.write_html(f"{name}/losses.html")





Directory created successfully!
mc_915

----------------------------------------
DNC(1, 64, rnn_type=rnn, num_layers=3, num_hidden_layers=1, nr_cells=16, read_heads=1, cell_size=1, independent_linears=True, debug=store_true)
DNC(
  (rnn_layer_0): RNN(2, 64, batch_first=True)
  (rnn_layer_1): RNN(65, 64, batch_first=True)
  (rnn_layer_2): RNN(65, 64, batch_first=True)
  (rnn_layer_memory_shared): Memory(
    (read_keys_transform): Linear(in_features=64, out_features=1, bias=True)
    (read_strengths_transform): Linear(in_features=64, out_features=1, bias=True)
    (write_key_transform): Linear(in_features=64, out_features=1, bias=True)
    (write_strength_transform): Linear(in_features=64, out_features=1, bias=True)
    (erase_vector_transform): Linear(in_features=64, out_features=1, bias=True)
    (write_vector_transform): Linear(in_features=64, out_features=1, bias=True)
    (free_gates_transform): Linear(in_features=64, out_features=1, bias=True)
    (allocation_gate_transform): Line

  0%|          | 0/10001 [00:00<?, ?it/s]


	Avg. Loss: 7.2999


  0%|          | 1/10001 [00:01<4:28:53,  1.61s/it]




Input:  tensor([ 8.,  6., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         2.])
Output:  tensor([0.3000, 0.4000, 0.4000, 0.3000, 0.3000, 0.4000, 0.3000, 0.3000, 0.4000,
        0.4000, 0.4000, 0.4000, 0.4000, 0.3000, 0.2000],
       grad_fn=<ViewBackward0>)
Target:  tensor([8., 6., 8., 6., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  6.452544689178467
CE Loss:  0.4688805639743805
EXP Loss:  117.891845703125





  1%|          | 100/10001 [00:47<1:11:25,  2.31it/s]


	Avg. Loss: 2.3515


  1%|          | 101/10001 [00:48<1:35:55,  1.72it/s]




Input:  tensor([ 4.,  4.,  7., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([4.0000, 5.0000, 5.2000, 4.6000, 5.1000, 4.5000, 2.9000, 1.1000, 0.4000,
        0.3000, 0.3000, 0.2000, 0.2000, 0.2000, 0.2000, 0.2000, 0.2000, 0.2000,
        -0.0000, 0.2000], grad_fn=<ViewBackward0>)
Target:  tensor([4., 4., 7., 4., 4., 7., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])
MSE Loss:  1.748117208480835
CE Loss:  -4.298061847686768
EXP Loss:  4.24885368347168





  2%|▏         | 200/10001 [01:28<1:12:23,  2.26it/s]


	Avg. Loss: 1.4857


  2%|▏         | 201/10001 [01:29<1:52:29,  1.45it/s]




Input:  tensor([ 4.,  3.,  7.,  3.,  3., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([ 4.5000,  4.8000,  5.0000,  4.6000,  4.7000,  4.5000,  5.4000,  4.9000,
         4.4000,  1.6000,  0.1000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000,  0.0000, -0.1000,  0.1000],
       grad_fn=<ViewBackward0>)
Target:  tensor([4., 3., 7., 3., 3., 4., 3., 7., 3., 3., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.5839447975158691
CE Loss:  -4.817359447479248
EXP Loss:  4.163693428039551





  3%|▎         | 300/10001 [02:12<59:12,  2.73it/s]  


	Avg. Loss: 1.1982


  3%|▎         | 301/10001 [02:13<1:32:23,  1.75it/s]




Input:  tensor([ 2.,  3.,  3.,  2., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([ 2.5000,  3.8000,  3.2000,  3.3000,  4.2000,  5.0000,  4.6000,  1.5000,
         0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
         0.1000], grad_fn=<ViewBackward0>)
Target:  tensor([2., 3., 3., 2., 2., 3., 3., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.4169195890426636
CE Loss:  -4.9470367431640625
EXP Loss:  3.380463123321533





  4%|▍         | 400/10001 [02:53<1:05:03,  2.46it/s]


	Avg. Loss: 0.8294


  4%|▍         | 401/10001 [02:55<2:07:09,  1.26it/s]




Input:  tensor([ 2.,  6.,  3.,  2.,  1., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([ 1.8000,  6.4000,  2.3000,  1.6000,  2.0000,  3.1000,  3.3000,  4.5000,
         3.4000,  0.1000, -0.2000, -0.1000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,  0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([2., 6., 3., 2., 1., 2., 6., 3., 2., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.8806023001670837
CE Loss:  -5.07850456237793
EXP Loss:  2.2617127895355225





  5%|▍         | 500/10001 [03:36<45:01,  3.52it/s]  


	Avg. Loss: 0.6119


  5%|▌         | 501/10001 [03:37<1:09:40,  2.27it/s]




Input:  tensor([ 1.,  1.,  7.,  4.,  5., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([ 0.8000,  0.9000,  4.2000,  3.0000,  3.8000,  2.2000,  3.1000,  3.4000,
         4.3000,  4.9000, -0.5000,  0.0000, -0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000, -0.0000,  0.1000],
       grad_fn=<ViewBackward0>)
Target:  tensor([1., 1., 7., 4., 5., 1., 1., 7., 4., 5., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.7427048087120056
CE Loss:  -5.439847469329834
EXP Loss:  1.802642822265625





  6%|▌         | 600/10001 [04:15<1:06:05,  2.37it/s]


	Avg. Loss: 0.3786


  6%|▌         | 601/10001 [04:15<1:23:25,  1.88it/s]




Input:  tensor([ 4.,  8.,  2.,  2.,  3., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([ 4.4000,  7.9000,  1.6000,  1.3000,  3.1000,  5.1000,  3.9000,  3.0000,
         3.7000,  1.8000, -0.3000, -0.0000,  0.0000,  0.1000,  0.1000,  0.1000,
         0.1000,  0.1000,  0.1000,  0.1000,  0.1000,  0.1000,  0.1000,  0.1000,
         0.1000,  0.1000,  0.1000,  0.1000, -0.0000,  0.1000],
       grad_fn=<ViewBackward0>)
Target:  tensor([4., 8., 2., 2., 3., 4., 8., 2., 2., 3., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.5593560934066772
CE Loss:  -5.682557106018066
EXP Loss:  1.2560842037200928





  7%|▋         | 700/10001 [04:56<56:19,  2.75it/s]  


	Avg. Loss: 0.4170


  7%|▋         | 701/10001 [04:56<1:00:04,  2.58it/s]




Input:  tensor([ 2.,  3., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         2.])
Output:  tensor([ 1.8000,  3.0000,  2.1000,  3.5000,  0.0000,  0.1000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.1000,  0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([2., 3., 2., 3., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.13827800750732422
CE Loss:  -4.381918907165527
EXP Loss:  0.29077351093292236





  8%|▊         | 800/10001 [05:39<1:09:09,  2.22it/s]


	Avg. Loss: 0.2901


  8%|▊         | 801/10001 [05:40<1:29:08,  1.72it/s]




Input:  tensor([ 5.,  1.,  4.,  5.,  5., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([5.6000, 1.9000, 4.6000, 6.1000, 6.4000, 3.7000, 2.0000, 2.9000, 4.4000,
        7.3000, 0.1000, 0.3000, 0.2000, 0.2000, 0.2000, 0.2000, 0.2000, 0.1000,
        0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000,
        0.1000, 0.1000, 0.1000], grad_fn=<ViewBackward0>)
Target:  tensor([5., 1., 4., 5., 5., 5., 1., 4., 5., 5., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.43477165699005127
CE Loss:  -6.393901824951172
EXP Loss:  0.8916178941726685





  9%|▉         | 900/10001 [06:20<1:07:43,  2.24it/s]


	Avg. Loss: 0.2388


  9%|▉         | 901/10001 [06:22<1:38:56,  1.53it/s]




Input:  tensor([ 8.,  3.,  3.,  8.,  8., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([ 7.2000,  2.8000,  2.9000,  7.5000,  8.2000,  5.8000,  2.8000,  3.6000,
         5.4000,  7.2000, -0.2000,  0.1000, -0.0000,  0.1000,  0.1000,  0.1000,
         0.1000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.1000],
       grad_fn=<ViewBackward0>)
Target:  tensor([8., 3., 3., 8., 8., 8., 3., 3., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.403519868850708
CE Loss:  -6.154924392700195
EXP Loss:  0.7999351024627686





 10%|▉         | 1000/10001 [07:00<57:11,  2.62it/s] 


	Avg. Loss: 0.1756


 10%|█         | 1001/10001 [07:01<1:17:03,  1.95it/s]




Input:  tensor([ 2.,  1.,  5.,  2., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([ 1.6000,  0.7000,  4.6000,  1.8000,  0.6000,  2.3000,  3.1000,  3.3000,
        -0.1000, -0.1000, -0.0000, -0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([2., 1., 5., 2., 2., 1., 5., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.4469752013683319
CE Loss:  -4.945995330810547
EXP Loss:  0.9431663751602173





 11%|█         | 1100/10001 [07:39<41:58,  3.53it/s]  


	Avg. Loss: 0.1465


 11%|█         | 1101/10001 [07:39<49:40,  2.99it/s]




Input:  tensor([ 4.,  2., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         2.])
Output:  tensor([ 4.1000,  2.3000,  4.8000,  1.9000, -0.1000,  0.0000, -0.1000, -0.1000,
        -0.1000, -0.1000, -0.1000, -0.0000, -0.0000, -0.1000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([4., 2., 4., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.044460445642471313
CE Loss:  -5.048004627227783
EXP Loss:  0.1557455062866211





 12%|█▏        | 1200/10001 [08:20<1:12:28,  2.02it/s]


	Avg. Loss: 0.1991


 12%|█▏        | 1201/10001 [08:21<1:16:46,  1.91it/s]




Input:  tensor([ 3.,  5.,  2.,  7.,  5., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([3.2000, 5.4000, 2.0000, 6.8000, 3.7000, 3.5000, 4.4000, 3.0000, 5.7000,
        5.5000, 0.9000, 0.2000, 0.1000, 0.1000, 0.2000, 0.1000, 0.1000, 0.1000,
        0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000,
        0.1000, 0.1000, 0.1000], grad_fn=<ViewBackward0>)
Target:  tensor([3., 5., 2., 7., 5., 3., 5., 2., 7., 5., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.3190845251083374
CE Loss:  -6.472901821136475
EXP Loss:  0.6273237466812134





 13%|█▎        | 1300/10001 [09:01<1:12:32,  2.00it/s]


	Avg. Loss: 0.1029


 13%|█▎        | 1301/10001 [09:02<1:16:32,  1.89it/s]




Input:  tensor([ 3.,  8.,  3.,  5., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([2.9000, 7.7000, 2.8000, 4.9000, 2.8000, 7.4000, 3.4000, 4.7000, 0.2000,
        -0.0000, -0.0000, -0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([3., 8., 3., 5., 3., 8., 3., 5., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.07746512442827225
CE Loss:  -5.54736328125
EXP Loss:  0.1775836944580078





 14%|█▍        | 1400/10001 [09:44<1:08:03,  2.11it/s]


	Avg. Loss: 0.0885


 14%|█▍        | 1401/10001 [09:45<1:33:13,  1.54it/s]




Input:  tensor([ 5.,  5.,  4.,  6.,  1., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([5.5000, 5.1000, 3.8000, 6.3000, 0.7000, 4.7000, 5.4000, 3.5000, 6.7000,
        1.3000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([5., 5., 4., 6., 1., 5., 5., 4., 6., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.07017352432012558
CE Loss:  -6.929930210113525
EXP Loss:  0.1859428882598877





 15%|█▍        | 1500/10001 [10:23<58:51,  2.41it/s]  


	Avg. Loss: 0.0442


 15%|█▌        | 1501/10001 [10:24<1:18:34,  1.80it/s]




Input:  tensor([ 3.,  4.,  3.,  3.,  4., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([ 2.7000,  3.8000,  2.9000,  2.5000,  4.5000,  3.6000,  5.7000,  2.5000,
         2.4000,  3.8000, -0.3000,  0.0000, -0.1000, -0.1000, -0.0000, -0.0000,
        -0.0000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000,
        -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000],
       grad_fn=<ViewBackward0>)
Target:  tensor([3., 4., 3., 3., 4., 3., 4., 3., 3., 4., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.09546205401420593
CE Loss:  -6.593924522399902
EXP Loss:  0.23866140842437744





 16%|█▌        | 1600/10001 [11:05<1:00:11,  2.33it/s]


	Avg. Loss: 0.0817


 16%|█▌        | 1601/10001 [11:06<1:26:45,  1.61it/s]




Input:  tensor([ 1.,  2.,  7.,  2., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([1.1000, 2.0000, 6.6000, 2.0000, 0.8000, 2.4000, 6.4000, 2.1000, 0.1000,
        0.0000, 0.0000, 0.0000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000,
        0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.0000, 0.1000],
       grad_fn=<ViewBackward0>)
Target:  tensor([1., 2., 7., 2., 1., 2., 7., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.044556546956300735
CE Loss:  -6.118283748626709
EXP Loss:  0.164880633354187





 17%|█▋        | 1700/10001 [11:47<58:21,  2.37it/s]  


	Avg. Loss: 0.0692


 17%|█▋        | 1701/10001 [11:48<1:02:57,  2.20it/s]




Input:  tensor([ 1.,  1.,  6.,  5., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([ 1.0000,  1.1000,  5.6000,  4.8000,  1.0000,  2.0000,  6.5000,  4.1000,
        -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.0000, -0.0000, -0.0000,
        -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.0000,
        -0.1000], grad_fn=<ViewBackward0>)
Target:  tensor([1., 1., 6., 5., 1., 1., 6., 5., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.18026253581047058
CE Loss:  -5.840744972229004
EXP Loss:  0.38291072845458984





 18%|█▊        | 1800/10001 [12:30<55:09,  2.48it/s]  


	Avg. Loss: 0.0440


 18%|█▊        | 1801/10001 [12:30<1:16:57,  1.78it/s]




Input:  tensor([ 3.,  8., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         2.])
Output:  tensor([3.2000, 7.9000, 2.7000, 7.8000, 0.1000, 0.1000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([3., 8., 3., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.006616190541535616
CE Loss:  -4.960816860198975
EXP Loss:  0.053437232971191406





 19%|█▉        | 1900/10001 [13:15<49:56,  2.70it/s]  


	Avg. Loss: 0.0446


 19%|█▉        | 1901/10001 [13:16<1:13:27,  1.84it/s]




Input:  tensor([ 8.,  6., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         2.])
Output:  tensor([7.9000, 6.1000, 8.1000, 6.0000, 0.0000, -0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([8., 6., 8., 6., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.016184519976377487
CE Loss:  -4.981228828430176
EXP Loss:  0.06941449642181396





 20%|█▉        | 2000/10001 [14:27<1:20:43,  1.65it/s]


	Avg. Loss: 0.0468


 20%|██        | 2001/10001 [14:30<2:25:27,  1.09s/it]




Input:  tensor([ 3.,  4.,  5., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([ 2.8000,  3.6000,  4.9000,  2.7000,  3.9000,  5.4000, -0.2000, -0.0000,
        -0.2000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000,
        -0.1000, -0.1000, -0.1000, -0.1000], grad_fn=<ViewBackward0>)
Target:  tensor([3., 4., 5., 3., 4., 5., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])
MSE Loss:  0.09958479553461075
CE Loss:  -5.570570945739746
EXP Loss:  0.2798633575439453





 21%|██        | 2100/10001 [15:43<50:28,  2.61it/s]  


	Avg. Loss: 0.0374


 21%|██        | 2101/10001 [15:44<1:13:45,  1.79it/s]




Input:  tensor([ 5.,  7.,  4.,  5., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([ 5.0000,  6.9000,  3.9000,  4.9000,  5.1000,  7.3000,  3.8000,  5.2000,
        -0.1000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([5., 7., 4., 5., 5., 7., 4., 5., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.016299040988087654
CE Loss:  -6.079854488372803
EXP Loss:  0.07696986198425293





 22%|██▏       | 2200/10001 [16:52<1:17:03,  1.69it/s]


	Avg. Loss: 0.0483


 22%|██▏       | 2201/10001 [16:53<1:40:38,  1.29it/s]




Input:  tensor([ 7.,  2.,  5.,  4.,  8., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([7.0000, 1.7000, 5.1000, 4.2000, 7.4000, 7.0000, 1.3000, 4.5000, 3.6000,
        8.1000, 0.1000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        -0.0000, -0.0000, 0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([7., 2., 5., 4., 8., 7., 2., 5., 4., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.070185586810112
CE Loss:  -6.214542388916016
EXP Loss:  0.1704387664794922





 23%|██▎       | 2300/10001 [18:01<1:18:07,  1.64it/s]


	Avg. Loss: 0.0214


 23%|██▎       | 2301/10001 [18:02<1:40:06,  1.28it/s]




Input:  tensor([ 6.,  5., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         2.])
Output:  tensor([6.2000, 5.0000, 6.0000, 4.8000, -0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([6., 5., 6., 5., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.005744979716837406
CE Loss:  -5.14164400100708
EXP Loss:  0.04119682312011719





 24%|██▍       | 2400/10001 [19:09<1:06:42,  1.90it/s]


	Avg. Loss: 0.0494


 24%|██▍       | 2401/10001 [19:10<1:34:30,  1.34it/s]




Input:  tensor([ 4.,  4.,  5.,  3., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([ 3.9000,  3.8000,  4.8000,  2.7000,  3.1000,  4.3000,  4.1000,  5.7000,
         2.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000,
        -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000,
        -0.1000], grad_fn=<ViewBackward0>)
Target:  tensor([4., 4., 5., 3., 4., 4., 5., 3., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.9803547859191895
CE Loss:  -5.737139701843262
EXP Loss:  7.0630950927734375





 25%|██▍       | 2500/10001 [20:17<1:42:57,  1.21it/s]


	Avg. Loss: 0.0488


 25%|██▌       | 2501/10001 [20:18<1:51:51,  1.12it/s]




Input:  tensor([ 5.,  3., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         2.])
Output:  tensor([5.2000, 3.4000, 5.0000, 3.1000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([5., 3., 5., 3., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.011586567386984825
CE Loss:  -5.122898578643799
EXP Loss:  0.06689083576202393





 26%|██▌       | 2600/10001 [21:28<1:47:27,  1.15it/s]


	Avg. Loss: 0.0332


 26%|██▌       | 2601/10001 [21:31<2:36:54,  1.27s/it]




Input:  tensor([ 2.,  7.,  7.,  8.,  5., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([2.3000, 7.1000, 7.0000, 7.6000, 4.8000, 2.1000, 6.5000, 6.7000, 7.3000,
        4.7000, -0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([2., 7., 7., 8., 5., 2., 7., 7., 8., 5., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.03251608461141586
CE Loss:  -6.4777302742004395
EXP Loss:  0.10326361656188965





 27%|██▋       | 2700/10001 [22:33<1:29:51,  1.35it/s]


	Avg. Loss: 0.0138


 27%|██▋       | 2701/10001 [22:35<2:00:12,  1.01it/s]




Input:  tensor([ 1.,  6.,  7.,  6.,  2., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([1.0000, 5.8000, 6.9000, 6.1000, 2.1000, 1.3000, 6.8000, 8.0000, 6.5000,
        1.9000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([1., 6., 7., 6., 2., 1., 6., 7., 6., 2., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.049570232629776
CE Loss:  -6.336184024810791
EXP Loss:  0.1418827772140503





 28%|██▊       | 2800/10001 [23:43<1:07:12,  1.79it/s]


	Avg. Loss: 0.0376


 28%|██▊       | 2801/10001 [23:44<1:17:21,  1.55it/s]




Input:  tensor([ 2.,  3.,  6.,  5.,  4., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([ 2.2000,  3.0000,  6.1000,  5.2000,  4.6000,  1.5000,  2.4000,  5.7000,
         5.0000,  3.8000, -0.2000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([2., 3., 6., 5., 4., 2., 3., 6., 5., 4., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.03167504817247391
CE Loss:  -6.37960147857666
EXP Loss:  0.11891436576843262





 29%|██▉       | 2900/10001 [24:49<1:13:17,  1.61it/s]


	Avg. Loss: 0.0397


 29%|██▉       | 2901/10001 [24:51<1:37:41,  1.21it/s]




Input:  tensor([ 2.,  5.,  4.,  6.,  8., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([ 2.1000,  5.1000,  4.1000,  6.2000,  7.9000,  2.2000,  4.6000,  3.9000,
         5.6000,  7.7000, -0.1000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([2., 5., 4., 6., 8., 2., 5., 4., 6., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.023137662559747696
CE Loss:  -6.369584560394287
EXP Loss:  0.08621633052825928





 30%|██▉       | 3000/10001 [25:55<1:09:47,  1.67it/s]


	Avg. Loss: 0.0185



Input:  tensor([ 3.,  1., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         2.])
Output:  tensor([ 3.0000,  1.0000,  2.9000,  1.3000, -0.1000,  0.0000, -0.0000,  0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([3., 1., 3., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.011965952813625336
CE Loss:  -4.96260929107666
EXP Loss:  0.05484163761138916





 31%|███       | 3100/10001 [27:02<58:53,  1.95it/s]  


	Avg. Loss: 0.0343


 31%|███       | 3101/10001 [27:04<1:24:46,  1.36it/s]




Input:  tensor([ 6.,  7.,  1., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([5.9000, 7.1000, 1.2000, 5.9000, 7.1000, 1.3000, 0.0000, -0.0000, -0.0000,
        0.0000, 0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([6., 7., 1., 6., 7., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])
MSE Loss:  0.008666068315505981
CE Loss:  -5.610889911651611
EXP Loss:  0.048546671867370605





 32%|███▏      | 3200/10001 [28:20<1:41:16,  1.12it/s]


	Avg. Loss: 0.0511


 32%|███▏      | 3201/10001 [28:22<2:17:56,  1.22s/it]




Input:  tensor([ 8.,  6., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         2.])
Output:  tensor([8.0000, 6.1000, 8.3000, 5.8000, -0.0000, 0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([8., 6., 8., 6., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.012355665676295757
CE Loss:  -4.651134967803955
EXP Loss:  0.05789780616760254





 33%|███▎      | 3300/10001 [29:21<54:44,  2.04it/s]  


	Avg. Loss: 0.0108


 33%|███▎      | 3301/10001 [29:22<58:27,  1.91it/s]




Input:  tensor([ 4.,  6.,  1., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([3.9000, 6.1000, 1.2000, 3.9000, 6.3000, 1.5000, 0.0000, -0.0000, -0.0000,
        -0.0000, 0.0000, 0.0000, 0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([4., 6., 1., 4., 6., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])
MSE Loss:  0.018585428595542908
CE Loss:  -6.000415802001953
EXP Loss:  0.07255089282989502





 34%|███▍      | 3400/10001 [30:13<45:41,  2.41it/s]  


	Avg. Loss: 0.0388


 34%|███▍      | 3401/10001 [30:13<1:01:22,  1.79it/s]




Input:  tensor([ 4.,  7.,  4.,  2., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([3.9000, 6.8000, 3.7000, 1.7000, 4.1000, 7.0000, 4.2000, 1.9000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([4., 7., 4., 2., 4., 7., 4., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.027270376682281494
CE Loss:  -6.381206035614014
EXP Loss:  0.10325872898101807





 35%|███▍      | 3500/10001 [31:01<1:05:25,  1.66it/s]


	Avg. Loss: 0.0301


 35%|███▌      | 3501/10001 [31:02<1:11:33,  1.51it/s]




Input:  tensor([ 1.,  6.,  6., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([1.1000, 6.0000, 6.1000, 1.0000, 5.8000, 6.5000, -0.0000, -0.0000, -0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        -0.0000, 0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([1., 6., 6., 1., 6., 6., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])
MSE Loss:  0.009392627514898777
CE Loss:  -6.025081157684326
EXP Loss:  0.049999356269836426





 36%|███▌      | 3600/10001 [31:52<48:01,  2.22it/s]  


	Avg. Loss: 0.0148


 36%|███▌      | 3601/10001 [31:53<1:07:16,  1.59it/s]




Input:  tensor([ 5.,  5.,  5.,  8.,  7., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([4.9000, 4.9000, 4.9000, 7.9000, 7.1000, 5.1000, 4.8000, 4.8000, 8.1000,
        7.2000, 0.0000, -0.0000, -0.0000, -0.0000, 0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([5., 5., 5., 8., 7., 5., 5., 5., 8., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.014966725371778011
CE Loss:  -6.49824857711792
EXP Loss:  0.06844115257263184





 37%|███▋      | 3700/10001 [32:38<38:00,  2.76it/s]  


	Avg. Loss: 0.0219


 37%|███▋      | 3701/10001 [32:39<45:24,  2.31it/s]




Input:  tensor([ 4.,  8.,  8., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([ 3.8000,  7.8000,  7.2000,  3.4000,  7.9000,  7.2000, -0.1000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([4., 8., 8., 4., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])
MSE Loss:  0.04869192838668823
CE Loss:  -6.020100116729736
EXP Loss:  0.13255274295806885





 38%|███▊      | 3800/10001 [33:25<45:54,  2.25it/s]  


	Avg. Loss: 0.0341


 38%|███▊      | 3801/10001 [33:26<1:03:08,  1.64it/s]




Input:  tensor([ 4.,  6.,  2., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([3.9000, 6.0000, 2.1000, 3.8000, 6.4000, 2.7000, 0.1000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([4., 6., 2., 4., 6., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])
MSE Loss:  0.029180660843849182
CE Loss:  -5.773486137390137
EXP Loss:  0.1034090518951416





 39%|███▉      | 3900/10001 [34:11<48:43,  2.09it/s]  


	Avg. Loss: 0.0154


 39%|███▉      | 3901/10001 [34:12<1:19:25,  1.28it/s]




Input:  tensor([ 4.,  7.,  7.,  8.,  6., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([ 4.0000,  7.0000,  6.8000,  7.8000,  6.0000,  3.1000,  6.4000,  6.2000,
         7.7000,  5.7000, -0.1000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([4., 7., 7., 8., 6., 4., 7., 7., 8., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.07139602303504944
CE Loss:  -6.17616081237793
EXP Loss:  0.1864837408065796





 40%|███▉      | 4000/10001 [34:56<46:22,  2.16it/s]  


	Avg. Loss: 0.0352


 40%|████      | 4001/10001 [34:57<1:03:57,  1.56it/s]




Input:  tensor([ 5.,  8.,  2., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([5.1000, 8.0000, 1.9000, 5.6000, 7.9000, 1.7000, 0.1000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([5., 8., 2., 5., 8., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])
MSE Loss:  0.00760385999456048
CE Loss:  -5.996551036834717
EXP Loss:  0.05282151699066162





 41%|████      | 4100/10001 [35:45<47:05,  2.09it/s]  


	Avg. Loss: 0.0118


 41%|████      | 4101/10001 [35:46<1:07:00,  1.47it/s]




Input:  tensor([ 4.,  8.,  3.,  1.,  3., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([3.9000, 8.0000, 2.9000, 1.0000, 2.9000, 3.9000, 7.6000, 2.6000, 1.2000,
        3.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([4., 8., 3., 1., 3., 4., 8., 3., 1., 3., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.015337864868342876
CE Loss:  -6.409402847290039
EXP Loss:  0.07203781604766846





 42%|████▏     | 4200/10001 [36:35<44:34,  2.17it/s]  


	Avg. Loss: 0.0311


 42%|████▏     | 4201/10001 [36:36<57:34,  1.68it/s]




Input:  tensor([ 4.,  2.,  5., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([ 3.7000,  1.4000,  4.5000,  4.9000,  1.8000,  4.6000, -0.1000, -0.1000,
        -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000,
        -0.1000, -0.1000, -0.1000, -0.1000], grad_fn=<ViewBackward0>)
Target:  tensor([4., 2., 5., 4., 2., 5., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])
MSE Loss:  0.06735306978225708
CE Loss:  -5.417069435119629
EXP Loss:  0.22889304161071777





 43%|████▎     | 4300/10001 [37:21<33:07,  2.87it/s]


	Avg. Loss: 0.0153


 43%|████▎     | 4301/10001 [37:22<1:04:35,  1.47it/s]




Input:  tensor([ 4.,  3.,  5.,  2., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([3.9000, 3.0000, 4.8000, 1.8000, 4.1000, 2.9000, 4.7000, 1.6000, 0.0000,
        -0.0000, -0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, -0.0000, -0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([4., 3., 5., 2., 4., 3., 5., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.01654261350631714
CE Loss:  -5.670762062072754
EXP Loss:  0.06738638877868652





 44%|████▍     | 4400/10001 [38:13<39:03,  2.39it/s]  


	Avg. Loss: 0.0158


 44%|████▍     | 4401/10001 [38:14<54:29,  1.71it/s]




Input:  tensor([ 2.,  7.,  6.,  6., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([2.1000, 7.1000, 6.0000, 6.0000, 1.7000, 7.1000, 5.8000, 5.7000, -0.0000,
        -0.0000, 0.0000, -0.0000, 0.0000, 0.0000, 0.0000, 0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([2., 7., 6., 6., 2., 7., 6., 6., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.015120355412364006
CE Loss:  -6.697145462036133
EXP Loss:  0.06615591049194336





 45%|████▍     | 4500/10001 [39:05<49:06,  1.87it/s]  


	Avg. Loss: 0.0367


 45%|████▌     | 4501/10001 [39:06<1:08:51,  1.33it/s]




Input:  tensor([ 8.,  3.,  5.,  8.,  1., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([7.9000, 3.1000, 5.1000, 7.8000, 1.6000, 8.0000, 3.2000, 5.3000, 7.6000,
        1.2000, 0.1000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([8., 3., 5., 8., 1., 8., 3., 5., 8., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.014307595789432526
CE Loss:  -6.884422302246094
EXP Loss:  0.06545233726501465





 46%|████▌     | 4600/10001 [39:55<39:59,  2.25it/s]  


	Avg. Loss: 0.0143


 46%|████▌     | 4601/10001 [39:56<43:19,  2.08it/s]




Input:  tensor([ 5.,  7., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         2.])
Output:  tensor([4.8000, 7.0000, 5.6000, 6.4000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([5., 7., 5., 7., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.03385835140943527
CE Loss:  -4.935748100280762
EXP Loss:  0.10140466690063477





 47%|████▋     | 4700/10001 [40:44<45:31,  1.94it/s]  


	Avg. Loss: 0.0180


 47%|████▋     | 4701/10001 [40:45<57:51,  1.53it/s]




Input:  tensor([ 3.,  3., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         2.])
Output:  tensor([2.9000, 2.9000, 3.1000, 2.8000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([3., 3., 3., 3., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.01113063283264637
CE Loss:  -5.274181842803955
EXP Loss:  0.061723947525024414





 48%|████▊     | 4800/10001 [41:34<36:31,  2.37it/s]


	Avg. Loss: 0.0158


 48%|████▊     | 4801/10001 [41:35<43:51,  1.98it/s]




Input:  tensor([ 1.,  1.,  3.,  5.,  5., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([0.9000, 1.1000, 3.0000, 5.3000, 5.0000, 1.5000, 0.8000, 3.7000, 5.0000,
        3.0000, -0.0000, 0.0000, 0.1000, 0.1000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([1., 1., 3., 5., 5., 1., 1., 3., 5., 5., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.3195382356643677
CE Loss:  -5.849493026733398
EXP Loss:  0.8152092695236206





 49%|████▉     | 4900/10001 [42:25<47:32,  1.79it/s]


	Avg. Loss: 0.0303


 49%|████▉     | 4901/10001 [42:26<1:05:48,  1.29it/s]




Input:  tensor([ 2.,  8.,  6.,  4., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([2.1000, 8.1000, 6.3000, 4.2000, 2.0000, 8.0000, 6.2000, 4.2000, -0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([2., 8., 6., 4., 2., 8., 6., 4., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.010616587474942207
CE Loss:  -6.714508533477783
EXP Loss:  0.07175576686859131





 50%|████▉     | 5000/10001 [43:10<38:27,  2.17it/s]  


	Avg. Loss: 0.0209


 50%|█████     | 5001/10001 [43:11<48:40,  1.71it/s]

Increasing max number of copies to 4



Input:  tensor([ 5.,  1., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         2.])
Output:  tensor([5.2000, 1.2000, 5.0000, 1.4000, -0.0000, -0.0000, 0.0000, 0.0000, 0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, 0.0000, 0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([5., 1., 5., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.8168104887008667
CE Loss:  -5.142518520355225
EXP Loss:  38.07468032836914





 51%|█████     | 5100/10001 [44:01<39:01,  2.09it/s]  


	Avg. Loss: 1.3526


 51%|█████     | 5101/10001 [44:02<50:49,  1.61it/s]




Input:  tensor([ 3.,  5.,  3.,  8., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  3.])
Output:  tensor([ 2.9000,  5.0000,  3.3000,  7.9000,  3.1000,  4.5000,  2.9000,  6.8000,
         2.5000,  2.1000,  1.6000,  2.4000,  1.7000,  0.7000,  0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000,
        -0.1000], grad_fn=<ViewBackward0>)
Target:  tensor([3., 5., 3., 8., 3., 5., 3., 8., 3., 5., 3., 8., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.1517412662506104
CE Loss:  -6.3814873695373535
EXP Loss:  4.5183424949646





 52%|█████▏    | 5200/10001 [45:01<1:28:26,  1.11s/it]


	Avg. Loss: 1.0646


 52%|█████▏    | 5201/10001 [45:03<1:36:16,  1.20s/it]




Input:  tensor([ 2.,  8.,  1.,  5., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  3.])
Output:  tensor([2.1000, 8.0000, 0.6000, 5.2000, 1.5000, 7.9000, 1.1000, 5.8000, 1.9000,
        3.1000, 2.3000, 2.2000, 0.6000, 0.2000, 0.1000, 0.1000, 0.1000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, -0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([2., 8., 1., 5., 2., 8., 1., 5., 2., 8., 1., 5., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.2175792455673218
CE Loss:  -6.948426723480225
EXP Loss:  3.8654537200927734





 53%|█████▎    | 5300/10001 [46:13<1:01:52,  1.27it/s]


	Avg. Loss: 1.0047


 53%|█████▎    | 5301/10001 [46:15<1:27:12,  1.11s/it]




Input:  tensor([ 4.,  2.,  4., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., -9.,  3.])
Output:  tensor([ 3.7000,  1.9000,  4.0000,  4.1000,  1.8000,  3.2000,  2.5000,  0.7000,
         1.5000, -0.1000,  0.1000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.1000, -0.1000], grad_fn=<ViewBackward0>)
Target:  tensor([4., 2., 4., 4., 2., 4., 4., 2., 4., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])
MSE Loss:  1.0025670528411865
CE Loss:  -5.982541561126709
EXP Loss:  3.1619296073913574





 54%|█████▍    | 5400/10001 [47:17<53:34,  1.43it/s]  


	Avg. Loss: 0.9514





 54%|█████▍    | 5401/10001 [47:18<1:06:45,  1.15it/s]

Input:  tensor([ 6.,  4., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         2.])
Output:  tensor([ 6.1000,  4.2000,  6.1000,  4.2000,  2.7000,  2.0000,  0.0000, -0.0000,
        -0.0000,  0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.1000],
       grad_fn=<ViewBackward0>)
Target:  tensor([6., 4., 6., 4., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.899151623249054
CE Loss:  -5.8292460441589355
EXP Loss:  2.715914726257324





 55%|█████▍    | 5500/10001 [48:23<50:24,  1.49it/s]  


	Avg. Loss: 0.9426


 55%|█████▌    | 5501/10001 [48:24<1:02:52,  1.19it/s]




Input:  tensor([ 2.,  5.,  6.,  4.,  4., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([2.1000, 5.1000, 6.3000, 4.0000, 4.1000, 1.8000, 5.1000, 7.0000, 4.8000,
        4.1000, 1.4000, 2.9000, 3.5000, 2.6000, 3.0000, 0.0000, 0.1000, 0.0000,
        0.0000, 0.1000, 0.1000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        -0.0000, 0.0000, 0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([2., 5., 6., 4., 4., 2., 5., 6., 4., 4., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.0425100326538086
CE Loss:  -6.68549108505249
EXP Loss:  2.7738776206970215





 56%|█████▌    | 5600/10001 [49:38<49:34,  1.48it/s]  


	Avg. Loss: 0.9650


 56%|█████▌    | 5601/10001 [49:39<1:02:58,  1.16it/s]




Input:  tensor([ 1.,  7.,  7.,  5., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  3.])
Output:  tensor([0.9000, 7.0000, 7.0000, 5.1000, 1.2000, 6.3000, 7.0000, 5.1000, 0.8000,
        2.3000, 2.9000, 3.6000, 0.1000, 0.2000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, -0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([1., 7., 7., 5., 1., 7., 7., 5., 1., 7., 7., 5., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.2001312971115112
CE Loss:  -7.2529802322387695
EXP Loss:  3.974625587463379





 57%|█████▋    | 5700/10001 [50:57<59:52,  1.20it/s]  


	Avg. Loss: 0.9889


 57%|█████▋    | 5701/10001 [51:00<1:32:03,  1.28s/it]




Input:  tensor([ 7.,  7., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         3.])
Output:  tensor([ 6.8000,  6.8000,  7.0000,  6.3000,  3.5000,  3.3000,  0.2000,  0.0000,
        -0.0000,  0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.1000],
       grad_fn=<ViewBackward0>)
Target:  tensor([7., 7., 7., 7., 7., 7., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.9743863940238953
CE Loss:  -6.02826452255249
EXP Loss:  2.5774338245391846





 58%|█████▊    | 5800/10001 [52:08<40:30,  1.73it/s]  


	Avg. Loss: 0.9612


 58%|█████▊    | 5801/10001 [52:10<1:04:27,  1.09it/s]




Input:  tensor([ 1.,  1.,  3., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([ 1.1000,  1.1000,  2.8000,  0.8000,  1.2000,  3.4000,  1.1000,  0.3000,
         1.0000, -0.1000, -0.0000, -0.0000, -0.0000, -0.1000, -0.1000, -0.1000,
        -0.1000, -0.1000, -0.0000, -0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([1., 1., 3., 1., 1., 3., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])
MSE Loss:  0.8446449637413025
CE Loss:  -5.4585981369018555
EXP Loss:  2.163804531097412





 59%|█████▉    | 5900/10001 [53:17<56:35,  1.21it/s]  


	Avg. Loss: 0.9646


 59%|█████▉    | 5901/10001 [53:19<1:12:37,  1.06s/it]




Input:  tensor([ 5.,  1.,  7.,  2.,  7., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([ 5.1000,  1.2000,  7.2000,  2.2000,  6.9000,  5.4000,  1.9000,  7.6000,
         2.1000,  7.8000,  2.9000,  1.5000,  3.6000,  1.3000,  3.9000, -0.3000,
        -0.0000, -0.1000,  0.1000,  0.0000,  0.1000,  0.1000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000, -0.0000,  0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([5., 1., 7., 2., 7., 5., 1., 7., 2., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.1429346799850464
CE Loss:  -7.5819807052612305
EXP Loss:  3.0427746772766113





 60%|█████▉    | 6000/10001 [54:27<44:36,  1.49it/s]  


	Avg. Loss: 0.9576


 60%|██████    | 6001/10001 [54:29<1:00:27,  1.10it/s]




Input:  tensor([ 8.,  4.,  8., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., -9.,  3.])
Output:  tensor([ 7.9000,  4.2000,  8.1000,  7.8000,  3.4000,  7.8000,  3.7000,  2.3000,
         4.1000,  0.2000,  0.1000, -0.0000, -0.0000, -0.1000, -0.1000, -0.1000,
        -0.1000, -0.1000, -0.1000, -0.1000], grad_fn=<ViewBackward0>)
Target:  tensor([8., 4., 8., 8., 4., 8., 8., 4., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])
MSE Loss:  1.0661389827728271
CE Loss:  -7.080301284790039
EXP Loss:  2.9098165035247803





 61%|██████    | 6100/10001 [55:38<52:49,  1.23it/s]  


	Avg. Loss: 0.9551


 61%|██████    | 6101/10001 [55:39<1:05:12,  1.00s/it]




Input:  tensor([ 1.,  8., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         3.])
Output:  tensor([ 0.9000,  7.8000,  0.9000,  7.8000,  0.5000,  3.4000, -0.1000,  0.1000,
        -0.0000,  0.0000, -0.0000,  0.0000, -0.0000,  0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([1., 8., 1., 8., 1., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.9474391341209412
CE Loss:  -5.994887828826904
EXP Loss:  2.6789088249206543





 62%|██████▏   | 6200/10001 [56:52<49:03,  1.29it/s]  


	Avg. Loss: 0.9548


 62%|██████▏   | 6201/10001 [56:53<52:57,  1.20it/s]




Input:  tensor([ 4.,  8.,  8.,  8., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([3.9000, 7.7000, 8.1000, 7.9000, 3.6000, 7.9000, 7.9000, 7.7000, 1.9000,
        2.8000, 3.8000, 3.5000, -0.0000, 0.2000, -0.0000, -0.0000, -0.0000, -0.0000,
        0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([4., 8., 8., 8., 4., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.0150996446609497
CE Loss:  -6.497311115264893
EXP Loss:  2.689213991165161





 63%|██████▎   | 6300/10001 [58:01<41:21,  1.49it/s]  


	Avg. Loss: 0.9734


 63%|██████▎   | 6301/10001 [58:02<48:51,  1.26it/s]




Input:  tensor([ 1.,  4.,  8.,  1., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([1.1000, 3.8000, 7.5000, 0.7000, 0.7000, 4.2000, 6.9000, 0.9000, 0.8000,
        2.5000, 3.7000, 0.4000, 0.1000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([1., 4., 8., 1., 1., 4., 8., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.0182390213012695
CE Loss:  -6.550673484802246
EXP Loss:  2.664003849029541





 64%|██████▍   | 6400/10001 [59:11<52:44,  1.14it/s]  


	Avg. Loss: 0.9498


 64%|██████▍   | 6401/10001 [59:12<1:05:15,  1.09s/it]




Input:  tensor([ 2.,  4.,  3.,  7., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  3.])
Output:  tensor([ 2.0000,  3.9000,  3.0000,  6.8000,  1.9000,  3.8000,  2.5000,  6.8000,
         0.7000,  1.6000,  1.0000,  3.5000, -0.1000,  0.1000, -0.0000, -0.0000,
        -0.0000, -0.0000,  0.0000,  0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.1000], grad_fn=<ViewBackward0>)
Target:  tensor([2., 4., 3., 7., 2., 4., 3., 7., 2., 4., 3., 7., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.1024949550628662
CE Loss:  -7.254317760467529
EXP Loss:  3.0334811210632324





 65%|██████▍   | 6500/10001 [1:00:22<46:52,  1.24it/s]  


	Avg. Loss: 0.9693


 65%|██████▌   | 6501/10001 [1:00:24<59:27,  1.02s/it]




Input:  tensor([ 5.,  5.,  7.,  7., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  3.])
Output:  tensor([5.0000, 5.1000, 7.4000, 7.2000, 5.1000, 5.4000, 7.1000, 7.5000, 2.9000,
        2.8000, 4.2000, 3.9000, 0.1000, 0.4000, 0.1000, 0.0000, 0.0000, 0.1000,
        0.1000, 0.1000, 0.1000, 0.0000, 0.0000, 0.0000, 0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([5., 5., 7., 7., 5., 5., 7., 7., 5., 5., 7., 7., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.0234004259109497
CE Loss:  -7.30188512802124
EXP Loss:  2.737107515335083





 66%|██████▌   | 6600/10001 [1:01:32<35:37,  1.59it/s]


	Avg. Loss: 0.9662


 66%|██████▌   | 6601/10001 [1:01:33<46:55,  1.21it/s]




Input:  tensor([ 5.,  2.,  2.,  3.,  8., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([5.0000, 2.0000, 1.8000, 3.3000, 8.1000, 5.7000, 2.1000, 2.3000, 2.7000,
        7.6000, 2.4000, 0.1000, 1.1000, 1.6000, 3.5000, 0.1000, 0.0000, 0.1000,
        0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000,
        0.0000, 0.1000, 0.1000], grad_fn=<ViewBackward0>)
Target:  tensor([5., 2., 2., 3., 8., 5., 2., 2., 3., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.1549636125564575
CE Loss:  -7.103672981262207
EXP Loss:  3.498983860015869





 67%|██████▋   | 6700/10001 [1:02:41<36:41,  1.50it/s]


	Avg. Loss: 0.9514


 67%|██████▋   | 6701/10001 [1:02:42<37:18,  1.47it/s]




Input:  tensor([ 4.,  6.,  4., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., -9.,  3.])
Output:  tensor([4.0000, 6.1000, 4.0000, 4.4000, 6.4000, 4.1000, 2.1000, 3.6000, 2.6000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        0.0000, -0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([4., 6., 4., 4., 6., 4., 4., 6., 4., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])
MSE Loss:  0.9790703058242798
CE Loss:  -6.86155366897583
EXP Loss:  2.6733055114746094





 68%|██████▊   | 6800/10001 [1:03:48<26:02,  2.05it/s]


	Avg. Loss: 0.9198


 68%|██████▊   | 6801/10001 [1:03:49<37:00,  1.44it/s]




Input:  tensor([ 7.,  2., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         3.])
Output:  tensor([6.8000, 1.9000, 6.9000, 1.7000, 3.2000, 0.6000, 0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, 0.0000, -0.0000, -0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([7., 2., 7., 2., 7., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.8961275815963745
CE Loss:  -5.562831401824951
EXP Loss:  2.427802801132202





 69%|██████▉   | 6900/10001 [1:04:56<36:05,  1.43it/s]


	Avg. Loss: 0.9869


 69%|██████▉   | 6901/10001 [1:04:58<51:48,  1.00s/it]




Input:  tensor([ 3.,  3.,  3.,  8.,  4., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  3.])
Output:  tensor([ 3.1000,  2.9000,  2.9000,  7.7000,  4.1000,  2.9000,  2.5000,  2.8000,
         7.9000,  4.0000,  2.0000,  1.6000,  1.5000,  3.9000,  2.0000, -0.1000,
        -0.0000, -0.0000,  0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.1000],
       grad_fn=<ViewBackward0>)
Target:  tensor([3., 3., 3., 8., 4., 3., 3., 3., 8., 4., 3., 3., 3., 8., 4., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.0472792387008667
CE Loss:  -6.537229061126709
EXP Loss:  2.6433072090148926





 70%|██████▉   | 7000/10001 [1:06:05<27:24,  1.83it/s]


	Avg. Loss: 0.9473


 70%|███████   | 7001/10001 [1:06:07<42:25,  1.18it/s]




Input:  tensor([ 1.,  7.,  8., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([ 1.0000,  7.0000,  7.7000,  1.1000,  6.8000,  7.2000,  0.4000,  3.3000,
         2.9000,  0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.1000, -0.1000,
        -0.0000, -0.0000,  0.0000,  0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([1., 7., 8., 1., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])
MSE Loss:  0.9184750318527222
CE Loss:  -5.745304584503174
EXP Loss:  2.435617685317993





 71%|███████   | 7100/10001 [1:07:12<28:17,  1.71it/s]


	Avg. Loss: 0.9337


 71%|███████   | 7101/10001 [1:07:14<43:06,  1.12it/s]




Input:  tensor([ 7.,  5.,  3.,  2.,  1., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  3.])
Output:  tensor([ 7.1000,  4.9000,  3.0000,  2.0000,  1.4000,  7.5000,  4.9000,  2.8000,
         2.1000,  1.3000,  3.6000,  2.7000,  1.7000,  1.4000,  0.9000, -0.1000,
        -0.0000,  0.1000,  0.0000,  0.1000,  0.1000,  0.1000,  0.0000,  0.0000,
         0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([7., 5., 3., 2., 1., 7., 5., 3., 2., 1., 7., 5., 3., 2., 1., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.0899204015731812
CE Loss:  -6.9992475509643555
EXP Loss:  2.893421173095703





 72%|███████▏  | 7200/10001 [1:08:21<26:31,  1.76it/s]


	Avg. Loss: 0.9492


 72%|███████▏  | 7201/10001 [1:08:22<38:10,  1.22it/s]




Input:  tensor([ 5.,  2.,  7.,  7.,  6., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  3.])
Output:  tensor([5.0000, 1.9000, 7.1000, 7.0000, 5.9000, 4.9000, 1.3000, 7.4000, 6.6000,
        5.1000, 1.9000, 0.6000, 3.1000, 3.4000, 3.2000, -0.0000, -0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, -0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([5., 2., 7., 7., 6., 5., 2., 7., 7., 6., 5., 2., 7., 7., 6., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.1292997598648071
CE Loss:  -6.816775321960449
EXP Loss:  3.1450066566467285





 73%|███████▎  | 7300/10001 [1:09:27<31:02,  1.45it/s]


	Avg. Loss: 0.9550


 73%|███████▎  | 7301/10001 [1:09:29<47:20,  1.05s/it]




Input:  tensor([ 1.,  7., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         3.])
Output:  tensor([1.0000, 7.0000, 1.1000, 7.1000, 0.4000, 3.7000, -0.0000, 0.0000, -0.0000,
        0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([1., 7., 1., 7., 1., 7., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.8375602960586548
CE Loss:  -5.544831275939941
EXP Loss:  2.144122362136841





 74%|███████▍  | 7400/10001 [1:10:33<30:30,  1.42it/s]


	Avg. Loss: 0.9274


 74%|███████▍  | 7401/10001 [1:10:34<37:53,  1.14it/s]




Input:  tensor([ 4.,  3., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         2.])
Output:  tensor([4.1000, 3.0000, 3.7000, 3.2000, 2.2000, 1.5000, 0.2000, -0.0000, -0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, -0.0000, 0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([4., 3., 4., 3., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.9622087478637695
CE Loss:  -6.589472770690918
EXP Loss:  2.6316921710968018





 75%|███████▍  | 7500/10001 [1:11:39<19:55,  2.09it/s]


	Avg. Loss: 0.9351


 75%|███████▌  | 7501/10001 [1:11:41<36:58,  1.13it/s]




Input:  tensor([ 1.,  6., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         2.])
Output:  tensor([1.0000, 5.9000, 0.9000, 5.9000, 0.4000, 2.5000, 0.1000, -0.0000, -0.0000,
        0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([1., 6., 1., 6., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.7976317405700684
CE Loss:  -5.274265766143799
EXP Loss:  2.090040922164917





 76%|███████▌  | 7600/10001 [1:12:49<31:09,  1.28it/s]


	Avg. Loss: 0.9424


 76%|███████▌  | 7601/10001 [1:12:50<40:30,  1.01s/it]




Input:  tensor([ 5.,  4.,  7.,  7., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  3.])
Output:  tensor([4.9000, 3.9000, 7.0000, 6.8000, 4.7000, 4.1000, 6.9000, 7.1000, 2.2000,
        1.7000, 3.7000, 3.3000, 0.1000, 0.1000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([5., 4., 7., 7., 5., 4., 7., 7., 5., 4., 7., 7., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.0296058654785156
CE Loss:  -6.726104736328125
EXP Loss:  2.699688196182251





 77%|███████▋  | 7700/10001 [1:13:56<29:17,  1.31it/s]


	Avg. Loss: 0.9486


 77%|███████▋  | 7701/10001 [1:13:58<39:59,  1.04s/it]




Input:  tensor([ 7.,  3.,  7., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., -9.,  3.])
Output:  tensor([ 7.0000,  3.1000,  7.1000,  7.0000,  2.9000,  7.2000,  4.1000,  1.7000,
         4.0000, -0.0000,  0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.1000, -0.1000, -0.1000, -0.1000], grad_fn=<ViewBackward0>)
Target:  tensor([7., 3., 7., 7., 3., 7., 7., 3., 7., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])
MSE Loss:  1.0811575651168823
CE Loss:  -7.237365245819092
EXP Loss:  2.9851396083831787





 78%|███████▊  | 7800/10001 [1:15:04<26:52,  1.36it/s]


	Avg. Loss: 0.9401


 78%|███████▊  | 7801/10001 [1:15:05<33:13,  1.10it/s]




Input:  tensor([ 6.,  5.,  3., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([ 6.1000,  4.9000,  3.2000,  5.9000,  5.2000,  3.2000,  3.2000,  3.0000,
         1.8000, -0.1000,  0.0000,  0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000,  0.0000, -0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([6., 5., 3., 6., 5., 3., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])
MSE Loss:  0.9470325708389282
CE Loss:  -6.596796989440918
EXP Loss:  2.388040781021118





 79%|███████▉  | 7900/10001 [1:16:14<21:29,  1.63it/s]


	Avg. Loss: 0.9408


 79%|███████▉  | 7901/10001 [1:16:16<34:00,  1.03it/s]




Input:  tensor([ 4.,  1.,  1.,  3., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([3.9000, 1.1000, 1.0000, 3.0000, 4.2000, 1.1000, 0.8000, 3.6000, 2.4000,
        1.1000, 0.9000, 1.5000, -0.0000, 0.0000, -0.0000, 0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, 0.0000, 0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([4., 1., 1., 3., 4., 1., 1., 3., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.0670872926712036
CE Loss:  -6.570842266082764
EXP Loss:  2.8594508171081543





 80%|███████▉  | 8000/10001 [1:17:21<23:53,  1.40it/s]


	Avg. Loss: 0.9282


 80%|████████  | 8001/10001 [1:17:22<30:26,  1.09it/s]




Input:  tensor([ 8.,  7.,  3.,  3.,  6., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  3.])
Output:  tensor([7.9000, 6.9000, 2.9000, 2.9000, 6.2000, 7.9000, 6.9000, 3.2000, 3.3000,
        6.4000, 3.8000, 3.6000, 1.1000, 1.4000, 3.2000, -0.0000, -0.0000, 0.0000,
        0.0000, 0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([8., 7., 3., 3., 6., 8., 7., 3., 3., 6., 8., 7., 3., 3., 6., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.0321377515792847
CE Loss:  -6.575685977935791
EXP Loss:  2.6929385662078857





 81%|████████  | 8100/10001 [1:18:34<26:48,  1.18it/s]


	Avg. Loss: 0.9620


 81%|████████  | 8101/10001 [1:18:36<38:07,  1.20s/it]




Input:  tensor([ 1.,  7.,  4., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., -9.,  3.])
Output:  tensor([1.1000, 7.3000, 4.5000, 0.9000, 6.7000, 4.1000, 0.1000, 2.8000, 1.5000,
        0.1000, 0.0000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000,
        0.1000, 0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([1., 7., 4., 1., 7., 4., 1., 7., 4., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])
MSE Loss:  1.0339572429656982
CE Loss:  -6.649835109710693
EXP Loss:  2.9335031509399414





 82%|████████▏ | 8200/10001 [1:19:48<24:12,  1.24it/s]


	Avg. Loss: 0.9770


 82%|████████▏ | 8201/10001 [1:19:50<34:26,  1.15s/it]




Input:  tensor([ 5.,  7.,  4., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., -9.,  3.])
Output:  tensor([5.0000, 7.1000, 3.8000, 5.4000, 6.9000, 3.9000, 2.6000, 3.0000, 1.7000,
        -0.0000, 0.0000, 0.0000, 0.0000, 0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([5., 7., 4., 5., 7., 4., 5., 7., 4., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])
MSE Loss:  1.0010768175125122
CE Loss:  -6.351258277893066
EXP Loss:  2.87148380279541





 83%|████████▎ | 8300/10001 [1:21:00<19:21,  1.46it/s]


	Avg. Loss: 0.9577


 83%|████████▎ | 8301/10001 [1:21:02<32:56,  1.16s/it]




Input:  tensor([ 3.,  4.,  2.,  6.,  7., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([ 3.0000,  4.0000,  2.2000,  6.1000,  7.3000,  3.0000,  4.1000,  1.8000,
         6.4000,  7.1000,  1.7000,  1.5000,  0.8000,  3.5000,  3.9000, -0.1000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.1000, -0.1000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([3., 4., 2., 6., 7., 3., 4., 2., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.1134318113327026
CE Loss:  -7.636111259460449
EXP Loss:  3.0267629623413086





 84%|████████▍ | 8400/10001 [1:22:12<24:10,  1.10it/s]


	Avg. Loss: 0.9782


 84%|████████▍ | 8401/10001 [1:22:14<31:58,  1.20s/it]




Input:  tensor([ 7.,  1., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         3.])
Output:  tensor([7.1000, 0.8000, 7.1000, 1.2000, 3.4000, 0.5000, 0.1000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([7., 1., 7., 1., 7., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.8975803852081299
CE Loss:  -6.399409294128418
EXP Loss:  2.365278959274292





 85%|████████▍ | 8500/10001 [1:23:23<17:20,  1.44it/s]


	Avg. Loss: 0.9320


 85%|████████▌ | 8501/10001 [1:23:24<21:50,  1.14it/s]




Input:  tensor([ 1.,  8., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         2.])
Output:  tensor([0.9000, 7.8000, 0.9000, 7.6000, 0.8000, 4.3000, -0.0000, 0.1000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([1., 8., 1., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.9298115968704224
CE Loss:  -5.8160858154296875
EXP Loss:  2.7438998222351074





 86%|████████▌ | 8600/10001 [1:24:32<12:34,  1.86it/s]


	Avg. Loss: 0.9329


 86%|████████▌ | 8601/10001 [1:24:33<15:38,  1.49it/s]




Input:  tensor([ 5.,  3.,  4.,  1., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  3.])
Output:  tensor([4.9000, 3.0000, 3.9000, 1.2000, 4.9000, 3.3000, 4.2000, 1.7000, 2.8000,
        1.6000, 2.4000, 1.0000, -0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([5., 3., 4., 1., 5., 3., 4., 1., 5., 3., 4., 1., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.1250238418579102
CE Loss:  -7.5783820152282715
EXP Loss:  3.0508575439453125





 87%|████████▋ | 8700/10001 [1:25:43<13:13,  1.64it/s]


	Avg. Loss: 0.9497


 87%|████████▋ | 8701/10001 [1:25:45<18:47,  1.15it/s]




Input:  tensor([ 6.,  6.,  7.,  1.,  5., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([6.0000, 6.0000, 7.3000, 0.7000, 5.1000, 5.5000, 6.1000, 6.8000, 1.2000,
        5.4000, 3.5000, 3.2000, 3.6000, 0.6000, 3.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([6., 6., 7., 1., 5., 6., 6., 7., 1., 5., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.055667519569397
CE Loss:  -6.822183132171631
EXP Loss:  2.7006473541259766





 88%|████████▊ | 8800/10001 [1:26:57<16:35,  1.21it/s]


	Avg. Loss: 0.9543


 88%|████████▊ | 8801/10001 [1:26:58<17:48,  1.12it/s]




Input:  tensor([ 8.,  4., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         3.])
Output:  tensor([8.1000, 4.1000, 8.0000, 4.1000, 4.0000, 2.1000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([8., 4., 8., 4., 8., 4., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.8817911148071289
CE Loss:  -6.007511615753174
EXP Loss:  2.3000898361206055





 89%|████████▉ | 8900/10001 [1:28:10<15:33,  1.18it/s]


	Avg. Loss: 0.9534


 89%|████████▉ | 8901/10001 [1:28:11<17:28,  1.05it/s]




Input:  tensor([ 3.,  4.,  8.,  1., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([ 3.1000,  4.0000,  8.1000,  0.9000,  3.3000,  3.8000,  7.8000,  1.5000,
         1.6000,  1.5000,  3.6000,  0.6000, -0.0000, -0.0000, -0.0000, -0.1000,
        -0.1000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.1000,
        -0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([3., 4., 8., 1., 3., 4., 8., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.00103759765625
CE Loss:  -6.560877323150635
EXP Loss:  2.632927417755127





 90%|████████▉ | 9000/10001 [1:29:16<14:02,  1.19it/s]


	Avg. Loss: 0.9162


 90%|█████████ | 9001/10001 [1:29:18<19:08,  1.15s/it]




Input:  tensor([ 5.,  2.,  2.,  1., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  3.])
Output:  tensor([5.0000, 2.0000, 2.1000, 1.3000, 5.0000, 2.1000, 2.3000, 1.5000, 2.9000,
        1.6000, 1.5000, 1.1000, 0.0000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000,
        0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.1000, 0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([5., 2., 2., 1., 5., 2., 2., 1., 5., 2., 2., 1., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.130255103111267
CE Loss:  -7.243037700653076
EXP Loss:  3.1087541580200195





 91%|█████████ | 9100/10001 [1:30:26<10:32,  1.42it/s]


	Avg. Loss: 0.9313


 91%|█████████ | 9101/10001 [1:30:27<12:37,  1.19it/s]




Input:  tensor([ 3.,  1.,  7.,  3., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  3.])
Output:  tensor([ 2.9000,  0.9000,  7.1000,  2.9000,  3.3000,  1.1000,  6.6000,  3.3000,
         1.7000,  0.4000,  3.0000,  1.4000,  0.2000,  0.0000, -0.1000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([3., 1., 7., 3., 3., 1., 7., 3., 3., 1., 7., 3., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.0409364700317383
CE Loss:  -6.732506275177002
EXP Loss:  2.7846391201019287





 92%|█████████▏| 9200/10001 [1:31:33<07:33,  1.77it/s]


	Avg. Loss: 0.9816


 92%|█████████▏| 9201/10001 [1:31:34<09:23,  1.42it/s]




Input:  tensor([ 1.,  7.,  7.,  4.,  1., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        -9.,  2.])
Output:  tensor([1.0000, 7.2000, 7.2000, 4.0000, 1.2000, 1.4000, 7.2000, 7.4000, 3.3000,
        1.5000, 1.1000, 3.9000, 3.9000, 2.1000, 0.4000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([1., 7., 7., 4., 1., 1., 7., 7., 4., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.236475944519043
CE Loss:  -7.697922706604004
EXP Loss:  3.382633686065674





 93%|█████████▎| 9300/10001 [1:32:49<08:27,  1.38it/s]


	Avg. Loss: 0.9537


 93%|█████████▎| 9301/10001 [1:32:50<10:38,  1.10it/s]




Input:  tensor([ 4.,  2.,  6., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([ 3.9000,  2.0000,  6.1000,  4.0000,  2.3000,  6.3000,  1.8000,  0.8000,
         2.9000, -0.1000,  0.0000, -0.0000, -0.0000, -0.1000, -0.1000, -0.1000,
        -0.1000, -0.1000, -0.1000, -0.1000], grad_fn=<ViewBackward0>)
Target:  tensor([4., 2., 6., 4., 2., 6., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])
MSE Loss:  0.9475763440132141
CE Loss:  -6.31904935836792
EXP Loss:  2.4349758625030518





 94%|█████████▍| 9400/10001 [1:33:56<06:45,  1.48it/s]


	Avg. Loss: 0.9230


 94%|█████████▍| 9401/10001 [1:33:57<08:37,  1.16it/s]




Input:  tensor([ 5.,  2., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         2.])
Output:  tensor([5.0000, 2.2000, 4.8000, 2.3000, 2.4000, 0.8000, 0.1000, -0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1000],
       grad_fn=<ViewBackward0>)
Target:  tensor([5., 2., 5., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.9026778340339661
CE Loss:  -6.090052604675293
EXP Loss:  2.4731132984161377





 95%|█████████▍| 9500/10001 [1:35:09<05:37,  1.48it/s]


	Avg. Loss: 0.9493


 95%|█████████▌| 9501/10001 [1:35:11<08:10,  1.02it/s]




Input:  tensor([ 2.,  1.,  8., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0., -9.,  3.])
Output:  tensor([1.9000, 1.1000, 7.8000, 1.9000, 1.2000, 7.8000, 1.5000, 0.7000, 3.7000,
        0.1000, 0.1000, -0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        -0.0000, -0.0000], grad_fn=<ViewBackward0>)
Target:  tensor([2., 1., 8., 2., 1., 8., 2., 1., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])
MSE Loss:  0.9718286395072937
CE Loss:  -6.367647171020508
EXP Loss:  2.570054054260254





 96%|█████████▌| 9600/10001 [1:36:21<05:50,  1.14it/s]


	Avg. Loss: 0.9221


 96%|█████████▌| 9601/10001 [1:36:23<06:32,  1.02it/s]




Input:  tensor([ 1.,  7.,  7.,  5., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  3.])
Output:  tensor([ 0.9000,  6.8000,  7.1000,  5.2000,  1.2000,  7.3000,  7.1000,  5.8000,
         0.9000,  3.5000,  3.4000,  2.9000, -0.2000, -0.1000, -0.1000, -0.1000,
        -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000,
        -0.1000], grad_fn=<ViewBackward0>)
Target:  tensor([1., 7., 7., 5., 1., 7., 7., 5., 1., 7., 7., 5., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.024007797241211
CE Loss:  -7.059571743011475
EXP Loss:  2.6223561763763428





 97%|█████████▋| 9700/10001 [1:37:33<02:50,  1.76it/s]


	Avg. Loss: 0.9329


 97%|█████████▋| 9701/10001 [1:37:35<05:24,  1.08s/it]




Input:  tensor([ 7.,  3.,  5.,  7., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  3.])
Output:  tensor([7.0000, 3.0000, 5.1000, 7.0000, 7.1000, 3.1000, 5.1000, 6.9000, 3.2000,
        1.2000, 2.6000, 3.7000, 0.1000, 0.0000, -0.0000, -0.0000, -0.0000, -0.0000,
        -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([7., 3., 5., 7., 7., 3., 5., 7., 7., 3., 5., 7., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.0763434171676636
CE Loss:  -7.022580623626709
EXP Loss:  2.8518285751342773





 98%|█████████▊| 9800/10001 [1:38:40<02:10,  1.54it/s]


	Avg. Loss: 0.9328


 98%|█████████▊| 9801/10001 [1:38:42<02:43,  1.22it/s]




Input:  tensor([ 1.,  8., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         2.])
Output:  tensor([ 0.9000,  8.0000,  0.9000,  7.6000,  0.5000,  3.6000, -0.2000, -0.0000,
        -0.1000, -0.0000, -0.0000, -0.0000, -0.0000, -0.1000, -0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([1., 8., 1., 8., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  0.8975296020507812
CE Loss:  -6.2001447677612305
EXP Loss:  2.337646484375





 99%|█████████▉| 9900/10001 [1:39:52<00:54,  1.84it/s]


	Avg. Loss: 0.9398


 99%|█████████▉| 9901/10001 [1:39:54<01:20,  1.24it/s]




Input:  tensor([ 8.,  4.,  3.,  3., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,  2.])
Output:  tensor([ 8.0000,  4.2000,  3.1000,  2.6000,  7.7000,  3.9000,  2.6000,  2.8000,
         3.9000,  1.4000,  1.5000,  0.7000, -0.1000, -0.0000, -0.1000, -0.1000,
        -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000, -0.1000,
        -0.1000], grad_fn=<ViewBackward0>)
Target:  tensor([8., 4., 3., 3., 8., 4., 3., 3., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.0808510780334473
CE Loss:  -6.666878700256348
EXP Loss:  3.0408401489257812





100%|█████████▉| 10000/10001 [1:41:01<00:00,  1.61it/s]


	Avg. Loss: 0.9237
Increasing max number of copies to 5


100%|██████████| 10001/10001 [1:41:02<00:00,  1.65it/s]




Input:  tensor([ 5.,  6., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         3.])
Output:  tensor([4.9000, 6.0000, 4.7000, 5.9000, 2.3000, 2.7000, -0.0000, 0.0000, -0.0000,
        0.0000, 0.0000, 0.0000, -0.0000, -0.0000, 0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([5., 6., 5., 6., 5., 6., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.9385112524032593
CE Loss:  -5.405060768127441
EXP Loss:  27.467405319213867








In [38]:
#from dnc.lib import calcAccuracy
print(name)

if 'rnn' in locals() or 'rnn' in globals():
  del rnn

rnn = DNC(
        input_size=input_size,
        hidden_size=output_size,
        rnn_type='rnn',
        #rnn_type='lstm',
        num_layers=3,
        num_hidden_layers=1,
        dropout=0,
        nr_cells=mem_slot,
        cell_size=mem_size,
        read_heads=read_heads,
        gpu_id=-1,
        debug='store_true',
        batch_first=True,
        independent_linears=True,
        nonlinearity='tanh',
    )

#name = 'mc_7ac'
lastcp = f'{name}/checkpoint_{iterations}.pth'

with open(f"{name}/output_2.txt", "w") as f:
  batch_size=1
  rnn.load_state_dict(T.load(lastcp, weights_only=True))
  rnn.eval()

  maxnumberofcopies = 3
  input_data, target_output = generate_data(1, np.random.randint(2, 4), maxnumberofcopies=maxnumberofcopies, currentmaxnocopies=maxnumberofcopies)

  input_data = var(T.from_numpy(input_data))
  target_output = var(T.from_numpy(target_output))
  print(input_data)
  print(target_output)

  stepByStep = copy.deepcopy(STEPBYSTEPOBJ)
  stepByStep["CurrI"] = 0
  stepByStep["input"] = input_data.detach().numpy()
  stepByStep["target"] = target_output.detach().numpy()
  stepByStep["currentObj"] = copy.deepcopy(stepByStep["defObj"])
  stepByStep["currentObj"]["i"] = 0 
  stepByStep["MEMORYCOLUMNS"] = mem_slot
  stepByStep["INPUTSIZE"] = input_size
  stepByStep["OUTPUTSIZE"] = output_size

  output, (chx, mhx, rv), v = rnn(input_data, (None, None, None), reset_experience=True, pass_through_memory=True, stepByStep=stepByStep)

  stepByStep['output'] = output.detach().numpy()
  stepByStep["objects"].append(copy.deepcopy(stepByStep["currentObj"]))
  stepByStep['loss'] = str(mse(output, target_output).item())

  pickle.dump(stepByStep, open(f"{name}/stepByStep.pkl", "wb"))



  for i in range(1):#range(int((iterations + 1) / 100)):
    llprint("\nIteration %d/%d" % (i, iterations))
    # We test now the learned generalization using sequence_max_length examples
    random_length = np.random.randint(2, sequence_max_length + 1)
    input_data, target_output = generate_data(batch_size, random_length, maxnumberofcopies=maxnumberofcopies, currentmaxnocopies=maxnumberofcopies)


    input_data = var(T.from_numpy(input_data))
    target_output = var(T.from_numpy(target_output))
    if rnn.debug:
      output, (chx, mhx, rv), v = rnn(input_data, (None, mhx, None), reset_experience=True, pass_through_memory=True)
    else:
      output, (chx, mhx, rv) = rnn(input_data, (None, mhx, None), reset_experience=True, pass_through_memory=True)

    print("\n\n")
    print("Input: ", torch.flatten(input_data[0]))
    print("Output: ", torch.flatten(torch.round(output[0], decimals=1)))
    print("Target: ", torch.flatten(target_output[0]))
    print("MSE Loss: ", str(mse(output, target_output).item()))
    print("CE Loss: ", str(criterion(output, target_output).item()))
    print("EXP Loss: ", str(exp_loss(output, target_output).item()))
    #print("accuracy: ", str(calcAccuracy(output, target_output).item()))
    print("\n\n")
    output = output[:, -1, :].sum().data.cpu().numpy()
    target_output = target_output.sum().data.cpu().numpy()

    
    try:
      print("\nReal value: ", ' = ' + str(int(target_output[0])))
      print("Predicted:  ", ' = ' + str(int(output // 1)) + " [" + str(output) + "]")
    except Exception as e:
      pass

mc_915
tensor([[[ 5.],
         [ 5.],
         [-9.],
         [ 0.],
         [ 0.],
         [ 0.],
         [ 0.],
         [-9.],
         [ 2.]]])
tensor([[[5.],
         [5.],
         [5.],
         [5.],
         [0.],
         [0.],
         [0.],
         [0.],
         [0.]]])

Iteration 0/10000


Input:  tensor([ 5.,  2.,  6.,  4., -9.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., -9.,
         2.])
Output:  tensor([5.7000, 2.2000, 5.8000, 4.2000, 5.3000, 2.5000, 5.6000, 3.8000, 2.8000,
        0.8000, 2.9000, 1.9000, -0.0000, -0.0000, 0.0000],
       grad_fn=<ViewBackward0>)
Target:  tensor([5., 2., 6., 4., 5., 2., 6., 4., 0., 0., 0., 0., 0., 0., 0.])
MSE Loss:  1.4620453119277954
CE Loss:  -7.879790306091309
EXP Loss:  2.9134232997894287



