In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import random
from uuid import uuid4
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import getopt
import sys
import os
import math
import time
import argparse
from visdom import Visdom
from tqdm import tqdm
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd

sys.path.insert(0, os.path.join('..', '..'))

import torch as T
from torch.autograd import Variable as var
import torch.nn.functional as F
import torch.optim as optim

from torch.nn.utils import clip_grad_norm_

from dnc.dnc import DNC
from dnc.sdnc import SDNC
from dnc.sam import SAM
from dnc.util import *

from dnc.lib import exp_loss, InputStorage, mse, criterion, ENDSYM, tensor2string, LEARNABLEOBJECTIVES, LEARNTHISOBJECTIVES, RETURNOTHEROBJ

T.autograd.set_detect_anomaly(True)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x7218096bcf40>

# Todo:
- generate_data, InputStorage to Torch objects

In [2]:
viz = Visdom()
# assert viz.check_connection()


def llprint(message):
  sys.stdout.write(message)
  sys.stdout.flush()



st = InputStorage()

def genSeq(sizeTpl, min=None, max=None, default=True):
  if default:
    return np.random.binomial(1, 0.5, sizeTpl)
  assert min is not None and max is not None
  return np.random.randint(min, max, sizeTpl)


def generate_data(batch_size, length, maxlength, testoccurance=True, transposeInput=False, transposeOutput=False):
  minSeq = 0
  maxSeq = 10 # 2= binary, 10=decimal
  input_data = np.zeros((batch_size, maxlength, maxlength), dtype=np.float32)
  target_output = np.zeros((batch_size, maxlength, maxlength), dtype=np.float32)
  sequence1 = genSeq((batch_size, length, 1), min=minSeq, max=maxSeq, default=False)
  sequence2 = genSeq((batch_size, length, 1), min=minSeq, max=maxSeq, default=False)

  if testoccurance: # test if the sequence is in the test data, replace if so
    for i in range(batch_size):
      input_test_data = np.zeros((1, maxlength, maxlength), dtype=np.float32)
      input_test_data[0, 0:length, 0:1] = sequence1[i] #first sequence
      input_test_data[0, length, 1] = ENDSYM  #pause
      input_test_data[0, length+1:length*2+1, 2:3] = sequence2[i] #second sequence
      input_test_data[0, length*2+1, 3] = ENDSYM  #pause
      while st.isSaved(input_test_data[0], flag="testData"):
        if np.random.binomial(1, 0.5, 1) == 1: # replace first sequence
          sequence1[i] = genSeq((length, 1), min=minSeq, max=maxSeq, default=False)
          input_test_data[0, 0:length, 0:1] = sequence1[i]
        else: # replace second sequence
          sequence2[i] = genSeq((length, 1), min=minSeq, max=maxSeq, default=False)
          input_test_data[0, length+1:length*2+1, 2:3] = sequence2[i]

  input_data[:, 0:length, 0:1] = sequence1 #first sequence
  input_data[:, length, 1] = ENDSYM  #pause
  input_data[:, length+1:length*2+1, 2:3] = sequence2 #second sequence
  input_data[:, length*2+1, 3] = ENDSYM  #pause
  if transposeInput:
    for i in range(batch_size):
      input_data[i] = input_data[i].T

  def calcsum(sequenceA, sequenceB, maxval=(maxSeq)): #calculate sum of two binary numbers
    sumsequence = np.zeros((batch_size, length + 1, length +1))
    assert len(sequenceA) == len(sequenceB)
    for k in range(len(sequenceA)):
      carry = 0 # carry bit
      for j in reversed(range(len(sequenceA[k]))):
          sumsequence[k][j+1][-1] = (sequenceA[k][j][0] + sequenceB[k][j][0] + carry) % maxval
          carry = (sequenceA[k][j][0] + sequenceB[k][j][0] + carry) // maxval
      sumsequence[k][0][-1] = carry
    return sumsequence
  
  cs = calcsum(sequence1, sequence2)
  for i in range(batch_size):
    target_output[i, -(length+1):, -(length+1):] = cs[i] #write sum to target output
    if transposeOutput:
      target_output[i] = target_output[i].T

  return input_data, target_output




def combLoss(prediction, target):
  return mse(prediction, target)

def incrementCurriculum(trainError, epoch, sequence_length, maxsequence_length, curriculum_fre):
  return epoch != 0 and sequence_length < maxsequence_length and epoch % curriculum_fre == 0

Setting up a new session...


In [3]:
#d = generate_data(1, 3, 9, testoccurance=False)
#print(d)

In [4]:
import copy
from dnc.lib import STEPBYSTEPOBJ
import pickle

import os

batch_size = 100
sequence_length = 4
sequence_max_length = 6
iterations = 1*10**3 #200000
summarize_freq = int(iterations/100)
check_freq = int(iterations/20)
curriculum_freq = int(iterations/10)


  # input_size = output_size = args.input_size
mem_slot = 85#48 #112
mem_size = 1
read_heads = 1
curriculum_increment = 1
input_size = 3*sequence_max_length + 2
output_size = 64

replaceWithWrong = True

num_layers = 4 #5

# mem operations = input_size*num_layers

In [5]:
def create_directory_if_not_exists(directory_path):
    if not os.path.exists(directory_path):
        os.makedirs(directory_path)
        print("Directory created successfully!")
    else:
        print("Directory already exists.")

name = 'add_' + str(uuid4().hex)[:3] + ''

lastcp = None

create_directory_if_not_exists(name)

datas = []

loadcp = False #= 'checkpoint_add_46_242000.pth

print(input_size, output_size)

Directory created successfully!
20 64


In [6]:


def lossfnwithReturnOther(output, target, otherReturn, OneAndZero=False, writegateThreshold=0.0, printlosses=False, returnTuple=False, learnthisobjective=LEARNTHISOBJECTIVES, alwaysone=True, epoch=1):
  
  if epoch % 50 == 0:
    with open(f'{name}/output_lossfn.txt', 'a') as lossfile:
      print("EPOCH: ", epoch, file=lossfile)
      for key in otherReturn.keys():
        if isinstance(otherReturn[key], T.Tensor):
          print(key, otherReturn[key].shape, file=lossfile)
          for i in range(otherReturn[key].shape[1] // num_layers):
            print("Input ", i, file=lossfile)
            print(otherReturn[key][0, i*num_layers:(i+1)*num_layers], file=lossfile)

      
  sf = 100 # scale factor for softmax
  base = 0
  if learnthisobjective["general_loss"]:
    base = (exp_loss(output, target) + mse(output, target))*output_size

  allocation_weight_loss = 0
  if learnthisobjective["allocation_weights"]  and isinstance(otherReturn["allocation_weights"], T.Tensor):
    zarr = T.zeros_like(otherReturn["allocation_weights"])
    for b in range(otherReturn["allocation_weights"].shape[0]):
      for i in range(otherReturn["allocation_weights"].shape[1]):
        #zarr[b, i, :, (i % input_size) % zarr.shape[3]] = 1
        zarr[b, i, :, (i // num_layers) % zarr.shape[3]] = 1

    if epoch % 50 == 0:
      with open(f'{name}/output_lossfn.txt', 'a') as lossfile:
        print("Allocation Weights (target): ", zarr.shape, file=lossfile)
        for i in range(zarr.shape[1] // num_layers):
            print("Input ", i, file=lossfile)
            print(zarr[0, i*num_layers:(i+1)*num_layers], file=lossfile)
    allocation_weight_loss = T.sum(T.abs(otherReturn["allocation_weights"] - zarr)) / batch_size

  allocation_gate_loss = 0
  if learnthisobjective["allocation_gate"]  and isinstance(otherReturn["allocation_gate"], T.Tensor):
    if OneAndZero:
      allocation_gate_loss = T.sum(T.abs(T.pow(((otherReturn["allocation_gate"]-0.5)*2),2) - T.ones(otherReturn["allocation_gate"].shape))) / batch_size
    else: # no writing by content similarity
      allocation_gate_loss = T.sum(T.abs(otherReturn["allocation_gate"]-T.ones(otherReturn["allocation_gate"].shape))) / batch_size

  write_gate_loss = 0
  if learnthisobjective["write_gate"]  and isinstance(otherReturn["write_gate"], T.Tensor):
    write_gate_loss = T.sum(T.abs(otherReturn["write_gate"] - T.ones(otherReturn["write_gate"].shape))) / batch_size

  read_modes_loss = 0
  if learnthisobjective["read_modes"]  and isinstance(otherReturn["read_modes"], T.Tensor):
    read_modes_loss = T.sum(T.abs(otherReturn["read_modes"] - T.nn.functional.softmax(otherReturn["read_modes"].clone().detach()*sf, 3))) / batch_size

  write_weights_loss = 0
  if learnthisobjective["write_weights"]  and isinstance(otherReturn["write_weights"], T.Tensor):
    #write_weights_loss = T.sum(T.abs(otherReturn["write_weights"] - T.nn.functional.softmax(otherReturn["write_weights"].clone().detach()*sf, 3))) / batch_size
    zarr = T.zeros_like(otherReturn["write_weights"])
    for b in range(otherReturn["write_weights"].shape[0]):
      for i in range(otherReturn["write_weights"].shape[1]):
        zarr[b, i, :, (i // num_layers) % zarr.shape[3]] = 1
    write_weights_loss = T.sum(T.abs(otherReturn["write_weights"] - zarr)) / batch_size
  
  usage_vector_loss = 0
  if learnthisobjective["usage_vector"]  and isinstance(otherReturn["usage_vector"], T.Tensor):
    uv = T.zeros_like(otherReturn["usage_vector"])
    for b in range(otherReturn["usage_vector"].shape[0]):
      for i in range(otherReturn["usage_vector"].shape[1]-1):
        i+=1
        uv[b, i:, (i-1 // num_layers) % uv.shape[2]] = 1
    usage_vector_loss = T.sum(T.abs(otherReturn["usage_vector"] - uv)) / batch_size
    #print("Usage Vector LOSS: ", usage_vector_loss)
    #print("Usage Vector (output): ", otherReturn["usage_vector"])
    #print("Usage Vector (target): ", uv)
    if epoch % 10 == 0:
      with open(f'{name}/output_lossfn.txt', 'a') as lossfile:
        print("Usage Vector (target): ", uv.shape, file=lossfile)
        for i in range(uv.shape[1] // num_layers):
            print("Input ", i, file=lossfile)
            print(uv[0, i*num_layers:(i+1)*num_layers], file=lossfile)


  if printlosses:
    print("losses: ", base, "\n Allocation Weight ", allocation_weight_loss,  "\n allocation gate", allocation_gate_loss, "\n write gate", write_gate_loss, "\n read modes", read_modes_loss,"\n write weights", write_weights_loss, "\n usage vector", usage_vector_loss)
  
  
  if returnTuple:
    return base, allocation_weight_loss, usage_vector_loss, allocation_gate_loss, write_gate_loss, read_modes_loss, write_weights_loss
  return base + allocation_weight_loss + allocation_gate_loss + write_gate_loss + read_modes_loss + write_weights_loss + usage_vector_loss



otherkey:  allocation_weights
torch.Size([100, 80, 1, 48])
otherkey:  allocation_gate
torch.Size([100, 80, 1])
otherkey:  write_gate
torch.Size([100, 80, 1])
otherkey:  write_weights
torch.Size([100, 80, 1, 48])
otherkey:  read_modes
torch.Size([100, 80, 1, 48])
otherkey:  read_weights
torch.Size([100, 80, 1, 48])
otherkey:  free_gates
torch.Size([100, 80, 1])
otherkey:  erase_vector
torch.Size([100, 80, 1, 1])
otherkey:  write_vector
torch.Size([100, 80, 1, 1])
otherkey:  usage_vector
torch.Size([100, 80, 48])

In [7]:
class CaclulateFactors:
  def __init__(self, iterations, nofactors=7, justOnes=False, softmax=False, softmaxTemp=1):
    self.losses = T.zeros((iterations+1, nofactors))
    self.factors = T.ones(nofactors)
    self.noFactors = nofactors
    self.justOnes = justOnes
    self.softmax = softmax
    self.softmaxTemp = softmaxTemp

  def setFactors(self, factors):
    self.factors = factors
  
  def setJustOnes(self, justOnes):
    self.justOnes = justOnes

  def __call__(self, epoch, currentlosses, lookback=3, resetinterval=int(iterations//100), rescalebiggerthan=200, rescaleby=3):
    if self.justOnes:
      return T.ones(self.noFactors)
    
    for i in range(len(currentlosses)):
      if isinstance(currentlosses[i], T.Tensor):
        self.losses[epoch,i] = currentlosses[i].copy().item()
    
    if epoch <= 1 or (epoch % resetinterval) == 0 or self.justOnes:
      self.factors = T.ones(self.noFactors)
      if self.softmax:
        return T.nn.functional.softmax(self.factors*self.softmaxTemp, 0)
      return self.factors

    oldlosses = self.losses[max(0,epoch-lookback):epoch]
    oldlosses = T.where(oldlosses == 0, self.losses[epoch].unsqueeze(1).T.expand(oldlosses.shape), oldlosses)
    omeandist = T.abs(T.mean(oldlosses, dim=0)-self.losses[epoch])
    firstsignificant = T.where(omeandist > 1.2, 0, T.floor(T.nan_to_num(T.log10(T.mean(T.abs(oldlosses), dim=0)), nan=0, posinf=0, neginf=0)))

    omeandist = omeandist / 10**firstsignificant
    meandist = T.where(omeandist > 1.2, 2.01, omeandist)    
    meandist = (2-meandist)+1

    newfactors = self.factors*meandist
    if T.any((newfactors > rescalebiggerthan)):
      newfactors = newfactors / rescaleby

    self.factors = newfactors
    if self.softmax:
      return T.nn.functional.softmax(self.factors*self.softmaxTemp, 0)
    return self.factors



In [8]:
def CalcLossonValidationData(st, rnn, mhx, batch_size, input_size):
  testset = st.getDataByFlag("testData") # get test data
  testlosses = []
  if len(testset) == 0:
    raise ValueError("No test data available")
  for k in range(int(len(testset) / batch_size)+1): # split testdata into batch_size chunks
    input_TEST_data = np.zeros((batch_size, input_size, input_size))
    target_TEST_output = np.zeros((batch_size, input_size, input_size))
    for i in range(batch_size):
      if i + k * batch_size < len(testset):
        input_TEST_data[i] = testset[k*batch_size+i]["input"]
        target_TEST_output[i] = testset[k*batch_size+i]["output"]
      else: # if there is not enough test data fill the remaining slots with random entries
        robj = random.choice(testset)
        input_TEST_data[i] = robj["input"]
        target_TEST_output[i] = robj["output"]

    input_TEST_data = var(T.from_numpy(input_TEST_data)).type(T.float32)
    target_TEST_output = var(T.from_numpy(target_TEST_output)).type(T.float32)
    if rnn.debug:
      TEST_output, _, _ = rnn(input_TEST_data, (None, mhx, None), reset_experience=True, pass_through_memory=True)
    else:
      TEST_output, _ = rnn(input_TEST_data, (None, mhx, None), reset_experience=True, pass_through_memory=True)

    MyTestloss = combLoss((TEST_output), target_TEST_output).item() # calculate test loss
    testlosses.append(MyTestloss)
  Testloss = np.mean(testlosses) # calculate test loss mean
  return Testloss
    

In [9]:
import select


rnn = DNC(
        input_size=input_size,
        hidden_size=output_size*2, #new *2
        output_size=input_size, #new
        #rnn_type='rnn',
        rnn_type='lstm',
        num_layers=num_layers,
        num_hidden_layers=3, #1
        dropout=0,
        nr_cells=mem_slot,
        cell_size=mem_size,
        read_heads=read_heads,
        gpu_id=-1,
        debug='store_true',
        batch_first=True,
        independent_linears=True,
        nonlinearity='tanh',
    )

with open(f'{name}/output.txt', 'a') as f:
  print(name)
  print(name, file=f)
  
  
  
  if loadcp != False:
    rnn.load_state_dict(T.load(loadcp, weights_only=True))
    rnn.eval()
  
  print(rnn)
  print(rnn, file=f)

  last_save_losses = []

  optimizer = optim.Adam(rnn.parameters(), lr=0.001, eps=1e-9, betas=[0.9, 0.98]) # 0.0001
 
  for i in range(3, sequence_max_length,1): # generate test data
    inputdataspace = 2**i*2 # 2 i bit sequences
    testdatasize = int(inputdataspace*0.15)+1 #15%
    input_data, target_output = generate_data(testdatasize, i, input_size)
    for i in range(testdatasize):
      st.saveInput(input_data[i], output=target_output[i], withoutIncrement=True, flag="testData") #saveData


  (chx, mhx, rv) = (None, None, None)
  Testloss = 0 # loss of test data
  
  learnthisobjective = copy.deepcopy(LEARNTHISOBJECTIVES)
  for key in learnthisobjective.keys():
    learnthisobjective[key] = False

  learnthisobjective["usage_vector"] = True

  #  "general_loss": True,
  #   "allocation_weights": False,
  #   "allocation_gate": False,
  #   "write_gate": False,
  #   "write_weights": False,
  #   "read_modes": False,
  #   "read_weights": False,
  #   "free_gates": False,
  #   "usage_vector": False,
  #   "write_weights": False,


  factors = CaclulateFactors(iterations, nofactors=7, justOnes=True)

  lastobjectivechange = 0

  optimizerdict = optimizer.state_dict()

  losschanged = False

  for epoch in tqdm(range(iterations + 1)):
    summarize = (epoch % summarize_freq == 0)
    take_checkpoint = (epoch != 0) and (epoch % check_freq == 0)
    #llprint("\rIteration {ep}/{tot}".format(ep=epoch, tot=iterations))
    optimizer.zero_grad()


    input_data, target_output = generate_data(batch_size, sequence_length, input_size) # generate data
    input_data = var(T.from_numpy(input_data))
    target_output = var(T.from_numpy(target_output))


    if rnn.debug:
      output, (chx, mhx, rv), v, otherReturn = rnn(input_data, (None, mhx, None), reset_experience=True, pass_through_memory=True, retOther=True)
    else:
      output, (chx, mhx, rv), otherReturn = rnn(input_data, (None, mhx, None), reset_experience=True, pass_through_memory=True, retOther=True)

    #print(otherReturn)
    #print(otherReturn, file=f)
    loss = combLoss((output), target_output)

    if epoch % summarize_freq == 0:
      Testloss = CalcLossonValidationData(st, rnn, mhx, batch_size, input_size)
     

    datas.append({"epoch": epoch, "loss": loss.item(), "testloss": Testloss, "sequencelength": sequence_length}) #append to the datas df
    
    

    if epoch == 10:
      for mybool in RETURNOTHEROBJ["bools"]:
        otherkey = [kc[1] for kc in RETURNOTHEROBJ["keycombs"] if kc[0] == mybool]
        #print("otherkey: ", otherkey)
        if len(otherkey) == 0:
          continue
        otherkey = otherkey[0]
        print("otherkey: ", otherkey)
        if otherReturn[otherkey] is not None and isinstance(otherReturn[otherkey], T.Tensor):
          print(otherReturn[otherkey].shape)


    #print(learnthisobjective, isinstance(otherReturn["usage_vector"], T.Tensor))
    currentlosses = lossfnwithReturnOther(output, target_output, otherReturn, printlosses=epoch % 10 == 0, returnTuple=True, learnthisobjective=learnthisobjective, epoch=epoch)
    base, allocation_weight_loss, cwl, allocation_gate_loss, write_gate_loss, read_modes_loss, write_content_weights_loss = currentlosses
    
    #print("currentlosses: ", currentlosses)
    currfactors = factors(epoch, currentlosses)
    Rloss = 0
    for i in range(currfactors.shape[0]):
      Rloss += currfactors[i] * currentlosses[i]
    print("REAL Loss: ", Rloss.item(), file=f)
    if summarize:
      print("REAL Loss: ", Rloss.item())
      print("Factors: ", currfactors)
      print("losses: ", currentlosses)


    objectivesorder = ["allocation_weights", "allocation_gate", "write_gate", "read_modes", "write_weights", "usage_vector", "general_loss"]
    if epoch > (int(iterations//12) + lastobjectivechange) and len(objectivesorder) > 0:
         startlearningthis = objectivesorder.pop(0)
         learnthisobjective[startlearningthis] = True
         losschanged = True
         print(f"Learning {startlearningthis} at {epoch}", file=f)
         print(f"Learning {startlearningthis} at {epoch}")
         
      
    
    if losschanged:
      lastobjectivechange = epoch
      optimizer.load_state_dict(optimizerdict)
      optimizer.zero_grad()
      losschanged = False

    


    if np.isnan(Rloss.item()) or np.isinf(Rloss.item()) or np.isclose(Rloss.item(), 0):
      continue
    Rloss.backward()


    T.nn.utils.clip_grad_norm_(rnn.parameters(), 100)
    optimizer.step()
    loss_value = loss.item()

    
    

    # detach memory from graph
    mhx = { k : (v.detach() if isinstance(v, var) else v) for k, v in mhx.items() }

    last_save_losses.append(loss_value)
    loss = np.mean(last_save_losses)

    if summarize:
      llprint("\n\tAvg. Loss: %.4f\n" % (loss))
      llprint("\n\tAvg. Test Loss: %.4f\n" % (Testloss))
      if np.isnan(loss):
        continue
        #raise Exception('nan Loss')
      print("\n")

    if summarize and rnn.debug:
      last_save_losses = []

      viz.heatmap(
            v['memory'],
            opts=dict(
                xtickstep=10,
                ytickstep=2,
                title= name + 'Memory, t: ' + str(epoch) + ', loss: ' + str(loss),
                ylabel='layer * time',
                xlabel='mem_slot * mem_size'
            )
        )

      viz.heatmap(
            v['link_matrix'][-1].reshape(mem_slot, mem_slot),
            opts=dict(
                xtickstep=10,
                ytickstep=2,
                title=name + 'Link Matrix, t: ' + str(epoch) + ', loss: ' + str(loss),
                ylabel='mem_slot',
                xlabel='mem_slot'
            )
      )
     
      viz.heatmap(
            v['precedence'],
            opts=dict(
                xtickstep=10,
                ytickstep=2,
                title=name + 'Precedence, t: ' + str(epoch) + ', loss: ' + str(loss),
                ylabel='layer * time',
                xlabel='mem_slot'
            )
      )

    if incrementCurriculum(loss, epoch, sequence_length, sequence_max_length, curriculum_freq):
      sequence_length = sequence_length + curriculum_increment
      print("Increasing max length to " + str(sequence_length))

    if take_checkpoint:
      cur_weights = rnn.state_dict()
      T.save(cur_weights, f'{name}/checkpoint_{epoch}.pth')
      lastcp = f'{name}/checkpoint_{epoch}.pth'
      df = pd.DataFrame(datas)
      pickle.dump(df, open(f"{name}/df_{epoch}.pkl", "wb"))


  df = pd.DataFrame(datas) # plot loss 
  pickle.dump(df, open(f"{name}/df_total.pkl", "wb"))

  fig = go.Figure()
  fig.add_trace(go.Scatter(x=df["epoch"], y=df["loss"], mode='lines', name='Train Data'))
  fig.add_trace(go.Scatter(x=df["epoch"], y=df["testloss"], mode='lines', name='Test Data'))
  fig.update_layout(title='Losses', xaxis_title='Epoch', yaxis_title='Loss')
  fig.show()
  fig.write_html(f"{name}/losses.html")




add_f4a

----------------------------------------
DNC(20, 128, num_layers=4, num_hidden_layers=3, nr_cells=85, read_heads=1, cell_size=1, independent_linears=True, debug=store_true)
DNC(
  (lstm_layer_0): LSTM(21, 128, num_layers=3, batch_first=True)
  (lstm_layer_1): LSTM(129, 128, num_layers=3, batch_first=True)
  (lstm_layer_2): LSTM(129, 128, num_layers=3, batch_first=True)
  (lstm_layer_3): LSTM(129, 128, num_layers=3, batch_first=True)
  (rnn_layer_memory_shared): Memory(
    (allocation_weight_transform): Sequential(
      (0): Linear(in_features=85, out_features=85, bias=True)
      (1): LSTM(85, 85, num_layers=7, batch_first=True)
      (2): extract_tensor()
      (3): Sigmoid()
      (4): Linear(in_features=85, out_features=85, bias=True)
      (5): LSTM(85, 85, num_layers=7, batch_first=True)
      (6): extract_tensor()
      (7): Sigmoid()
      (8): Linear(in_features=85, out_features=85, bias=True)
      (9): LSTM(85, 85, num_layers=7, batch_first=True)
      (10): extrac

  0%|          | 1/1001 [00:26<7:22:51, 26.57s/it]

losses:  0 
 Allocation Weight  0 
 allocation gate 0 
 write gate 0 
 read modes 0 
 write weights 0 
 usage vector 0
REAL Loss:  0.0
Factors:  tensor([1., 1., 1., 1., 1., 1., 1.])
losses:  (0, 0, 0, 0, 0, 0, 0)


  1%|          | 11/1001 [03:01<4:19:27, 15.72s/it]

otherkey:  allocation_weights
otherkey:  allocation_gate
otherkey:  write_gate
otherkey:  write_weights
otherkey:  read_modes
otherkey:  read_weights
otherkey:  free_gates
otherkey:  erase_vector
otherkey:  write_vector
otherkey:  usage_vector
losses:  0 
 Allocation Weight  0 
 allocation gate 0 
 write gate 0 
 read modes 0 
 write weights 0 
 usage vector 0
REAL Loss:  0.0
Factors:  tensor([1., 1., 1., 1., 1., 1., 1.])
losses:  (0, 0, 0, 0, 0, 0, 0)


  2%|▏         | 21/1001 [05:30<4:13:56, 15.55s/it]

losses:  0 
 Allocation Weight  0 
 allocation gate 0 
 write gate 0 
 read modes 0 
 write weights 0 
 usage vector 0
REAL Loss:  0.0
Factors:  tensor([1., 1., 1., 1., 1., 1., 1.])
losses:  (0, 0, 0, 0, 0, 0, 0)


  3%|▎         | 31/1001 [07:59<4:08:07, 15.35s/it]

losses:  0 
 Allocation Weight  0 
 allocation gate 0 
 write gate 0 
 read modes 0 
 write weights 0 
 usage vector 0
REAL Loss:  0.0
Factors:  tensor([1., 1., 1., 1., 1., 1., 1.])
losses:  (0, 0, 0, 0, 0, 0, 0)


  4%|▍         | 41/1001 [10:28<4:08:27, 15.53s/it]

losses:  0 
 Allocation Weight  0 
 allocation gate 0 
 write gate 0 
 read modes 0 
 write weights 0 
 usage vector 0
REAL Loss:  0.0
Factors:  tensor([1., 1., 1., 1., 1., 1., 1.])
losses:  (0, 0, 0, 0, 0, 0, 0)


  5%|▌         | 51/1001 [12:53<3:58:52, 15.09s/it]

losses:  0 
 Allocation Weight  0 
 allocation gate 0 
 write gate 0 
 read modes 0 
 write weights 0 
 usage vector 0
REAL Loss:  0.0
Factors:  tensor([1., 1., 1., 1., 1., 1., 1.])
losses:  (0, 0, 0, 0, 0, 0, 0)


  6%|▌         | 61/1001 [15:21<4:00:29, 15.35s/it]

losses:  0 
 Allocation Weight  0 
 allocation gate 0 
 write gate 0 
 read modes 0 
 write weights 0 
 usage vector 0
REAL Loss:  0.0
Factors:  tensor([1., 1., 1., 1., 1., 1., 1.])
losses:  (0, 0, 0, 0, 0, 0, 0)


  7%|▋         | 71/1001 [17:53<4:07:23, 15.96s/it]

losses:  0 
 Allocation Weight  0 
 allocation gate 0 
 write gate 0 
 read modes 0 
 write weights 0 
 usage vector 0
REAL Loss:  0.0
Factors:  tensor([1., 1., 1., 1., 1., 1., 1.])
losses:  (0, 0, 0, 0, 0, 0, 0)


  8%|▊         | 81/1001 [20:23<4:04:03, 15.92s/it]

losses:  0 
 Allocation Weight  0 
 allocation gate 0 
 write gate 0 
 read modes 0 
 write weights 0 
 usage vector 0
REAL Loss:  0.0
Factors:  tensor([1., 1., 1., 1., 1., 1., 1.])
losses:  (0, 0, 0, 0, 0, 0, 0)


  8%|▊         | 84/1001 [21:10<3:54:19, 15.33s/it]

Learning allocation_weights at 84


  9%|▉         | 91/1001 [23:05<4:18:45, 17.06s/it]

losses:  0 
 Allocation Weight  0 
 allocation gate 0 
 write gate 0 
 read modes 0 
 write weights 0 
 usage vector 0
REAL Loss:  0.0
Factors:  tensor([1., 1., 1., 1., 1., 1., 1.])
losses:  (0, 0, 0, 0, 0, 0, 0)


 10%|█         | 101/1001 [26:03<4:25:30, 17.70s/it]

losses:  0 
 Allocation Weight  0 
 allocation gate 0 
 write gate 0 
 read modes 0 
 write weights 0 
 usage vector 0
REAL Loss:  0.0
Factors:  tensor([1., 1., 1., 1., 1., 1., 1.])
losses:  (0, 0, 0, 0, 0, 0, 0)


 10%|█         | 104/1001 [27:07<3:53:58, 15.65s/it]


KeyboardInterrupt: 

In [None]:
# #from dnc.dnc import DNC

# if 'rnn' in locals() or 'rnn' in globals():
#   del rnn

# rnn = DNC(
#         input_size=input_size,
#         hidden_size=output_size,
#         rnn_type='rnn',
#         #rnn_type='lstm',
#         num_layers=num_layers,
#         num_hidden_layers=1,
#         dropout=0,
#         nr_cells=mem_slot,
#         cell_size=mem_size,
#         read_heads=read_heads,
#         gpu_id=-1,
#         debug='store_true',
#         batch_first=True,
#         independent_linears=True,
#         nonlinearity='tanh',
#     )

# if not 'name' in locals() or not 'name' in globals():
#   name = 'add_9b2'
# if not 'lastcp' in locals() or not 'lastcp' in globals():
#   lastcp = f'{name}/checkpoint_1000.pth'
  
print(name)

with open(f"{name}/output_2.txt", "w") as f:
  batch_size=1
  rnn.load_state_dict(T.load(lastcp, weights_only=True))
  rnn.eval()
  
  stepByStep = copy.deepcopy(STEPBYSTEPOBJ)

  i=0
  llprint("\nIteration %d/%d" % (i, iterations))
  # We test now the learned generalization using sequence_max_length examples
  random_length = np.random.randint(2, sequence_length  + 1)
  input_data, target_output = generate_data(1, random_length, input_size)

  #print (input_data, target_output)

  
  input_data = var(T.from_numpy(input_data))
  target_output = var(T.from_numpy(target_output))

  stepByStep["CurrI"] = i
  stepByStep["currentObj"] = copy.deepcopy(stepByStep["defObj"])
  stepByStep["currentObj"]["i"] = i 
  stepByStep["input"] = input_data.detach().numpy()
  stepByStep["target"] = target_output.detach().numpy()
  stepByStep["MEMORYCOLUMNS"] = mem_slot
  stepByStep["INPUTSIZE"] = input_size
  stepByStep["OUTPUTSIZE"] = output_size
  stepByStep["read_heads"] = read_heads
    
  if rnn.debug:
    output, (chx, mhx, rv), v = rnn(input_data, (None, None, None), reset_experience=True, pass_through_memory=True, stepByStep=stepByStep)
  else:
    output, (chx, mhx, rv) = rnn(input_data, (None, None, None), reset_experience=True, pass_through_memory=True, stepByStep=stepByStep)

  stepByStep["output"] = output
  stepByStep["objects"].append(copy.deepcopy(stepByStep["currentObj"]))
  stepByStep['loss'] = str(mse(output, target_output).item())
  #output = output[:, -1, :].sum().data.cpu().numpy()
  #target_output = target_output.sum().data.cpu().numpy()
  print("loss", mse(output, target_output).item())
  print(stepByStep["input"].shape)
  print(stepByStep["output"].shape)
  print(stepByStep["target"].shape)
  #raise Exception("STOP")

  print(stepByStep)

  pickle.dump(stepByStep, open(f"{name}/stepByStep.pkl", "wb"))

  print("\n\n")
  print("Input: ", tensor2string(input_data[0]), file=f)
  print("Output: ", tensor2string(output[0]), file=f)
  print("Target: ", tensor2string(target_output[0]), file=f)
  print("CE Loss: ", str(mse(output[0].to(dtype=T.float32), target_output[0]).item()), file=f)
  print("Log Loss: ", str(criterion(output[0].to(dtype=T.float32), target_output[0]).item()), file=f)
  print("Exp Loss: ", str(exp_loss(output[0].to(dtype=T.float32), target_output[0]).item()), file=f)
  print("\n\n")
  print("CE Loss: ", str(mse(output.to(dtype=T.float32), target_output).item()), file=f)
  print("Log Loss: ", str(criterion(output.to(dtype=T.float32), target_output).item()), file=f)
  print("Exp Loss: ", str(exp_loss(output.to(dtype=T.float32), target_output).item()), file=f)
  print("\n\n")

  try:
    print("\nReal value: ", ' = ' + str(int(target_output[0])))
    print("Predicted:  ", ' = ' + str(int(output // 1)) + " [" + str(output) + "]")
  except Exception as e:
    pass

  

add_8e5

Iteration 0/3000loss 0.019999999552965164
(1, 20, 20)
torch.Size([1, 20, 20])
(1, 20, 20)
{'stepByStep': True, 'CurrI': 0, 'time': 19, 'layer': 2, 'currentObj': {'i': 0, 'time': 19, 'layer': 2, 'inputs': array([[ 9.91816223e-01, -9.94312286e-01,  9.90811944e-01,
         7.97125101e-01,  9.03635658e-03, -2.69453228e-03,
         2.81421235e-04,  4.62055253e-03, -9.65788603e-01,
        -9.99353945e-01, -1.14789675e-03,  2.03004535e-02,
         1.18128804e-03, -9.87029731e-01, -9.93462145e-01,
         9.45764482e-01, -1.75820815e-03,  9.99072731e-01,
        -8.78133595e-01, -7.17715314e-03,  4.39590305e-01,
         2.01094761e-01,  6.97500587e-01, -9.99353349e-01,
         7.35875545e-03, -1.70307979e-01,  9.96370077e-01,
        -9.05499220e-01, -9.99168038e-01, -9.99915302e-01,
         9.71591890e-01,  9.96556520e-01,  4.56004813e-02,
        -7.20250309e-01,  5.49733303e-02,  9.98406827e-01,
         9.94412184e-01,  2.83380941e-04, -9.98255014e-01,
        -9.59943794e