In [1]:
import json
from glob import glob
from tqdm.notebook import tqdm
import os
import random
import re


def read_json(path):
    """Read json file.

    Args
    ----
    path: path to the json

    Returns
    -------
    loaded: loaded dict

    """
    with open(path, "r") as stream:
        loaded = json.load(stream)

    return loaded

def atoi(text):
    return int(text) if text.isdigit() else text


def natural_keys(text):
    """
    copied from https://stackoverflow.com/questions/5967500/how-to-correctly-sort-a-string-with-a-number-inside
    alist.sort(key=natural_keys) sorts in human order
    http://nedbatchelder.com/blog/200712/human_sorting.html
    (See Toothy's implementation in the comments)

    """
    return [atoi(c) for c in re.split(r"(\d+)", text)]


In [2]:
data_all = {}

paths = glob("../data/*.json")
paths.sort(key=natural_keys)
for path in tqdm(paths):
    data = read_json(path)

    print(path, "\t", list(data.keys()))
    data_all[path] = data

    maximum_history = os.path.basename(path).split("_")[0]
    memory_capacity = os.path.basename(path).split("_")[1].split(".json")[0]

    print(
        f"maximum_history: {maximum_history}\nmemory_capacity per system: {memory_capacity}\n"
    )

HBox(children=(FloatProgress(value=0.0, max=7.0), HTML(value='')))

../data\128_1.json 	 ['val', 'test', 'max_history', 'capacity', 'rewards', 'accuracy']
maximum_history: 128
memory_capacity per system: 1

../data\128_2.json 	 ['val', 'test', 'max_history', 'capacity', 'rewards', 'accuracy']
maximum_history: 128
memory_capacity per system: 2

../data\128_4.json 	 ['val', 'test', 'max_history', 'capacity', 'rewards', 'accuracy']
maximum_history: 128
memory_capacity per system: 4

../data\128_8.json 	 ['val', 'test', 'max_history', 'capacity', 'rewards', 'accuracy']
maximum_history: 128
memory_capacity per system: 8

../data\128_16.json 	 ['val', 'test', 'max_history', 'capacity', 'rewards', 'accuracy']
maximum_history: 128
memory_capacity per system: 16

../data\128_32.json 	 ['val', 'test', 'max_history', 'capacity', 'rewards', 'accuracy']
maximum_history: 128
memory_capacity per system: 32

../data\128_64.json 	 ['val', 'test', 'max_history', 'capacity', 'rewards', 'accuracy']
maximum_history: 128
memory_capacity per system: 64




In [3]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = ""

from transformers import T5Tokenizer, T5ForConditionalGeneration

tokenizer = T5Tokenizer.from_pretrained("tscholak/t5.1.1.lm100k.base")
model = T5ForConditionalGeneration.from_pretrained("tscholak/t5.1.1.lm100k.base")


# We want to do this but this is too big. Tae'll run your prompts on a beefy server later.
# tokenizer = AutoTokenizer.from_pretrained("bigscience/T0pp")
# model = AutoModelForSeq2SeqLM.from_pretrained("bigscience/T0pp")

## Prompts using semantic memories only

In [4]:
j = 0
k = 1
scores = []
while j <= 6:
    prompts = []
    answers = []
    for i in range(len(data_all["../data\\128_"+str(k)+".json"]["test"])):
        random_point = data_all["../data\\128_"+str(k)+".json"]["test"][i]
        random_point["episodic_memory_system"] = sorted(random_point["episodic_memory_system"], key=lambda x: x[-1])
        for idx, mem in enumerate(random_point['episodic_memory_system']):
            max_len = len(random_point["episodic_memory_system"])
            days = len(random_point['episodic_memory_system']) - idx - 1
            if days == 0:
                timestamp = "today"
            else:
                timestamp = f"{days} days ago"
            random_point['episodic_memory_system'][idx][-1] = timestamp

        prompt = []

        for mem in random_point["semantic_memory_system"]:
            prompt.append(f"{mem[-1]} {mem[0]} were found at {mem[2]}.")
        prompt.append(f"Where is {random_point['question'][0]}?")
        prompt = ' '.join(prompt)

        prompts.append(prompt)
        answers.append(random_point['correct_answer'])

        predictions = []

    for prompt in prompts:
        input_ids = tokenizer(prompt, return_tensors="pt").input_ids
        outputs = model.generate(input_ids)
        prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
        predictions.append(prediction)
        
    correct = 0
    wrong = 0
    for answer, pred in zip(answers, predictions):
        if answer in pred:
            correct += 1
        else:
            wrong += 1
    scores.append(correct / (correct+ wrong))
    
    j+=1
    k*=2

In [5]:
j=0
k=1
while j<=6:
    print("128_"+str(k)+".json test accuracy: "+str(scores[j]))
    j+=1
    k*=2

128_1.json test accuracy: 0.0390625
128_2.json test accuracy: 0.0625
128_4.json test accuracy: 0.0234375
128_8.json test accuracy: 0.0859375
128_16.json test accuracy: 0.09375
128_32.json test accuracy: 0.09375
128_64.json test accuracy: 0.0390625


## converted all episodic to semantic and added semantic memories to the prompt based on the question

In [6]:
import pandas as pd

In [8]:
j = 0
k = 1
scores = []
while j <= 6:
    cols = ["object", "location"]
    lss = []

    for i in range(len(data_all["../data\\128_"+str(k)+".json"]["test"])):
        random_point = data_all["../data\\128_"+str(k)+".json"]["test"][i]
        random_point["episodic_memory_system"] = sorted(random_point["episodic_memory_system"], key=lambda x: x[-1])

        for mem in random_point["episodic_memory_system"]:
            lss.append(["".join(mem[0].split()[1:2]), "".join(mem[2].split()[1:2])])
    observations = pd.DataFrame(lss, columns = cols)
    
    obs = observations.groupby(observations.columns.tolist()).size().reset_index().rename(columns={0:'ranking'})
    obs_rank_sorted = obs.sort_values('ranking', ascending = False).reset_index(drop=True)
    
    prompts = []
    answers = []
    for i in range(len(data_all["../data\\128_"+str(k)+".json"]["test"])):
        random_point = data_all["../data\\128_"+str(k)+".json"]["test"][i]
        for idx, mem in enumerate(random_point['episodic_memory_system']):
            max_len = len(random_point["episodic_memory_system"])
            days = len(random_point['episodic_memory_system']) - idx - 1
            if days == 0:
                timestamp = "today"
            else:
                timestamp = f"{days} days ago"
            random_point['episodic_memory_system'][idx][-1] = timestamp

        prompt = []
        episodic_object = obs_rank_sorted.loc[obs_rank_sorted['object'] == "".join(random_point['question'][0].split()[1:2])].reset_index()

        for mem in random_point["episodic_memory_system"]:
            prompt.append(f"{mem[0]} was at {mem[2]}, {mem[3]}.")

        if len(episodic_object) >= k:
            for i in range(k):
                prompt.append(str(episodic_object["ranking"][i]) +" " +str(episodic_object["object"][i]) + " were found at "+str(episodic_object["location"][i])+".")
        else:
            for i in range(len(episodic_object)):
                prompt.append(str(episodic_object["ranking"][i]) +" " +str(episodic_object["object"][i]) + " were found at "+str(episodic_object["location"][i])+".")

        prompt.append(f"Where is {random_point['question'][0]}?")
        prompt = ' '.join(prompt)

        prompts.append(prompt)
        answers.append(random_point['correct_answer'])

        predictions = []

    for prompt in prompts:
        input_ids = tokenizer(prompt, return_tensors="pt").input_ids
        outputs = model.generate(input_ids)
        prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
        predictions.append(prediction)
        
    correct = 0
    wrong = 0
    for answer, pred in zip(answers, predictions):
        if answer in pred:
            correct += 1
        else:
            wrong += 1
            
    scores.append(correct / (correct+ wrong))
    j+=1
    k*=2

Token indices sequence length is longer than the specified maximum sequence length for this model (526 > 512). Running this sequence through the model will result in indexing errors


In [9]:
j=0
k=1
while j<=6:
    print("128_"+str(k)+".json test accuracy: "+str(scores[j]))
    j+=1
    k*=2

128_1.json test accuracy: 0.5703125
128_2.json test accuracy: 0.3671875
128_4.json test accuracy: 0.375
128_8.json test accuracy: 0.359375
128_16.json test accuracy: 0.421875
128_32.json test accuracy: 0.4453125
128_64.json test accuracy: 0.5546875
