# Experiment 1 (The red/blue/green/yellow house)



In [1]:
from IPython.display import display, Markdown as md
import ipywidgets as widgets
import itertools
import math
import matplotlib
import matplotlib.pyplot as plt
import numpy
import numpy as np
import random

import pandas as pd
import pathlib
from preprocess import preprocess_text

from src.lib.DSDM import DSDM

from sklearn.metrics import pairwise_distances
from sklearn.neighbors import LocalOutlierFactor

import torch
import torchhd as thd
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F 

# Type checking
from typing import List

In [17]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:50% !important; }</style>"))

In [3]:
def fix_seed():
    seed = 42
    print("[ Using Seed : ", seed, " ]")

    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.cuda.manual_seed(seed)
    numpy.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
def load_data(path, bs=0, shuffle=False):
    """Load data from file path."""
    text = pathlib.Path(path).read_text(encoding='utf-8')
    return text.splitlines()


def compute_distances_gpu(X, Y):
    """Compute Euclidean distance."""
    return torch.sqrt(-2 * torch.mm(X,Y.T) +
                    torch.sum(torch.pow(Y, 2),dim=1) +
                    torch.sum(torch.pow(X, 2),dim=1).view(-1,1))

In [4]:
# Fix seed.
fix_seed()

# Set device.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

dim = 2000 # Vector dimension


cleanup = {} # Cleanup memory for saving atomic HVs

[ Using Seed :  42  ]


In [5]:
# DSDM class
class SONN(nn.Module):
    def __init__(self, address_size, ema_time_period, learning_rate_update, temperature, normalize=False):
        super(SONN, self).__init__()
        self.address_size = address_size
        self.addresses = torch.tensor([]).to(device)

        self.normalize = normalize

        self.ema = 0
        self.ema_time_period = ema_time_period
        self.ema_temperature = 2 / (self.ema_time_period + 1)
        
        self.learning_rate_update = learning_rate_update

        self.temperature = temperature
        
        
    def retrieve(self, query_address):
        with torch.no_grad():
            retrieved_content = torch.tensor([]).to(device)

            cos = torch.nn.CosineSimilarity()
            # Calculate the cosine similarities.
            if self.normalize: 
                similarities = cos(self.addresses.sgn(), query_address.sgn())
            else:
                similarities = cos(self.addresses, query_address)
            # Cosine distance tensor
            distances = 1 - similarities

            # Calculate the softmin weights.
            softmin_weights = F.softmin(distances/self.temperature, dim=-1)

            # Weight the memory addresses with the softmin weights.
            weighted_addresses = torch.matmul(softmin_weights, self.addresses.to(device)).view(-1)

            # Pool the weighted memory addresses to create the output.
            retrieved_content = torch.sum(weighted_addresses.view(1, -1), 0)

        return retrieved_content   

    
    def save(self, query_address):
        # The memory is instantiated with the first observation.
        if self.addresses.shape[0] == 0:
            self.addresses = torch.cat((self.addresses, query_address.view(1, -1)))
            
            return
        
        cos = torch.nn.CosineSimilarity()
        # Calculate the cosine similarities.
        if self.normalize: 
            similarities = cos(self.addresses.sgn(), query_address.sgn())
        else:
            similarities = cos(self.addresses, query_address)

        # Calculate the cosine distances.
        distances = 1 - similarities
        # Get the minimum distance and the corresponding address index.  
        min_distance = torch.min(distances, dim=0)[0].item()
        
        # Calculate EMA for current chunk.
        self.ema += self.ema_temperature * (min_distance - self.ema)
        
        # Check if the minimum distance is bigger than the adaptive threshold.
        if min_distance > self.ema: # If the minimum distance is bigger, create a new address.
            # Add a new entry to the address matrix/tensor equal to the target address.
            self.addresses = torch.cat((self.addresses, query_address.view(1, -1)))
        else: # If the minimum distance is smaller or equal, update the memory addresses.
            # Apply the softmin function to the distance tensor the get the softmin weights.
            softmin_weights = F.softmin(distances/self.temperature, dim=-1)
            # Update the memory address space.
            self.addresses += self.learning_rate_update * torch.mul(softmin_weights.view(-1, 1), query_address - self.addresses)
         

        return

In [6]:
def generate_atomic_HVs_from_tokens_and_add_them_to_cleanup(tokens: List[str]) -> None:
    global cleanup, dim

    for token in tokens:
        # Check if the token has been encountered before by querying the cleanup memory.
        entry = cleanup.get(token)
        # If it hasn't, 
        if entry == None:
            # Generate a random HV representation for the token.
            atomic_HV = thd.MAPTensor.random(1, dim)[0]
            # Add the HV to the cleanup memory.
            cleanup[token] = atomic_HV
    
    return


def generate_chunk_representations_and_save_them_to_memory(memory, tokens, chunk_lengths=[], output=False):
    # "n" represents the no. of tokens in the sentence, which is also the max. no. of tokens 
    # that can be grouped to form a chunk.
    n = len(tokens)
    chunk_lengths = np.array(chunk_lengths, dtype=int)

    # Generate all possible chunks.
    if len(chunk_lengths) == 0:
        chunk_lengths = np.arange(1, n +  1)
    else:
        # Remove lengths which are bigger than the maximum chunk length.
        chunk_lengths = chunk_lengths[chunk_lengths <= n]
   
    for no_tokens in chunk_lengths:
        if output:
            print("no. of tokens: ", no_tokens)
        for i in range(n):
            if output:
                print("start index: ", i)
            # If there are not enough tokens left to construct a chunk comprised of "no_tokens", break. 
            if i + no_tokens > len(tokens):
                if output:
                    print("Not enough tokens left.")
                break 
            HC_representation = thd.MAPTensor.empty(1, dim)[0]

            # Construct HC representation.
            for j in range(no_tokens):
                if output:
                    print(tokens[i + j])
                HC_representation += cleanup[tokens[i + j]]

            # Save the chunk HC representation to memory.
            memory.save(HC_representation)

    return


def generate_query(tokens: list):
    n = len(tokens)
    HC_representation = thd.MAPTensor.empty(1, dim)

    # Iterate through all tokens.
    for i in range(n):
        # The token hasn't been encountered before.
        if cleanup.get(tokens[i]) == None:
            # Generate an atomic HC for the unencountered token.
            atomic_HC = thd.MAPTensor.random(1, dim)[0]
            # Add the atomic HC to the cleanup memory.
            cleanup[tokens[i]] = atomic_HC
            # Add the atomic (i.e., superpose) HC to the chunk HC representation.
            HC_representation += atomic_HC
        # The token has been encountered before.
        else:
            HC_representation += cleanup[tokens[i]]

    return HC_representation

In [7]:
# Comment: Pruning code copied of original DSDM.
def prune(self):
    N_pruning = self.N_prune  # Maximum no. of (address) nodes the memory can have. 
    n_class = self.M.size(1)
    # If the maximum number of nodes has been reached, apply LOF
    # to get normalcy scores.
    if len(self.Address) > N_pruning:   
        clf = LocalOutlierFactor(n_neighbors=min(len(self.Address), self.n_neighbors), contamination=self.contamination)
        A = self.Address
        M = self.M
        y_pred = clf.fit_predict(A.cpu())
        X_scores = clf.negative_outlier_factor_
        x_scor = torch.tensor(X_scores)

        # "Naive" pruning mode.
        if self.prune_mode == "naive":
            if len(A) > N_pruning:
                prun_N_addr = len(A) - N_pruning # No. of addresses that must be pruned out.
                val, ind = torch.topk(x_scor, prun_N_addr) 
                idx_remove = [True] * len(A)
                for i in ind:
                    idx_remove[i] = False
                self.M = self.M[idx_remove] # Delete content from address.
                self.Address = self.Address[idx_remove] # Delete address.

        # "Balance" pruning mode.
        # Idea: Prune from each class instead of the nodes with the highest densities.
        if self.prune_mode == "balance":
            prun_N_addr = len(A) - N_pruning  # No. of addresses that must be pruned out.
            mean_addr = N_pruning // n_class  # Max. number of allowed nodes per class.
            val, ind = torch.sort(x_scor, descending=True)

            count = prun_N_addr
            idx_remove = [True] * len(A)
            idx = 0
            arg_m = torch.argmax(M, axis=1)  # Get predicted class.
            N_remaining = torch.bincount(arg_m)  # Count the frequency of each value, i.e., no. of predictions for each class.
            while count != 0:
                idx +=1
                indice = ind[idx]
                if N_remaining[arg_m[indice]] > (N_pruning // n_class):
                    N_remaining[arg_m[indice]] -= 1
                    idx_remove[ind[idx]] = False
                    count-=1
            self.M = self.M[idx_remove]
            self.Address = self.Address[idx_remove]
    return

## Run experiment

In [8]:
def get_similarities_to_atomic_HVs(memory, sentence):
    sims_df = pd.DataFrame(columns=['sentence', 'token', 'similarity'])

    # Actual inteference 
    retrieved_content = memory.retrieve(generate_query(preprocess_text(sentence)))


    for token, atomic_HC in cleanup.items():
        sims_df = pd.concat([sims_df, pd.DataFrame([{'sentence': sentence,
                                                     'token': token,
                                                     'similarity': thd.cosine_similarity(atomic_HC, retrieved_content).item()}])])

    return sims_df

def get_most_similar_HVs(sims_df, delta_threshold=0.15):
    # Sort values: This is needed since similarity_next makes sense only in the context of a sort df.
    df = sims_df.sort_values('similarity', ascending=False).reset_index(drop=True).copy()
    # Add column with the previous token's similarity.
    df['previous_token_similarity'] = df['similarity'].shift(1).values
    # Compute the differece between the similarities. 
    df['delta'] = df['previous_token_similarity'] - df['similarity']
    # Set the NaN value of the delta to '0', since the first token doesn't have a previous token.
    df['delta'] = df['delta'].fillna(0)
    # Get index of the first element whose delta is bigger than delta_threshold.
    # TODO: Consider - This might have the edge case of all the deltas decreasing by delta_threshold.
    unsimilar_df = df[df['delta'] > delta_threshold].head(1)
    # We initially assume that all the tokens are equally represented.
    idx_cut_in = len(unsimilar_df)
    if len(unsimilar_df) > 0:
        idx_cut_in = df[df['delta'] > delta_threshold].head(1).index[0]
    # Subdataframe with only the most similar tokens.
    most_similar_tokens_df = df.head(idx_cut_in)
    
    # Get concept as a string.
    concept = most_similar_tokens_df['token'].values
    concept.sort()
    #print(concept)
    #display(df)
    return concept 
    

def display_and_get_memory_addresses(memory):
    print("Number of constructed addresses/abstract concepts: ", len(memory.addresses))

    concepts_df = pd.DataFrame(columns=['memory_address', 'memory_concept'])
    
    for address in memory.addresses:
        sims_df = pd.DataFrame(columns=['token', 'similarity'])
        for key, item in cleanup.items():
            sims_df = pd.concat([sims_df, pd.DataFrame([{'token': key, 'similarity': thd.cosine_similarity(item,  address).item()}])])
        
        display(sims_df.sort_values('similarity', ascending=False).reset_index(drop=True))
        concept = get_most_similar_HVs(sims_df)
        concepts_df = pd.concat([concepts_df, pd.DataFrame([{'memory_address': address, 'memory_concept': concept}])])
    
    
    
    concepts_df = concepts_df.reset_index(drop=True)
    #display(concepts_df)
    #display(sims_df.sort_values('similarity', ascending=False).reset_index(drop=True))
    return concepts_df

In [9]:
# DSDM hyperparameters
address_size = dim
ema_time_period = 5000  # No. of days in the EMA, i.e., maximum number of save operations to be performed.
learning_rate_update = 0.1
temperature = 0.2

# Create DSDM instances.
memory_unnormalized = SONN(address_size=address_size, ema_time_period=ema_time_period, learning_rate_update=learning_rate_update, temperature=temperature)
memory_normalized = SONN(address_size=address_size, ema_time_period=ema_time_period, learning_rate_update=learning_rate_update, temperature=temperature, normalize=True)

memories = {"normalized": memory_normalized, "unnormalized": memory_unnormalized}

In [10]:
# Load initial training data.
lines_raw = load_data('../data/initial_training_data.txt')

# Preprocess initial training data. 
lines_tokens = []
for line_raw in lines_raw:
    # Account for empty lines.
    if line_raw.rstrip():
        lines_tokens.append(preprocess_text(line_raw))

# Flush cleanup memory.
cleanup = {}

# Define chunk lengths
chunk_lengths = [1, 2, 3]

# Train memories (normalized & unnormalized) with initial trianing data.
for sentence_tokens in lines_tokens:
    generate_atomic_HVs_from_tokens_and_add_them_to_cleanup(sentence_tokens)
    for _, memory in memories.items():
        # Construct the chunks of each sentence and save them to each memory.
        generate_chunk_representations_and_save_them_to_memory(memory, sentence_tokens, chunk_lengths=[1, 2, 3])        

### Inference

In [11]:
def column_output(memories, tables: dict):
    outs = [widgets.Output() for _ in range(len(memories))]
    
    for out, (memory_type, _) in zip(outs, memories.items()):
        with out:
            display(md(f"### <ins>{memory_type.capitalize()}</ins>"))
            display(tables[memory_type])

    box = widgets.HBox(outs)
    display(box)
    return

In [12]:
### TODOs ####
# 1. Separate file for innferece sentences.
def infer(memory, inference_sentences: List[str], output=False):
    sims_df = pd.DataFrame(columns=['sentence','token', 'similarity']) 
    
    for inference_sentence in inference_sentences:
        sentence_sims_df = get_similarities_to_atomic_HVs(memory, inference_sentence)
        sims_df = pd.concat([sims_df, sentence_sims_df])
        
        
    sims_df = sims_df.sort_values(['sentence', 'similarity'], ascending=False).set_index(['sentence', 'token'])
    
    if output:
        display(sims_df)
    return sims_df

In [13]:
def online_learning_with_inference(memories, inference_sentences, index, tracked_tokens_sims_dfs, chunk_lengths=[1, 2, 3], epochs=10):
    # Load data.
    lines_raw = load_data('../data/data.txt')

    # Preprocess data. 
    lines_tokens = []
    for line_raw in lines_raw:
        # Account for empty lines.
        if line_raw.rstrip():
            lines_tokens.append(preprocess_text(line_raw))
            
        
    for sentence_tokens in lines_tokens:
        for epoch in range(epochs):
            for memory_type, memory in memories.items():
                # Learning: Construct the chunks of each sentence and save them to each memory.
                generate_chunk_representations_and_save_them_to_memory(memory, sentence_tokens, chunk_lengths=chunk_lengths)
            
                # Inference
                sims_df = infer(memory, inference_sentences)
                display(md(f" <ins>{memory_type.capitalize()}</ins>"))
                display(md(f"epoch: {epoch}"))
                display(sims_df)

            
                # Add similarities to global similarities table.
                tracked_tokens_sims_dfs[memory_type]['similarity' + '_' + str(epoch + 1)] = sims_df.loc[index]['similarity']
            
    column_output(memories, tracked_tokens_sims_dfs)
    return 

In [14]:
inference_sentences = ["The red house.", "The house.", "House.", "The purple house."]
tokens_to_keep_track_of = ['red']

# Get index to get the similarities of the tokens we keep track of for current epoch.
index = list(itertools.product(inference_sentences, tokens_to_keep_track_of))

In [15]:
# Construct dataframes for keeping track of token similarites.
tracked_tokens_sims_dfs = {}
for memory_type, memory in memories.items():
    tracked_tokens_sims_dfs[memory_type] = infer(memory, inference_sentences).loc[index]

online_learning_with_inference(memories,
                               inference_sentences,
                               index,
                               tracked_tokens_sims_dfs,
                               chunk_lengths)

 <ins>Normalized</ins>

epoch: 0

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity
sentence,token,Unnamed: 2_level_1
The red house.,red,0.738053
The red house.,the,0.471178
The red house.,house,0.46216
The red house.,blue,0.061105
The red house.,purple,0.028926
The red house.,green,-0.009104
The purple house.,the,0.60455
The purple house.,house,0.557373
The purple house.,red,0.50396
The purple house.,blue,0.203475


 <ins>Unnormalized</ins>

epoch: 0

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity
sentence,token,Unnamed: 2_level_1
The red house.,the,0.582949
The red house.,house,0.575664
The red house.,red,0.545368
The red house.,blue,0.126312
The red house.,green,0.056021
The red house.,purple,0.031191
The purple house.,the,0.660221
The purple house.,house,0.639569
The purple house.,blue,0.258861
The purple house.,red,0.221317


 <ins>Normalized</ins>

epoch: 1

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity
sentence,token,Unnamed: 2_level_1
The red house.,red,0.751916
The red house.,the,0.460181
The red house.,house,0.451297
The red house.,blue,0.055597
The red house.,purple,0.028654
The red house.,green,-0.014865
The purple house.,the,0.587912
The purple house.,red,0.547347
The purple house.,house,0.545169
The purple house.,blue,0.182153


 <ins>Unnormalized</ins>

epoch: 1

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity
sentence,token,Unnamed: 2_level_1
The red house.,the,0.58124
The red house.,house,0.573899
The red house.,red,0.548912
The red house.,blue,0.126554
The red house.,green,0.056017
The red house.,purple,0.031162
The purple house.,the,0.659748
The purple house.,house,0.639084
The purple house.,blue,0.257162
The purple house.,red,0.22675


 <ins>Normalized</ins>

epoch: 2

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity
sentence,token,Unnamed: 2_level_1
The red house.,red,0.760792
The red house.,the,0.452868
The red house.,house,0.444069
The red house.,blue,0.051946
The red house.,purple,0.028469
The red house.,green,-0.01866
The purple house.,red,0.580312
The purple house.,the,0.573453
The purple house.,house,0.5343
The purple house.,blue,0.165005


 <ins>Unnormalized</ins>

epoch: 2

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity
sentence,token,Unnamed: 2_level_1
The red house.,red,0.572548
The red house.,the,0.569842
The red house.,house,0.562265
The red house.,blue,0.124879
The red house.,green,0.053175
The red house.,purple,0.030978
The purple house.,the,0.658747
The purple house.,house,0.638013
The purple house.,blue,0.254125
The purple house.,red,0.237414


 <ins>Normalized</ins>

epoch: 3

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity
sentence,token,Unnamed: 2_level_1
The red house.,red,0.767022
The red house.,the,0.447437
The red house.,house,0.439023
The red house.,blue,0.049352
The red house.,purple,0.028327
The red house.,green,-0.021351
The purple house.,red,0.606114
The purple house.,the,0.560804
The purple house.,house,0.524842
The purple house.,blue,0.150988


 <ins>Unnormalized</ins>

epoch: 3

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity
sentence,token,Unnamed: 2_level_1
The red house.,the,0.579233
The red house.,house,0.570951
The red house.,red,0.55568
The red house.,blue,0.120705
The red house.,green,0.050604
The red house.,purple,0.031169
The purple house.,the,0.67361
The purple house.,house,0.646521
The purple house.,blue,0.227389
The purple house.,red,0.217087


 <ins>Normalized</ins>

epoch: 4

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity
sentence,token,Unnamed: 2_level_1
The red house.,red,0.768584
The red house.,the,0.470291
The red house.,house,0.412122
The red house.,blue,0.04786
The red house.,purple,0.029213
The red house.,green,-0.022329
The purple house.,red,0.616591
The purple house.,the,0.573007
The purple house.,house,0.502042
The purple house.,blue,0.143481


 <ins>Unnormalized</ins>

epoch: 4

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity
sentence,token,Unnamed: 2_level_1
The red house.,house,0.596745
The red house.,red,0.587206
The red house.,the,0.520625
The red house.,blue,0.111939
The red house.,green,0.04056
The red house.,purple,0.029276
The purple house.,house,0.658159
The purple house.,the,0.655358
The purple house.,red,0.246302
The purple house.,blue,0.220579


 <ins>Normalized</ins>

epoch: 5

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity
sentence,token,Unnamed: 2_level_1
The red house.,red,0.772351
The red house.,the,0.464066
The red house.,house,0.412189
The red house.,blue,0.046231
The red house.,purple,0.029016
The red house.,green,-0.024066
The purple house.,red,0.634826
The purple house.,the,0.561484
The purple house.,house,0.496161
The purple house.,blue,0.13307


 <ins>Unnormalized</ins>

epoch: 5

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity
sentence,token,Unnamed: 2_level_1
The red house.,red,0.613445
The red house.,the,0.551451
The red house.,house,0.541838
The red house.,blue,0.104259
The red house.,green,0.033191
The red house.,purple,0.030739
The purple house.,the,0.667098
The purple house.,house,0.638424
The purple house.,red,0.274342
The purple house.,blue,0.213315


 <ins>Normalized</ins>

epoch: 6

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity
sentence,token,Unnamed: 2_level_1
The red house.,red,0.775222
The red house.,the,0.459261
The red house.,house,0.412218
The red house.,blue,0.04495
The red house.,purple,0.028863
The red house.,green,-0.025426
The purple house.,red,0.649776
The purple house.,the,0.551546
The purple house.,house,0.490908
The purple house.,blue,0.124287


 <ins>Unnormalized</ins>

epoch: 6

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity
sentence,token,Unnamed: 2_level_1
The red house.,red,0.614341
The red house.,the,0.550984
The red house.,house,0.541361
The red house.,blue,0.103931
The red house.,green,0.032871
The red house.,purple,0.03073
The purple house.,the,0.666433
The purple house.,house,0.637902
The purple house.,red,0.279048
The purple house.,blue,0.211532


 <ins>Normalized</ins>

epoch: 7

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity
sentence,token,Unnamed: 2_level_1
The red house.,red,0.777486
The red house.,the,0.455408
The red house.,house,0.412251
The red house.,blue,0.043917
The red house.,purple,0.02874
The red house.,green,-0.026518
The purple house.,red,0.66224
The purple house.,the,0.542877
The purple house.,house,0.486237
The purple house.,blue,0.116799


 <ins>Unnormalized</ins>

epoch: 7

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity
sentence,token,Unnamed: 2_level_1
The red house.,red,0.615152
The red house.,the,0.550565
The red house.,house,0.54093
The red house.,blue,0.103604
The red house.,green,0.032558
The red house.,purple,0.030723
The purple house.,the,0.665758
The purple house.,house,0.637371
The purple house.,red,0.283717
The purple house.,blue,0.209731


 <ins>Normalized</ins>

epoch: 8

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity
sentence,token,Unnamed: 2_level_1
The red house.,red,0.779324
The red house.,the,0.452282
The red house.,house,0.412239
The red house.,blue,0.043067
The red house.,purple,0.028641
The red house.,green,-0.027415
The purple house.,red,0.672784
The purple house.,the,0.535284
The purple house.,house,0.482031
The purple house.,blue,0.110345


 <ins>Unnormalized</ins>

epoch: 8

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity
sentence,token,Unnamed: 2_level_1
The red house.,red,0.6287
The red house.,the,0.542907
The red house.,house,0.533174
The red house.,blue,0.102297
The red house.,green,0.030665
The red house.,purple,0.03057
The purple house.,the,0.664347
The purple house.,house,0.63607
The purple house.,red,0.292301
The purple house.,blue,0.207286


 <ins>Normalized</ins>

epoch: 9

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity
sentence,token,Unnamed: 2_level_1
The red house.,red,0.780835
The red house.,the,0.449741
The red house.,house,0.412174
The red house.,blue,0.042355
The red house.,purple,0.02856
The red house.,green,-0.028162
The purple house.,red,0.681802
The purple house.,the,0.528619
The purple house.,house,0.478212
The purple house.,blue,0.104737


 <ins>Unnormalized</ins>

epoch: 9

Unnamed: 0_level_0,Unnamed: 1_level_0,similarity
sentence,token,Unnamed: 2_level_1
The red house.,red,0.629322
The red house.,the,0.54259
The red house.,house,0.532846
The red house.,blue,0.10189
The red house.,purple,0.030565
The red house.,green,0.030301
The purple house.,the,0.663671
The purple house.,house,0.635529
The purple house.,red,0.296864
The purple house.,blue,0.205371


HBox(children=(Output(), Output()))

### Memory state

In [16]:
for memory_type, memory in memories.items():
    display(md(f"### <ins>{memory_type.capitalize()}</ins>"))
    concepts_df = display_and_get_memory_addresses(memory)
    
    concepts_df['memory_concept_str'] = concepts_df['memory_concept'].apply(lambda concept_list: " ".join(concept_list))
    print(concepts_df['memory_concept_str'].values)
    
    # Get cosine similarties of the memory addresses mapping to the same concept.
    tmp_df = pd.DataFrame(concepts_df.groupby('memory_concept_str')['memory_address'].apply(list)).reset_index()
    for i in range(len(tmp_df)):
        address_list = tmp_df['memory_address'][i]
        
        if len(address_list) > 1: 
            stacked_tensor = torch.stack(address_list, dim=0)
            pairwise_similarities = torch.nn.functional.cosine_similarity(stacked_tensor.unsqueeze(1), stacked_tensor.unsqueeze(0), dim=2)
            print(pairwise_similarities)
            # Why are they all identical? 

### <ins>Normalized</ins>

Number of constructed addresses/abstract concepts:  38


Unnamed: 0,token,similarity
0,the,0.999822
1,purple,0.04019
2,house,0.018593
3,blue,0.004658
4,red,0.003803
5,green,0.001332


Unnamed: 0,token,similarity
0,red,0.999911
1,blue,0.034176
2,purple,0.012397
3,the,-0.000559
4,house,-0.010498
5,green,-0.042046


Unnamed: 0,token,similarity
0,house,0.999841
1,the,0.017978
2,blue,0.014499
3,purple,0.002643
4,green,-0.006533
5,red,-0.00687


Unnamed: 0,token,similarity
0,the,0.799732
1,red,0.592241
2,purple,0.039454
3,blue,0.023785
4,house,0.003198
5,green,-0.023672


Unnamed: 0,token,similarity
0,house,0.79839
1,red,0.585972
2,blue,0.031862
3,purple,0.009241
4,the,0.008664
5,green,-0.030138


Unnamed: 0,token,similarity
0,the,0.576633
1,house,0.57094
2,red,0.57052
3,purple,0.031375
4,blue,0.030414
5,green,-0.027009


Unnamed: 0,token,similarity
0,green,0.999982
1,the,0.004783
2,blue,-0.001799
3,house,-0.002707
4,purple,-0.031836
5,red,-0.037907


Unnamed: 0,token,similarity
0,the,0.793493
1,green,0.609987
2,purple,0.012361
3,house,0.010731
4,blue,0.002468
5,red,-0.022559


Unnamed: 0,token,similarity
0,house,0.792592
1,green,0.604824
2,the,0.015261
3,blue,0.010326
4,purple,-0.017427
5,red,-0.031093


Unnamed: 0,token,similarity
0,the,0.588311
1,house,0.583788
2,green,0.560799
3,blue,0.009671
4,purple,0.006588
5,red,-0.032688


Unnamed: 0,token,similarity
0,blue,0.999973
1,red,0.039872
2,purple,0.026183
3,house,0.01716
4,the,0.006754
5,green,-0.002264


Unnamed: 0,token,similarity
0,the,0.784028
1,blue,0.62369
2,purple,0.047536
3,red,0.024058
4,house,0.022684
5,green,-0.000195


Unnamed: 0,token,similarity
0,house,0.781638
1,blue,0.634448
2,purple,0.018253
3,red,0.016774
4,the,0.016262
5,green,-0.006334


Unnamed: 0,token,similarity
0,house,0.590818
1,the,0.58426
2,blue,0.570477
3,purple,0.03898
4,red,0.011846
5,green,-0.003869


Unnamed: 0,token,similarity
0,red,0.706681
1,the,0.694943
2,house,0.077875
3,purple,0.036824
4,blue,0.028363
5,green,-0.029174


Unnamed: 0,token,similarity
0,red,0.703689
1,house,0.690779
2,the,0.085399
3,blue,0.034653
4,purple,0.013568
5,green,-0.034235


Unnamed: 0,token,similarity
0,red,0.706757
1,the,0.696971
2,house,0.059033
3,purple,0.036869
4,blue,0.028098
5,green,-0.029045


Unnamed: 0,token,similarity
0,red,0.703739
1,house,0.692819
2,the,0.066715
3,blue,0.034605
4,purple,0.012823
5,green,-0.034281


Unnamed: 0,token,similarity
0,red,0.706383
1,the,0.698472
2,house,0.046463
3,purple,0.036899
4,blue,0.027908
5,green,-0.02894


Unnamed: 0,token,similarity
0,red,0.70327
1,house,0.694429
2,the,0.054054
3,blue,0.034558
4,purple,0.012312
5,green,-0.034293


Unnamed: 0,token,similarity
0,red,0.705888
1,the,0.699684
2,purple,0.036922
3,house,0.03673
4,blue,0.027753
5,green,-0.028851


Unnamed: 0,token,similarity
0,red,0.702695
1,house,0.695731
2,the,0.044236
3,blue,0.034516
4,purple,0.011914
5,green,-0.034294


Unnamed: 0,token,similarity
0,red,0.705401
1,the,0.70072
2,purple,0.036939
3,house,0.027617
4,blue,0.027607
5,green,-0.028766


Unnamed: 0,token,similarity
0,red,0.755006
1,house,0.640457
2,blue,0.035324
3,purple,0.010777
4,the,0.002424
5,green,-0.036186


Unnamed: 0,token,similarity
0,red,0.704883
1,the,0.701589
2,purple,0.036953
3,blue,0.027487
4,house,0.020356
5,green,-0.028693


Unnamed: 0,token,similarity
0,red,0.702057
1,house,0.697233
2,blue,0.03445
3,the,0.028766
4,purple,0.011289
5,green,-0.034302


Unnamed: 0,token,similarity
0,red,0.704349
1,the,0.702338
2,purple,0.036965
3,blue,0.027392
4,house,0.014934
5,green,-0.028632


Unnamed: 0,token,similarity
0,red,0.701
1,house,0.698554
2,blue,0.034406
3,the,0.022289
4,purple,0.011018
5,green,-0.034277


Unnamed: 0,token,similarity
0,red,0.703472
1,the,0.703394
2,purple,0.036985
3,blue,0.027282
4,house,0.009206
5,green,-0.028554


Unnamed: 0,token,similarity
0,red,0.700823
1,house,0.698913
2,blue,0.03438
3,the,0.016494
4,purple,0.010784
5,green,-0.034282


Unnamed: 0,token,similarity
0,the,0.703647
1,red,0.70334
2,purple,0.036983
3,blue,0.027203
4,house,0.004037
5,green,-0.028513


Unnamed: 0,token,similarity
0,red,0.700617
1,house,0.699243
2,blue,0.034356
3,the,0.011257
4,purple,0.010572
5,green,-0.034284


Unnamed: 0,token,similarity
0,the,0.703859
1,red,0.703216
2,purple,0.036979
3,blue,0.027117
4,house,-0.001605
5,green,-0.028469


Unnamed: 0,token,similarity
0,red,0.700405
1,house,0.699534
2,blue,0.034332
3,purple,0.010378
4,the,0.006469
5,green,-0.034284


Unnamed: 0,token,similarity
0,the,0.704016
1,red,0.703108
2,purple,0.03697
3,blue,0.027008
4,house,-0.008834
5,green,-0.028415


Unnamed: 0,token,similarity
0,red,0.700199
1,house,0.699784
2,blue,0.03431
3,purple,0.010199
4,the,0.002052
5,green,-0.034285


Unnamed: 0,token,similarity
0,the,0.703571
1,red,0.703553
2,purple,0.036956
3,blue,0.027016
4,house,-0.009233
5,green,-0.028431


Unnamed: 0,token,similarity
0,house,0.700005
1,red,0.699994
2,blue,0.034289
3,purple,0.010028
4,the,-0.00215
5,green,-0.034284


['the' 'red' 'house' 'the' 'house' 'house red the' 'green' 'the' 'house'
 'green house the' 'blue' 'the' 'blue house' 'blue house the' 'red the'
 'house red' 'red the' 'house red' 'red the' 'house red' 'red the'
 'house red' 'red the' 'house red' 'red the' 'house red' 'red the'
 'house red' 'red the' 'house red' 'red the' 'house red' 'red the'
 'house red' 'red the' 'house red' 'red the' 'house red']
MAPTensor([[1.0000, 0.8063, 0.7924],
           [0.8063, 1.0000, 0.6237],
           [0.7924, 0.6237, 1.0000]])
MAPTensor([[1.0000, 0.9998, 0.9995, 0.9991, 0.9941, 0.9984, 0.9980, 0.9976,
            0.9972, 0.9968, 0.9965, 0.9961],
           [0.9998, 1.0000, 0.9999, 0.9997, 0.9954, 0.9993, 0.9990, 0.9987,
            0.9984, 0.9982, 0.9979, 0.9976],
           [0.9995, 0.9999, 1.0000, 1.0000, 0.9960, 0.9997, 0.9995, 0.9993,
            0.9991, 0.9988, 0.9986, 0.9984],
           [0.9991, 0.9997, 1.0000, 1.0000, 0.9963, 0.9999, 0.9998, 0.9996,
            0.9994, 0.9993, 0.9991, 0.9989],


### <ins>Unnormalized</ins>

Number of constructed addresses/abstract concepts:  20


Unnamed: 0,token,similarity
0,the,0.990751
1,red,0.117282
2,house,0.050661
3,purple,0.041302
4,blue,0.008986
5,green,-0.003682


Unnamed: 0,token,similarity
0,red,0.985333
1,the,0.112387
2,house,0.100037
3,blue,0.035777
4,purple,0.016968
5,green,-0.042006


Unnamed: 0,token,similarity
0,house,0.991323
1,red,0.103412
2,the,0.05038
3,blue,0.018299
4,purple,0.005298
5,green,-0.011067


Unnamed: 0,token,similarity
0,red,0.70321
1,the,0.69549
2,house,0.099119
3,purple,0.036846
4,blue,0.028558
5,green,-0.029172


Unnamed: 0,token,similarity
0,red,0.700718
1,house,0.690788
2,the,0.106845
3,blue,0.034642
4,purple,0.014391
5,green,-0.034075


Unnamed: 0,token,similarity
0,red,0.610955
1,the,0.556465
2,house,0.54789
3,blue,0.031378
4,purple,0.031022
5,green,-0.028587


Unnamed: 0,token,similarity
0,green,0.999902
1,the,0.009541
2,house,0.001711
3,blue,-0.001554
4,purple,-0.031583
5,red,-0.033221


Unnamed: 0,token,similarity
0,the,0.767052
1,green,0.641613
2,house,0.017539
3,purple,0.010625
4,red,0.005648
5,blue,0.003408


Unnamed: 0,token,similarity
0,house,0.763634
1,green,0.639621
2,the,0.022994
3,blue,0.010877
4,red,-0.003149
5,purple,-0.017957


Unnamed: 0,token,similarity
0,the,0.609284
1,house,0.603395
2,green,0.512136
3,red,0.025997
4,blue,0.012084
5,purple,0.009656


Unnamed: 0,token,similarity
0,blue,0.999843
1,red,0.046281
2,purple,0.026497
3,house,0.023109
4,the,0.012601
5,green,-0.002565


Unnamed: 0,token,similarity
0,the,0.763944
1,blue,0.646184
2,red,0.062602
3,purple,0.047768
4,house,0.033278
5,green,-0.001934


Unnamed: 0,token,similarity
0,house,0.766708
1,blue,0.650628
2,red,0.053686
3,the,0.026493
4,purple,0.019476
5,green,-0.007777


Unnamed: 0,token,similarity
0,house,0.609606
1,the,0.604107
2,blue,0.522888
3,red,0.074405
4,purple,0.039318
5,green,-0.006576


Unnamed: 0,token,similarity
0,red,0.99554
1,the,0.057768
2,house,0.046139
3,blue,0.035105
4,purple,0.014796
5,green,-0.042156


Unnamed: 0,token,similarity
0,the,0.998125
1,red,0.047166
2,purple,0.040678
3,house,0.026759
4,blue,0.006259
5,green,-0.000553


Unnamed: 0,token,similarity
0,house,0.998297
1,red,0.034397
2,the,0.026591
3,blue,0.01593
4,purple,0.003499
5,green,-0.008246


Unnamed: 0,token,similarity
0,red,0.700239
1,house,0.698514
2,the,0.039304
3,blue,0.034452
4,purple,0.011691
5,green,-0.034217


Unnamed: 0,token,similarity
0,red,0.703995
1,the,0.702177
2,purple,0.036978
3,blue,0.027553
4,house,0.02679
5,green,-0.028699


Unnamed: 0,token,similarity
0,red,0.999767
1,blue,0.034282
2,purple,0.012639
3,the,0.005228
4,house,-0.004622
5,green,-0.042071


['the' 'red' 'house' 'red the' 'house red' 'house red the' 'green'
 'green the' 'green house' 'green house the' 'blue' 'blue the'
 'blue house' 'blue house the' 'red' 'the' 'house' 'house red' 'red the'
 'red']
MAPTensor([[1.0000, 0.9973],
           [0.9973, 1.0000]])
MAPTensor([[1.0000, 0.9977],
           [0.9977, 1.0000]])
MAPTensor([[1.0000, 0.9970, 0.9888],
           [0.9970, 1.0000, 0.9973],
           [0.9888, 0.9973, 1.0000]])
MAPTensor([[1.0000, 0.9974],
           [0.9974, 1.0000]])
MAPTensor([[1.0000, 0.9972],
           [0.9972, 1.0000]])
