# Encoding functions

This notebook includes the functions for encoding a dictionary into HD vectors and an associative memory. In order to develop the program I'm using a toy set of object descriptions from McRae's dataset.

In [5]:
%run HDComputing_basics.ipynb  #The HDvector class and other helpful functions are defined here...

import pandas

df = pandas.read_excel('sample.xlsx')

# Dictionary of definitions
Dict_defs = {}
# Obtaining concept names from column 0
concepts = [x for x in map(str, df[df.columns[0]].values) if x != 'nan']
print 'List of concepts: ', concepts

# Loop for each concept...
for concept in concepts:
    val_dic = list(map(list, zip([x for x in map(str, df[concept + '_feature']) if x !='nan'], 
                       [y for y in map(str, df[concept + '_val']) if y !='nan'])))
    Dict_defs[concept] = val_dic

feature_vectors = []  # List of feature vectors such as 'is', 'used_for', 'shape', etc...

List of concepts:  ['apple', 'bowl', 'cup', 'fork', 'knife', 'plate', 'pot', 'spatula', 'spoon', 'handle', 'metal', 'ceramic', 'plastic', 'glass', 'steel']


## Auxiliar functions

In [6]:
def flat_list (L):
    "Recursive function that flats a list of lists (at any level)"
    if L == []:
        return L
    if type(L[0]) is list:
        return flat_list(L[0]) + flat_list(L[1:])
    return L[:1] + flat_list(L[1:])

## Memory functions

In [7]:
def SaveConcepts(Dic):
    """Given a definitions dictionary it stores in memory the entire set of concepts in the dictionary (including feature vectors)"""
    keys = Dic.keys()
    vals = Dic.values()
    all_concepts = list(set(flat_list(vals) + keys))
    # Process for storing list of concepts in memory
    for concept in all_concepts:
        HDvector(N,concept) #This creates an object and store it in memory

def FeatureVectors(Dic):
    global feature_vectors
    featt = []
    vals = Dic.values()
    for l in vals:
        for p in l:
            featt.append(p[0])
    feature_vectors = list(set(featt))
            
def CreateSemanticPointer (PairList):
    "Turns list as [[feat1,feat_val],[feat2,feat_val],[feat3,feat_val]] into vector feat1*feat_val + feat2*feat_val ..."
    vecs = []
    for pair in PairList:
        vecs.append(Dict[pair[0]] * Dict[pair[1]])
    return ADD(vecs)
        
def SaveDefinitions(Dic):
    """Given the definitions dictionary, and having all its concepts previously stored in memory, this functions
       creates a definition vector (semantic pointer) using HD operations and assign it as a pointer to an 
       object vector (ID vector)."""
    global feature_vectors
    # Going through all elements in dictionary
    for key, value in Dic.iteritems():
        Dict[key].setPointer(CreateSemanticPointer(value))

## Testing encoding

In [8]:
def EncodingTests():
    init()
    FeatureVectors(Dict_defs)
    SaveConcepts(Dict_defs)
    SaveDefinitions(Dict_defs)
    print 'Feature vectors:', feature_vectors,'\n'
    
    # Comparing a definition vs a single feature vector  -> orthogonal...
    d1 = Dict['bowl'].getPointer().dist( Dict['shape'] )  
    print "Distance from semantic pointer of 'bowl' to 'shape':", d1
    assert d1 > thr
    

    d2 = Dict['round'].dist( Dict['bowl'] ** Dict['shape'])  # Equivalent to previous line... 
    print "Distance from semantic pointer 'bowl'*'shape' to 'round':", d2
    print "Assigned label to bowl ** shape: ", HDvector.getLabelID( Dict['bowl'] ** Dict['shape'] ), "\n"
    assert d2 < thr
    
    d22 = Dict['red'].dist( Dict['apple'] ** Dict['color']) 
    print "Labels for apple ** color : ", HDvector.getLabelID( Dict['apple'] ** Dict['color'] ), "\n"

    # Comparing two semantic pointer vectors:
    d3 = HDvector.dist( Dict['knife'].getPointer(), Dict['spoon'].getPointer() )
    print "Distance from knife to spoon (definitions):", d3
    d4 = HDvector.dist( Dict['knife'].getPointer(), Dict['pot'].getPointer() )
    print "Distance from knife to pot (definitions):", d4, "\n"
    
    # Asking for closest concept
    vec = CreateSemanticPointer( [['shape','round'], ['material','metal'],['used_for','eating']] )
    print "Closest concepts for definition [['shape','round'], ['material','metal'],['used_for','eating']]: ", HDvector.getLabelSP(vec)
    
    # Tampoco estaría mal hacer un programa que haga una matriz de distancias de todos los semantic pointers contra los demás...
    
    # Sería interesante probar la codificación de un grafo según gayler:  G = A * P(B) + A * P(C) + B * P(D)
    # (si el vector es permutado es hijo, el no permutado es padre)
    # ¿qué ventajas hay?, se ve que es más sencillo pero quien sabe que desventajas tenga....
    
EncodingTests()

Feature vectors: ['origin', 'info', 'infested_by', 'is ', 'consistency', 'color', 'is', 'material', 'made_of', 'surface', 'used_for', 'texture', 'taste', 'shape', 'location', 'eaten_in', 'has', 'used_for ', 'example'] 

Distance from semantic pointer of 'bowl' to 'shape': 5044
Distance from semantic pointer 'bowl'*'shape' to 'round': 3854
Assigned label to bowl ** shape:  [['curved', 3824], ['round', 3854]] 

Labels for apple ** color :  [['yellow', 4048], ['green', 4063], ['red', 4086]] 

Distance from knife to spoon (definitions): 4168
Distance from knife to pot (definitions): 4050 

Closest concepts for definition [['shape','round'], ['material','metal'],['used_for','eating']]:  [['pot', 3604], ['plate', 3775], ['handle', 4123], ['fork', 4378], ['bowl', 4435], ['spoon', 4438], ['knife', 4450], ['spatula', 4471]]
