## Encoding McRae's dataset in hyperdimensional memory

### Libraries and HDComputing class

In [1]:
%run HDComputing_basics.ipynb
import pandas as pd

### Functions for reading dataset

In [2]:
def TranslateFeats(ListFeat):
    "It receives a list of features such as ['is_blue', 'is_rectangular'] and it returns: [['color','blue'], ['shape','rectangular']"
    # Dataframe for excel document
    df = pd.read_excel('../McRaedataset/FEATS_brm.xlsx')
    ListPairs = []
    for feat in ListFeat:
        # Row for feature...
        row = df.loc[df['Feature'] == feat]       
        # Look for values in vec_feat and vec_value
        ListPairs.append([str(row['feat_name'].tolist()[0]), str(row['feat_value'].tolist()[0])])       
    return ListPairs

def ReadDefinitions():
    "Given an xlsx file it returns all the concepts feature values as they appear in the original dataset"
    #Dataframe for excel document
    df = pd.read_excel('../McRaeDataset/CONCS_FEATS_concstats_brm.xlsx') #MINI_
    #Create a list with all concept names
    names = set(df['Concept'])
    # Extract list of features for each name
    Concepts = []
    for n in names:
        row = df.loc[df['Concept'] == n]
        Concepts.append([str(n), map(str,list(row['Feature']))])
    return Concepts


def ClosestConcepts (concept, nc):
    "Given a concept label this function reads the distance matrix from McRae's and returns the 'nc' closests concepts in a list"
    # Excel document to data frame...
    df = pd.read_excel('../McRaeDataset/cos_matrix_brm_IFR.xlsx')
    
    ordered = df.sort_values(by=concept, ascending=False)[['CONCEPT', concept]]
    #print ordered 
    
    L1 = list(ordered['CONCEPT'][0:nc])
    L1 = map(str, L1)
    
    L2 = zip(L1,list(ordered[concept][0:nc]))
    L2 = map(list, L2)
    
    #print L1
    #print L2
    return L2
   # return L1, L2

### Creating definitions dictionary

In [3]:
def CreateDictionary():
    global Dict_defs
    data = ReadDefinitions()
    for concept in data:
        Dict_defs[concept[0]] = TranslateFeats(concept[1])

## Storing ID vectors into memory

### Memory functions

In [4]:
def flat_list (L):
    "Recursive function that flats a list of lists (at any level)"
    if L == []:
        return L
    if type(L[0]) is list:
        return flat_list(L[0]) + flat_list(L[1:])
    return L[:1] + flat_list(L[1:])

def SaveConcepts(Dic):
    """Given a definitions dictionary it stores in memory the entire set of concepts in the dictionary (including feature vectors)"""
    keys = Dic.keys()
    vals = Dic.values()
    all_concepts = list(set(flat_list(vals) + keys))
    # Process for storing list of concepts in memory
    for concept in all_concepts:
        HDvector(N,concept) #This creates an object and store it in memory
        
def FeatureVectors(Dic):
    "It extract from the definition dictionary all the feature type vectors ('is','has','color', etc...)"
    global feature_vectors
    featt = []
    vals = Dic.values()
    for l in vals:
        for p in l:
            featt.append(p[0])
    feature_vectors = list(set(featt))
    
def CreateSemanticPointer (PairList):
    "Turns list as [[feat1,feat_val],[feat2,feat_val],[feat3,feat_val]] into vector feat1*feat_val + feat2*feat_val ..."
    vecs = []
    for pair in PairList:
        vecs.append(Dict[pair[0]] * Dict[pair[1]])
    return ADD(vecs)

def SaveDefinitions(Dic):
    """Given the definitions dictionary, and having all its concepts previously stored in memory, this functions
       creates a definition vector (semantic pointer) using HD operations and assign it as a pointer to an 
       object vector (ID vector)."""
    global feature_vectors
    # Going through all elements in dictionary
    for key, value in Dic.iteritems():
        Dict[key].setPointer(CreateSemanticPointer(value))

## Main function

In [5]:
def EncodingTests():
    # Test 1: Distance between any two ID vectors
    IDvecs = np.random.choice(Memory[1], 2)
    assert Dict[IDvecs[0]].dist(Dict[IDvecs[1]]) > thr
    print "Test 1 passed!"
    
    # Test 2: Comparing a definition vs a random vector
    d1 = Dict[Dict_defs.keys()[0]].getPointer().dist(Dict[IDvecs[1]])
    assert d1 > thr   #Should be greater than threshold (0.45 * N)
    print "Test 2 passed!"
    
    # Test 3: Getting labels for accordion ** has  (should return all features that accordion has)
    L = HDvector.getLabelID( Dict['accordion'] ** Dict['has'] )
    assert ( 'keys' in L[0][0] or 'keys' in L[1][0] ) or  ( 'buttons' in L[0][0] or 'buttons' in L[1][0] )
    print  "Test 3 passed!"
    
    # Test 4: Comparing two semantic pointers
    Dist = HDvector.dist( Dict['accordion'].getPointer(), Dict['airplane'].getPointer() )
    print "Test 4: Distance from accordion to airplane (definitions) ->", Dist

    # Test 5: Asking closest concept of another concept's definition...
    Def = Dict['airplane'].getPointer()
    print "\n\nClosest concepts to 'airplane' definition: ", HDvector.getLabelSP(Def)    
    
    # Test 6: Asking for closest concept
    vec = CreateSemanticPointer( [['has','wings'], ['is','reptile'],['made_of','metal']] )
    print "\n\nClosest concepts for definition [['has','wings'], ['is','reptile'],['made_of','metal']]: ", HDvector.getLabelSP(vec)
    
    # Test 6b: Asking for closest concept
    vec = CreateSemanticPointer( [['associated_with','polkas']] )
    print "\n\nClosest concepts for definition [['associated_with','polkas']]: ", HDvector.getLabelSP(vec)
    
    
    
def Main():
    init()
    thr = 0.4 * N  
    # Read dataset and create definition dictionary
    CreateDictionary()
    # Feature vectors
    FeatureVectors(Dict_defs)
    # Save concepts into memory (ID vectors)
    SaveConcepts(Dict_defs)
    # Associate definitions to concepts into memory (SP vectors)
    SaveDefinitions(Dict_defs)
    print "End of encoding"
    
    print "Starting tests...\n"
    EncodingTests()
    
Main()

    
print "\nClosest concepts to airplane (from Dataset)", ClosestConcepts('airplane', 20)


End of initialization
End of encoding
Starting tests...

Test 1 passed!
Test 2 passed!
Test 3 passed!
Test 4: Distance from accordion to airplane (definitions) -> 5049


Closest concepts to 'airplane' definition:  [['airplane', 0], ['jet', 2441], ['train', 3807], ['rocket', 3925], ['helicopter', 3931], ['trolley', 4123], ['taxi', 4144], ['ship', 4190], ['raven', 4219], ['submarine', 4238], ['crow', 4238], ['housefly', 4253], ['scooter', 4265], ['pelican', 4275], ['beetle', 4275], ['car', 4284], ['bin_(waste)', 4286], ['buzzard', 4294], ['hornet', 4299], ['goose', 4316], ['hawk', 4319], ['van', 4332], ['stork', 4333], ['ambulance', 4338], ['horse', 4342], ['subway', 4345], ['bus', 4348], ['eagle', 4356], ['falcon', 4361], ['nightingale', 4362], ['sled', 4367], ['oriole', 4372], ['vulture', 4380], ['tank_(container)', 4381], ['leopard', 4386], ['umbrella', 4386], ['pheasant', 4399], ['bike', 4401], ['sledgehammer', 4410], ['wheelbarrow', 4411], ['owl', 4422], ['building', 4425], ['dove',

In [6]:
print Dict_defs['alligator']
print Dict_defs['zucchini']

[['has', 'teeth'], ['color', 'green'], ['is', 'animal'], ['shape', 'long'], ['is', 'reptile'], ['lives_in', 'swamps'], ['has', 'scales'], ['lives_in', 'water'], ['has', 'tail'], ['adjective', 'scary'], ['behaviour', 'swims'], ['has', 'mouth'], ['lives_in', 'Florida'], ['behaviour', 'eats_people'], ['has', 'jaws'], ['adjective', 'dangerous']]
[['is', 'vegetable'], ['color', 'green'], ['shape', 'long'], ['has', 'inside'], ['eaten_by', 'cooking'], ['eaten_by', 'frying'], ['eaten_in', 'breads'], ['like_a', 'cucumber'], ['adjective', 'tastes_good'], ['has', 'seeds'], ['color', 'white_inside'], ['eaten', 'raw'], ['has', 'green_outside'], ['has', 'outside'], ['adjective', 'nutritious']]
