In [1]:
import numpy as np
import pickle
import scipy.spatial.distance as sd
import json

In [2]:
# load models
semantic_models = dict()
for i in range(1800,2000,10):
    data = dict()
    vocab = pickle.load(open(str(i) + "-vocab.pkl","rb"))
    vectors = np.load(str(i) + "-w.npy")
    data['name'] = i
    data['vocab'] = vocab
    data['vectors'] = vectors
    semantic_models[i] = data

In [3]:
def get_neighbors(word,model):
    vectors = semantic_models[model]['vectors']
    vocab = semantic_models[model]['vocab']
    idx = vocab.index(word)
    neighbors = list()
    for i in np.argsort(sd.cdist([vectors[idx]],vectors,"cosine")[0])[1:25]:
        val = np.round(sd.cdist([vectors[idx]],[vectors[i]],"cosine")[0][0],9)
        neighbors.append([vocab[i],val])
    return(neighbors)

In [4]:
def get_distance(word1,word2,model):
    vectors = semantic_models[model]['vectors']
    idx1 = semantic_models[model]['vocab'].index(word1)
    idx2 = semantic_models[model]['vocab'].index(word2)
    distance = np.round(sd.cdist([vectors[idx1]],[vectors[idx2]],"cosine")[0][0],9)
    return(distance)

In [None]:
#get_distance("beautiful","pretty",1950)

In [None]:
#get_neighbors("beautiful",1990)

In [None]:
#get_neighbors("cute",1990)

In [20]:
key_terms = ["beautiful","cute","ugly"]

outfilejson = "../../tensors/tensors_all.json"

json_data = dict()
json_data['embeddings'] = list()
    
for model_name in semantic_models.keys():
    print(model_name)
    tensor_filename = "../../tensors/" + "concepts-" + str(model_name)
    outfiletsv = tensor_filename + '_tensor.tsv'
    outfiletsvmeta = tensor_filename + '_metadata.tsv'
    outfilejson = tensor_filename + '.json'

    
    vectors=list()
    vocab=list()
    
    for t in key_terms:
        if t in semantic_models[model_name]['vocab']:
            nn = get_neighbors(t,model_name)
            nn = [x[0] for x in nn]
            for n in nn:
                vocab.append(n)

    # add key terms to list
    vocab = vocab + key_terms
    vocab = set(vocab)
    
    with open(outfiletsv, 'wt') as file_vector:
        with open(outfiletsvmeta, 'wt',encoding='utf-8') as file_metadata:
            for word in vocab:
                file_metadata.write(word + '\n')
                idx = semantic_models[model_name]['vocab'].index(word)
                vector_row = '\t'.join(str(x) for x in semantic_models[model_name]['vectors'][idx])
                file_vector.write(vector_row + '\n')
    shape = [1000,50]
    json_data['embeddings'].append({
        'tensorName': str(model_name),
        'tensorPath': 'https://raw.githubusercontent.com/jeddobson/vector-space-mappings/master/tensors/concepts-' + str(model_name) + '_tensor.tsv',
        'metadataPath': 'https://raw.githubusercontent.com/jeddobson/vector-space-mappings/master/tensors/concepts-' + str(model_name) + '_metadata.tsv',
        'tensorShape' : shape
    })

with open(outfilejson, 'wt') as fp:
    json.dump(json_data, fp)

1920
1890
1860
1990
1830
1800
1980
1930
1900
1970
1870
1840
1960
1810
1940
1910
1880
1850
1820
1950


In [21]:
with open("../../tensors/tensors_all.json", 'wt') as fp:
    json.dump(json_data, fp)