In [6]:
from random import randint

import numpy as np
import torch

# Load model
from InferSent.models import InferSent
model_version = 1
MODEL_PATH = "./InferSent/encoder/infersent2.pickle"
params_model = {'bsize': 64, 'word_emb_dim': 300, 'enc_lstm_dim': 2048,
                'pool_type': 'max', 'dpout_model': 0.0, 'version': model_version}
model = InferSent(params_model)
model.load_state_dict(torch.load(MODEL_PATH))

# Keep it on CPU or put it on GPU
use_cuda = False
model = model.cuda() if use_cuda else model


# If infersent1 -> use GloVe embeddings. If infersent2 -> use InferSent embeddings.
W2V_PATH = './InferSent/dataset/GloVe/glove.840B.300d.txt' if model_version == 1 else './InferSent/dataset/fastText/crawl-300d-2M.vec'
model.set_w2v_path(W2V_PATH)


# Load embeddings of K most frequent words
model.build_vocab_k_words(K=100000)
# Load some sentences
sentences = []
with open('./InferSent/encoder/samples.txt') as f:
    for line in f:
        sentences.append(line.strip())
# print(len(sentences))

sentences[:5]

embeddings = model.encode(sentences, bsize=128, tokenize=False, verbose=True)
print('nb sentences encoded : {0}'.format(len(embeddings)))

np.linalg.norm(model.encode(['the cat eats.']))



Vocab size : 100000
Nb words kept : 128201/130068 (98.6%)
Speed : 64.0 sentences/s (cpu mode, bsize=128)
nb sentences encoded : 9815


4.573626

In [None]:

def cosine(u, v):
    return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))

# print(cosine(model.encode(['multi action light day cream'])[0], model.encode(['a fresh fast absorbing serum'])[0]))
functions = []
m = {}
i = 0
with open('func.txt') as f:
    for line in f:
        out = line.split('.')
        m[i]=out[0]
        i += 1
        functions.append(out[1].strip())
# print(len(m))
similar_func = np.zeros((len(functions), len(functions)))
for i in range(len(functions)):
    for j in range(len(functions)):
        similar_func[i][j] = cosine(model.encode([functions[i]])[0], model.encode([functions[j]])[0])

print(similar_func)

top = 3
most_similar = np.zeros((len(functions), 3),dtype=np.int32)
for i in range(len(functions)):
    row = similar_func[i]
    ind = np.argpartition(row, -4)[-4:]
    for k in range(3):
        most_similar[i][k] = ind[k]

print("------------------------")
print(most_similar)
print("------------------------")


descriptions = []
with open('desc.txt') as f:
    for line in f:
        out = line.split('.')
        descriptions.append(out[1].strip())

similar_desc = np.zeros((len(descriptions), len(descriptions)))
for i in range(len(descriptions)):
    for j in range(len(descriptions)):
        similar_desc[i][j] = cosine(model.encode([descriptions[i]])[0], model.encode([descriptions[j]])[0])
print('--------------------------------')
print(similar_desc)
print('--------------------------------')
recommendoutput = open('recommend.txt','w')

functodec = np.zeros((len(descriptions),3),dtype=np.int32)
print(len(descriptions))
for i in range(len(descriptions)):
    recommendoutput.write(str(m[i]))
    recommendoutput.write(' ')
    dict={}
    for j in range(3):
        functodec[i][j] = int(similar_desc[i][most_similar[i][j]]*1000)
        dict[functodec[i][j]]=most_similar[i][j]
    functodec[i].sort()
    for j in range(3):
        index=dict[functodec[i][j]]
        print("index=%s",m[index])
        recommendoutput.write(str(m[index]))
        recommendoutput.write(' ')
    recommendoutput.write('\n')
recommendoutput.close()


[[0.99999988 0.99999988 0.99999988 ... 0.38680312 0.66330665 0.40878397]
 [0.99999988 0.99999988 0.99999988 ... 0.38680312 0.66330665 0.40878397]
 [0.99999988 0.99999988 0.99999988 ... 0.38680312 0.66330665 0.40878397]
 ...
 [0.38680312 0.38680312 0.38680312 ... 0.99999994 0.7774061  0.90737301]
 [0.66330665 0.66330665 0.66330665 ... 0.7774061  0.99999994 0.80034721]
 [0.40878397 0.40878397 0.40878397 ... 0.90737301 0.80034721 1.00000012]]
------------------------
[[15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [15 10 11]
 [33 27 24]
 [31 33 29]
 [33 30 28]
 [33 24 30]
 [30 33 31]
 [28 33 25]
 [24 31 27]
 [33 30 31]
 [33 28 32]
 [24 31 28]]
------------------------
