# Getting embeddings for queries and snippets

This notebook demonstrates how to get snippet/query embeddings with pre-trained models

© 2020 Nokia

Licensed under the BSD 3 Clause license

SPDX-License-Identifier: BSD-3-Clause

In [1]:
from codesearch.utils import load_model
from codesearch.data_config import MODELS

### List which pretrained models are available

In [2]:
sorted(MODELS)

['ncs-embedder-so.feb20', 'tnbow-embedder-so.feb20', 'use-embedder-so.feb20']

### Load one of the models

In [3]:
modelname = "ncs-embedder-so.feb20"
embedder = load_model(modelname)

Downloading ncs-embedder-so.feb20 model from http://codebook.dyn.nesc.nokia.net:8089/codesearch-experiments/models/ncs-embedder-so.feb20.tar.gz
codesearch.ncs.ncs_embedder.NcsEmbedder
codesearch.encoders.BasicEncoder




### Embed queries

In [4]:
query_embs = embedder.embed_queries(["create a bar plot"])
query_embs.shape

Initializing spacy nlp \
Initialized spacy nlp


(1, 100)

### Embed snippets

In [5]:
language = "python"
code = \
"""
import matplotlib.pyplot as plt
import numpy as np

y = np.random.rand(8)
idxs = np.arange(len(y))
plt.bar(idxs, y)
plt.show()
"""
snippet = {"description": "plot a bar chart", 
           "code": code,
           "language": "python"
          }
snippet_embs = embedder.embed_snippets([snippet])
snippet_embs.shape

(1, 100)

### Compute the similarity between a query and snippet

In [6]:
import numpy as np

query_embs_n = query_embs/(np.linalg.norm(query_embs, axis=1, keepdims=True))
snippet_embs_n = snippet_embs/(np.linalg.norm(snippet_embs, axis=1, keepdims=True))
cosine_sims = np.dot(query_embs_n, snippet_embs_n.T)
cosine_sims

array([[0.739]], dtype=float16)