In [None]:
import torch
from transformers import RobertaTokenizer, RobertaConfig, RobertaModel
from sentence_transformers import util

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = RobertaTokenizer.from_pretrained("microsoft/codebert-base")
model = RobertaModel.from_pretrained("microsoft/codebert-base")
model.to(device)

In [None]:
"""
Extract embeddings from a code snippet or a natural language query.
"""
def get_embeddings(text):
    tokens_ids = model.tokenize([text],max_length=512,mode="<encoder-only>")
    source_ids = torch.tensor(tokens_ids).to(device)
    tokens_embeddings,nl_embedding = model(source_ids)
    norm_nl_embedding = torch.nn.functional.normalize(nl_embedding, p=2, dim=1)
    norm_nl_embedding = norm_nl_embedding.detach().cpu().numpy()[0]
    return norm_nl_embedding

In [None]:

nl_query  = 'Plot a histogram'
nlq_emb = get_embeddings(nl_query)
nlq_emb

cos_scores = util.cos_sim(nlq_emb, vector_database)[0]
top_results = torch.topk(cos_scores, k=2)

# print(top_results)


type(torch.return_types.topk(top_results))
data = torch.return_types.topk(top_results)
print(data)
max = data.indices
print(max[0])

print(code_corpus[max[0]])

In [None]:
code_corpus = [
"""
class Queue:
    def __init__(self):
        self.items = []
    def is_empty(self):
        return self.items == []
    def enqueue(self, item):
        self.items.insert(0, item)
    def dequeue(self):
        return self.items.pop()\
    def size(self):
        return len(self.items)
    def peek(self):
        return self.items[-1]
""",
"""
print('Hello, world!')
""",
"""
num = input('Enter a number: ')

print('You Entered:', num)

print('Data type of num:', type(num))
""",
"""
num1 = 5 
num2 = 10 
sum = num1 + num2
print("The sum of", num1, "and", num2, "is", sum)

""",
"""
num1 = 5 
num2 = 10 
num3  = 22
sum = num1 + num2 + num3
print("The sum of", num1, ",",num3 ,"and", num2, "is", sum)

"""
]
vector_database = []
for code in code_corpus:
    vector_database.append(get_embeddings(code))