# Function Summarization Evaluation

*Note: This notebook is modified from the CodeSearchNet [ExploreData.ipynb](https://github.com/github/CodeSearchNet/blob/master/notebooks/ExploreData.ipynb) notebook

In [1]:
%pip install datasets transformers sentence_transformers pandas

Note: you may need to restart the kernel to use updated packages.


In [2]:
from datasets import load_dataset
from transformers import T5ForConditionalGeneration, RobertaTokenizer
from sentence_transformers import SentenceTransformer, util

import pandas as pd

In [3]:
dataset = load_dataset("code_x_glue_ct_code_to_text", "python")

Found cached dataset code_x_glue_ct_code_to_text (/Users/cjwilliams/.cache/huggingface/datasets/code_x_glue_ct_code_to_text/python/0.0.0/f8b7e9d51f609a87e7ec7c7431706d4ee0b402e3398560410313d4acc67060a0)


  0%|          | 0/3 [00:00<?, ?it/s]

In [26]:
df = pd.DataFrame(dataset['test'])

id                  14918
repo                14918
path                14918
func_name           14918
original_string     14918
language            14918
code                14918
code_tokens         14918
docstring           14918
docstring_tokens    14918
sha                 14918
url                 14918
dtype: int64

In [20]:
tokenizer = RobertaTokenizer.from_pretrained("Salesforce/codet5-base")
model = T5ForConditionalGeneration.from_pretrained("Salesforce/codet5-base-multi-sum")
cosine_model = SentenceTransformer("sentence-transformers/multi-qa-distilbert-cos-v1")
finetuned_model = T5ForConditionalGeneration.from_pretrained(
    "stmnk/codet5-small-code-summarization-python")

In [21]:
def summarize(code_tokens):
    input_ids = tokenizer(' '.join(code_tokens), return_tensors='pt').input_ids
    generated_ids = model.generate(input_ids, max_length=200)
    return tokenizer.decode(generated_ids[0], skip_special_tokens=True)

In [22]:
def summarize_finetuned(code_tokens):
    input_ids = tokenizer(' '.join(code_tokens), return_tensors='pt').input_ids
    generated_ids = finetuned_model.generate(input_ids, max_length=200)
    return tokenizer.decode(generated_ids[0], skip_special_tokens=True)

In [23]:
%%time
df['summarization'] = df['code_tokens'].apply(summarize)
df['summarization_finetuned'] = df['code_tokens'].apply(summarize_finetuned)

CPU times: user 7.03 s, sys: 859 ms, total: 7.89 s
Wall time: 6.83 s


In [10]:
def compare(summarization, docstring):
    embedding_1 = cosine_model.encode(summarization)
    embedding_2 = cosine_model.encode([docstring, ""])
    score = util.dot_score(embedding_1, embedding_2)[0].cpu().tolist()[0]
    return abs(score)

In [11]:
df['score'] = df.apply(lambda x: compare(x.summarization, ' '.join(x.docstring_tokens)), axis=1)
df['finetuned_score'] = df.apply(
    lambda x: compare(x.summarization_finetuned, ' '.join(x.docstring_tokens)), axis=1)

In [12]:
df['score'].mean()

0.5839301273226738

In [13]:
df['finetuned_score'].mean()

0.4003072716295719