# Function Summarization Evaluation

In [None]:
%pip install datasets transformers sentence_transformers pandas

In [None]:
from datasets import load_dataset
from transformers import T5ForConditionalGeneration, RobertaTokenizer
from sentence_transformers import SentenceTransformer, util

import pandas as pd
import matplotlib.pyplot as plt

In [None]:
dataset = load_dataset("code_x_glue_ct_code_to_text", "python")

In [None]:
df = pd.DataFrame(dataset['test'])

In [None]:
tokenizer = RobertaTokenizer.from_pretrained("Salesforce/codet5-base")
model = T5ForConditionalGeneration.from_pretrained("Salesforce/codet5-base-multi-sum")
cosine_model = SentenceTransformer("sentence-transformers/multi-qa-distilbert-cos-v1")
finetuned_model = T5ForConditionalGeneration.from_pretrained(
    "cjwilliams/codet5_base_python_sum")
finetuned_model_small = T5ForConditionalGeneration.from_pretrained(
    "stmnk/codet5-small-code-summarization-python")

In [None]:
counter = 1
counter_fine = 1
counter_fine_small = 1
size = len(df)

In [None]:
def summarize(code_tokens):
    global counter, size
    print(f"Base {counter}/{size}")
    counter += 1
    input_ids = tokenizer(' '.join(code_tokens), return_tensors='pt').input_ids
    generated_ids = model.generate(input_ids, max_length=200)
    return tokenizer.decode(generated_ids[0], skip_special_tokens=True)

In [None]:
def summarize_finetuned(code_tokens):
    global counter_fine, size
    print(f"Fine tuned {counter_fine}/{size}")
    counter_fine += 1
    input_ids = tokenizer(' '.join(code_tokens), return_tensors='pt').input_ids
    generated_ids = finetuned_model.generate(input_ids, max_length=200)
    return tokenizer.decode(generated_ids[0], skip_special_tokens=True)

In [None]:
def summarize_finetuned_small(code_tokens):
    global counter_fine_small, size
    print(f"Small fine tuned {counter_fine_small}/{size}")
    counter_fine_small += 1
    input_ids = tokenizer(' '.join(code_tokens), return_tensors='pt').input_ids
    generated_ids = finetuned_model_small.generate(input_ids, max_length=200)
    return tokenizer.decode(generated_ids[0], skip_special_tokens=True)

In [None]:
%%time
df['summarization'] = df['code_tokens'].apply(summarize)

In [None]:
%%time
df['summarization_finetuned'] = df['code_tokens'].apply(summarize_finetuned)

In [None]:
%%time
df['summarization_finetuned_small'] = df['code_tokens'].apply(summarize_finetuned_small)

In [None]:
def compare(summarization, docstring):
    embedding_1 = cosine_model.encode(summarization)
    embedding_2 = cosine_model.encode([docstring, ""])
    score = util.dot_score(embedding_1, embedding_2)[0].cpu().tolist()[0]
    return score

In [None]:
df['score'] = df.apply(lambda x: compare(x.summarization, ' '.join(x.docstring_tokens)), axis=1)
df['finetuned_score'] = df.apply(
    lambda x: compare(x.summarization_finetuned, ' '.join(x.docstring_tokens)), axis=1)
df['finetuned_small_score'] = df.apply(
    lambda x: compare(x.summarization_finetuned_small, ' '.join(x.docstring_tokens)), axis=1)

In [None]:
df['score'].median()

In [None]:
df['finetuned_score'].median()

In [None]:
df['finetuned_small_score'].median()

In [None]:
df.to_csv("summarization_results.csv", sep=',', encoding='utf-8')

In [None]:
df = pd.read_csv('summarization_results.csv')

In [None]:
fig, (ax1, ax2, ax3) = plt.subplots(3,1, figsize=(10,10))
df.loc[:, "score"].hist(ax=ax1)
df.loc[:, "finetuned_score"].hist(ax=ax2)
df.loc[:, "finetuned_small_score"].hist(ax=ax3)
fig.tight_layout(pad=6.0)
ax1.set_title("Salesforce/codet5-base-multi-sum")
ax2.set_title("cjwilliams/codet5_base_python_sum")
ax3.set_title("stmnk/codet5-small-code-summarization-python")
ax1.set_xlabel("Cosine Similarity")
ax2.set_xlabel("Cosine Similarity")
ax3.set_xlabel("Cosine Similarity")
ax1.set_ylabel("Count")
ax2.set_ylabel("Count")
ax3.set_ylabel("Count")
fig.suptitle("Function Summarization Performance")
fig.savefig("summarization-performance-histogram.png", bbox_inches='tight')

In [None]:
fig, ax = plt.subplots(1, figsize=(15,15))
df.rename(columns={"score": "Salesforce/codet5-base-multi-sum", "finetuned_score": "cjwilliams/codet5_base_python_sum", "finetuned_small_score": "stmnk/codet5-small-code-summarization-python"}).boxplot(column=['Salesforce/codet5-base-multi-sum', 'cjwilliams/codet5_base_python_sum', 'stmnk/codet5-small-code-summarization-python'], ax=ax)
ax.set_ylabel("Cosine Similarity")
ax.set_xlabel("Models")
fig.suptitle("Function Summarization Performance")
fig.savefig("summarization-performance-boxplot.png", bbox_inches='tight')

## Assessing duration of summarization execution

In [None]:
import time
res = pd.DataFrame(columns=['Salesforce/codet5-base-multi-sum', 'cjwilliams/codet5_base_python_sum', 'stmnk/codet5-small-code-summarization-python'])

for index, row in df.sample(n=50).iterrows():
    input_ids = tokenizer(' '.join(row['code_tokens']), return_tensors='pt').input_ids

    start = time.time()
    generated_ids = model.generate(input_ids, max_length=200)
    duration = time.time() - start

    start = time.time()
    generated_ids = finetuned_model.generate(input_ids, max_length=200)
    duration_fine = time.time() - start

    start = time.time()
    generated_ids = finetuned_model_small.generate(input_ids, max_length=200)
    duration_small = time.time() - start
    
    res = res.append({'Salesforce/codet5-base-multi-sum': duration, 'cjwilliams/codet5_base_python_sum': duration_fine, 'stmnk/codet5-small-code-summarization-python': duration_small}, ignore_index=True)

In [None]:
fig, ax = plt.subplots(1, figsize=(15,15))
res.boxplot(column=['Salesforce/codet5-base-multi-sum', 'cjwilliams/codet5_base_python_sum', 'stmnk/codet5-small-code-summarization-python'], ax=ax)
ax.set_ylabel("Summarization Duration (seconds)")
ax.set_xlabel("Models")
fig.suptitle("Function Summarization Execution Duration")
fig.savefig("summarization-speed-boxplot.png", bbox_inches='tight')

In [None]:
print(df['score'].median())