# Semantic Evaluation of Summary Generation

This demo uses a subset of https://www.kaggle.com/datasets/gowrishankarp/newspaper-text-summarization-cnn-dailymail

In [84]:
# parameters 

file_name = "articles.txt"

tuning_examples = 1 # right now tuning has not been implemented
testing_examples = 50
temperature_list = [0,0.5,1.0]
top_k_list = [40]
top_p_list = [0.8]
max_output_tokens_list = [200, 1000]
prompts = ["summarize the following: ", "create a short abstract from the following article: "] # test different variations of your prompts

In [85]:
# import local dataset

import pandas as pd
headers = ["id", "context", "ground_truth_response"]
df_tune = pd.read_csv(file_name, nrows=tuning_examples, skiprows=1,names=headers).drop(columns="id")
df_train = pd.read_csv(file_name, nrows=testing_examples, skiprows=tuning_examples,names=headers).drop(columns="id")

In [73]:
# define functions

def generate_response(row, prompt_, model_, parameters_):
    response = model.predict(
        prompt  + row["context"],
        **parameters_,
    )
    return response.text

def evaluate(row):
    if row["generated_response"] == '':
        return 0
    embedding_response = emb_model.get_embeddings([row["ground_truth_response"], row["generated_response"]])
    embeddings = [embedding.values for embedding in embedding_response]    
    return np.dot(embeddings[0], embeddings[1])

In [74]:
# evaluate base model

from vertexai.preview.language_models import TextEmbeddingModel
import numpy as np
from vertexai.preview.language_models import TextGenerationModel

model = TextGenerationModel.from_pretrained("text-bison@001")
emb_model = TextEmbeddingModel.from_pretrained("textembedding-gecko@001")

eval_scores = []
model_number = 1

for temperature in temperature_list:
    for top_k in top_k_list:
        for top_p in top_p_list:
            for max_output_tokens in max_output_tokens_list:
                for prompt in prompts:
                    parameters = {
                        "temperature": temperature,  # Temperature controls the degree of randomness in token selection.
                        "max_output_tokens": max_output_tokens,  # Token limit determines the maximum amount of text output.
                        "top_p": top_p,  # Tokens are selected from most probable to least until the sum of their probabilities equals the top_p value.
                        "top_k": top_k,  # A top_k of 1 means the selected token is the most probable among all tokens.
                    }
                    df_train["generated_response"] = df_train.apply(lambda x: generate_response(x, prompt, model, parameters), axis=1)    
                    df_train["similarity_score"] = df_train.apply(evaluate, axis =1)
                    similarity_score = df_train["similarity_score"].mean()
                    
                    eval_scores.append([str(model_number),str(temperature),str(top_k),str(top_p), str(max_output_tokens),prompt,str(similarity_score)])
                    
                    print("finished evaluating model: " + str(model_number))
                    model_number +=1
                    
                        
         

finished evaluating model: 1
finished evaluating model: 2
finished evaluating model: 3
finished evaluating model: 4
finished evaluating model: 5
finished evaluating model: 6
finished evaluating model: 7
finished evaluating model: 8
finished evaluating model: 9
finished evaluating model: 10
finished evaluating model: 11
finished evaluating model: 12


In [75]:
df_eval = pd.DataFrame(eval_scores)
df_eval.columns = ["model_number","temperature","top_k","top_p","max_output_tokens", "prompt", "similarity_score"]
df_eval

Unnamed: 0,model_number,temperature,top_k,top_p,max_output_tokens,prompt,similarity_score
0,1,0.0,40,0.8,200,summarize the following:,0.5001347131100727
1,2,0.0,40,0.8,200,create a short abstract from the following art...,0.65245284270237
2,3,0.0,40,0.8,1000,summarize the following:,0.4819216166617734
3,4,0.0,40,0.8,1000,create a short abstract from the following art...,0.65245284270237
4,5,0.5,40,0.8,200,summarize the following:,0.6461484487826463
5,6,0.5,40,0.8,200,create a short abstract from the following art...,0.6851358014146902
6,7,0.5,40,0.8,1000,summarize the following:,0.6236201826520367
7,8,0.5,40,0.8,1000,create a short abstract from the following art...,0.6680973028388406
8,9,1.0,40,0.8,200,summarize the following:,0.6431095503512387
9,10,1.0,40,0.8,200,create a short abstract from the following art...,0.6997829324371893
