Download this dataset to a local file https://www.kaggle.com/datasets/gowrishankarp/newspaper-text-summarization-cnn-dailymail

In [52]:
import pandas as pd
df = pd.read_csv("test.csv", nrows=5).drop(columns="id")
df.head()

Unnamed: 0,article,highlights
0,Ever noticed how plane seats appear to be gett...,Experts question if packed out planes are put...
1,A drunk teenage boy had to be rescued by secur...,Drunk teenage boy climbed into lion enclosure ...
2,Dougie Freedman is on the verge of agreeing a ...,Nottingham Forest are close to extending Dougi...
3,Liverpool target Neto is also wanted by PSG an...,Fiorentina goalkeeper Neto has been linked wit...
4,Bruce Jenner will break his silence in a two-h...,"Tell-all interview with the reality TV star, 6..."


In [54]:
# instantiate model
from vertexai.preview.language_models import TextGenerationModel

parameters = {
    "temperature": 0.2,  # Temperature controls the degree of randomness in token selection.
    "max_output_tokens": 256,  # Token limit determines the maximum amount of text output.
    "top_p": 0.8,  # Tokens are selected from most probable to least until the sum of their probabilities equals the top_p value.
    "top_k": 40,  # A top_k of 1 means the selected token is the most probable among all tokens.
}

model = TextGenerationModel.from_pretrained("text-bison@001")

#test out a response
response = model.predict(
    "Give me 5 interview questions for the role of program manager.",
    **parameters,
)
print(f"Response from Model: {response.text}")


Response from Model: 1. What is your experience with project management methodologies and tools?
2. How do you handle multiple projects at once and prioritize tasks?
3. What is your approach to risk management?
4. How do you communicate with stakeholders and keep them updated on the progress of your projects?
5. What are your strengths and weaknesses as a project manager?


In [116]:
def generate_response(article):
    response = model.predict(
        "create summary of this article: " + article,
        **parameters,
    )
    return response.text

df["generated_answer"] = df["article"].apply(generate_response)    
df.head()

Unnamed: 0,article,highlights,generated_answer,similarity_score
0,Ever noticed how plane seats appear to be gett...,Experts question if packed out planes are put...,The author discusses how the shrinking space o...,0.813708
1,A drunk teenage boy had to be rescued by secur...,Drunk teenage boy climbed into lion enclosure ...,"In Ahmedabad, India, a drunk 17-year-old boy j...",0.936408
2,Dougie Freedman is on the verge of agreeing a ...,Nottingham Forest are close to extending Dougi...,Dougie Freedman is on the verge of signing a n...,0.932517
3,Liverpool target Neto is also wanted by PSG an...,Fiorentina goalkeeper Neto has been linked wit...,Liverpool are interested in signing Fiorentina...,0.92657
4,Bruce Jenner will break his silence in a two-h...,"Tell-all interview with the reality TV star, 6...",Bruce Jenner will speak out in a 'far-reaching...,0.922733


In [117]:
df.iloc[2, :]["article"]

"Dougie Freedman is on the verge of agreeing a new two-year deal to remain at Nottingham Forest. Freedman has stabilised Forest since he replaced cult hero Stuart Pearce and the club's owners are pleased with the job he has done at the City Ground. Dougie Freedman is set to sign a new deal at Nottingham Forest . Freedman has impressed at the City Ground since replacing Stuart Pearce in February . They made an audacious attempt on the play-off places when Freedman replaced Pearce but have tailed off in recent weeks. That has not prevented Forest's ownership making moves to secure Freedman on a contract for the next two seasons."

In [118]:
df.iloc[2, :]["highlights"]

"Nottingham Forest are close to extending Dougie Freedman's contract .\nThe Forest boss took over from former manager Stuart Pearce in February .\nFreedman has since lead the club to ninth in the Championship ."

In [119]:
df.iloc[2, :]["generated_answer"]

'Dougie Freedman is on the verge of signing a new two-year deal to remain at Nottingham Forest. Freedman has impressed since replacing Stuart Pearce in February and has helped Forest to stabilise.'

In [120]:
from vertexai.preview.language_models import TextEmbeddingModel
import numpy as np

emb_model = TextEmbeddingModel.from_pretrained("textembedding-gecko@001")

def check_similarity(row):
    response = emb_model.get_embeddings([row["highlights"], row["generated_answer"]])
    embeddings = [embedding.values for embedding in response]    
    return np.dot(embeddings[0], embeddings[1])
    
df["similarity_score"] = df.apply(check_similarity, axis=1)   

df
    

Unnamed: 0,article,highlights,generated_answer,similarity_score
0,Ever noticed how plane seats appear to be gett...,Experts question if packed out planes are put...,The author discusses how the shrinking space o...,0.813708
1,A drunk teenage boy had to be rescued by secur...,Drunk teenage boy climbed into lion enclosure ...,"In Ahmedabad, India, a drunk 17-year-old boy j...",0.907429
2,Dougie Freedman is on the verge of agreeing a ...,Nottingham Forest are close to extending Dougi...,Dougie Freedman is on the verge of signing a n...,0.938819
3,Liverpool target Neto is also wanted by PSG an...,Fiorentina goalkeeper Neto has been linked wit...,Liverpool are interested in signing Fiorentina...,0.924862
4,Bruce Jenner will break his silence in a two-h...,"Tell-all interview with the reality TV star, 6...",Bruce Jenner will speak out in a 'far-reaching...,0.923909


In [121]:
total_similarity_score = df['similarity_score'].mean()
print(total_similarity_score)

0.9017453729520287
