In [15]:
from transformers import PegasusTokenizer, PegasusForConditionalGeneration

In [16]:
# Load model and tokenizer
model_name = "google/pegasus-cnn_dailymail"
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name)

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [34]:
def summarize(text: str) -> str:

    # Tokenize the text
    inputs = tokenizer(text, truncation=True, padding="longest", return_tensors="pt")

    # Generate summary
    summary_ids = model.generate(**inputs, early_stopping=True)

    # Decode and print summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

In [35]:
# Your input text
text = """
Climate change is affecting every country on every continent. It is disrupting national economies 
and affecting lives, costing people, communities, and countries dearly today and even more tomorrow. 
Weather patterns are changing, sea levels are rising, and weather events are becoming more extreme.
"""

print(summarize(text))

Climate change is affecting every country on every continent.<n>It is disrupting national economies and affecting lives, costing people, communities, and countries dearly today and even more tomorrow.


In [36]:
example2 = """Temporarily restored to his normal size, Shinichi joins Ran and their friends on a trip to Kyoto.
There, he meets Keiko, an old actress friend of his mother, who has received a strange code from a friend who committed suicide years ago.
"""

print(summarize(example2))

Shinichi joins Ran and their friends on a trip to Kyoto .<n>There, he meets Keiko, an old actress friend of his mother .


In [37]:
example3 = "Five gay men from different walks of life are confronted with important choices that could change everything for them."
print(summarize(example3))

Five gay men from different walks of life are confronted with important choices that could change everything for them.<n>Five gay men from different walks of life are confronted with important choices that could change everything for them.


In [39]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('clean_db.csv')

# Display the first few rows of the dataframe
df.head()

Unnamed: 0,titleLink,storyLine
0,https://www.imdb.com/title/tt1727434/,"Ethan Morgan is a geeky freshman, not trusted ..."
1,https://www.imdb.com/title/tt0084855/,Frank Galvin was once a promising Boston lawye...
2,https://www.imdb.com/title/tt1971325/,2044. The climate change and a series of solar...
3,https://www.imdb.com/title/tt0372824/,Fond de l'Etang is a boarding school for troub...
4,https://www.imdb.com/title/tt4276820/,1954. Having worked as a salesman most of his ...


In [40]:
# Add a new column to the dataframe
df['pegasusSummary'] = ''
df.head()

Unnamed: 0,titleLink,storyLine,pegasusSummary
0,https://www.imdb.com/title/tt1727434/,"Ethan Morgan is a geeky freshman, not trusted ...",
1,https://www.imdb.com/title/tt0084855/,Frank Galvin was once a promising Boston lawye...,
2,https://www.imdb.com/title/tt1971325/,2044. The climate change and a series of solar...,
3,https://www.imdb.com/title/tt0372824/,Fond de l'Etang is a boarding school for troub...,
4,https://www.imdb.com/title/tt4276820/,1954. Having worked as a salesman most of his ...,


In [41]:
df['pegasusSummary'] = df['storyLine'].apply(summarize)

df.to_csv('final_db_with_pegasus_summaries.csv', index=False)
