# Setup

In [None]:
# installations

!pip install -q sentencepiece
!pip install -q transformers
!pip install -q evaluate
!pip install -q rouge_score
!pip install -q textsum

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m48.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m91.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.1/200.1 KB[0m [31m19.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 KB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.9/132.9 KB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 KB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m212.2/212.2 KB[0m [31m20.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# imports

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import re
import os
import json
import zipfile
import evaluate
import torch
import functools as ft
import nltk
import networkx as nx

from collections import defaultdict
from nltk import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from pprint import pprint
from io import BytesIO, StringIO
from google.colab import drive, files
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity

from transformers import pipeline
from transformers import AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import PegasusTokenizer, TFPegasusForConditionalGeneration



In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# downloads

nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
!wget http://nlp.stanford.edu/data/glove.6B.zip
!unzip glove*.zip

!ls
!pwd

--2023-04-08 18:26:17--  http://nlp.stanford.edu/data/glove.6B.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://nlp.stanford.edu/data/glove.6B.zip [following]
--2023-04-08 18:26:17--  https://nlp.stanford.edu/data/glove.6B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://downloads.cs.stanford.edu/nlp/data/glove.6B.zip [following]
--2023-04-08 18:26:18--  https://downloads.cs.stanford.edu/nlp/data/glove.6B.zip
Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22
Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 862182613 (822M) [application/zip]
Saving to: ‘glove.6B.zip’


202

In [None]:
# Extract word vectors
word_embeddings = {}
f = open('glove.6B.100d.txt', encoding='utf-8')
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    word_embeddings[word] = coefs
f.close()

# Functions

### Preprocessing functions

In [None]:
# Preprocessing functions

def concatenate_script_element(file_data, script_element):
    '''concatenate only a specific script element such as dialog, text, speaker_heading, scene_heading'''
    combined_script_element = ''
    with open(file_data, 'r') as script_file:
        for line in script_file:
            if line.split(':')[0].strip() == script_element:
                combined_script_element += line.split(':')[1]
            else:
              continue

    # remove empty lines
    text = os.linesep.join([s for s in combined_script_element.splitlines() if s])

    return text

#####

def concatenate_dialog(file_data):
    '''concatenate only on dialog'''
    combined_dialog = ''
    with open(file_data, 'r') as script_file:
        for line in script_file:
            if line.split(':')[0].strip() == 'dialog':
                combined_dialog += line.split(':')[1]
            else:
              continue

    # remove empty lines
    text = os.linesep.join([s for s in combined_dialog.splitlines() if s])

    return text

#####

def concatenate_text(file_data):
    '''concatenate only on text'''
    combined_dialog = ''
    with open(file_data, 'r') as script_file:
        for line in script_file:
            if line.split(':')[0].strip() == 'text':
                combined_dialog += line.split(':')[1]
            else:
              continue

    # remove empty lines
    text = os.linesep.join([s for s in combined_dialog.splitlines() if s])

    return text

#####

def remove_stopwords(sen):
    ''' remove useless stopwords'''
    stop_words = stopwords.words('english')
    sen_new = " ".join([i for i in sen if i not in stop_words])

    return sen_new

#####

def clean_text(text):
    ''' remove punctuation, special characters, and numbers '''
    cleaned_text = re.sub(r'[^\w\s]|[\d_]+', '', text).strip()
    return cleaned_text

### Model functions

In [None]:
def calculate_rouge_scores(prediction, reference):
    '''
    calculate ROUGE scores
    - prediction is a single string of long text
    - reference is the wikipedia plot_outline
    '''

    rouge = evaluate.load('rouge')
    predictions = [prediction]
    references = [reference]
    results = rouge.compute(predictions=predictions, references=references)

    return results

In [None]:
def run_textrank_model(script, num_sentences_to_keep = int(50)):
    ''' 
    - PageRank to score sentences for extractive summary
    '''

    ### STEP 1 ###
    # TOKENIZE
    # tokenize words in each sentence
    sentences = sent_tokenize(script)
    print(len(sentences))

    # PREPROCESS
    # remove punctuations, numbers and special characters
    clean_sentences = pd.Series(sentences).apply(clean_text)

    # make alphabets lowercase
    clean_sentences = [s.lower() for s in clean_sentences]

    # remove stopwords
    stop_words = stopwords.words('english') 
    clean_sentences = [remove_stopwords(r.split()) for r in clean_sentences]

    # create sentence vectors with 100 elements each then average to get consolidated vector
    # https://www.kaggle.com/code/kunjan6902/text-summarization-textrank
    sentence_vectors = []
    for i in clean_sentences:
        if len(i) != 0:
            v = sum([word_embeddings.get(w, np.zeros((100,))) for w in i.split()]) / (len(i.split()) + 0.001)
        else:
            v = np.zeros((100,))
        sentence_vectors.append(v)

    ### STEP 2 ###
    # COSINE SIMILARITY MATRIX
    sim_mat = np.zeros([len(sentences), len(sentences)])
    for i in range(len(sentences)):
        for j in range(len(sentences)):
            if i != j:
                sim_mat[i][j] = cosine_similarity(sentence_vectors[i].reshape(1, 100), sentence_vectors[j].reshape(1, 100))[0,0]

    ### STEP 3 ###
    # PAGERANK / TEXT RANK ALGORITHM
    # https://networkx.org/documentation/networkx-1.0/reference/generated/networkx.pagerank.html
    # https://arxiv.org/pdf/2108.02997.pdf
    nx_graph = nx.from_numpy_array(sim_mat)
    scores = nx.pagerank(nx_graph, alpha=0.85, tol=1e-02, max_iter=100)

    # ranked sentences
    ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)

    # get top X sentences based on provided funcion arguments
    top_sentences = ''
    for i in range(num_sentences_to_keep):
        top_sentences += (ranked_sentences[i][1])

    return top_sentences

In [None]:
def run_pegasus_short_model(text_to_summarize):
    ''' 
    - max token length is 512 
    - Pegasus for Headline Generation
    '''

    # only use the first 1000 tokens
    text_to_summarize = text_to_summarize.strip()[:1000]

    pmodel = TFPegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")
    ptokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")    
    config = AutoConfig.from_pretrained("google/pegasus-xsum")
    inputs = ptokenizer(text_to_summarize, max_length=512, truncation=True, return_tensors="tf")

    # generate summary

    summary_ids = pmodel.generate(inputs["input_ids"],    
        max_length=256,
        min_length=94,
        no_repeat_ngram_size=3,
        num_beams=4)                                               

    prediction = ptokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
    pprint(prediction, compact=True)

    return prediction

In [None]:
def run_pegasus_long_model(text_to_summarize):
    ''' 
    - max token length is 1024 
    - Pegasus for Longer Generation
    '''

    # only use the first 1000 tokens
    text_to_summarize = text_to_summarize.strip()[:1000]

    cnnmodel = TFPegasusForConditionalGeneration.from_pretrained("google/pegasus-cnn_dailymail", from_pt=True)
    cnntokenizer = PegasusTokenizer.from_pretrained("google/pegasus-cnn_dailymail", from_pt=True)
    config = AutoConfig.from_pretrained("google/pegasus-cnn_dailymail")
    cnninputs = cnntokenizer(text_to_summarize, max_length=1024, truncation=True, return_tensors="tf")

    # generate summary
    summary_ids = cnnmodel.generate(cnninputs["input_ids"], 
        max_length=256,
        min_length=94,
        no_repeat_ngram_size=3,
        num_beams=4)                            
                             
    prediction = cnntokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
    pprint(prediction, compact=True)

    return prediction

In [None]:
def run_pegasus_large_model(text_to_summarize):
    ''' 
    - max token length is 1024 
    - Pegasus Large
    '''

    # only use the first 1000 tokens
    text_to_summarize = text_to_summarize.strip()[:1000]

    cnnmodel = TFPegasusForConditionalGeneration.from_pretrained("google/pegasus-large", from_pt=True)
    cnntokenizer = PegasusTokenizer.from_pretrained("google/pegasus-large", from_pt=True)
    config = AutoConfig.from_pretrained("google/pegasus-large")
    cnninputs = cnntokenizer(text_to_summarize, max_length=1024, truncation=True, return_tensors="tf")

    # generate summary
    summary_ids = cnnmodel.generate(cnninputs["input_ids"], 
        max_length=256,
        min_length=94,
        no_repeat_ngram_size=3,
        num_beams=4)
    
    prediction = cnntokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
    pprint(prediction, compact=True)

    return prediction

In [None]:
def run_longt5_model(text_to_summarize):
    ''' 
    - max token length is 16384 
    - LongT5 Booksum
    '''

    # only use the first 1000 tokens
    text_to_summarize = text_to_summarize.strip()[:1000]

    summarizer = pipeline(
        "summarization",
        "pszemraj/long-t5-tglobal-base-16384-book-summary",
        max_length=256,
        min_length=94,
        no_repeat_ngram_size=3,
        num_beams=4)

    prediction = summarizer(text_to_summarize)[0]["summary_text"]
    pprint(prediction, compact=True)

    return prediction

In [None]:
def execute_two_stage_short_model(start, stop, rouge_dict, summary_dict, df):
    '''
    this function executes 
    1. textrank
    2. abstractive summarization (pegasus short models)
    3. rouge scoring
    '''

    for i in range(start, stop):

        ranked_sentences = run_textrank_model(df.iloc[i]['bert_dialog'])
        prediction = run_pegasus_short_model(ranked_sentences)
        reference = df.iloc[i]['plot_outline']
        rouge_scores = calculate_rouge_scores(prediction, reference)

        # add rouge scores and summary to a dictionary
        title = df.iloc[i]['title']
        rouge_dict[title] = rouge_scores
        summary_dict[title] = prediction

    return rouge_dict, summary_dict

In [None]:
def execute_two_stage_long_model(start, stop, rouge_dict, summary_dict, df):
    '''
    this function executes 
    1. textrank
    2. abstractive summarization (pegasus long models)
    3. rouge scoring
    '''

    for i in range(start, stop):

        ranked_sentences = run_textrank_model(df.iloc[i]['bert_dialog'])
        prediction = run_pegasus_long_model(ranked_sentences)
        reference = df.iloc[i]['plot_outline']
        rouge_scores = calculate_rouge_scores(prediction, reference)

        # add rouge scores and summary to a dictionary
        title = df.iloc[i]['title']
        rouge_dict[title] = rouge_scores
        summary_dict[title] = prediction

    return rouge_dict, summary_dict

In [None]:
def execute_two_stage_large_model(start, stop, rouge_dict, summary_dict, df):
    '''
    this function executes 
    1. textrank
    2. abstractive summarization (pegasus large models)
    3. rouge scoring
    '''

    for i in range(start, stop):

        ranked_sentences = run_textrank_model(df.iloc[i]['bert_dialog'])
        prediction = run_pegasus_large_model(ranked_sentences)
        reference = df.iloc[i]['plot_outline']
        rouge_scores = calculate_rouge_scores(prediction, reference)

        # add rouge scores and summary to a dictionary
        title = df.iloc[i]['title']
        rouge_dict[title] = rouge_scores
        summary_dict[title] = prediction

    return rouge_dict, summary_dict

In [None]:
def execute_two_stage_t5_model(start, stop, rouge_dict, summary_dict, df):
    '''
    this function executes 
    1. textrank
    2. abstractive summarization (long t5 models)
    3. rouge scoring
    '''

    for i in range(start, stop):

        ranked_sentences = run_textrank_model(df.iloc[i]['bert_dialog'])
        prediction = run_longt5_model(ranked_sentences)
        reference = df.iloc[i]['plot_outline']
        rouge_scores = calculate_rouge_scores(prediction, reference)

        # add rouge scores and summary to a dictionary
        title = df.iloc[i]['title']
        rouge_dict[title] = rouge_scores
        summary_dict[title] = prediction

    return rouge_dict, summary_dict

In [None]:
def execute_two_stage_text_short_model(start, stop, rouge_dict, summary_dict, df):
    '''
    this function executes 
    1. textrank
    2. abstractive summarization (pegasus short models)
    3. rouge scoring
    '''

    for i in range(start, stop):

        ranked_sentences = run_textrank_model(df.iloc[i]['bert_text'])
        prediction = run_pegasus_short_model(ranked_sentences)
        reference = df.iloc[i]['plot_outline']
        rouge_scores = calculate_rouge_scores(prediction, reference)

        # add rouge scores and summary to a dictionary
        title = df.iloc[i]['title']
        rouge_dict[title] = rouge_scores
        summary_dict[title] = prediction

    return rouge_dict, summary_dict

In [None]:
def execute_two_stage_text_long_model(start, stop, rouge_dict, summary_dict, df):
    '''
    this function executes 
    1. textrank
    2. abstractive summarization (pegasus long models)
    3. rouge scoring
    '''

    for i in range(start, stop):

        ranked_sentences = run_textrank_model(df.iloc[i]['bert_text'])
        prediction = run_longt5_model(ranked_sentences)
        reference = df.iloc[i]['plot_outline']
        rouge_scores = calculate_rouge_scores(prediction, reference)

        # add rouge scores and summary to a dictionary
        title = df.iloc[i]['title']
        rouge_dict[title] = rouge_scores
        summary_dict[title] = prediction

    return rouge_dict, summary_dict

In [None]:
def execute_two_stage_text_large_model(start, stop, rouge_dict, summary_dict, df):
    '''
    this function executes 
    1. textrank
    2. abstractive summarization (pegasus large models)
    3. rouge scoring
    '''

    for i in range(start, stop):

        ranked_sentences = run_textrank_model(df.iloc[i]['bert_text'])
        prediction = run_pegasus_large_model(ranked_sentences)
        reference = df.iloc[i]['plot_outline']
        rouge_scores = calculate_rouge_scores(prediction, reference)

        # add rouge scores and summary to a dictionary
        title = df.iloc[i]['title']
        rouge_dict[title] = rouge_scores
        summary_dict[title] = prediction

    return rouge_dict, summary_dict

In [None]:
def execute_two_stage_text_t5_model(start, stop, rouge_dict, summary_dict, df):
    '''
    this function executes 
    1. textrank
    2. abstractive summarization (long t5 models)
    3. rouge scoring
    '''

    for i in range(start, stop):

        ranked_sentences = run_longt5_model(df.iloc[i]['bert_text'])
        prediction = run_pegasus_large_model(ranked_sentences)
        reference = df.iloc[i]['plot_outline']
        rouge_scores = calculate_rouge_scores(prediction, reference)

        # add rouge scores and summary to a dictionary
        title = df.iloc[i]['title']
        rouge_dict[title] = rouge_scores
        summary_dict[title] = prediction

    return rouge_dict, summary_dict

In [None]:
def combine_rouge_and_summary_results(rouge_dict, summary_dict, abstractive_model_name):
    '''
    create a single dataframe that combines results from a given model
    '''

    # add rouge results to a pandas dataframe
    rouge_df = pd.DataFrame(rouge_dict).T

    # add summaries to a pandas series
    summary_series = pd.Series(summary_dict)

    # merge dataframe and series
    combined_results_df = rouge_df.merge(summary_series.rename(f'{abstractive_model_name}_summary'), left_index=True, right_index=True)

    return combined_results_df

# Data

### Import

#### Wikipedia references

In [None]:
try:
    wikipedia_summary = pd.read_csv('/content/drive/MyDrive/Wikipedia_movie_meta_data.csv')
    wikipedia_summary.sort_values(by=['Title'])
    display(wikipedia_summary.head())
except FileNotFoundError:
    print('wikipedia_summary not found')

Unnamed: 0,Age Restrict,Akas,Awards,Budget,Cast,Cast1,Casting Directors,Countries,Director,Directors,...,Title1,Wiki Page,Writers,Year,Count Matches,F1,Imdb User Rating,Imdbid,Metascore,Number Of Imdb User Votes
0,"Argentina:Atp, Australia:M, Brazil:Livre, Cana...",Spider-Man 2: The IMAX Experience (United Stat...,"Oscar 2005, BAFTA Film Award 2005, Movies for ...","$200,000,000 (estimated)","Tobey Maguire, Kirsten Dunst, James Franco, Al...","Tobey Maguire, Kirsten Dunst, James Franco, Al...",Dianne Crittenden,United States,Sam Raimi,Sam Raimi,...,Spider-Man 2,https://en.wikipedia.org/wiki/Spider-Man_2,"Stan Lee, Steve Ditko, Alfred Gough, Miles Mil...",2004,1,0.0,7,316654,83,538496
1,,,,,"Hanna Lee, Bernard Siegel, Templar Saxe, Nita ...",,,United States,,Lester Park,...,,,"Willard King Bradley, Willard King Bradley",1923,0,1.0,-1,164167,-1,-1
2,,,,,,,,,,,...,,,,2012,0,2.0,-1,2761612,-1,-1
3,,Shining The Light (United States),,,"Denise Jaxon, Alan Santana",,,United States,,,...,,,"Kevin Karp, Alan Santana",2019,0,3.0,-1,9106126,-1,-1
4,,Smoke Gets in Your Eyes (United States),,,"William Conrad, Joe Penny, Alan Campbell, Mark...",,"Sally Powers, Susan Scudder",United States,,Harvey S. Laidman,...,,,"Dean Hargrove, Joel Steiger, Douglas Stefen Bo...",1987,0,4.0,7,614236,-1,19


#### BERT annotations

In [None]:
bert_annotations_file_path = '/content/drive/MyDrive/W266_Movie_Data/BERT_annotations/'

try:
    all_files_bert = os.listdir(bert_annotations_file_path)
    print(f'{len(all_files_bert)} files found')
    print(all_files_bert)
except FileNotFoundError:
    print('bert_annotations not found')

1998 files found


#### Raw text lemmas

In [None]:
raw_text_lemmas_file_path = '/content/drive/MyDrive/W266_Movie_Data/raw_text_lemmas/raw_text_lemmas/'

try:
    all_files_lemmas = sorted(os.listdir(raw_text_lemmas_file_path))
    print(f'{len(all_files_lemmas)} files found')
    print(all_files_lemmas)
except FileNotFoundError:
    print('raw_text_lemmas not found')

2941 files found


## Format data

#### Wikipedia references

In [None]:
# Wikipedia
wiki_df = wikipedia_summary[['Title','Imdb Id','Plot Outline']]

# create imdb_id that is of type string
wiki_df['imdb_id_str'] = wiki_df['Imdb Id'].astype(str).str.split('.').str[0]

# rename columns
wiki_df = wiki_df.rename(columns={'Title': 'title',
                                  'imdb_id_str': 'imdb_id',
                                  'Plot Outline': 'plot_outline'})

# only keep certain columns
wiki_df = wiki_df[['title', 'imdb_id', 'plot_outline']]

# only keep movies that have plot outlines
wiki_df = wiki_df[wiki_df['plot_outline'].notna()]

# display
display(wiki_df.head())
print(len(wiki_df))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  wiki_df['imdb_id_str'] = wiki_df['Imdb Id'].astype(str).str.split('.').str[0]


Unnamed: 0,title,imdb_id,plot_outline
0,Spider-Man 2,316654,Peter Parker is an unhappy man: after two year...
3,Shining The Light,9106126,Shining The Light will have a panel of six you...
4,Smoke Gets in Your Eyes,614236,A woman who was assaulted and left blind leave...
5,Solstice,473267,Six months after the suicide of her twin siste...
6,Something Wicked This Way Comes,86336,"In Green Town, Illinois, the twelve year-old b..."


2498


#### BERT annotations

In [None]:
# BERT
bert_df = pd.DataFrame(all_files_bert, columns=['filename'])

# get title from filename
bert_df['title'] = bert_df['filename'].str.split('_').str[0]

# get imdb id from filename
bert_df['imdb_id'] = bert_df['filename'].str.split('_').str[1]

# create filepath to script
bert_df['bert_data'] = bert_annotations_file_path + bert_df['filename']

# display
display(bert_df.head())
print(len(bert_df))

Unnamed: 0,filename,title,imdb_id,bert_data
0,One Eight Seven_0118531_anno.txt,One Eight Seven,118531,/content/drive/MyDrive/W266_Movie_Data/BERT_an...
1,Mystic River_0327056_anno.txt,Mystic River,327056,/content/drive/MyDrive/W266_Movie_Data/BERT_an...
2,Moon_1182345_anno.txt,Moon,1182345,/content/drive/MyDrive/W266_Movie_Data/BERT_an...
3,Mr Mark Fenton_1282592_anno.txt,Mr Mark Fenton,1282592,/content/drive/MyDrive/W266_Movie_Data/BERT_an...
4,Mr Destiny_0100201_anno.txt,Mr Destiny,100201,/content/drive/MyDrive/W266_Movie_Data/BERT_an...


1998


#### Raw text lemmas

In [None]:
# Lemma
lemma_df = pd.DataFrame(all_files_lemmas, columns=['filename'])

# get title from filename
lemma_df['title'] = lemma_df['filename'].str.split('_').str[0]

# get imdb id from filename
lemma_df['imdb_id'] = lemma_df['filename'].str.split('_').str[1]

# create filepath to script
lemma_df['lemma_data'] = raw_text_lemmas_file_path + lemma_df['filename']

# display
display(lemma_df.head())
print(len(lemma_df))

Unnamed: 0,filename,title,imdb_id,lemma_data
0,10 Cloverfield Lane_1179933_lemmas.txt,10 Cloverfield Lane,1179933,/content/drive/MyDrive/W266_Movie_Data/raw_tex...
1,10 Things I Hate About You_0147800_lemmas.txt,10 Things I Hate About You,147800,/content/drive/MyDrive/W266_Movie_Data/raw_tex...
2,101 Days of 101 Dalmatians_0249328_lemmas.txt,101 Days of 101 Dalmatians,249328,/content/drive/MyDrive/W266_Movie_Data/raw_tex...
3,12 Angry Men_0118528_lemmas.txt,12 Angry Men,118528,/content/drive/MyDrive/W266_Movie_Data/raw_tex...
4,12 Monkeys_0114746_lemmas.txt,12 Monkeys,114746,/content/drive/MyDrive/W266_Movie_Data/raw_tex...


2941


### Combine DataFrames
Combination is done based on imdb_id


In [None]:
# merge bert and lemma dataframes
data_df = pd.merge(bert_df, lemma_df, how='outer', on='imdb_id')

# rename columns
data_df = data_df.rename(columns={'filename_x': 'bert_filename',
                                  'filename_y': 'lemma_filename',
                                  'title_x': 'title'})

# reorder columns
data_df = data_df[['title', 'imdb_id', 'bert_data', 'lemma_data']]

# display
display(data_df.head())
print(len(data_df))

Unnamed: 0,title,imdb_id,bert_data,lemma_data
0,One Eight Seven,118531,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...
1,Mystic River,327056,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...
2,Moon,1182345,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...
3,Mr Mark Fenton,1282592,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...
4,Mr Destiny,100201,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...


2941


In [None]:
# merge bert + lemma (data_df) with wikipedia dataframe
final_df = pd.merge(data_df, wiki_df, how='outer', on='imdb_id')

# drop rows that have NaN in any of the columns
final_df = final_df.dropna(how='any')

# drop movies that have duplicate imdb_ids
final_df = final_df.drop_duplicates(subset='imdb_id', keep='first')

# rename columns
final_df = final_df.rename(columns={'title_x': 'title'})

# reorder columns
final_df = final_df[['title', 'imdb_id', 'bert_data', 'lemma_data', 'plot_outline']]

# reindex rows
final_df.reset_index(drop=True, inplace=True)

# display
display(final_df.head())
print(len(final_df))

Unnamed: 0,title,imdb_id,bert_data,lemma_data,plot_outline
0,Moon,1182345,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,Sam Bell has a three year contract to work for...
1,Only Lovers Left Alive,1714915,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,"Adam (Tom Hiddleston), an underground musician..."
2,Mulholland Dr,1619856,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,Yearning to spread her wings and make a name f...
3,Novitiate,4513316,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,"1964. Cathleen Harris, in her late teens, has ..."
4,Monsters University,1453405,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,"Mike Wazowski (Billy Crystal) and James P. ""Su..."


565


There are 565 examples, but only keep 500 of them with random state 266 (for DATASCI 266 🙂)

In [None]:
# keep only 500 samples
samples_to_keep = 500
pct_samples_to_keep = samples_to_keep / len(final_df)

# assign final_df to df for ease of use
df = final_df.sample(frac=pct_samples_to_keep, random_state=266)

# display
display(df.head())
print(len(df))

Unnamed: 0,title,imdb_id,bert_data,lemma_data,plot_outline
341,Buried,1462758,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,"Waking groggy in pitch darkness, Paul Conroy, ..."
452,I Smile Back,3640682,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,Laney Brooks does bad things. Married with kid...
119,The Edge of Seventeen,1878870,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,"Everyone knows that growing up is hard, and li..."
131,The Company Men,1172991,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,When the GTX Corporation must cut jobs to impr...
247,Upgrade,6499752,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,"Grey's a stay-at-home mechanic, whose wife is ..."


500


In [None]:
# double check for duplicates (vc = value counts)
title_vc = df['title'].value_counts()
print(title_vc[title_vc > 1])

imdb_id_vc = df['imdb_id'].value_counts()
print(imdb_id_vc[imdb_id_vc > 1])

Series([], Name: title, dtype: int64)
Series([], Name: imdb_id, dtype: int64)


In [None]:
# only add the bert dialog and text to the trimmed data to save on resources and time

# get bert dialog
df['bert_dialog'] = df.apply(lambda x: concatenate_dialog(x['bert_data']), axis=1)

# get bert text
df['bert_text'] = df.apply(lambda x: concatenate_text(x['bert_data']), axis=1)

In [None]:
# train / val / test split
#    70 / 10  / 20 

train_pct = int(0.7 * len(df))
val_pct = int(0.8 * len(df))

# the split is done using a numpy solution
# first shuffle all of the samples at a random state
# then split into the percentages above
train_df, val_df, test_df = np.split(df.sample(frac=1, random_state=266), [train_pct, val_pct])

print(len(train_df))
print(len(val_df))
print(len(test_df))

350
50
100


In [None]:
# check dataframes

# train
display(train_df.head())

# val
display(val_df.head())

# test
display(test_df.head())

Unnamed: 0,title,imdb_id,bert_data,lemma_data,plot_outline,bert_dialog,bert_text
221,Three Billboards Outside Ebbing Missouri,5027774,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,"THREE BILLBOARDS OUTSIDE EBBING, MISSOURI is a...","by\n Martin McDonagh\n You Red Welby?\n Yes, ...","MILDRED HAYES, a woman in her early 50's, dri..."
346,Candle to Water,2387411,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,To present five different contemporary stories...,\n Written by\n Nick Green\n Shooting draft (...,"The car is stuck in rush-hour traffic, which ..."
277,1917,8579674,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,"April 1917, the Western Front. Two British sol...",1917\n Written by\n Sam Mendes\n &\n Krysty W...,The following script takes place in real time...
420,Friends with Benefits,1632708,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,Jamie Rellis (Mila Kunis) is a New York City h...,No. Not even close.\n I know. I'll be there.\...,He almost steps on a dog sleeping on the floo...
315,Anonymous,1521197,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,"Edward De Vere, Earl of Oxford, is presented a...","\n Written by\n John Orloff\n up, etc...\n th...",TITLES BEGIN over the SOUNDS of city traffic....


Unnamed: 0,title,imdb_id,bert_data,lemma_data,plot_outline,bert_dialog,bert_text
6,Mistress America,2872462,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,"Tracy, a lonely college freshman in New York, ...",Written by\n Noah Baumbach and Greta Gerwig\n...,much fun to agree with her.\n A dark room. We...
428,Fargo,2802850,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,"The all new ""true crime"" case of Fargo's new c...",\n a screenplay by\n Ethan Coen\n and\n Joel ...,The following text fades in over black This i...
432,Gangs of Wasseypur,1954470,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,Shahid Khan is exiled after impersonating the ...,"Ha se?\n wer erftrst Wat eiiteat!\n Shia, SN ...",Sardar is at the door with young Definite.\n ...
379,Cloud Atlas,1371111,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,Everything is connected: an 1849 diary of an o...,A Film By\n A. Wachowski\n L. Wachowski\n T. ...,"On motes like meteors; dancing, streaking par..."
240,Wall Street Money Never Sleeps,1027718,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,As the global economy teeters on the brink of ...,"\n \n Written by\n Allan Loeb\n ""When busines...",Onto which is written in white letters...\n T...


Unnamed: 0,title,imdb_id,bert_data,lemma_data,plot_outline,bert_dialog,bert_text
271,21 Jump Street,1232829,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,"In 2005, Schmidt and Jenko are high school stu...","By Oren Uziel\n 4/23/13\n Previously, on 21 J...",Long-haired JENKO watches Slim-Shady SCHMIDT ...
490,If Beale Street Could Talk,7125860,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,African-American teen sweethearts Fonny and Ti...,Written by\n Barry Jenkins\n Adapted from the...,IF BEALE STREET COULD TALK\n +.+.ON a young m...
476,J Edgar,1616195,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,Biopic of J. Edgar Hoover (Leonardo DiCaprio) ...,Screenplay by\n Dustin Lance Black\n 09-29-20...,"CLOSE ON STRAW HAT, his HALF SMOKED CIGAR and..."
279,A Simple Favor,7040874,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,"Stephanie, a widowed mother, who also runs a c...","Written by\n Jessica Sharzer\n June 29th, 201...","Revisions by Jessica Sharzer, Peter Craig and..."
15,Molly s Game,4209788,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,"Molly Bloom, a beautiful young Olympic-class s...",screenplay by\n Aaron Sorkin\n Based on the b...,But we don’t know what we’re looking at yet. ...


In [None]:
# export final dataframes

# all 500 samples (train/val/test BEFORE split)
df.to_csv('/content/complete_wiki_bert_lemma.csv')

# train split
train_df.to_csv('/content/train_wiki_bert_lemma.csv')

# val split
val_df.to_csv('/content/val_wiki_bert_lemma.csv')

# test split
test_df.to_csv('/content/test_wiki_bert_lemma.csv')

In [None]:
# use random state 266 for taking only 30 samples from the training data set
subset_train_df = train_df.sample(n=30, axis=0, random_state=266)

In [None]:
# check for NaNs
subset_train_df.isna().sum().sum()

0

In [None]:
# save results to a csv
subset_train_df.to_csv('/content/subset_train_df.csv')
# files.download('/content/subset_train_df.csv') 

# More Exploratory Data Analysis
Since there are too many tokens in any given movie script and not enough computational resources, selectively choose which tokens are used. Thankfully, BERT annotation data splits the script into 4 main elements: *dialog, text, scene_heading, and speaker_heading*

<br>

The intution behind the below code is to compare ROUGE scores between only concatenating dialog and only concatening text from movie scripts.

The steps are broken down further:

1.   Concatenate script element (dialog or text)
2.   Run TextRank algorithm to score sentences (extract only top X sentences)
3.   Run abstractive summary model
4.   Compute ROUGE (Recall-Oriented Understudy for Gisting Evaluation) score



Concatenated bert_dialog

In [None]:
ranked_sentences = run_textrank_model(train_df.iloc[0]['bert_dialog'])
calculate_rouge_scores(ranked_sentences, train_df.iloc[0]['plot_outline'])

1240


{'rouge1': 0.08264462809917356,
 'rouge2': 0.0,
 'rougeL': 0.03856749311294766,
 'rougeLsum': 0.06978879706152433}

In [None]:
ranked_sentences = run_textrank_model(train_df.iloc[1]['bert_dialog'])
calculate_rouge_scores(ranked_sentences, train_df.iloc[1]['plot_outline'])

1327


{'rouge1': 0.11272141706924314,
 'rouge2': 0.012924071082390952,
 'rougeL': 0.0644122383252818,
 'rougeLsum': 0.10305958132045087}

In [None]:
ranked_sentences = run_textrank_model(train_df.iloc[2]['bert_dialog'])
calculate_rouge_scores(ranked_sentences, train_df.iloc[2]['title'])

1249


{'rouge1': 0.20979020979020976,
 'rouge2': 0.02112676056338028,
 'rougeL': 0.10489510489510488,
 'rougeLsum': 0.13986013986013984}

In [None]:
# export an example to csv
example_df = pd.DataFrame(data={'title': train_df.iloc[2]['title'],
                                'ranked_sentences': ranked_sentences,
                                'plot_outline': train_df.iloc[2]['plot_outline']},
                          index=[0])
example_df.to_csv('/content/1917_top_50_ranked.csv')

Concatenated bert_text

In [None]:
ranked_sentences = run_textrank_model(train_df.iloc[0]['bert_text'])
calculate_rouge_scores(ranked_sentences, train_df.iloc[0]['plot_outline'])

539


{'rouge1': 0.05534105534105534,
 'rouge2': 0.007731958762886597,
 'rougeL': 0.03732303732303732,
 'rougeLsum': 0.0501930501930502}

In [None]:
ranked_sentences = run_textrank_model(train_df.iloc[1]['bert_text'])
calculate_rouge_scores(ranked_sentences, train_df.iloc[1]['plot_outline'])

1345


{'rouge1': 0.07547169811320754,
 'rouge2': 0.007566204287515764,
 'rougeL': 0.05534591194968553,
 'rougeLsum': 0.06792452830188679}

In [None]:
ranked_sentences = run_textrank_model(train_df.iloc[2]['bert_text'])
calculate_rouge_scores(ranked_sentences, train_df.iloc[2]['plot_outline'])

1757


{'rouge1': 0.055315471045808126,
 'rouge2': 0.0069264069264069255,
 'rougeL': 0.04148660328435609,
 'rougeLsum': 0.05185825410544512}

As you can see from the examples above, using bert_dialog shows better summarizations than using bert_text

# Baseline Models (Raw text lemmas)
All baseline models will be run with 30 sample movie scripts from random seed 266

## Pegasus Short

In [None]:
pegasus_short_lemma_rouge_dict = {}
pegasus_short_lemma_summary_dict = {}

for i in range(30):
    
    raw_lemma = subset_train_df.iloc[i]['lemma_data']
    with open(raw_lemma, 'r') as file:
        text_data = file.read().replace('\n', '')

    pegasus_short_summary = run_pegasus_short_model(text_data)
    pegasus_short_rouge_scores = calculate_rouge_scores(pegasus_short_summary, subset_train_df.iloc[i]['plot_outline'])

    # add rouge scores and summary to a dictionary
    title = subset_train_df.iloc[i]['title']
    pegasus_short_lemma_summary_dict[title] = pegasus_short_summary
    pegasus_short_lemma_rouge_dict[title] = pegasus_short_rouge_scores

print(pegasus_short_lemma_rouge_dict)
print(pegasus_short_lemma_summary_dict)

All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A video of a police chase in the Republic of Ireland in which a car ploughed '
 'into a patrol car, killing one officer and injuring three others, has been '
 'released by the Irish National Police (INPS) and the Garda Sochna (Irish '
 "police) on the eve of the Republic's independence day.... and it's all "
 'captured on a helmet-mounted camera, with the help of a thermal imaging '
 'camera and a thermal camera system.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Batman v Superman: Dawn of Justice director: Zack Snyder starring: Ben '
 'Affleck, Henry Cavill, Chris Evans, Gal Gadot, Ray Stevenson, Scoot McNairy, '
 'Jeremy Irons, JK Simmons, Rosario Dawson, Benicio del Toro, Michael Keaton, '
 'Ben Kingsley, Holly Hunter, Michael Sheen, and director: Frank Miller '
 'starring: Henry Cavill as Batman, Ben Affleck as Bruce Wayne, Gal Affleck as '
 "Batman's alter-ego, Robin, Chris Pine as Wonder Woman, Amy Adams as Lois "
 "Lane, Michael Shannon as John Wayne's wife, Talia Shire as Robin's "
 'ex-girlfriend, Rachel McAdams as Wonder')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Thomas Anderson's film adaptation of the novel by Thomas pynchon is set in "
 '1970s Los Angeles and tells the story of a young man who moves into a young '
 "woman's apartment and falls in love with her, despite the fact that the "
 'young woman has a secret she keeps from her family and the rest of the '
 "world.'' film is directed by Thomas Anderson from a screenplay by Anderson "
 'and is produced by Anderson, his wife, Wes Anderson, and his son, Owen.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Comic book writer Robert Mark kamen reveals the secret of the day catale '
 "catale - a young girl's notebook - on a black screen in a favela in Rio de "
 'Janeiro, Brazil, on the eve of the release of his new novel colombiana, '
 "which tells the story of a young woman's struggle to raise a cat in a world "
 'where cats are not allowed to live, in the first of a series of new works by '
 'the writer, which will be published in the UK in October.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("mordecai richler's novel 'The Sense of an Ending' is a darkly comic account "
 "of a young man's relationship with his wife, Miriam, in the early 1970s, in "
 "a small town in south-east Scotland.' The following is an extract from the "
 "novel, which was published in the UK in 1983.'' read full story at "
 'www.guardian.co.uk/books/mordecai-richler-s-novel-the-sense-of-an- Ending')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A terrifying new play by Jared Bush and Rich Moore about a young bunny who '
 "runs into the jungle to try to escape a jaguar who's about to leap out of "
 'the forest and attack him, in a zootopia where fear and trickery are the '
 "order of the day - a play that's both terrifying and terrifying at the same "
 "time - at a time when the world's most dangerous animals are on the brink of "
 'extinction - a world where the only way to stop them is to stop us.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A selection of some of the best horror stories from the past 50 years, '
 "compiled by the American Library Association's Visual Journalism Section.  "
 'Copyright (c) The Vancouver Sun E-mail this Article Print this Article '
 'online at: '
 'www.the-sun.com/article/us-horror-stories-by-the-american-library-association-visual- '
 'Journalism-Section-Video-Journalism-Writing-and-Editing-on-a-daily- basis')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A poem by Kelly fremon Craig, published in the January 2011 issue of The New '
 'York Times, in which she reflects on her life as a single mother and the '
 'impact litter has had on her family over the past 100 years, including the '
 'death of her father, who was killed in a plane crash in 1965... and then '
 "there's the last thing that makes me want to say it is that I'm sorry but I "
 "don't know what to say to you.")


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A power camera on a bus in the Iranian capital, Tehran, has captured a '
 'stunning image of a woman wearing a traditional Arabian headdress as she '
 'sits on the floor of a third world bus in a city that has been dubbed "the '
 'world\'s most dangerous place" by the United Nations , according to a report '
 "by the UN's refugee agency (UNHCR) and the World Health Organization (WHO) "
 'in February 2013 - the first time a power camera has been used in the '
 'country.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('celesse and jeste forever - the love story of a high school football player '
 'and his best friend - is a love story that will live on in the hearts of '
 'their fans for years to come, and will forever be a part of their lives as '
 'they grow up and leave the world they call home , but they will never forget '
 'the moment they met and fell in love - the moment when they first kissed and '
 'fell head over heels for each other - their love story is a story of '
 'friendship, love, and love.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('In our series of letters from African journalists, Jason reitman looks at '
 'some of the more unusual ways that African journalists have covered the news '
 "in the past year - and how they've helped shape our understanding of what's "
 'going on in the world around us. ryan bnackingham stand at a spotlight '
 'reveal ryan a backpack and backpack down down beside he . . - common '
 'pre-flight instruction a spotlight reveals a spotlight stand at the podium .')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("As India goes to the polls to elect a new parliament, the BBC's Geeta Pandey "
 "takes a look at some of the key stories that will shape the country's "
 "political landscape over the next five years.... and then there's the story "
 "of a politician who's been on the road with his supporter for a week and a "
 'half and has come back with the same message - he wants you to vote for him '
 "and he's going to take you on a journey with him.")


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A selection of the most striking images from the 70th anniversary of the '
 'D-Day landings in Normandy, France, which led to the liberation of France '
 'from Nazi occupation and the end of World War II in Europe and the Americas, '
 "as captured by Associated Press photographers and film crews.'  Copyright "
 '(c) The Vancouver Sun E-mail this Article Print this Article Share this '
 'Article Mirai Nagasawa, a student at the University of British Columbia, '
 'takes a look back at some of the more striking images of the Normandy '
 'landings.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Alice in Wonderland by Lewis Carroll 9/9/08 ( script White shoot London ) '
 '10/28/08 revise page ) fade in sil - cont hou night - cont - sil hou sil '
 'night - hou day - cont ( cont  hounight ) charle kingsley have a new venture '
 'to he friend , include lord Charles , and he lose a sense of the venture '
 'being impossible to believe it can be possible - charle kingley')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('All photographs by Jamie Silver and jaffa Silver, courtesy of the '
 'International Union for the Conservation of Nature (IUCN) and the Zoological '
 'Society of London (ZSL) in cooperation with the African Wildlife Foundation '
 '(AWF) in the Democratic Republic of Congo (DR Congo) and by jaffa and amanda '
 "Silver, both from the BBC's Wildlife Photographer of the Year (WPY) "
 'programme. ., edited by JamieSilver and  amandaSilver.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('" needy , needy, needy - needy " she says, " I\'m sorry, but I don\'t have '
 'any money to buy you any presents " needy - she says , "I\'m so sorry, I\'ve '
 'got no money to send you any gifts - needy - I have no time to write you any '
 'more letters " needy " I see a little girl in a hospital bed, a little boy '
 'in a prison cell, a girl in prison, a boy in prison - needy .')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("The BBC's Russia correspondent John Solomon looks back at his time reporting "
 'on the conflict in eastern Ukraine, which has claimed the lives of more than '
 '1,000 people, many of them Russian soldiers, since the start of the year.  '
 'Subscribe to the BBC News Channel: http://bit.ly/subscribe-to-the- BBC News '
 'website: www.bbc.co.uk/news Facebook: https://www.facebook.com/bbcnews '
 'Twitter: @bbcnews')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Carberry and Glenn Patterson's new album, Good Vibrations, will be released "
 'on 20 September 2011 via Capitol Records.., and will be available on iTunes, '
 'Amazon and all other digital retailers from the same date., as well as being '
 'available on the Carberry website, and on iTunes and other online '
 'retailers., the album will also be available in the UK on the BBC Music '
 'website and on the iTunes store., by Glenn Patterson and Carberry.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Sophie's Song is a song about money and how much money you can make in a "
 'day, written and performed by Sophie Turner, a singer-songwriter from '
 "London, who was inspired to write the song after a trip to London's Hyde "
 'Park, where she saw a statue of George Bernard Shaw and met a man who gave '
 'her a dollar and told her to "give him money", as well as a few words of '
 'advice on how to make money in London.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('This is the story of a father and his son who are separated by a million '
 'years and find themselves at the centre of a war between two tribes in the '
 'Sahara desert , one of them is a man and the other is a plant , the two are '
 'forced to live side-by-side in the desert and have to fight each other in a '
 'war of wills and wills , a war that will last for millions of years and will '
 'change the course of history.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("A mother's minivan burst into flames as she tried to stop her teenage son "
 "from driving dangerously at high speed, causing the vehicle's glass to "
 'shatter and send a plume of smoke and dust into the air.  Prev of 2 Next: A '
 'man slumps against a blood-stained minivan to reveal the identity of a '
 'teenage boy who was killed in a car crash in Los Angeles, California, on '
 'Tuesday, April 9, 2014.  Next of 2')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("A look back at some of the more memorable moments from this year's Baftas, "
 "which took place at London's Royal Albert Hall on Saturday, 7 July.'''s "
 'theme song is by the band Belle and Sebastian, who were nominated for four '
 "awards, including best new artist and best song for their track, 'I'm Not "
 "There', from their album 'The Belle & Sebastian', which was released in "
 "2012.''.")


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('tweedurn my factte a porch and elegant pottery hold onto a desk, suck the '
 'secretariat, hold on to a chair, toss a page into a wall, revision of a '
 '1960s paperboy, throw a penny into a newspaper, a lift into a house, a dog '
 'in a morning newspaper, walk down a street, a house in a day, a day in a '
 'week, a week in a month, a month in a year, a year in a decade, a decade in '
 'a century.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("A selection of images from the BBC's coverage of the 70th anniversary of the "
 'Battle of the Somme, which claimed the lives of more than 3,600 people, '
 'including more than 1,000 British soldiers, and more than 2,000 French and '
 'German soldiers.... and a few more from the front line, all captured on '
 'camera by BBC News correspondents during the conflict, including footage of '
 'Tommy Dkirk, one of the last British soldiers to survive the battle and the '
 'only British survivor.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The film is based on the novel of the same name by William giraldi and is '
 "directed by macon Blair and stars Gerald the bear, who plays the bear's son, "
 "and the film's other cast members, including the voices of Patrick Stewart, "
 'Stephen Fry, Michael Sheen, and Jason Schwartzman, as well as a cameo '
 'appearance by the bear himself, the grizzly bear cub cub, the polar bear '
 'cub, and a grizzly bear bear calf.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Bridget Jones's baby by helen field final shoot script 8 January 2016 London "
 "studios universal film rights 1 Ext , all rights go to the film's director, "
 "dan daniel daniel, Emma Thompson, Bridget's co-stars, cast, crew, extras, "
 'photographers, cinematographer, costume designer, make up artist, hair and '
 'make-up artist, makeup artist, hairstylist, wardrobe designer, hair stylist, '
 'make-over, hair styling, makeup, costume design, hair design,make-up design, '
 'wardrobe design, makeup design, styling, hair Styling, makeup Design, '
 'Styling, hair, makeup Styling,')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Dirty girl, some girl, rock mustang, nerd, and a dream of republican - the '
 'story of a high school girl from a small town in Oklahoma who falls in love '
 'with a rock mustang and embarks on a road trip that will change her life - '
 'the first of a series of books by the author of the best-selling novel by '
 'the same name, which was turned into a film by Clint Eastwood and directed '
 'by Sam Goldwyn, who also co-wrote the screenplay.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Watch the video for the song " shall be the light of the world" by the band '
 'The Flaming Lips on their official website: www.the Flaming Lips Facebook '
 'page: '
 'https://www.facebook.com/pages/The-Flaming-Lips-on-LLC-LLC/3311936574574?ref=ts '
 "- by the Flaming Lips - with permission from the band's record label, "
 'Interscope Records - on their website: http://bit.ly/2u9w5sr')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Jane the Virgin cast member ryan hansen explains what it's like to play the "
 'title role in the new drama about a group of misfit high school students who '
 "team up to solve a mystery in the final episodes of the series' first "
 "season. smh - i'm not a fan of the show, but i'll give it a shot - i love "
 "ryan - he's a great guy - i think he'll do a great job - i want to work with "
 "him again - i don't know what else to say - i just want to see him on the "
 'show - i hope he')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Pacific lexicon by Travis beacham preface : translate a supplementary guide '
 'to pertinent term jargon and terminology for sailors, airmen, Marines, '
 'soldiers, sailors, and others who serve, live, work, or train in the Pacific '
 'Ocean and its surrounding waters.) by quasi pacific lexicon byTravis beacham '
 'preamble : Translate a supplementary Guide to Relevant Terms and Terminology '
 'for Sailors, Airmen, Marine, Soldiers, sailors and others Who Serve, Live, '
 'Work, or Train in thePacific Ocean and Its Surrounding Waters.')
{'The Guard': {'rouge1': 0.19672131147540983, 'rouge2': 0.016666666666666663, 'rougeL': 0.11475409836065574, 'rougeLsum': 0.11475409836065574}, 'Batman Year One': {'rouge1': 0.08653846153846154, 'rouge2': 0.009708737864077669, 'rougeL': 0.04807692307692308, 'rougeLsum': 0.04807692307692308}, 'Inherent Vice': {'rouge1': 0.2962962962962963, 'rouge2': 0.07518796992481203, 'rougeL': 0.14814814814814814, 'rougeLsum': 0.14814814814814814}, 'Colombiana': {'ro

In [None]:
pegasus_short_lemma_results = combine_rouge_and_summary_results(pegasus_short_lemma_rouge_dict, pegasus_short_lemma_summary_dict, 'pegasus_short_lemma')
pegasus_short_lemma_results.head()

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum,pegasus_short_lemma_summary
The Guard,0.196721,0.016667,0.114754,0.114754,A video of a police chase in the Republic of I...
Batman Year One,0.086538,0.009709,0.048077,0.048077,Batman v Superman: Dawn of Justice director: Z...
Inherent Vice,0.296296,0.075188,0.148148,0.148148,Thomas Anderson's film adaptation of the novel...
Colombiana,0.22069,0.027972,0.151724,0.151724,Comic book writer Robert Mark kamen reveals th...
Barney s Version,0.133333,0.0,0.116667,0.116667,mordecai richler's novel 'The Sense of an Endi...


In [None]:
# save results to a csv
pegasus_short_lemma_results.to_csv('/content/pegasus_short_lemma_results.csv')
# files.download('/content/pegasus_short_lemma_results.csv') 

In [None]:
# average rouge scores
pegasus_short_lemma_results.drop(columns=['pegasus_short_lemma_summary']).mean(axis=0)

rouge1       0.194040
rouge2       0.020054
rougeL       0.118927
rougeLsum    0.118927
dtype: float64

## Pegasus Long

In [None]:
pegasus_long_lemma_rouge_dict = {}
pegasus_long_lemma_summary_dict = {}

for i in range(30):

    raw_lemma = subset_train_df.iloc[i]['lemma_data']
    with open(raw_lemma, 'r') as file:
        text_data = file.read().replace('\n', '')

    pegasus_long_summary = run_pegasus_long_model(text_data)
    pegasus_long_rouge_scores = calculate_rouge_scores(pegasus_long_summary, subset_train_df.iloc[i]['plot_outline'])

    # add rouge scores and summary to a dictionary
    title = subset_train_df.iloc[i]['title']
    pegasus_long_lemma_summary_dict[title] = pegasus_long_summary
    pegasus_long_lemma_rouge_dict[title] = pegasus_long_rouge_scores

print(pegasus_long_lemma_rouge_dict)
print(pegasus_long_lemma_summary_dict)

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Final shoot draft contains all revision January 2010 reprisal film / element '
 'picture 21 mespil Road Dublin 4 Ireland .<n> dawn helicopter shot -- a red '
 'car speed through the barren landscape ., veer wildly , just as the sun be '
 'rise , glint of light break the darkness . int. red car - dawn five young '
 'men be pass a whiskey bottle around , ext . country road - dawn a garda '
 'police car be park in the road .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('batman year one screenplay by frank miller fade in .<n> ext . gotham CITY , '
 'night lightning rip across a jet-black sky ..<n>Thunder crash . lightning '
 'cast gotham City in stark relief . siren wail . car alarm scream . lighting '
 'silhouettes a menace police helicopter ., roaring downward like a monster '
 'insect . Lightning ... spark? shower. wild . reveal : int. dark area - night '
 "a sweat , torment sleeper '' bruce wayne .")


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('This script is the confidential and proprietary property of Warner bros '
 '.<n>Picture and no portion of it may be perform , distribute , reproduce , '
 'use , quote or publish without prior write permission ..<n>final shoot '
 'script August 7 , 2013 Warner boulevard warner bros picture inc.<n>All '
 "rights reserved over black a sweet , young woman 's voice narrate , "
 'v.o.<n>She come along the alley and up the back stair the way she always use '
 'to .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('10 year old girl turns page of a notebook to reveal a lara croft comic '
 '.<n>The comic was written by Robert Mark kamen and luc besson in August 2009 '
 'on a black screen bogota ., colombia 1997 close on a page of the lara Croft '
 'comic book be copy , with uncanny precision , down to the last detail , by a '
 'ten year old hand clutching a pencil voice cataleya !<n>The notebook '
 'contains a rare orchid plant .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Michael Konyves's version of the novel by mordecai richler white .<n>In the "
 'novel, a man and his wife have an affair after a night of heavy drinking '
 '..<n>The story is told from the point of view of the man, who is trying to '
 'decide what to do with a nude photo of the woman he is having an affair '
 "with.<n>It is told through the prism of a man's life in the 1980s in New "
 'York City.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Amateur stage production .<n>Young judy blood , blood, blood !<n>Reams of '
 'red mo ché entrail ooze from the bunny . and when that run out -- projectile '
 'ketchup ..<n>Reveal : this be animal ., phil johnston and Jared Bush , Jim '
 'reardon , josie trinidad , Byron Howard , Rich Moore , Jared Bush and '
 'Jennifer Lee in black .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('scary storues to tell in the dark written by Kevin & Dan hageman .<n>Story '
 'by guillermo Del toro base on scary story to tell In the Dark by Alvin '
 'schwartz illustration by Stephen gammell 11.2.17 draft " some town have a '
 'curse"<n> disk jockey who sound like he be battle throat cancer, cut through '
 'the doleful monotony of americana -- disk jockey ( v.0o)<n>Mother place a '
 'jack-o-lantern on house .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('nadine rostami ( 16 ) skinny , self - conscious posture , a face that have '
 'not quite grow into she nose .<n>She clutches a large duffle bag and talk '
 'into she cell phone as she gazes into the muck of the norfolk pond.<n>The '
 'pond has not drain in a hundred years.<n>Her favorite present is the Canada '
 "sweatshirt you get her from Canada that 's her favorite because the inside "
 'be still fuzzy after a billion wash.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Middle Eastern leg , traditional Arabian dress to the floor .<n> scarlett '
 'durang, mid 20 , a adventurous british archaeologist.<n>The kind of cat that '
 'get kill by curiosity her off - the-chart intellect and multiple post - grad '
 'degree be barely conceal by she strike good look and punk rock inclination '
 '.. judge from the cloth that edge frame , she be ob.<n>She betwixt a man and '
 'a cat, a woman and a child .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('11 1 montage over the open credit to sunny levine\'s " love 1 rhino "<n>A '
 'progression of image of celeste and jesse , age 18 to 30 .<n>Close - up of '
 'they hand cross , make " C " and " J " shape ..<n>They go to college '
 'together, study together, drink together and be still best friend . junior '
 'year , celeste with saleem , she hot , black militant boyfriend .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Jason reitman and sheldon Turner show you how to pack your life .<n>From a '
 'backpack to a couch to a two-story house, this guide will help you along the '
 'way . Filmed in front of a studio audience at the Edinburgh Airshow in '
 'Scotland, this video was shot using a Canon 5D Mark II camera ., Canon’s '
 'flagship camera. The Canon 5d Mark II is the world’s most advanced digital '
 'SLR camera. It features an advanced Canon digital SLR lens, a Canon digital '
 'camera management system, and an integrated Canon software suite.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A politician ( 55) in a white shirt and pants who look more like a local '
 'hood .<n>Travels on top of a Jeep which have be design to look like a '
 'mythological chariot.<n>They ride slowly in front of the chariot shouting '
 'slogan ..<n> chorus humara neta kaisa ho ?<n>mangal netam jaisa ho, !<n>Mat '
 'do mujhe vote ! Mat do ! his supporter be silent .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The alphabet of rage .<n>until we cut to black, and hold there on black , '
 "title : base on the actual event then -- a man 's hand enter frame.<n>2 shut "
 'the alarm, and now -- hand in a mirror . perfect a tie , and - now - 3 '
 'the.<n>Mark felt - the man who bring down the white house by Peter landesman '
 'over black : title  :<n>Washington , D.C. , 1972 1 now a rhythmic , '
 'accelerate anthology -- footage , still -- President Richard Nixon at '
 're-election whistle -stop.<n>as Pennsylvania Ave roil with protest -- '
 'scrimmage line of National Guard and Police.<n>Washington monument carpete '
 'in tent -- placard of jungle trench full of american army dead.<n>lapd squad '
 'car pour smoke and flame -- accelerate now to a torrent of campaign bunt and '
 'bumpersticker .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Alice by Linda woolverton based on Alice 's adventure in wonderland through "
 "the look Glass by Lewis Carroll .<n>The story follows nine-year-old Alice's "
 'adventures as she tries to find her way home after a visit to her aunt and '
 "uncle's home in the bleak Victorian era ..<n>An all-star cast of famous "
 'faces including Cate Blanchett, Anne-Marie Duff, Ian McKellen, Helena Bonham '
 'Carter, Matthew Macfayden,Sally Hawkins, Martin Freeman, and Colin Firth .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Rick jaffa's latest book, Rise of the Planet of the ape, is published by "
 'Zondervan .<n>Story follows the life of a young chimp, Alpha, and her quest '
 'to mate with a male chimpanzee, alpha, in the west African jungle ..<n>Rick '
 'jaffa is the author of several books on chimpanzees, including The '
 "Chimpanzee: A Scientific and Geographic Look at the World's Most Adapted "
 'Chimps .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("jennifer' body written by Diablo cody 9/20/2007 int .<n> leech lake women's "
 'correctional hospital - day anita " needy " lesnicki , 17 ..<n>I think I get '
 'more letter than Santa Claus , Zac efron and Dr. phil combine, says jennifer '
 '., as she sits on she hospital bed in pajamas.<n>We see a pile of unopen '
 'mail scattered casually on the floor.<n>there be letter , package , even '
 'creepy little gift and totem send by admire " fan .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("A cb radio dangles uselessly from a military truck's cab .<n>A few word in "
 'Russian can be hear through the static voice over cb ( in Russian )<n> day '
 'the camera track along the front of the truck where we find the body of '
 'another dead soldier befor.<n> a human hand lie limp and bleed on the '
 "passenger seat, a soldier 's face is clearly dead, his eye and mouth be "
 "open, he brain splatters against the inside window of the cab, it 's a "
 'gruesome site .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('hooley family home - day caption : belfast .<n>Colin carberry and Glenn '
 "Patterson shoot script 12 August 2011 ext ..<n>Child terri 's hand pick up ' "
 "I saw the Light 's - hank Williams in he trademark rhinestone suit - and "
 'place it on the turntable a needle be set down on the vinyl.<n>A tomato '
 'burst against the window, obliterate the poster.<n>Stone land in the garden '
 "along with arrow from a kid 's bow .")


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('France and Sophie run through the park together .<n>They 3 dump what little '
 "money they earn into another musician 's guitar case ..<n>France make sure "
 'to keep a dollar bill . they run into the subway ., France cook scramble egg '
 'and stir fry.<n>Sophie 6 enter frame and get . tompkin Square Park, New '
 'York, U.S.<n>In this video, two 27-year-old women sing and play a shitty '
 'guitar in front of a statue .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A meteorite hit Africa and we see plant life and animal affect by vibranium '
 '.<n>A warrior shaman receive a vision from the panther goddess bast who lead '
 'him to the heart shape herb, a plant that grant he super human strength , '
 'speed , and instinct.<n> a visual representation of the five tribe emerge as '
 'hand from the sand animation, and we sees they unite , but then break apart '
 'as conflic.<n>Animated by Ryan coogler and Joe Robert Cole .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A male teenager slumps against the blood-stain window of a speed minivan '
 '.<n>A distinctly female voice be scream in recognition of this horror.<n> '
 "pull out to reveal that this voice belong to the minivan 's driver, the late "
 "teen 's mother.<n>Two more dead boy in the back seat of the vehicle.<n>I be "
 '-- the mother be cut off mid-sentence by exploding glass , as she windshield '
 'rupture on impact with another vehicle .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Tom bradby base on he novel .<n>12-year-old collette mcveigh be absorb in '
 "make a necklace with a bucket of bead ..<n> JUNE 1973 collette 's father , "
 'gerry senior , put he head around the door.<n>He have interrupt a call and '
 'have he hand over the receiver ., gery senior get we some fag love . you '
 "mother 's run out . he disappear again .")


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Mike Rich original February 3, 2009 page 1 of 113 fade in : ext '
 '.<n>establishing suburban neighborhood — day a manicure , upper-middle - '
 "class neighborhood in the late 1960's.<n>Day penny chenery tweedy , 40 's , "
 'intelligent , elegant , beautiful, walk down the staircase of she precisely '
 'arrange home.<n>She check she watch and call out -- penny Breakfast in ten '
 'minute !<n>She open the front door and lift the morning newspaper close on a '
 'newspaper --the STAR --with the headline : November 12 , 1969 Nixon vow end '
 "to War it 's now lie beside a plate at a perfectly set table .")


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Six young men race towards a fence twenty yard away - one by one five be '
 'shot down - the survivor climb the fence - gunfire burst thro .<n>The Tommy '
 'with the hose carefully lift each side ... title 4 for a MIRACLE . blamblam '
 '!- Tommy , grab he trousers - all six race away from we , towards the fence '
 '20 yard away, one by two five are shot down, the survivor climbs the fence, '
 'gunfire bursts thro')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Chaos production / addictive picture bonneville film this appear on black '
 'screen : o unteachably after evil , but uttering truth .<n>How do people '
 'even survive out here ?<n>and why?<n> ext . behind the slone cabin - day not '
 'so much a backyard as a expanse between the distant treeline and a roughly - '
 'hewn log cabin ..<n>A soft rhythmic wump wumpwump lead we to ... a boy ( 7 )')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Final shoot script for Bridget Jones's Baby by helen field and Dan mazer and "
 'Emma Thompson .<n>Bridget is older than when we last see her, slim and '
 'elegantly to be exact , slender and elegantly dress ., she take a deep '
 'breath and collect herself ..<n> omitted 3 int church day and 4 BRIDGET jone '
 "enter the church. she 'solder than whenwe last see she , forty three to be "
 'precise .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Clarke ( 15 , a soft -nature , overweight nerd with a bruise on he face ) '
 'watch as the mustang climax .<n>Clarke ( v.O. ) -.. that be , the best girl '
 "... the car door fly open . a stocky boy emerge put on he letterman 's "
 'jacket ..<n>Abe sylvia wga registered first draft April 14 , 2005 norman , '
 'Oklahoma  1989 over black .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("'Seth' gets out of his car in the parking lot of a restaurant and goes to "
 'the pick-up counter .<n>He waits for the cashier to look up, and then drop '
 'he tip in the ja.<n>Video was taken at a restaurant in Los Angeles, '
 "California, U.S.<n>Seth's name is Seth and he is a bartender at a bar called "
 'Third Man Tavern. He lives with his girlfriend, Hailey Hill, and their two '
 'children, aged two and four. He has a son, Seth Jr., who is three years old.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('ryan hansen solves crime on television .<n>Ryan hansen is played by rawson '
 'Marshall thurber episode # 102 February lst, 2017.<n>Read the full episode '
 "on steal chinese internet and that 's cool too.<n>Follow Ryan hansen on "
 'Twitter @ryanhansen and on Facebook @ryanHansen . GRAPHIC CONTENT MAY OFFEND '


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Pacific rim .<n> lexicon ( a supplementary guide to pertinent term and '
 'jargon ... ).<n>Travis beacham is a marine engineer and author of marine '
 'technology books.<n>His company, Travis Beacham & Co., conducts marine '
 'research and development projects in the U.S. and overseas.<n>Travis '
 'Beacham’s company also conducts marine exploration and testing operations in '
 'the Pacific Ocean and the Indian Ocean.<n>He is the author of the book '
 'Pacific Rim: The Official Encyclopedia of the United States of America, '
 'published by Simon & Schuster.')
{'The Guard': {'rouge1': 0.1016949152542373, 'rouge2': 0.017241379310344827, 'rougeL': 0.1016949152542373, 'rougeLsum': 0.1016949152542373}, 'Batman Year One': {'rouge1': 0.13186813186813187, 'rouge2': 0.022222222222222223, 'rougeL': 0.054945054945054944, 'rougeLsum': 0.054945054945054944}, 'Inherent Vice': {'rouge1': 0.15748031496062992, 'rouge2': 0.0, 'rougeL': 0.11023622047244094, 'rougeLsum': 0.11023622047244094}, 'Colombiana

In [None]:
pegasus_long_lemma_results = combine_rouge_and_summary_results(pegasus_long_lemma_rouge_dict, pegasus_long_lemma_summary_dict, 'pegasus_long_lemma')
pegasus_long_lemma_results.head()

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum,pegasus_long_lemma_summary
The Guard,0.101695,0.017241,0.101695,0.101695,Final shoot draft contains all revision Januar...
Batman Year One,0.131868,0.022222,0.054945,0.054945,batman year one screenplay by frank miller fad...
Inherent Vice,0.15748,0.0,0.110236,0.110236,This script is the confidential and proprietar...
Colombiana,0.164179,0.0,0.119403,0.119403,10 year old girl turns page of a notebook to r...
Barney s Version,0.19697,0.015385,0.106061,0.106061,Michael Konyves's version of the novel by mord...


In [None]:
# save results to a csv
pegasus_long_lemma_results.to_csv('/content/pegasus_long_lemma_results.csv')
# files.download('/content/pegasus_long_lemma_results.csv') 

In [None]:
# average rouge scores
pegasus_long_lemma_results.drop(columns=['pegasus_long_lemma_summary']).mean(axis=0)

rouge1       0.173448
rouge2       0.012663
rougeL       0.105938
rougeLsum    0.105938
dtype: float64

## Pegasus Large

In [None]:
pegasus_large_lemma_rouge_dict = {}
pegasus_large_lemma_summary_dict = {}

for i in range(30):

    raw_lemma = subset_train_df.iloc[i]['lemma_data']
    with open(raw_lemma, 'r') as file:
        text_data = file.read().replace('\n', '')

    pegasus_large_summary = run_pegasus_large_model(text_data)
    pegasus_large_rouge_scores = calculate_rouge_scores(pegasus_large_summary, subset_train_df.iloc[i]['plot_outline'])

    # add rouge scores and summary to a dictionary
    title = subset_train_df.iloc[i]['title']
    pegasus_large_lemma_summary_dict[title] = pegasus_large_summary
    pegasus_large_lemma_rouge_dict[title] = pegasus_large_rouge_scores

print(pegasus_large_lemma_rouge_dict)
print(pegasus_large_lemma_summary_dict)

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('connemara - dawn the red car zoom along at breakneck speed -- ext . country '
 'road - dawn eventually shoot past boyle in he police car -- int . garda car '
 '- dawn boyle barely react -- suddenly there come the sound of screeching '
 'brake , and the boom of a high - impact car crash -- boyle unhurriedly start '
 'the car . bridge - sunrise the Red car have slam stra t n tnn.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Close batman year one screenplay by frank miller fade in . bruce wayne's "
 'apartment - night awoken from he nightmare by the sound of he own scream '
 'silhouettes , disorient , bruce go to the sink and splashes cold wate ? '
 "bruce's bedroom - night woken from his nightmare by his own scream , his "
 'disoriental , he go to his sink and splash cold , . .close batman year 1 '
 'screenplay by Frank Miller fade .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("'' 2013 4000 Warner boulevard warner bros . back then it be always sandal , "
 'bottom half of a flower print bikini , fade country Joe & the Fish T - shirt '
 '. tonight , she be all in flatland gear , hair a lot short than he remember '
 ", look just like she swore she have never look ... fade in : int . doc 's "
 'apartment ( gordita beach ) - dusk ( 1970 ) doc sportello sit half awake on '
 'he couch .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('colombiana written by Robert Mark kamen &amp ; luc besson August 2009 on a '
 'black screen bogota , colombia 1997 close on a page of a lara croft comic '
 'book be copy , with uncanny precision , down to the last detail , by a ten '
 'year old hand clutching a pencil . her mother , take time off from pack , '
 'come over to cat , and turn the page of the notebook to reveal the lara '
 'Croft comic and it')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('den - barney and miriam apartment - night a tumbler of scotch . barney then '
 'just ask she what she want I to do with this nude photo I have of she . come '
 'to think of it , you might actually want they if only to see what Miriam '
 'look like in she prime . den -barney and michelle apartment - morning the '
 "bottle of macallan 's now empty . the cigar burn to the nub .")


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('fade in on : a jungle - night a bunny nervously walk through the dark , '
 'forebode forest , frighten by every shadow and move leaf . the timpani '
 'crescendos . a jaguar leap out of the shadow , attack the bunny , who scream '
 '-- CUT to : inside a barn - a jungle ( set ) - night the action continue -- '
 'as imagine by a amateur stage production . young judy blood ,blood , blood !')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('mill valley - establishing - dusk settle on a fork of the yamhill River in '
 'the Pacific Northwest rest the small railway city of mill valley . series of '
 'open shot of a town in decline : on MAIN STREET outside mill valley pawn '
 'shop , a old timer lower the american flag . ) dig yourself out of the grave '
 ", mill Valley , ' cause this could be we last Halloween ... on house a "
 'mother place a jack-o-lantern on')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('nadine I just call to tell you I be chuck everything you ever give I into '
 'the norfolk pond . yes , that disgusting scummy fester cesspool they have '
 'not drain in a hundred year and that have like 8 cheeto bag and a dead '
 'squirrel float in it right this very moment . nadine rostami ( 16 ) skinny , '
 'self - conscious posture , a face that have not quite grow into she nose , '
 "stand on the pond 's bridge stare into the muck .")


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('iranian bus - afternoon camera power up , jostle around until it settles on '
 '-- the floor of a third world bus . the camera rise , turn around revealing '
 'its operator , a woman wear a hajib ( a traditional Arabian headdress ) . '
 'she slump low in she seat for privacy , look around cautiously , and '
 'carefully lower she veil revealing -- scarlett durang , mid 20 , an '
 'adventurous british archaeologist .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A1 polaroid of high school moment : A1 celeste be a chronic overachiever and '
 'jesse be sweet , goofy and funny . junior year , celeste with saleem , she '
 'hot , black militant boyfriend . a moment later , jesse pose reluctantly '
 'with the couple , hold up a " Black power " fist , weakly .c1 super 8 '
 'footage : c1 senior . c1 Super 8 footage')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("ryan ( cont 'd ) imagine for a second that you be carry a backpack ... I "
 'want you to feel the strap on you shoulder ... now , start add the larger '
 'stuff . that backpack should be get pretty heavy at this point - go bigger . '
 'stuff it all in ... you car , get it in there ... you home , whether you '
 'have a studio apartment or a two story house , , I want ..')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('a politician ( 55 ) in a white shirt and pants who look more like a local '
 'hood , travel on top of a Jeep which have be design to look like a '
 'mythological chariot . a couple of he supporter climb on the Jeep and '
 'garland he . politician ( in mock anger ) mangal netam aapke beech vote '
 'maangne nahi aaya hai . Mat do mujhe vote ! politician ( cont ’d ) woh log '
 'mujhen thekedar kehte hai ?')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('MARK felt - the man who bring down the white house by Peter landesman over '
 'black : title : WASHINGTON , D.C. , 1972 1 now a rhythmic , accelerate '
 'anthology -- footage , still -- President Richard Nixon at re-election '
 'whistle -stop - as Pennsylvania Ave roil with protest -- scrimmage line of '
 'National Guard and Police -- Washington monument carpete in tent -- flame '
 'spitting out of Pentagon window -- placard of jungle trench full of american '
 'army dead -- lapd squad car pour smoke and flame -- accelerate now to a '
 'torrent of campaign bunt and bumpersticker and the signage and news feed of '
 'palestinian Liberation Organization -- Black Liberation Army -- klan -- '
 'Black September -- Red Army -- irish Republican Army -- weather Underground '
 '.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('london - 1855 - night 1 warm light shine from the study of the gracious '
 'kingsley home . 2 charle kingsley have just describe he new venture to he '
 'friend , include lord ascot . a colleague this venture be impossible . his '
 'nine year - old daughter alice stand at the door in she nightgown , clearly '
 'frighten . charle kingley ( cont ’d ) the nightmare again ? she nod . he '
 'take she hand an')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('they belong to a female chimpanzee : bright eye . grove of tree -- day she '
 'sit in a tree with alpha , she mate - large and muscular , a prominent white '
 'birthmark across he shoulder , like a shoot STAR . canopy - day Alpha sit up '
 ", sense something . Bright eye want to stay by Alpha 's side , but he bare "
 "he toot This is the first time I've seen a chimpanzee with a bright eye.")


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('leech lake women\' correctional hospital - day anita " needy " lesnicki , 17 '
 ', sit on she hospital bed in pajamas . as she stare out the window , she '
 'wind color yarn around a pair of popsicle stick to create a " god \'s eye . '
 '" needy v.o. every day , I get letter . raymundo rec time in five minute , '
 'needy . needy grassy -ass , raymundo .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('macgruber by will forte , John Solomon & jorma taccone super over black : '
 'dzhugdzhur mountain , eastern siberia voice over cb ( in Russian ) ( static '
 ') wombat , this be eagle nest . military truck -- cab -- day close up : a cb '
 "radio dangle uselessly from it 's base . voice over the cb ( cont ’d ) , in "
 'russian , wombat ...')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('on a window a election poster have be tape : vote hooley for a genuine '
 'alternative on the grass sit a portable record player . boy 1 wear a tatty '
 "sheriff 's hat , boy 2 have ' war paint 's on he face . in the shrub child "
 'terri crouch , hold on to he head : h The two boy do the throw and shoot be '
 'under ten . the two girl do the shoot and shoot .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('morning 2 we find france and Sophie , both 27 , set up a little performance '
 'space in front of a statue . when people walk by , they both stop and yell : '
 'france and sophie give we you money !!! france ( alone now ) give we -- ( '
 'laugh ) come on ! they 3 dump what little money they earn into another '
 "musician 's guitar case . france make sure to keep a dollar bill .")


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('father million of year ago , a meteorite make of vibranium , the strongest '
 'substance in the universe strike the continent of Africa affect the plant '
 'life around it . the meteorite hit Africa and we see plant life and animal '
 'affect by vibranium. the tribe live in constant war with each other until a '
 'warrior shaman receive a vision from the panther goddess bast who lead he to '
 'the heart shape herb , an plant that grant he super human strength , speed , '
 'and instinct .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('minivan - day open hard on the passenger seat of a speed minivan , where a '
 'male teenager be slump against he blood-stain window . pull out to reveal '
 "that this voice belong to the minivan 's driver , the late teen 's mother , "
 "who 's presumably racing to a hospital . pulled out even more to reveal two "
 'more dead boy in the back seat of the vehicle . I be -- the mother be cut '
 'off mid-sentence by exploding glass , as she windshield rupture on impact '
 'with another vehicle , ext .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('hassociates.co.uk shadow dancer by Tom bradby base on he novel . Lucas , '
 'Alexander whitley , 14 Vernon Street , London . twelve-year-old collette '
 'mcveigh be absorb in make a necklace with a bucket of bead . load up , load '
 "up the rubber bullet ... punch up ; JUNE 1973 collette 's father , gerry "
 'senior , put he head around the door .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('close on a newspaper --the denver STAR --with the headline : november 12 , '
 "1969 Nixon vow end to War it 's now lie beside a plate at a perfectly set "
 "table ; jack tweedy ( penny 's marry name be tweedy ) , mid - 40' , sit down "
 "The morning newspaper is the best way to start the day , it's the only way "
 'to get up early , and it is the easiest way to stay up late .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('dunkirk by Christopher nolan D U N k I R k black screen : water slap hollow '
 'metal , metal knock creaking wood ... Super title : " D UN k ir k " fade in '
 ': Paper . blamblamblam !- Tommy jolt , grab he trousers - all six race away '
 'from we , towards a fence twenty yard away - one by one five be shot down - '
 'the survivor climb the fence - gunfire burst thro')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('15 a24 / vision Chaos production / addictive picture bonneville film this '
 'appear on black screen : o unteachably after evil , but uttering truth . the '
 'alaskan frontier - day sunlight , as bright and sharp as a blade , slice '
 'down on this moonscape of snow and ice . in the distance sit god-size hill , '
 'hazy and dream-like . and spread before they , abutte a black bristle of '
 'tree ...')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('1 1a 1 aa omitted 1 ext . shazzer spot jude , now a pillar of marry '
 'respectability , she husband gile and they tiny baby . omitted 3 int . 4 '
 "BRIDGET jone enter the church . she 's older than when we last see she , "
 'forty three to be exact , slim and elegantly dress . the church be pack with '
 'people we might recognise . a few literary and tv celebrity amongst they .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('7 / dirty girl by abe sylvia wga registered first draft April 14 , 2005 '
 "norman , Oklahoma  1989 over black : clarke ( v.0 , every gixl be bear ' "
 'with a reputation . three average  looking girl pile into a car giggle . and '
 'some girl ... Clarke ( 15 , a soft -nature , overweight nerd with a bruise '
 'on he face ) watch as the mustang climax .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('eth pay and be about to drop a dollar in the tip jar , but notice the '
 'cashier be not look . he wait for the cashier to look up , and then drop he '
 'tip in the ja Copyright  2018 by craig robinson , all rights reserved , '
 'craig.robinson@gmail.com , www.craigrobinson.com and '
 'www.youtube.com/craigrobinston , All rights reserved.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('runyon canyon - day ryan hansen , shirtless and lightly perspire ( never '
 'sweat ) , huff and puff he way up this iconic " hike " trail in the '
 "Hollywood hill , all the while talk directly into he iphone 's periscope app "
 '. YouTube Red in da hiz-ouse ! ryan ( cont would ) ytr guy say there be a " '
 'less than normal " viewership loss from this crazy viral twerk grandma video '
 'that they link we to .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('brain-to-brain interface ( see pon or drift ) * conn-pod . ( slang ) a joint '
 'vision share by jaeger crew member while directly link via the pon ; trigger '
 'by subconscious stimulus , a generally involuntary but seldom debilite '
 'phenomenon + drivesuit . The jaeger / pilot interface suit design to monitor '
 'vital sign and to translate nerve signal to piloting input * ghost-drift. ( '
 'a kind of quasi-telepathic vision share ( by party with a pre-exist pon '
 'connection ) without the aid of a hard neural interface ; commonly trigger '
 'by stress , physical proximity , or coincide rem sleep pattern + headspace .')
{'The Guard': {'rouge1': 0.10909090909090909, 'rouge2': 0.0, 'rougeL': 0.07272727272727274, 'rougeLsum': 0.07272727272727274}, 'Batman Year One': {'rouge1': 0.1925133689839572, 'rouge2': 0.02162162162162162, 'rougeL': 0.10695187165775401, 'rougeLsum': 0.10695187165775401}, 'Inherent Vice': {'rouge1': 0.0847457627118644, 'rouge2': 0.017241379310344824, 'rougeL': 0.

In [None]:
pegasus_large_lemma_results = combine_rouge_and_summary_results(pegasus_large_lemma_rouge_dict, pegasus_large_lemma_summary_dict, 'pegasus_large_lemma')
pegasus_large_lemma_results.head()

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum,pegasus_large_lemma_summary
The Guard,0.109091,0.0,0.072727,0.072727,connemara - dawn the red car zoom along at bre...
Batman Year One,0.192513,0.021622,0.106952,0.106952,Close batman year one screenplay by frank mill...
Inherent Vice,0.084746,0.017241,0.050847,0.050847,'' 2013 4000 Warner boulevard warner bros . ba...
Colombiana,0.183206,0.0,0.122137,0.122137,colombiana written by Robert Mark kamen &amp ;...
Barney s Version,0.184874,0.0,0.10084,0.10084,den - barney and miriam apartment - night a tu...


In [None]:
# save results to a csv
pegasus_large_lemma_results.to_csv('/content/pegasus_large_lemma_results.csv')
# files.download('/content/pegasus_large_lemma_results.csv') 

In [None]:
# average rouge scores
pegasus_large_lemma_results.drop(columns=['pegasus_large_lemma_summary']).mean(axis=0)

rouge1       0.162353
rouge2       0.011145
rougeL       0.101918
rougeLsum    0.101918
dtype: float64

## LongT5

In [None]:
longt5_lemma_rouge_dict = {}
longt5_lemma_summary_dict = {}

for i in range(30):

    raw_lemma = subset_train_df.iloc[i]['lemma_data']
    with open(raw_lemma, 'r') as file:
        text_data = file.read().replace('\n', '')

    longt5_summary = run_longt5_model(text_data)
    longt5_rouge_scores = calculate_rouge_scores(longt5_summary, subset_train_df.iloc[i]['plot_outline'])

    # add rouge scores and summary to a dictionary
    title = subset_train_df.iloc[i]['title']
    longt5_lemma_summary_dict[title] = longt5_summary
    longt5_lemma_rouge_dict[title] = longt5_rouge_scores

print(longt5_lemma_rouge_dict)
print(longt5_lemma_summary_dict)

("John Michaelael Mcdonagh's final shoot draft is a short scene set in the "
 'Irish countryside at dawn. A red car speeds past a police station and then '
 "crashes into a guard's car. Boyle reacts violently to the sound of a crash "
 'and jumps out of the car. Then, as the car speeds away, it lands on a cliff. '
 'This is one of the most famous scenes in the play.')
('"Batman Year One" is a flashback to the year of batman, Frank Miller\'s '
 'first play. It opens with a thunder crash and lightning. A police helicopter '
 'roars down like a giant insect. Int . Night a sweaty, tormented sleeper '
 'Bruce Wayne wakes up from his nightmare and screams in terror. He goes into '
 'the sink and gets splashed cold.')
('Paul Thomas Anderson\'s "Inheritance" is a screenplay based on the novel of '
 'Thomas Pynchon. It was written by Thomas Pynchon, and it is the property of '
 'Warner Bros. The story begins with a young woman walking up the alley in '
 'front of the apartment where she always 

In [None]:
longt5_lemma_results = combine_rouge_and_summary_results(longt5_lemma_rouge_dict, longt5_lemma_summary_dict, 'longt5_lemma')
longt5_lemma_results.head()

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum,longt5_lemma_summary
The Guard,0.218487,0.034188,0.168067,0.168067,John Michaelael Mcdonagh's final shoot draft i...
Batman Year One,0.202247,0.011364,0.11236,0.11236,"""Batman Year One"" is a flashback to the year o..."
Inherent Vice,0.258993,0.043796,0.115108,0.115108,"Paul Thomas Anderson's ""Inheritance"" is a scre..."
Colombiana,0.242424,0.015385,0.166667,0.166667,This is a very short comic about a 10 year old...
Barney s Version,0.23622,0.0,0.15748,0.15748,"The next morning, barney calls his wife and as..."


In [None]:
# save results to a csv
longt5_lemma_results.to_csv('/content/longt5_lemma_results.csv')
# files.download('/content/longt5_lemma_results.csv') 

In [None]:
# average rouge scores
longt5_lemma_results.drop(columns=['longt5_lemma_summary']).mean(axis=0)

rouge1       0.222060
rouge2       0.018561
rougeL       0.129684
rougeLsum    0.129684
dtype: float64

# Baseline Models (BERT - Dialog)
All baseline models will be run with 30 sample movie scripts from random seed 266

## Pegasus Short

---


Max 512 tokens for pretrained *'google/pegasus-xsum'* model

In [None]:
pegasus_short_rouge_dict = {}
pegasus_short_summary_dict = {}

for i in range(30):
    pegasus_short_summary = run_pegasus_short_model(subset_train_df.iloc[i]['bert_dialog'])
    pegasus_short_rouge_scores = calculate_rouge_scores(pegasus_short_summary, subset_train_df.iloc[i]['plot_outline'])

    # add rouge scores and summary to a dictionary
    title = subset_train_df.iloc[i]['title']
    pegasus_short_summary_dict[title] = pegasus_short_summary
    pegasus_short_rouge_dict[title] = pegasus_short_rouge_scores

print(pegasus_short_rouge_dict)
print(pegasus_short_summary_dict)

All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('All photographs  Element Pictures / Reprisal Films, courtesy of the Irish '
 'Film Board and the National Film Board of Ireland, with permission from the '
 'copyright holder, Mirtn  Mln.  The film is released in cinemas in the '
 'Republic of Ireland on 26 January, and on DVD and Blu-ray on 1 February '
 '2010.  Subscribe to the BBC News website: www.bbc.co.uk/news  Like us on '
 'Facebook: https://www.facebook.com/bbc News Follow us on Twitter: @BBCNews')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("The following is an extract from Frank Miller's novel, The Man in the High "
 'Castle, in which the author reflects on a year in the life of his son, '
 'Spencer, who was killed in a car crash in New York City on Christmas Day, '
 "2014... maybe I'll kick you out of the house. Maybe you'll look fine, but "
 "I'm just not quite ready for that kind of work..maybe I'd like to take a "
 'break.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The screenplay by Paul Thomas Anderson, based on the novel by Thomas '
 'Pynchon, starring Brad Pitt, Leonardo DiCaprio, Jonah Hill, Jessica '
 'Chastain, and directed by Anderson himself, who also co-wrote the film with '
 'Anderson.... and the film is out in cinemas on Friday, 8 August.... but you '
 'can read the full script here: http://bit.ly/Wyvqsg The film is released in '
 'cinemas across the United States on 8 August, 2013.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Cat in the Hat is a comic book series written and illustrated by Robert Mark '
 'Kamen and Luc Besson and tells the story of Fabio Cataleya, a young cat who '
 'is forced to study to become a super hero. Written by Mark Kamen &Luc '
 'Besson, illustrated by Croft, and published by DC Comics, this is the first '
 "in a new series of comic books based on the popular children's TV series Cat "
 'in The Hat, which was broadcast on BBC One in the UK.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Watch the trailer for Barney's Version, Michael Konyves' adaptation of "
 "Mordecai Richler's novel of the same name, in which Barney (Justin Theroux) "
 'tries to get in touch with his estranged wife after the publication of nude '
 'photos of her in the New York Times. Barney wants to speak to his wife, but '
 "she doesn't want to talk to him, so he calls his father-in-law to find out "
 "what's going on.")


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('All photographs courtesy of Jared Bush and Rich Moore, courtesy of Rich '
 'Moore and Jared Bush, and by Byron Howard, Jim Reardon, Trinidad Josie, Phil '
 'Johnston, and Jennifer Lee Fearreachery. Written by Jared Bush astronaut '
 'Phil Johnston Story by ByronHoward,Rich Moore,Jared Bush,Jim Reardon, '
 'Josie,Phil Johnston,and Jennifer Lee fearreachery Treachery, bloodlust, '
 'predators, hunting, cops, police officers, cops on the beat, police on the '
 'street, cops in the woods, police in the city, cops out in the country, cops '
 'patrolling the streets, cops at the zoo, police at the')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Mill Valley is a horror story set in the fictional town of Mill Valley, '
 'California, where a witch has taken over the town and is trying to break the '
 'curse of the cursed town by turning it into a haunted house for '
 "trick-or-treating victims to watch live TV coverage of the witch's evil "
 'reign of terror on Halloween night in the late 1950s and early 1960s, '
 "according to the film's director, Guillermo Toro, who also co-wrote the "
 'script with Kevin Schwartz.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('This voicemail is from best friend, Nadine Hamidi, to her ex-boyfriend, '
 'Craig, who she says "cut her off" and "wasn\'t very good" at talking to her: '
 "I'm calling to tell you I've called off my friendship with you, you've cut "
 'me off and I was not very good with you at all, I think I have cut you off '
 "and you weren't good with me at all.")


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A Texas man has posted a letter he received from the government of Iraq, in '
 "which he is threatened with death if he doesn't leave a network of caves the "
 'government has ordered destroyed in order to make way for a new pipeline to '
 'carry crude oil from the Gulf of Mexico to the Gulf Coast of the US Here is '
 'the full text of the letter, which was posted online by John Erick Dowdle '
 'and has since been shared by thousands of people on social media.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Here's a round-up of some of the most memorable lines from the new James "
 'Bond film, Skyfall, as told by its star, Jesse James, in a letter he wrote '
 "to the makers of the new Bond phone, the Black Widow. ...I'm Love Jess, turn "
 "it down, seriously!? (back to the phone) Sorry, Jesse, I've got a phone en "
 'point towards New York Times, so no noise please.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('In our series of letters from African-American journalists, film-maker and '
 'columnist Jason Reitman shares his top five tips for making the most of your '
 'African- American heritage, as well as some of his own personal tips for '
 'getting the most out of your life. That backpack should be getting bigger - '
 'Go Bigger - Go Now, start adding the weight as it adds up - I want you to '
 'beat it - Start with all the little things in your life - Start adding the '
 'heavier things - Start going bigger - Start getting the stuff you have to go '
 'bigger.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Mangal Netam Zindabad (in mock anger) aur Mujika Neta Kaisa hoisa ho?tra '
 'Mangal ke shaadi mein miliye aur miliyenge aur apni puri mein aap aadmi mein '
 "vote aur shadi mein puri aur vote mein poll.''', apna aapna aadmi aap hun "
 'shuddhi mein election.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('In our series of letters from African-American journalists, film-maker and '
 'columnist Peter Landes looks back at some of the most memorable lines from '
 "some of his own works, including this one about the FBI's former "
 'second-in-command, Mark Felt, who is now at the centre of a scandal over the '
 'Watergate break-in and has died at the age of 79, after suffering a heart '
 'attack at his home in Connecticut on Thursday. (Which turns out to be a '
 'crime.)')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Alice in Wonderland by Lewis Carroll, based on Alice's Adventures in "
 'Wonderland. Alice by Linda Woolverton, Based on Alice"s Adventures In '
 'Wonderland, Looking Through The Glass by LewisCarroll, Charles Charles have '
 'finally lost your senses, this venture is impossible. That kind of thinking '
 'could ruin you... The nightmare again... What’s there’s a strange rabbit, a '
 "bird and a bird’s smile... Well 3 there, there's a bend.")


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Here's the full text of Rick's letter to Amanda Silver, the author of the "
 "study that led to the discovery of a new treatment for Alzheimer's disease, "
 "called Nanotherapy - here's what she had to say about it, in her own words, "
 'in a letter published in the journal Neuron, on 10 March 2011 and reprinted '
 'here on 11 March. Bright Eyes, an orangutan, living in the wild, is being '
 'treated with a new form of Nanotherapy, a therapy that uses tiny particles '
 'to "repair" damaged parts of the brain.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Here's a letter Diablo Cody received from a fan, in which she describes her "
 'life as "like a soap opera." Here are some of the things she says in the '
 "letter, which you can read in full below: I don't know what you're talking "
 'about, Needy, but here are some things I say in my letters to Needy: 1. A '
 "lot of people ask if I'm sorry I did it, but I did not do it.")


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Watch the trailer for the new film, MacGruber, starring Will Forte, John '
 'Solomon, Jorma Taccone, and Will Forte.MacGruber will be released in UK '
 'cinemas on 5 May, and in the US on 16 May.Subscribe to the BBC News YouTube '
 'channel for more news and analysis: http://www.bbc.co.uk/news Facebook: '
 'www.facebook.com/bbcnews Twitter: @bbcnews Instagram: @BBCNews')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Script’s Colin Carberry and Glenn Patterson have shared their views on the '
 'current political situation in Jamaica on their Facebook page, in the wake '
 'of last week’s shooting at the band’s concert in Babylon, which left one '
 'person dead and several others injured, including the band members, who were '
 'all taken to hospital with minor injuries and released the next day... Read '
 'the full post here.<n> (distinctly frantic) I’m going to lose my mind.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Check out the full song from Greta Gerwig’s new film, The Lady in the Van, '
 'directed by Noah Baumbach and written by the Oscar-winning actress herself: '
 'Here’s the full track from the film, which opens in US cinemas on Friday, 26 '
 'March.., here’s how it’s performed by the cast and crew: GretaGerwig - The '
 'Lady In The Van - Director:Noah Baumbach - Screenplay: Noah Baumbacher')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Marvel's 'Black Panther' tells the story of T'Challa, the first black king "
 'of Wakanda, a technologically advanced and culturally diverse nation on the '
 'continent of Africa.., starring Chris Evans, Michael B. Jordan, Lupita '
 "Nyong'o, Danai Gurira, Chadwick Boseman, Sebastian Stan, Letitia Wright, "
 'Michael Rooker, Samuel L. Jackson, and director Ryan Coogler, who also '
 "directed 'Fruitvale Station' and 'Looper'.")


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('In this week’s episode of The Call of Duty: Black Ops II, Black Ops III and '
 'Black Ops IV dispatchers are called in to deal with the aftermath of a '
 'terrorist attack on a bus full of passengers in New York City, which turns '
 'out to be an act of domestic terrorism by one of the passengers, a man named '
 'Michael Lichtman, who is a member of the al-Qaeda affiliate known as the '
 'Islamic State of Iraq and the Levant (Isis).')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Silence, up up up the rubber bullets, load some fags love, Dad wants to get '
 "to some playground, one of a playground's, a wee playground, we need all "
 "platforms, an open bag, please, Danny, give me control, I'm looking at a "
 "bag, look at me, look me, I’ve been shot, I don't know what happened, I "
 "asked you to go to a playground, you didn't, you asked me to go...")


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Here's a selection of some of the more memorable lines from the BBC News "
 "website over the past five years, as written by the BBC's film and TV "
 "critic, Roger Moore: [Editor's note: I'm writing a screenplay for a TV "
 "series, but I've never written a screenplay before.] [I've written two "
 'screenplays in the past two years, both of which have been published in MP3s '
 'and Revisions.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('All photographs courtesy of the Royal Navy and Royal Marines, via Getty '
 'Images and AFP, subject to copyright laws and subject to change at any time '
 'without prior notice, except where otherwise noted, with permission from the '
 'copyright owner.)  Copyright (c) The Vancouver Sun E-mail this Article Print '
 'this Article Share this Article: Sicily, Italy, France, Germany, Belgium, '
 'the Netherlands, the UK, USA, Canada, Australia, New Zealand, South Africa')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("I've been meaning to write about this film for a long time, but it's been so "
 "long since I did so, so I'm going to give it a go now - just in case you "
 'missed it, or if you want to know more about it, here\'s the blurb: "Set in '
 'a small town in Northern Ireland in the 1980s, the film tells the story of a '
 'young mother (H. Hopkins) whose life is turned upside down when her teenage '
 'son dies in a car accident."')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The funeral of Daniel Cleaver, the British model and actor who died on a '
 'flight to Australia, has been held in his home town of Ipswich, in Suffolk, '
 'and the BBC has been given exclusive footage of the service, including the '
 "eulogy given by Daniel's brother, Helen Fielding, who plays Bridget Jones in "
 'the new Bridget Jones film, and Emma Thompson, who played his wife, Dan '
 "Mazer, in the 2015 film, Bridget Jones's Baby, which also starred Emma "
 'Thompson.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Sylvia is a young woman living in a small town in the American state of '
 'Oklahoma, who is trying to decide whether or not to have sex with a man she '
 'has met on a dating website. o tell me, what’s the best alternative to '
 'pizza? You could have a pizza party, right in between windows... i tell '
 'me... what does it mean to be a girl born dirty, Fuck you ass, and who can '
 'be an example of an excellent choice?')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Jay Baruchel, Seth Rogen, and James Franco star in the comedy horror film '
 'The Book of Revelation, which opens in cinemas across the US on Friday, 22 '
 "March. (pleased to see you next time.) - Jay, I'm just here for an audition "
 "- I'd literally just get in and see if I can do it - I don't think I'll be "
 "able to do it, because I've got a job to do.")


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("It's been a while since I've written a post about a TV show, so I thought "
 'I\'d do it again for you all: "Hi, I\'m Jane D\' Poussin, and I have a new '
 "show on Netflix called Raw, and it's all about a Chinese detective called "
 "Redson, and he's the only detective in the world who can solve crimes in "
 "Chinese, so why don't you guys watch it?")


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('All terms used in this article are subject to change at any time, without '
 'notice, by the author or his or her AFFILIATES, including but not limited '
 'to, the BBC, the New York Times, the Washington Post, the Los Angeles Times, '
 'and other media outlets, and are copyrighted by their respective owners, '
 'unless otherwise expressly stated in this material, and may not be reprinted '
 'without the prior written consent of the author.., and all photographs are '
 'copyrighted..')
{'The Guard': {'rouge1': 0.128, 'rouge2': 0.016260162601626015, 'rougeL': 0.08, 'rougeLsum': 0.08}, 'Batman Year One': {'rouge1': 0.21105527638190955, 'rouge2': 0.01015228426395939, 'rougeL': 0.10050251256281408, 'rougeLsum': 0.10050251256281408}, 'Inherent Vice': {'rouge1': 0.22764227642276424, 'rouge2': 0.06611570247933884, 'rougeL': 0.13008130081300812, 'rougeLsum': 0.13008130081300812}, 'Colombiana': {'rouge1': 0.1678321678321678, 'rouge2': 0.014184397163120567, 'rougeL': 0.11188811188811189, 'rouge

In [None]:
pegasus_short_results = combine_rouge_and_summary_results(pegasus_short_rouge_dict, pegasus_short_summary_dict, 'pegasus_short')
pegasus_short_results.head()

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum,pegasus_short_summary
The Guard,0.128,0.01626,0.08,0.08,All photographs Element Pictures / Reprisal F...
Batman Year One,0.211055,0.010152,0.100503,0.100503,The following is an extract from Frank Miller'...
Inherent Vice,0.227642,0.066116,0.130081,0.130081,"The screenplay by Paul Thomas Anderson, based ..."
Colombiana,0.167832,0.014184,0.111888,0.111888,Cat in the Hat is a comic book series written ...
Barney s Version,0.15873,0.0,0.111111,0.111111,"Watch the trailer for Barney's Version, Michae..."


In [None]:
# save results to a csv
pegasus_short_results.to_csv('/content/pegasus_short_results.csv')
# files.download('/content/pegasus_short_results.csv') 

In [None]:
# average rouge scores
pegasus_short_results.drop(columns=['pegasus_short_summary']).mean(axis=0)

rouge1       0.191756
rouge2       0.019610
rougeL       0.116721
rougeLsum    0.116721
dtype: float64

## Pegasus Long


---


Max 1024 tokens for pretrained *'google/pegasus-cnn_dailymail'* model

In [None]:
pegasus_long_rouge_dict = {}
pegasus_long_summary_dict = {}

for i in range(30):
    pegasus_long_summary = run_pegasus_long_model(subset_train_df.iloc[i]['bert_dialog'])
    pegasus_long_rouge_scores = calculate_rouge_scores(pegasus_long_summary, subset_train_df.iloc[i]['plot_outline'])

    # add rouge scores and summary to a dictionary
    title = subset_train_df.iloc[i]['title']
    pegasus_long_summary_dict[title] = pegasus_long_summary
    pegasus_long_rouge_dict[title] = pegasus_long_rouge_scores

print(pegasus_long_rouge_dict)
print(pegasus_long_summary_dict)

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Final Shooting Draft is a feature film by Reprisal Films and Element '
 'Pictures based on a short story by Stephen Fry .<n>The story follows the '
 "final days of Fry's career as a journalist in Dublin before he was fired for "
 "his coverage of the IRA's seizure of power in the wake of the September 11 "
 "terrorist attacks ..<n>Fry's character is played by Aidan McBride, who also "
 "appeared in the film's predecessor, The Reader .")


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('This site was created by actor Frank Miller .<n>Miller is best known for '
 "playing Batman in the hit television series 'The Batman'<n>Miller has also "
 "starred in the films 'Miami Vice' and 'Saving Mr. Banks'<n>The Batman movie "
 'will be released in the U.S. on July 20th, 2012 at the Grauman Egyptian '
 'Theatre in Los Angeles, California. For more information on the film and to '
 'see all the cast and crew visit batmanmovie.com .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('This script is the confidential and proprietary property of Warner Bros. '
 'Pictures and no portion of it may be performed, distributed, reproduced, '
 'used, quoted or published without prior written permission.<n>The film is '
 'based on the novel by Thomas Pynchon and stars Paul Thomas Anderson and Amy '
 'Adams .<n>It was released on August 7, 2013 in U.S. theaters and on Blu-Ray '
 'and DVD on August 10, 2013 in the UK and on August 20, 2013 in Australia.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('This is the second in a new series from writer Luc Besson and artist Robert '
 'Mark Kamen .<n>Cataleya! Cat is the daughter of Dr. Emilio Croft and his '
 'long-suffering wife Mama Croft, and is determined to become a super hero '
 'like her sister, Fabio, or her father, Don Luis.<n>The series will continue '
 'through the end of the year with a special, extended edition of the original '
 'comic, written and drawn by Mark Kamen.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Barney's Version by Michael Konyves based on novel by Mordecai Richler "
 ".<n>Barney's version is about a man who gets caught up in a sex scandal and "
 "has to deal with his wife's nude photos and the fallout from the "
 'scandal.<n>The novel was originally published in 1996 and has since been '
 "adapted into a children's book and a television series.<n>Michael Konyres' "
 'version is available now on Blu-Ray and DVD.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Thousands of years ago, predators were scared of prey. Now, predators and '
 'prey live in harmony.<n>And every young mammal has multitudinous '
 "opportunities.<n>I don't have to cower in a herd anymore. Instead I can be "
 "an astronaut.<n>Today I can hunt for tax exemptions. I'm gonna be an "
 'actuary! And I can make the world a better place!<n>I am going to be... 3. A '
 'police officer! Bunny cop. That is the most stupidest thing I ever heard. It '
 'may seem impossib.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Mill Valley, has both a witch and a curse. But which one is which?"<n>"When '
 'that giant monstrosity is finally built, they may as well turn the knife, '
 'cause the rest of the world is gonna pass us by... 1921 - Oct 31 1958. But '
 'you got me. To say what I wanna say, and play what I Wanna play... to leave. '
 "So you be careful out there tonight - cause when the sun goes down... It's "
 'the Season of the Witch. we continue our OPENING SEQUENCE “When I look out '
 'my window... Many sights to see...." keep it moving. They do. costume. watch '
 "TV. ON it I'll take only a minute.")


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Kelly Fremon and Krista Hamidi had been best friends since the day they met '
 "in the Spring of 2nd grade .<n>Final straw came when Craig's voicemail was "
 "cut off by Krista's motherfucker voicemail lady, who didn't understand what "
 'was on the line and couldn\'t help but make fun of Craig for being " '
 'ADD-crazed"<n>"This friendship... is officially over. when we Krista and I '
 'had been Best Friends since the Day we met back in the spring of 2th grade," '
 'says Craig .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("I'm approximately 20 miles inside Habala region where they have ordered the "
 'destruction of a network of caves at sundown tonight .<n>I believe these '
 "caves might contain a critical missing piece of our history. A piece I can't "
 'let them destroy.<n>Oh, and if I’m found dead, please know I entered the '
 'country of my own free will, fully knowing the penalty for trespassing here '
 'is burial up to your neck in sand and you. Which, you know, seems perfectly '
 'appropriate.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('This year all trends point towards simplicity and comfort. them.<n>Consumers '
 'will be less likely to go out for en.<n>They are still best friends. '
 'head.<n>And Jesse wishes they were more. helmet. They are still good '
 "friends.<n>and I've got a enough love for got enough love, for the two "
 'the...(her Blackberry rings) us.<n>Oh please... oh shit, I gotta take this. '
 "Turn it down. ...I'm a Love... Jess, turn it down, seriously!")


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("How much does your life weigh? Imagine for a second that you're carrying a "
 'backpack... I want you to feel the straps on your shoulders... You feel '
 'them? (gives us a beat)<n>The Feel the weight as it adds up. Now, start '
 'adding the larger stuff. Your clothes, table top appliances, lamps, linens, '
 'your TV.<n>Your couch, your bed, your kitchen table. Stuff it all in... Your '
 'car, get it in there... Your home .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Mangal Netam is seen in a mock anger over his inability to cast his vote '
 '.<n>Mujhe iss kshetra ka vikaas chahiye. (calms down)<n>Mangala Netam '
 'Zindabad! marigolds. (in mock anger)<n>Guinness Book mein hum apna pichla '
 'world record tod ke ek naya record banate hai.<n>Tees hazar karod ka '
 'kharcha, chaurasi karod voter, nabbe lakh polling booth.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Peter Landesman's new book is a look at Richard Nixon's re-election "
 'whistle-stops .<n>He says the President wanted to be an FBI G-Man, just like '
 'you?<n>Landesman: Hoover wrote him personally what a damn fine candidate he '
 "was, but with budget cuts the FBI couldn't afford him. How’s that for a "
 'lesson in fate? Now he’s he’<n>The book is published by Simon & Schuster.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Based on Alice's Adventures In Wonderland Through The Looking Glass by Lewis "
 'Carroll Charles .<n>The story follows the adventures of Alice as she tries '
 'to find her way back home after a visit to the Tweedledee and Tweedledum '
 "..<n>Author Linda Woolverton says the book is 'dark' and 'darkly funny', "
 "with 'a dash of tenderness and humour'<n>The book is published by Amberley "
 'and is out now. For more information, visit amberley.co.uk .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Bright eyes. Start the scan.<n>Please keep the monkey still. She's not a "
 'monkey .<n>Where does the acorn come from? Acorn.<n>No, but where does the '
 'anacorn coming from?<n>Click here to test your knowledge of stories you saw '
 'on CNN.com and CNN.co.uk between January 8th, 2010 and March 10th, 2011 - or '
 "click here for details on CNN's special coverage of the Paralympics.")


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('I think I get more letters than Santa Claus, Zac Efron and Dr. Phil '
 "combined.<n>Sometimes the letters are from people who say they're praying "
 'for me.<n>They tell me everything will be okay if I just accept Jesus Christ '
 'into my heart.<n>I say the words, but nothing ever happens. Nobody comes '
 "back. Nobody gets off the cross.<n>A lot of people ask me if I'm sorry I did "
 "it. They're big on recreation here.")


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('MacGruber by Will Forte, John Solomon & Jorma Taccone (in Russian) (static) '
 "Wombat, this is Eagles Nest. We've lost your position.<n>Sorry, I don’t "
 'speak Russian. SMASH TO BLACK mountain ranges.<n>Why can’t we stay inside '
 'the department? We need someone outside the system.<n>Someone they’d never '
 "suspect. But sir, if this guy’s that good, won't they be tracking him? Not "
 'possible.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('What baffles me, Jamaica and Belfast have so much in common. Cops and '
 'soldiers giving you running round murdering people for fuck all. But at '
 'least in Jamaica they have decent music. (indistinctly)<n>Get the fuck '
 '.<n>What a fucking nightmare. Mind you your language. And they call this a '
 'revolution?<n>What baffles you, Jamaica, and Belfast, is that they are both '
 'so similar to each other.<n>But at least there are some decent songs.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Noah Baumbach and Greta Gerwig's new film is out in U.S. theaters on July 24 "
 '.<n>In it, they star as a married couple who become obsessed with each other '
 'and dance all over the place ..<n>They share some of their favorite scenes '
 'from the film, which was filmed over two months in LA and New York and '
 "features more than a dozen musicians, including Arcade Fire's Arcade Fire, "
 'Queen Latifah and The Lumineers, among others .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Millions of years ago , a meteorite substance in the universe struck the '
 'continent of Africa affecting .<n>Five tribes settled on it and called it '
 'Wakanda.<n>The tribes lived in constant war with each other until a warrior '
 'shaman received a vision from the Panther goddess Bast who led him to the '
 'Heart Shaped Herb, a plant that granted him super human strength , speed, '
 "and instincts.<n>Four tribes agreed to live under the King's rule, but the "
 'Jabari tribe isolated themselves in the mountains .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('All of the men are-- this unlucky trio, we SMASH CUT to Did you know Elvis '
 'had a twin brother? It’s true. Jesse Garon Presley. He was an identical '
 'twin, stillborn a few minutes before Gladys gave birth to the King. They '
 'buried him .<n>Based on the series from Vertigo Comics Created by Brian K. '
 'Vaughan & Pia Guerra Michael Shlain c/o The Shapiro Lichtman Agency Los '
 'Angeles, CA 90048 (310) 859-8877 .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Shadow Dancer by Tom Bradby based on novel by Mark Lucas .<n>10CC’s song '
 "comes with us. One hell of a playground. rubber bullets... Wham!<n>They've "
 'shot wee Sean! Mal Dad! Jesus, Sean. Speak to me!<n>No answer. I asked you '
 'to go. No answer. (louder) Why wasn’t it you? It should have been '
 'you!<n>Give me the walkway... (through an open window) For fuck’s sake! '
 'across... (looking at shot from helicopter monitor)')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Page 1 of 113 Breakfast in ten minutes!<n>Page 3 of 113 I've got it, Jack. "
 "I'm singing in the Christmas pageant.<n>I need to pick up a gown. A gown? "
 'For a prom? No, it’s a party. And you want a gown?<n>I’m sure we can find '
 "something reasonable. We'll look this weekend, Not Gewurtzraminer. Only "
 'Riesling, they’1l1 try to fool you.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Super title one week lift sand into the air- wave of bombs.<n>One day Navy's "
 "requisitioned her- there's Dunkirk, need taking off.<n>Stay down at 500ft to "
 'leave fuel for five.<n>10 Keep an eye on that gauge, even when it gets '
 'lively- save enough to get back.<n>The three planes bank left in perfect '
 "harmony as we- the mole... leaving- That's it- (he turns) Make way!")


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Based on the novel by William Giraldi written for the screen by Macon Blair '
 '12.10.15 A24 .<n>Gerald Manley Hopkins this moonscape of SNOW and ICE. Feel '
 'the cold in your teeth.<n>I’m going to see your mother tomorrow. She sends '
 'her love. And...so do I.<n>While Coalition forces advanced on the But Core '
 'isn’t really watching, is he? moment extends. Until... Hi, Kelly. H '
 'Bonneville .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Daniel Vivian Cleaver was a kind and wonderful son, a loving Uncle and '
 'brother, a fantastic friend... (aside to Bridget) What the fuck is he doing '
 'here?<n>I would now like to invite to his loved ones up to share some of '
 'their memories of Daniel.<n> (whispers to Jude) You kn .<n>Daniel Vivian '
 'Cleaver died when his plane crashed in the Australian outback. He was 25 '
 'years old. He leaves behind a wife and two children.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Some girls are hunchbacks, believe what they‘re told. Never knowing that '
 'their poo-nannies are gold.<n>Some girls fall in between, live windows '
 '.<n>-.that is, the best girls... ++. are born dirty. Fuck you, asshole! '
 'him.<n>The only safe sex is no sex. If you don’t stay busy, and practice '
 'abstinence, you could end up like this guy my mom knows.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Comedians Seth MacFarlane and Jay Pharaoh join forces to discuss the latest '
 'in stand-up comedy .<n>They talk about their new movie, "Scrabble The '
 'Movie," and what it\'s like to be in Los Angeles, where they\'ve been '
 'filming for the past few months.<n>The duo also discuss their new book, "The '
 'Book Of Revelations," which they hope to release in the near future.<n>Watch '
 "the full interview on this week's episode of The Daily Show, Saturday, April "
 '5, at 2 p.m. ET on Comedy Central .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('"Jane D\'Oh!" has been picked up for a pilot series by YouTube Red .<n>The '
 'show stars Rawson Marshall Thurber and Jessica Mathers, who play Jane and '
 'Detective Mathers respectively, on a police force in a small-time '
 'crime-ridden town.<n>Thurber on her co-star: "She made me better in every '
 'scene. You know chemistry is a two way street. And some times there are stop '
 'lights and some times that but you just keep going. Because... traffic, '
 "right? (faux reflective) But I'm not gonna let this show go to my head. It's "
 'all about finding my center. Staying grounded. Staying mindful."')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Brain-to-brain interface (see Pons or DRIFT) called a Conn .<n>Joint vision '
 'shared by Jaeger crew members phenomenon vital signs and to translate nerve '
 'signals to piloting input * ghost-drift.<n> virtual space inside the O/S in '
 "which crew telepresences interact inside the subject's brain + The "
 'Interstice. the rift between our universe and the person crew via direct '
 'neural linkup . LOCCENT. short for Local Command Center, comm.')
{'The Guard': {'rouge1': 0.15748031496062992, 'rouge2': 0.032, 'rougeL': 0.12598425196850394, 'rougeLsum': 0.12598425196850394}, 'Batman Year One': {'rouge1': 0.20304568527918782, 'rouge2': 0.010256410256410256, 'rougeL': 0.13197969543147206, 'rougeLsum': 0.13197969543147206}, 'Inherent Vice': {'rouge1': 0.21212121212121213, 'rouge2': 0.061538461538461535, 'rougeL': 0.15151515151515152, 'rougeLsum': 0.15151515151515152}, 'Colombiana': {'rouge1': 0.16296296296296295, 'rouge2': 0.0, 'rougeL': 0.1037037037037037, 'rougeLsum': 0.10370370370370

In [None]:
pegasus_long_results = combine_rouge_and_summary_results(pegasus_long_rouge_dict, pegasus_long_summary_dict, 'pegasus_long')
pegasus_long_results.head()

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum,pegasus_long_summary
The Guard,0.15748,0.032,0.125984,0.125984,Final Shooting Draft is a feature film by Repr...
Batman Year One,0.203046,0.010256,0.13198,0.13198,This site was created by actor Frank Miller .<...
Inherent Vice,0.212121,0.061538,0.151515,0.151515,This script is the confidential and proprietar...
Colombiana,0.162963,0.0,0.103704,0.103704,This is the second in a new series from writer...
Barney s Version,0.21875,0.0,0.125,0.125,Barney's Version by Michael Konyves based on n...


In [None]:
# save results to a csv
pegasus_long_results.to_csv('/content/pegasus_long_results.csv')
# files.download('/content/pegasus_long_results.csv') 

In [None]:
# average rouge scores
pegasus_long_results.drop(columns=['pegasus_long_summary']).mean(axis=0)

rouge1       0.174883
rouge2       0.011691
rougeL       0.105706
rougeLsum    0.105706
dtype: float64

## Pegasus Large


---


Max 1024 tokens for pretrained *'google/pegasus-large'* model

In [None]:
pegasus_large_rouge_dict = {}
pegasus_large_summary_dict = {}

for i in range(30):
    pegasus_large_summary = run_pegasus_large_model(subset_train_df.iloc[i]['bert_dialog'])
    pegasus_large_rouge_scores = calculate_rouge_scores(pegasus_large_summary, subset_train_df.iloc[i]['plot_outline'])

    # add rouge scores and summary to a dictionary
    title = subset_train_df.iloc[i]['title']
    pegasus_large_summary_dict[title] = pegasus_large_summary
    pegasus_large_rouge_dict[title] = pegasus_large_rouge_scores

print(pegasus_large_rouge_dict)
print(pegasus_large_summary_dict)

In [None]:
pegasus_large_results = combine_rouge_and_summary_results(pegasus_large_rouge_dict, pegasus_large_summary_dict, 'pegasus_large')
pegasus_large_results.head()

In [None]:
# save results to a csv
pegasus_large_results.to_csv('/content/pegasus_large_results.csv')
# files.download('/content/pegasus_large_results.csv') 

In [None]:
# average rouge scores
pegasus_large_results.drop(columns=['pegasus_large_summary']).mean(axis=0)

## LongT5


---

Max 16,384 tokens using pretrained *pszemraj/long-t5-tglobal-base-16384-book-summary* model

In [None]:
longt5_rouge_dict = {}
longt5_summary_dict = {}

for i in range(30):
    longt5_summary = run_longt5_model(subset_train_df.iloc[i]['bert_dialog'])
    longt5_rouge_scores = calculate_rouge_scores(longt5_summary, subset_train_df.iloc[i]['plot_outline'])

    # add rouge scores and summary to a dictionary
    title = subset_train_df.iloc[i]['title']
    longt5_summary_dict[title] = longt5_summary
    longt5_rouge_dict[title] = longt5_rouge_scores

print(longt5_rouge_dict)
print(longt5_summary_dict)



("The narrator tells us that this is the final draft of his play. It's been a "
 "long time since we last saw it, and now it's finally time to see it. We get "
 'a brief description of the scene: "What a beautiful Fucking Day" . A guy '
 "named Aidan Mcbride has just arrived from Ireland. He looks like he's just "
 "had a hard time getting out of the city. He's got a drink in his hand, and "
 'he wants to go back home before Mammy tells him what happened.')
('In this short scene, Frank Miller shows up at the auto shop looking for his '
 "son. He's a little confused about where he's going and what he can do there. "
 'When he gets there, he sees that the job orders are in order and he asks '
 'Bruce to come out and help him out since he looks so bad. Bruce tells him '
 "it's all fine and then he goes off to check on some other things.")


Your max_length is set to 256, but you input_length is only 239. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=119)


("Paul Thomas Anderson's reminiscing about how much he used to love Shasta. "
 "Now that he has an office and everything, he decides it's time to take a "
 'walk. He looks in his phone book and thinks about how great it would be if '
 'everyone had this opportunity to get out of bed and have a little chat with '
 'one of his old co-workers. Then he goes over to the office')
('The narrator tells us that the story is about a young boy named Fabio who '
 "wants to become a doctor, a lawyer, and a friend of Don Luis's. It's been a "
 "long time since we last saw him in his comic form, but now he's back on his "
 "own. He asks her what she's doing with her life, and she responds by telling "
 'him it\'s "studying mama" . She also tells him that if he doesn\'t want to '
 'be anything in life, he has to study for it. This is like being a super-hero '
 'or a Doctor. We learn that Emilio is supposed to come see them someday, but '
 "they haven't heard from him yet.")
('In this short scene, Barn

Your max_length is set to 256, but you input_length is only 252. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=126)


("January 3, 2011 Fremon Craig calls on Kelly and tells her that he's been "
 '"chucking everything" she\'s ever given him in the Norfolk Pond. Oh, and '
 "also, all of their pictures from Canada. He says he doesn't want to repeat "
 'himself because he thinks he has a lot of "addendous" things to say. So he '
 'sends her a message telling her to leave a note and not to reply until he '
 'hears from her again.')
("In this short scene, Dowdle tells us that he's about 20 miles outside of the "
 'Habala region and is worried about what might happen to his family if he '
 'were found dead. He wants to know if they can find him alive so they can '
 "take him back to their caves. They have to hurry because they don't want to "
 'risk everything going wrong. So they decide to head out without him. Nope.')
('In this short scene, Will and Rashida discuss their love for each other. '
 "They say that they're still best friends, and that they have a lot of things "
 "in common. Jesse asks him to t

Your max_length is set to 256, but you input_length is only 236. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=118)


("In this short lesson, the narrator explains how much one person's life "
 'weighs and how to pack it into a small, light-filled bag. He then instructs '
 'the audience to imagine that they are carrying a large, heavy backpack '
 'filled with everything they own in their life. They should start by putting '
 'away the little things first, like clothes, furniture, and linens. The more '
 'stuff they have, the heavier they will carry.')
('In this short scene, the dialogue shifts to the political situation in '
 'India. The narrator muses on the futility of electing a Bhaer from among the '
 'castes. He wonders aloud whether the Indian people will vote for him in the '
 'forthcoming elections. When the audience realizes that the candidate is none '
 'other than the ex-President of India, it becomes obvious that the two men do '
 'not approve of each other.')
('In this short scene, Richard Nixon is reelected as President of the United '
 'States. He sits at a table in front of a group of 

Your max_length is set to 256, but you input_length is only 251. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=125)


('In this chapter, the narrator tells us that every day he gets letters from '
 'people who are praying for him. They say things like "I\'m sorry you did it" '
 'or "I just don\'t want to do it anymore." Nobody ever comes back. People ask '
 "him if he's sorry he did it and he says he is. And then there's the mental "
 "gymnastic thing. It's supposed to help us vent our angers.")
("In this short scene, the action shifts to the Russian camp at Eagle's Nest. "
 'Wombat has lost his position and is unable to retrieve the key that was '
 'given to him by Colonel James Faith. The lieutenant asks why they had to '
 'travel so far to get this information. He says they need someone outside of '
 'the system to keep an eye on the man, but it would be impossible for them to '
 'keep him inside the system.')
('In this short scene, Colin Carberry explains that his father is a socialist '
 'who has just returned from the war in Vietnam and is planning to become '
 'blinded. The crowd gathers to hea

Your max_length is set to 256, but you input_length is only 252. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=126)


("In this chapter, Joe tells his son the story of home. It's about a time when "
 'humans came to Africa and five tribes called the Wakandans. They lived '
 'together until a vision came from the god Bast, who led them to the '
 'Heart-Shaped Herb plant, which gave them superhuman strength and speed. The '
 'first Black Panther was the leader of the tribe, but four tribes agreed that '
 "they would live under the king's rule. The Jabari were isolated in the "
 "mountains because they didn't want to be seen.")
('The narrator tells us that the men are responsible for the deaths of three '
 "of the soldiers. It turns out that they're all brothers, and one of them was "
 "born just before giving birth to the king. In other words, there's no way "
 'anyone could have given birth to a twin at this point in the play. So we '
 "guess it's not coincidence that the two boys were twins when they were born.")
('In this short scene, Hoskins and Lucas are loading up the cannons to shoot '
 'down a d

Your max_length is set to 256, but you input_length is only 249. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=124)


('On January 8, 2016, at the Universal Studios, Helen and Dan are gathered to '
 'mourn the death of their friend and neighbor, Daniel Vincyn Cleavesver. They '
 'also remember his wife, who is pretty. Emma invites everyone in the audience '
 'to share some memories of their own childhood friends and family members who '
 'loved and cared for Daniel. As they do so, Emma whispers to Jude that she '
 'would like to hold a party to remember them all.')
('The narrator tells us that every girl is born with a reputation, and that '
 'some girls are "sweet" and dreaming of books, while others are '
 '"hunchesbacks." Some girls fall between houses, live windows, and become '
 "dirty. He says that the best girls are born dirty because they're born "
 'dirty. When he asks who can give him an example of this kind of choice, he '
 "gets a pretty good one: Waiting. If a guy truly likes you but doesn't know "
 "how to understand, then he knows so much about it. What's more, if you don't "
 'stick ar

In [None]:
longt5_results = combine_rouge_and_summary_results(longt5_rouge_dict, longt5_summary_dict, 'longt5')
longt5_results.head()

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum,longt5_summary
The Guard,0.2,0.0,0.114286,0.114286,The narrator tells us that this is the final d...
Batman Year One,0.242424,0.0,0.131313,0.131313,"In this short scene, Frank Miller shows up at ..."
Inherent Vice,0.16129,0.0,0.096774,0.096774,Paul Thomas Anderson's reminiscing about how m...
Colombiana,0.233333,0.022472,0.133333,0.133333,The narrator tells us that the story is about ...
Barney s Version,0.166667,0.0,0.121212,0.121212,"In this short scene, Barney is trying to get h..."


In [None]:
# save results to a csv
longt5_results.to_csv('/content/longt5_results.csv')
# files.download('/content/longt5_results.csv') 

In [None]:
# average rouge scores
longt5_results.drop(columns=['longt5_summary']).mean(axis=0)

rouge1       0.214650
rouge2       0.014959
rougeL       0.122838
rougeLsum    0.122838
dtype: float64

# Baseline Models (BERT - Text)
All baseline models will be run with 30 sample movie scripts from random seed 266

## Pegasus Short

---


Max 512 tokens for pretrained *'google/pegasus-xsum'* model

In [None]:
pegasus_short_rouge_dict = {}
pegasus_short_summary_dict = {}

for i in range(30):
    pegasus_short_summary = run_pegasus_short_model(subset_train_df.iloc[i]['bert_text'])
    pegasus_short_rouge_scores = calculate_rouge_scores(pegasus_short_summary, subset_train_df.iloc[i]['plot_outline'])

    # add rouge scores and summary to a dictionary
    title = subset_train_df.iloc[i]['title']
    pegasus_short_summary_dict[title] = pegasus_short_summary
    pegasus_short_rouge_dict[title] = pegasus_short_rouge_scores

print(pegasus_short_rouge_dict)
print(pegasus_short_summary_dict)

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

Downloading tf_model.h5:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading (…)neration_config.json:   0%|          | 0.00/259 [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/87.0 [00:00<?, ?B/s]

('A young man is driving his car along a country road in the early hours of '
 'the morning when he sees a police car approaching from the other side of the '
 'road, with a siren on and a flashing blue light on the dashboard.., he turns '
 'around to see the police car and the siren on, but he does not see the car '
 'in front of him, which is a car full of people who have just been killed in '
 'a car crash in the Republic of Ireland.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A man in his late 50s walks into a dark alley in the middle of the night, '
 'his eyes fixed on a dark-blue car, its headlights glaring into the night '
 "sky, its engine running at low power.'' he turns around and walks back into "
 "the alley.' he takes a deep breath and walks out of the alley, his face "
 'buried in his hands, his body contorted in a grimace, his arms stretched out '
 'across his chest, his head bent over his knees.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Doc’s in the kitchen, writing on a pad, trying not to think about what’s '
 'going on around him, trying to keep his head down, not to look at anybody, '
 'not even his wife, who’s sitting in the living room with her head down and '
 'her eyes fixed on the fridge, staring at it with a blank stare, not paying '
 'any attention to anything else, other than the fact that he’s writing on the '
 'pad, and he doesn’t know what else is going on.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('All photographs courtesy of the author and courtesy of Getty Images and '
 'Getty Images for the BBC and AFP, EPA, Getty Images, AFP, GettyImages and '
 'GettyImages for the Reuters news agency, Reuters, Reuters and Reuters for '
 'the Getty Images agency, AP, AFP and AFP for the AFP news agency and AP for '
 'the AP Images for Reuters, AP for AFP and AP Images, Reuters for Reuters and '
 'AFP Images, AP and AFP images, AP images, Reuters images and Reuters images,')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Barney and Miriam are sitting in the living room, listening to Barney's "
 'radio show, when Barney interrupts to say, "I\'m sorry, I\'m late, but I\'ve '
 'got a book to read today." Barney leans back in his chair and says, "Sorry, '
 'I can\'t read it, I have to go to bed." Miriam leans over to read a '
 'newspaper, and Barney leans over again to look at the paper, but this time '
 "he doesn't see anything.")


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('This is the story of Judy, a young woman living in a remote village in the '
 'Amazon rainforest, who is forced to confront her fears of being eaten by a '
 'jaguar, in a play written and performed by British comedian and actor David '
 'Walliams, with music and lyrics by David Shrigley, and directed by Jeremy '
 "Deller, at the Royal Shakespeare Company in London's West End from 23 May to "
 '5 June 2017... and the cast and crew of this production.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A lone figure stares out of the window of a cabin in a remote corner of the '
 'American West, a lone voice calling out from the darkness of the night, a '
 'lonely voice calling for a way out, a way back, to the land of the long ago, '
 'the land that gave birth to us all, a land that has given us so much, and '
 'yet so little, and a land where we live in the shadow of the past, a place '
 'where the past has given way to the future.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("It's the end of the school day and the kids are heading home, so I'm going "
 'to take a look at some of their favourite things to do with their free time '
 "- and some of the things they don't do...well, really, because they're not "
 "allowed to do that..well, not really, but it's not too bad for a "
 'five-year-old to have to do it...well... not really.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('In our series of letters from African journalists, film-maker and columnist '
 "Farai Sevenzo looks at how some of the continent's most intriguing people "
 "have been captured on camera by the BBC'sClosed Captioning scheme, which "
 'allows members of the public to post images of themselves on the '
 "corporation's website without the need for them to be posted to the BBC News "
 'website, as well as to other social media sites such as Facebook, Twitter, '
 'YouTube and Instagram.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A series of images of a couple, Celeste and Jesse, who have been together '
 'for more than a decade, from the perspective of their friends, their family, '
 'and the people who matter most to them - their parents, friends, and people '
 'who care about them - from their lives and the lives of the people they care '
 'about - from the ages of 18 to 30 - as they grow up, move from friends to '
 'lovers, from lovers to boyfriends, from boyfriends to lovers.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A passenger on a flight from New York to London has been caught on camera '
 "apparently urinating into a plane's lavatories - and it's all caught on "
 "film.... and then he stops... and walks away... and we're left to wonder "
 "what on earth he's up to... and what's going through his mind... and who "
 "knows what else... and he just keeps on going... and I'm not even sure what "
 'to make of it...')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The following is a chronology of key events in the life of a politician in '
 "the southern Indian state of Kerala, as reported by BBC Hindi's Geeta Pandey "
 'and Prasanto Srinivasan: A POLITICIAN (not his real name) in a red shirt and '
 'white pants travels on a motorcycle on a dusty road in a mining town in the '
 "state's south-western district of Thiruvananthapuram, on his way to address "
 'a rally of his supporters in a nearby market.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A collection of short stories by the late, great American writer VS Naipaul, '
 'published in The New York Times, The Washington Post and other newspapers '
 'between 1942 and 1945.<n> The following is a selection of Naipaul’s short '
 'stories, all published between 1942-1949, with the exception of one about '
 'the Vietnam War, which was published after the war ended and is not included '
 'in this collection.) The full collection can be found at www.nytimes.com')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A young girl, dressed in her favourite outfit, stands in the middle of the '
 'road, her head bent at an angle, her eyes fixed on the road ahead, her lips '
 'lips pursed, as if to say, “I’m sorry, but I’m not quite ready for school '
 'yet.” She’s not yet old enough to go to school, but she’s got a pretty good '
 'idea of what it’s like to be a teenager.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A young horse called Alpha is sent to work in the jungle by his father, who '
 "wants him to help herd his herd of wild horses, but he's not so sure what to "
 "make of his new job - until he meets a young woman called Bright Eyes, who's "
 "more interested in him than her own horses, because she thinks he'll be a "
 'troublemaker - and she takes him under her wing and takes him on a wild ride '
 'through the jungle.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("We begin to see Needy's body in the mirror, as if she's peering out of a "
 "window into a dark, lonely world.<n> They're all in the same place, all "
 'wrapped up in a mystery, all waiting for Needy to see them.<n> The only '
 "thing missing is Needy, who's been missing for a year and a half, ever since "
 'she was taken from her family by a serial killer, who then killed himself.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The camera pans across a field, a few miles from the front line, and we see '
 'a convoy of armoured personnel carriers heading towards the scene of the '
 'attack. ZEKE falls to the ground, his head sticking out of the window of the '
 'truck, as if he’s about to be shot by a sniper, but the camera pans back and '
 'forth again to reveal the body of a man lying in the road, dead from a '
 'gunshot wound to the head.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("The Manic Street Preachers are back with a new album, and this time they're "
 'offering up a new take on the classic song, "A Boy Is a Boy".<n> The Mantic '
 "Street Preacher's new album is out now - here's a taster of what's to come: "
 'http://smarturl.it/The-Manic-Street- Preachers-New-Releases-Vol 1 - A Boy is '
 'a Boy')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Frances and Sophie live in a tiny room in Tompkins Square Park in New York '
 'City, where they cook, knit, play guitar, read, write, sing, and have a lot '
 'of fun... all in the space of a few hours a day, seven days a week, for the '
 "past year and a half, as part of the city's Arts in the Park project, a "
 'year-long series of performances, installations, films, and other events '
 "that aim to highlight New York's rich and diverse arts scene.")


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("A new short film from the BBC's African Service shows the impact of the "
 'conflict in South Sudan on the fictional African nation of Wakanda, home to '
 'the superhero Black Panther in the new Marvel film Avengers: Infinity War, '
 "which opens in the US on Friday and in UK cinemas on Saturday.' The film "
 'opens in cinemas across the UK on Friday, and in the United States on '
 'Saturday, and features the voices of Benedict Cumberbatch, Chris Evans, '
 "Thandie Newton, Lupita Nyong'o and others.")


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('We’re in the middle of a busy street in a busy city, and we see a minivan '
 'speeding down the road, its windows smashed, its roof torn off, and its '
 'passenger lying on its back, bleeding to death from a gunshot wound to the '
 'head. A hard-to-reach spot on the pavement is where the driver’s side window '
 'is smashed, and the passenger’s body lying on the ground, with a bullet '
 'wound to its head.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A young boy’s first day back at school after the summer holidays is marked '
 'by a series of events that leave him torn between his family and the world '
 'around him, as he tries to come to terms with the fact that his father is '
 'dead and his mother is pregnant with his first child, who is due in a few '
 'weeks time, and with his father’s death and the death of his mother’s '
 'partner, who has died in a car crash.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A morning in the life of a young married couple in a small seaside town in '
 'the south of England, as they prepare for their big day and the arrival of '
 'their first child, a series of events unfolds that will change the course of '
 'their lives and the lives of the people around them for years to come., in '
 'the words of one of the characters, "the world is changing, and the people '
 'who live in it are changing too.", in a story by the acclaimed British '
 'writer Paula Hawkins, who won the Booker Prize for her novel The Sense of an '
 'Ending.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Tommy stands by the side of the road with a hose in his hand, waiting for '
 'the call to go to the front, waiting to see what the Germans are going to do '
 'to him... He grabs the nozzle from the back of the gun, pulls the trigger, '
 'and the Germans start shooting... Tommy takes a deep breath, pauses for a '
 'second, and says, "I\'m sorry, I\'m not going to fire again... I\'ve got a '
 'rifle in my hand."')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('He’s a man of few words, a man who’s never spoken a word to anyone else, but '
 'he’s one of those rare people who can’t speak a word, who can only speak a '
 'few words at a time, but who can speak a thousand words at one time, and he '
 'can speak only a few thousand words in all his utterances, and only a '
 'handful of words in his whole life, so far as we know.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Bridget Jones's Diary is the fourth and final book in the Bridget Jones "
 'series, and the first to be made into a film, with Renee Zellweger in the '
 "title role and Jude Law as Bridget's husband, Azz., in cinemas and on demand "
 'on Friday, 15 November., directed by John Madden, and written by Helen '
 'Fielding, with Bridget Jones, Jude Law, Helen Fielding and John Cleese, all '
 'starring Renee ZellWerger.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The car pulls into the driveway, the windows are rolled up, the driver’s '
 'side window is smashed, and the passenger’s seat is ripped off the back of '
 'the vehicle, leaving only the front seat to be occupied by the girl who’s '
 'been sitting next to it for the past two weeks..... and she’s not the only '
 'one... and it’s all very well and good to have a boyfriend, but what’s the '
 'point of having a boyfriend if you can’t have a girlfriend?')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The story begins with Dion waiting in the car in the lot of Canter’s for his '
 'friend Seth to pick up his order of chips and dip in a bucket of mustard. '
 'eth pays and is looking up to the pick-up jar, but doesn’t notice Seth’s '
 'character is sexually immature and stands up in the character’s performance '
 'of All Back To Me, with the words “it’s all back to me” appearing on the '
 'screen.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('It\'s been called "the most naked selfie in the world" and it\'s all down to '
 "one man's love of running and his grandma's dislike of selfies.... and his "
 "phone... all at the same time... and he's got a girlfriend... and they're "
 'both doing it live on social media... all in the space of a few minutes... '
 'and... well... just a few seconds... and then it all falls apart.')


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The following is a list of key players in the development of the US '
 "military's first ever artificial intelligence (AI) robot, the Jaeger, which "
 'was built at the University of California, Los Angeles (UCLA) in the early '
 '1990s: The Jaeger was designed to mimic the mind-reading abilities of human '
 'pilots, but was also capable of responding to a wide range of threats, '
 'including those posed by other space-faring entities, as well as threats '
 'posed by humans.')
{'The Guard': {'rouge1': 0.19117647058823528, 'rouge2': 0.04477611940298508, 'rougeL': 0.14705882352941177, 'rougeLsum': 0.14705882352941177}, 'Batman Year One': {'rouge1': 0.1485148514851485, 'rouge2': 0.01, 'rougeL': 0.12871287128712872, 'rougeLsum': 0.12871287128712872}, 'Inherent Vice': {'rouge1': 0.20143884892086333, 'rouge2': 0.0291970802919708, 'rougeL': 0.12949640287769787, 'rougeLsum': 0.12949640287769787}, 'Colombiana': {'rouge1': 0.043478260869565216, 'rouge2': 0.0, 'rougeL': 0.043478260869565216, 'r

In [None]:
pegasus_short_results = combine_rouge_and_summary_results(pegasus_short_rouge_dict, pegasus_short_summary_dict, 'pegasus_short')
pegasus_short_results.head()

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum,pegasus_short_summary
The Guard,0.191176,0.044776,0.147059,0.147059,A young man is driving his car along a country...
Batman Year One,0.148515,0.01,0.128713,0.128713,A man in his late 50s walks into a dark alley ...
Inherent Vice,0.201439,0.029197,0.129496,0.129496,"Doc’s in the kitchen, writing on a pad, trying..."
Colombiana,0.043478,0.0,0.043478,0.043478,All photographs courtesy of the author and cou...
Barney s Version,0.169231,0.0,0.107692,0.107692,Barney and Miriam are sitting in the living ro...


In [None]:
# save results to a csv
pegasus_short_results.to_csv('/content/pegasus_short_text_results.csv')
# files.download('/content/pegasus_short_text_results.csv') 

In [None]:
# average rouge scores
pegasus_short_results.drop(columns=['pegasus_short_summary']).mean(axis=0)

rouge1       0.209115
rouge2       0.023188
rougeL       0.130840
rougeLsum    0.130840
dtype: float64

## Pegasus Long


---


Max 1024 tokens for pretrained *'google/pegasus-cnn_dailymail'* model

In [None]:
pegasus_long_rouge_dict = {}
pegasus_long_summary_dict = {}

for i in range(30):
    pegasus_long_summary = run_pegasus_long_model(subset_train_df.iloc[i]['bert_text'])
    pegasus_long_rouge_scores = calculate_rouge_scores(pegasus_long_summary, subset_train_df.iloc[i]['plot_outline'])

    # add rouge scores and summary to a dictionary
    title = subset_train_df.iloc[i]['title']
    pegasus_long_summary_dict[title] = pegasus_long_summary
    pegasus_long_rouge_dict[title] = pegasus_long_rouge_scores

print(pegasus_long_rouge_dict)
print(pegasus_long_summary_dict)

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.12k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading (…)ve/main/spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/88.0 [00:00<?, ?B/s]

('Five young men pass a whiskey bottle around -- Public Enemy playing on the '
 'CD player .<n> Garda Sergeant GERRY BOYLE behind the wheel. Staring off into '
 'space.<n>The red car zooms along at breakneck speed -- Eventually shooting '
 'past BoyLE in his police car -- Boyle barely reacts -- Suddenly there comes '
 'the sound of screeching brakes, and the boom of a high-impact car crash -- '
 'boyLE unhurriedly starts the car.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A sweating, tormented sleeper  BRUCE WAYNE. His eyes hollowed, skin pale, he '
 'TOSSES and TURNS.<n>Awoken from his NIGHTMARE by the sound of his own '
 'scream, disoriented, BRUCE goes to the sink and SPLASHES cold water in his '
 'face.<n>Little AL, a gigantic, early middle-aged black man carries a bag of '
 'doughnuts and two coffees. He whistles as he walks through a run-down slum.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Doc SportELLO is half- awake on his couch. He looks up, notices someone '
 'standing at his door .<n>She walks in, Doc gets a slow rising BONER in his '
 'pants... casually tries to cover it up.<n>Dope planted in the glove Looking '
 'at Doc. Can’t read her face. somebody downtown.<n>‘59 CADILLAC EL DORADO '
 'might be watching by now. She backs away, gets in theCADILLAC and drives off '
 'into the night.<n>He watches her go... HOLD WITH DOC. DENIS (rhymes with '
 'penis) walks up, says hi, they walk up towards town, away from the beach... '
 'and with Neptune moving into the Sagitarrian light DOC and DENIS and a bunch '
 'of locals,')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Lara Croft and the Tomb of the Unknowns is published by Little, Brown and '
 'Co.<n>It was originally published in the U.S. in 2010 .<n>The book was '
 'adapted by Diablo Cody and Diablo Cody wrote the book in collaboration with '
 'Simon & Schuster. The movie was released in the US on June 10, 2012 and in '
 'the UK on July 10, 2012. It has been translated into English by Simon & '
 'Shuster, and it is available to pre-order now.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Barney Panofsky's new book is a collection of short stories about living in "
 'a big city .<n>The stories are told through the eyes of a middle-aged man, '
 'his relationships and his fears ..<n>They include a love story between '
 'Barney and his wife, a love child and a marriage break-up ., and a fight '
 "over a soccer ball . and a story about a man's fear of losing his job, .")


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Judy Clark\'s new play, "The Return of the Jungle" is a musical comedy about '
 'animals and their entrails .<n>The musical score is composed of a timpani, '
 'timpani crescendo, red papier mché en trails, projectile ketchup, and music '
 'by Bobby CaTMULL.<n>Judy Clark: "This is my play being staged. And this is '
 'her play beingStaged. Reveal 10, is our hero. and this is this play '
 'beingstaged"')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The small railway city of Mill Valley, Oregon, is quiet, save for the '
 'clacking of a lone train.<n>A DISK JOCKEY who sounds like he is battling '
 'throat cancer, cuts through the doleful monotony of Americana.<n>Contruction '
 'workers laugh as they pack up for the day beneath an unfinished SPRAWLING '
 'RAISED HIGHWAY.<n>SULLEN MAN with the name OREN embroidered on his '
 'coveralls, drifts away from the pack to load his tools in his pick-up.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Little Krista and Nadine roller-skate down the sidewalk, holding each '
 "other's hands .<n>Nadine has KEN's pants off and is making him Krista's "
 'SKIPPER doll. Krista smiles.<n>Covered in head-to-toe protective pads, '
 'Nadine and Krista nervously roller-skating down the street, holdingeach '
 "other'S hands.<n>The book will be published in the U.S. on October 1st, by "
 'Simon & Schuster.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Middle Eastern legs, traditional Arabian dresses to the floor, homemade '
 'bags, animals everywhere .<n>A chaotic scene with men, women, children all '
 'shouting in Farsi.<n> SCARLETT DURANG, mid 20s, an adventurous British '
 'archaeologist.<n>Her off-the-charts intellect and multiple post-grad degrees '
 'are barely concealed by her striking good looks and punk rock.<n> Judging '
 'from the cloth that edges frame, she is obviously shooting with a small '
 'camera attached to her hajib.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Celeste is a chronic overachiever and Jesse is sweet, goofy and '
 "funny.<n>They are best friends but it's Close-up of their hands crossed, "
 'making "C" and "J" shapes.<n>On an engraved necklace, carved into a tree, '
 'written on a wet beach, and on their wedding cake.<n>Moving into their '
 'house, dancing, reading side by side.<n>Senior year, Jesse draws "C AND J '
 'FOREVER" in a pristine, snowy forest with a stick; he and Celeste laugh. A '
 'moment later, they kiss deeply.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("RyanINGHAM is the star of the new series of ITV's 'This Morning'<n>In his "
 "new role he takes on the role of 'Grim Reaper' and goes on a mission to find "
 'the real Grim Reaper .<n>The Grim Reaper is played by Sam Claflin and is '
 'joined by co-stars Miranda Kerr and Savannah Smiles ..<n>Watch the full '
 'series on ITV1 on Sunday, November 9 at 8.30am and 11.30am. Click here for '
 'more information .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A Politician in a white shirt and pants who looks more like a local hood, '
 'travels on top of a Jeep which has been designed to look like a mythological '
 'chariot .<n>His 40 supporters are on 15 motorcycles bearing bandannas and '
 'flags in his party color, yellow.<n>They ride slowly in front of the chariot '
 'shouting slogans.<n>A couple of his supporters climb on the Jeep and garland '
 'him. He is already laden with a dozen garlands- all yellow .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A rhythmic, accelerating anthology -- footage, stills -- Pennsylvania Ave '
 'roils with protest .<n>Accelerating now to a torrent of campaign bunting and '
 'bumperstickers and the signage and news feeds of Palestinian The alphabet of '
 'rage.<n>We’re 5 in an immaculate kitchen, circa early-70’s, and now to-- AN '
 'ANTISEPTIC living room. Legions of obsessively aligned 6 glass animals. '
 'Pictures of two KIDS Airman’s Uniform; a young pretty woman.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('ALICE KINGSLEY is on the verge of womanhood, beautiful, but slightly '
 'off-kilter. She sees the world with different eyes than other people her age '
 '.<n>She re-ties her wais as her mother fusses with her wild mane of blonde '
 'hair.<n>Now nine, she is tired and grumpy as she is forced to deal with her '
 "father's new venture and the pressures of being a young woman . ALICE's "
 'father Charles Kingsley has just described his new venture to his friends, '
 'including LORD ASCOT. He stops. He takes her hand and turns to her.<n>He '
 "sits on her bed, listening with utmost seriousness. I'm falling down a dark "
 'hole, then I a waistcoat, a smiling cat... caterpillar. Hmm. He feels her '
 'forehead as if for a fever. She smiles and leans against him. He pinches '
 'her. She screams and pinches him back.<n>A horse-drawn carriage careens down '
 'the road at a full gallop.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Alpha sits up, sensing something. BIRDS take sudden flight. He stands on the '
 'but he bares his teeth and sends her off. Then he drops, landing firmly on '
 'the clearing floor.<n>All around him, POACHERS pursue terrified apes, nets '
 'swinging.<n>MALE CHIMPANZEES jump up and down, SHOUTING AND SCREECH.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('AnITA "NEEDY" LESNICKI, 17, sits on her hospital bed in pajamas. She\'s a '
 'plain-faced girl with a haunted expression.<n>There are letters, packages, '
 'even creepy little gifts and totems sent by admiring "fans"<n>Needy glances '
 'sadly at a photo of a boy on her dresser. She touches the frame wistfully, '
 'her eyes full of remorse.<n>We see a morbid "hall of fame" of creepy photos '
 "sent in from guys. They line the mirror, grinning at us. There's even a DICK "
 'SHOT tucked in there.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The series follows two Russian soldiers as they fight an enemy .<n>One of '
 'the soldiers is killed by a submachine gun, the other by a drill bit on the '
 'roof of the truck he is riding in ..<n>The series ends with a bang as the '
 'two soldiers face off in a gun battle ., with one soldier dying in the '
 'middle of the battle and the other dying later in the hospital, while the '
 'other soldier is still alive .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Two boys throw and shoot arrows at each other in a garden .<n>The music is '
 'distorted, becoming the garden becomes a war zone ..<n>Child TERRI pops up, '
 'his eye widens as the arrow speeds towards it ., as the two boys try to '
 "shoot an arrow into a large shrub , which explodes in the child's face . and "
 'CHILD TERRI is left with no choice but to jump out of the shrub and run for '
 'cover .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Frances and Sophie, both 27, set up a little performance space in front of a '
 'statue .<n>Sophie sings and plays a shitty guitar badly. Frances makes sure '
 'to keep a They run into the subway.<n>Frances cooks, Scrambled eggs and stir '
 'fry. Sophie 6 enters frame and gets plates out of the cupboard.<n>She reads '
 'a sentence out loud from “Sincerity and Authenticity” by Lionel '
 'Trilling.<n>We see them talking outside the windows.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A meteorite hits Africa and we see plant life and animals affected by '
 'vibranium, the strongest the plant life around it .<n>A visual '
 'representation of the five tribes emerges as hands from the sand animation, '
 'andwe see them unite and then break apart as conflict arises.<n>We see the '
 'protective barrier rise around the Wakandan city as we pull back on the '
 'Earth as it spins, now zooming in on Towering apartment buildings loom over '
 'the horizon.<n>Kids play pickup basketball on a milk carton hoop when '
 'mysterious lights approach from the sky .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The late teen’s mother is racing to a Pull out to reveal two more dead BOYS '
 'in the back seats of the vehicle .<n>The mother is cut off mid-sentence by '
 'EXPLODING GLASS, as her windshield ruptures on impact with another '
 'vehicle.<n>Outside, the street is LITTERED with crashed cars, most of which '
 'contain the CORPSES of recently deceased male drivers.<n>A few small FIRES '
 'dot the landscape. T.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Hilarious account of growing up in a city torn apart by civil strife '
 '.<n>Parents gossip in the doorways; every day life in the city is chaotic '
 'and dangerous.<n>Family laugh, cry and play together in an attempt to escape '
 'the chaos around them.<n>An unlikely bond is formed between two siblings who '
 'learn to love each other and live together in the chaos of daily '
 'life.<n>Hilarious and heartwarming, this is a tale of family, friends and '
 'the joy of living in the middle of the city of London.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Penny Tweedy lives in an upper-middle-class neighborhood in the late 1960s '
 '.<n>She and her husband, Jack Tweedy, make breakfast every morning and mix '
 "pancakes with the help of her sister, Sarah Tweedy and her sister's "
 "boyfriend, KATE Tweedy.<n>After six minutes, Jack doesn't look up from the "
 'paper. Six minutes.<n>Sarah hurries in and sits on one side of Jack; her '
 'sister KATE, 16, bounces in too, both girls kissing.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Six young, filthy Tommys raise their heads along a deserted street .<n>One '
 'crouches to check a coiled garden hose.<n>He wads the leaflets up, crouches, '
 'drops his trousers.<n>The Tommy with the hose carefully lifts each '
 'side...<n>He gets a tiny DRIBBLE of water which he licks from the nozzle- '
 'BLAMBLAMBLAM!<n>The survivor CLIMBS the fence- Gunfire BURSTS through the '
 'fence, ten feet away .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('In the distance sit god-sized hills, hazy and dream-like.<n>Just a handful '
 'of buildings, looking like toy blocks dropped onto a clean white quilt.<n>A '
 'BOY (7), kneeling to pack a small mound of snow. Wump, wump,wump. He looks '
 'swollen in his pillowed coat, working a well-used ARMY MAN into the mound. '
 'His name is BAILEY. His nose running, his breath clouding before his face, '
 'he’s He looks up. Sees something. Freezes. A WOLF. Just emerged from the '
 'trees. Motionless. Yellow eyes fixed on Bailey, her own breath steaming. '
 'She’s huge.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Based on the characters and story created by Helen Fielding .<n>Shazzer '
 'spots JUDE, now a pillar of married respectability, her husband GILES and '
 'their tiny baby. They take the seats beside them.<n>Bridget spots Jude and '
 'Shazzer and heads towards them. She’s older than when we last saw her, forty '
 'three to be exact, slim and elegantly dressed. She takes a deep breath and '
 'collects herself.<n>A random guy, John, takes his place next to Bridget. A '
 'hush descends and the servic')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Clarke (15, a soft-natured, overweight nerd with a bruise on his face) '
 'watches as the Mustang climaxes.<n>Stocky Boy emerges putting on his '
 'letterman’s jacket. A curly-headed figure, DANIELLE, sits up in the '
 'backseat.<n>A NICE GIRL raises her hand. Stocky Boy passes a folded note '
 'labeled “Danielle”.<n>The note makes its way across the classroom, reaches '
 'its A REDHEADED .<n>Girl raises her hands. Mr. Potter points to her.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('eth pays and is about to drop a dollar in the tip jar, but notices the '
 'cashier isn’t looking.<n>Jay turns to see Seth, and has a slight “busted” 1 '
 'his face.<n>Seth looks kind of hurt. his eyes. Jay doesn’t notice. a weed '
 'scone.<n>Set! character with bullets. Jay stands up and raises hi in '
 'triumph. Jay starts singing to the tune of in, Dion’s “It’s All Coming Back '
 'To Me” as he perf sexually graphic and immature')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Hilarious video shows Ryan walking up Runyon, taking selfies and shooting '
 'videos as they "hike"<n>Behind Ryan, a VAMPY GIRL in crazy skimpy booty '
 'shorts, holds her phone down below her ass then brings it up to her face '
 'where she smiles and gives a peace sign .<n>Ryan, sporting a very strange '
 "18TH Century ARMY OFFICER'S UNIFORM, lands on the living room floor ..<n>At "
 'the top of Runyon he pans the hazy skyline, landing lying on the Living Room '
 'floor.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Anteverse. the older, parent universe on the other side of the * '
 'conn-pod.<n>The main campus of the Cosdec Defence College leftover. (slang) '
 "the surviving crewmate of a Jaeger operator Midway.<n>HQ for COSDEC's "
 'ultra-secret Science Division uninhabitable from radiological and biological '
 'contaminants of The Precursors. the unofficial Sci-Division code name for '
 'the hypothetical entities .<n>an ultra- secret research arm of V-50 '
 'Jumphawk.')
{'The Guard': {'rouge1': 0.17094017094017094, 'rouge2': 0.034782608695652174, 'rougeL': 0.10256410256410256, 'rougeLsum': 0.10256410256410256}, 'Batman Year One': {'rouge1': 0.18181818181818182, 'rouge2': 0.02162162162162162, 'rougeL': 0.1283422459893048, 'rougeLsum': 0.1283422459893048}, 'Inherent Vice': {'rouge1': 0.21686746987951808, 'rouge2': 0.024390243902439022, 'rougeL': 0.12048192771084337, 'rougeLsum': 0.12048192771084337}, 'Colombiana': {'rouge1': 0.08633093525179857, 'rouge2': 0.0, 'rougeL': 0.05755395683453237, 'ro

In [None]:
pegasus_long_results = combine_rouge_and_summary_results(pegasus_long_rouge_dict, pegasus_long_summary_dict, 'pegasus_long')
pegasus_long_results.head()

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum,pegasus_long_summary
The Guard,0.17094,0.034783,0.102564,0.102564,Five young men pass a whiskey bottle around --...
Batman Year One,0.181818,0.021622,0.128342,0.128342,"A sweating, tormented sleeper BRUCE WAYNE. Hi..."
Inherent Vice,0.216867,0.02439,0.120482,0.120482,Doc SportELLO is half- awake on his couch. He ...
Colombiana,0.086331,0.0,0.057554,0.057554,Lara Croft and the Tomb of the Unknowns is pub...
Barney s Version,0.260163,0.033058,0.178862,0.178862,Barney Panofsky's new book is a collection of ...


In [None]:
# save results to a csv
pegasus_long_results.to_csv('/content/pegasus_long_text_results.csv')
# files.download('/content/pegasus_long_text_results.csv') 

In [None]:
# average rouge scores
pegasus_long_results.drop(columns=['pegasus_long_summary']).mean(axis=0)

rouge1       0.203990
rouge2       0.017171
rougeL       0.127039
rougeLsum    0.127039
dtype: float64

## Pegasus Large


---


Max 1024 tokens for pretrained *'google/pegasus-large'* model

In [None]:
pegasus_large_rouge_dict = {}
pegasus_large_summary_dict = {}

for i in range(30):
    pegasus_large_summary = run_pegasus_large_model(subset_train_df.iloc[i]['bert_text'])
    pegasus_large_rouge_scores = calculate_rouge_scores(pegasus_large_summary, subset_train_df.iloc[i]['plot_outline'])

    # add rouge scores and summary to a dictionary
    title = subset_train_df.iloc[i]['title']
    pegasus_large_summary_dict[title] = pegasus_large_summary
    pegasus_large_rouge_dict[title] = pegasus_large_rouge_scores

print(pegasus_large_rouge_dict)
print(pegasus_large_summary_dict)

Downloading (…)lve/main/config.json:   0%|          | 0.00/3.09k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading (…)ve/main/spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/88.0 [00:00<?, ?B/s]

('A Garda police car is parked in the road. The red car zooms along at '
 'breakneck speed -- Eventually shooting past BOYLE in his police car -- '
 'BOYLES barely reacts -- Suddenly there comes the sound of screeching brakes. '
 'A YOUNG MAN has been thrown clear, the OTHER FOUR crushed inside the car. '
 'Gets out beside the YOUNG MAN, who is covered in blood. BOYLLE approaches '
 'the concertinaed car, CLOSE on him as he looks inside.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Awoken from his NIGHTMARE by the sound of his own scream, disoriented, BRUCE '
 'goes to the sink and SPLASHES cold water in his face. LITTLE AL, a gigantic, '
 'early middle-aged black man carries a bag of doughnuts and two coffees. He '
 'stops and inserts a key at a grimy storefront LITTLE AL hits the lights '
 "revealing a meticulously clean LITTLE AL continues his search in the It's a "
 'dreary, dirty, isolated place.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('He looks up, notices someone standing at his door She walks in, Doc gets a '
 'slow rising BONER in his pants... casually tries to cover it up. She gets a '
 'pencil, writes a number down, he watches He walks her down to her car, ‘59 '
 'CADILLAC EL DORADO might be watching by now. DENIS (rhymes with penis) walks '
 'up, says hi, they walk up towards town, away from the beach... and with '
 'Neptune moving at last rising into the Sagitarrian light DOC and DENIS and a '
 'bunch of locals,')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('ON A BLACK SCREEN CLOSE ON A PAGE OF A LARA CROFT COMIC BOOK Being copied, '
 'with uncanny precision, down to the last detail, by a ten year old hand '
 'clutching a pencil. CATALEYA RESTREPO, CAT for short, a contained, very '
 'centered 10 year old, pretty, but with a toughness, sitting at the dining '
 'room table, in a well kept, but modest home, with windows that look out over '
 'the steep hillside favela, attends to her notebook.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Barney hangs up, puffs on his cigar. The bottle of Macallan's now empty. "
 'Barney stares back Justin runs off. Now dressed, Barney reads the paper as '
 'he finishes his bagel and lox. Drops the paper on the table. Barney looks at '
 "his watch -- shit, he's late. Barney turns back inside. Barney is late to "
 "the party, but he'll be back in a few minutes. Barney's watch is late, too.")


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A JAGUAR leaps out of the shadows, attacks the bunny, who screams-- The '
 'action continues-- as imagined by an amateur stage Reams of red papier mché '
 'entrails ooze from the bunny. TWO BOXES drop down, labeled VICIOUS PREDATOR '
 'and MEEK PREY. The MEEK PRESY box lands on Judy. Her entrail s get stuck '
 'outside the box. A YOUNG SHEEP wearing a white muumuu and a cardboard '
 'rainbow on his head does an improvisational dance across the stage.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('TO TELL IN THE DARK Settled on a fork of the Yamhill River in the Pacific '
 'Northwest rests the small railway city of MILL VALLEY. SERIES OF OPENING '
 'SHOTS of a TOWN IN DECLINE Outside MILL VALLEY PAWN SHOP, an OLD TIMER '
 'lowers the AMERICAN FLAG. CONSTRUCTION WORKERS laugh as they pack up for the '
 'day beneath an unfinished SPRAWLING RAISED HIGHWAY. A SULLEN MAN with the '
 'name OREN embroidered on his coveralls, drifts away from the pack to load '
 'his tools in his pick-up.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('She hangs up and unzips the bag, ready to dump its contents, Crouched under '
 'the PLASTIC SLIDE, we find LITTLE NADINE (6), heavy brows and the faintest '
 'hint of a mustache even at this age, and LITTLE KRISTA (6), blonde, chubby, '
 "glasses an inch thick, playing Barbies together. Nadine has KEN's hump off "
 "and is making him Krista's SKIPPER doll. Nadine beams, making Ken hump even "
 'faster.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Camera powers up, jostles around until it settles on -- The floor of a third '
 'world bus. This bus could really The camera rises, turns around revealing '
 'its operator, a WOMAN wearing a hajib (a traditional Arabian headdress). She '
 'slumps low in her seat for privacy, looks around cautiously, and carefully '
 'lowers her veil revealing -- SCARLETT DURANG, mid 20s, an adventurous '
 'British archaeologist. Her off-the-charts intellect and multiple post-grad '
 'degrees are barely concealed by her striking good looks and punk rock '
 'Judging from the cloth that edges frame, she is obviously shooting with a '
 'small camera attached to her hajiab.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Jesse watches enviously from the sidelines, holding Mike's They go to "
 'college together, study together, drink together. Senior year, Jesse draws '
 '"C AND J FOREVER" in a pristine, snowy forest with a stick; he and Celeste '
 'laugh. A moment later, Jesse and Celeste kiss passionately, holding up a "C" '
 'and "J" sign in the middle of the forest. A minute later, they kiss deeply. '
 'A moments later, Celeste and Jesse kiss deeply, holding a sign that says "C '
 '& J FOREVER."')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Ryan pauses to let us consider this. Ryan takes a beat to let the weight '
 'sink in. A FEMALE FLIGHT ATTENDANT is looking directly at us. Turn to see '
 'RYAN looking back. The flight attendant raises her hand to reveal a CAN OF '
 'SODA. Ryan took a beat, then returns to his work. We hear people around us '
 'chuckling. We see a series of REAL PEOPLE react to the news of being '
 'authenticity. The series is REAL PEOPLE.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A POLITICIAN (55) in a white shirt and pants who looks more like a local '
 'hood, travels on top of a Jeep which has been designed to look like a '
 'mythological chariot. He is already laden with a dozen garlands- all yellow '
 'His supporters gesture at everyone to listen to him speak. As soon as the '
 'driver realizes it, everyone in the car panics as he tries to frantically '
 'turn the car. As the driver tries to take a sharp U turn, eight uniforme')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('5.15 a.m. Perfecting a tie, and now-- 3 THE HANDS lay a dress on a bed, '
 'across the feet of a woman 4 HOT WATER BEING POURED into a cup of Folger’s '
 'instant. We’re 5 in an immaculate kitchen, circa early-70’s,and now to-- AN '
 'ANTISEPTIC LIVING ROOM. Pictures of two KIDS Airman’s Uniform; a young '
 'pretty woman. Furniture shrink- wra')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('CHARLES KINGSLEY has just described his new venture to his friends, '
 'including LORD ASCOT. His nine year-old daughter ALICE stands at the door in '
 'her nightgown, clearly frightened. Blue caterpillar. She screams and pinches '
 'him back. She sees the world with different eyes than other people her age. '
 'Presently, she’s tired and grumpy as her mother fusses with her wild mane of '
 'blonde hair. She re-ties her wais')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('They belong to a FEMALE CHIMPANZEE She sits in a tree with ALPHA, her mate - '
 'large and muscular, a PROMINENT BIRTH WHITE BIRTHMARK ACROSS HIS SHOULDER, '
 'LIKE A SHOOTING STAR. Around them, under the TREE CANOPY, a COMMUNITY OF '
 'CHIMPSANZEES naps, eats, plays. Alpha sits up, sensing something. All around '
 "him, POACHERS pursue terrified apes, nets swinging. They're working towards "
 'the GROVE, visible in the distance.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('There are letters, packages, even creepy little gifts and totems sent by '
 'admiring "fans." RAYMUNDO, a counselor raps on the door and sticks his head '
 'in cautiously. As she slips off her pajamas, we can see a series of puffy, '
 'slash-like SCARS on her body. perverted Chester with a hard-on We see a '
 'morbid "hall of fame" of creepy photos sent in from guys. There\'s even a '
 'DICK SHOT tucked in there.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('CLOSE UP - An industrial drill bit boring into metal. On the roof we find '
 'ZEKE PLESHETTE, a wild-eyed bald man with a pony tail. He wears aviator '
 'goggles and drills away at the roof while listening to headphones. The drill '
 'bit goes through. The man falls, dead. He lets off a few loud cracks of gun '
 'fire. Hoss looks up to see five black SUVs approaching. He raises the gun '
 'and aims it at a SOLDIER running away in the distance.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A tomato bursts against the window, obliterating the CHILD TERRI dives for '
 'cover behind a large shrub. An egg follows the tomato; stones land in the '
 'garden along with arrows from a kid’s bow. In the shrub CHILD TERRI '
 'crouches, holding on to his head BOY 2 prepares to unleash a final arrow. '
 'The shrub seems to vibrate with rage. CHILD TERRI pops up. His eye widens as '
 'the arrow speeds towards it. The music is distorted, becoming the')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Frances When people walk by, they both stop and yell CUT TO Frances and '
 'Sophie run through the park together. Frances makes sure to keep a They run '
 'into the subway. Frances is reading and Sophie is knitting. Frances smoke in '
 'two separate adjacent 9 windows. We see them talking outside the windows. '
 'They both lean outside. Frances leans outside and Sophie leans inside. '
 'Frances lean outside, and Sophie lean inside. She lean outside and she lean '
 'inside, and they both lean out.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('made of vibranium, the strongest the plant life around it. The meteorite '
 'hits Africa and we see plant life and animals affected by vibranium. '
 'Bashenga rises above the conflict and eats the Heart Shaped Herb, proceeding '
 'to unite We see the Jabari striding off towards the isolated mountain We see '
 "images of war and slavery just outside Wakanda ' s secretive border . The "
 'protective barrier rise around the Wakandan city , as we pull back on the '
 'Earth as it spins, now zooming in on Towering apartment buildings loom over '
 'the horizon .')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Open hard on the passenger seat of a speeding minivan, where a MALE TEENAGER '
 'is slumped against his blood-stained window. He’s dead, and a distinctly '
 'female voice is screaming in Pull out to reveal that this voice belongs to '
 'the minivan’s driver, the late teen’s MOTHER, who’s presumably racing to a '
 'Pull out even more to reveal two more DEAD BOYS in the back seats of the '
 'vehicle. Cut outside as a SPEEDING FIRE TRUCK runs a red light and easily '
 'KNOCKS ASIDE the minivan that dared cross its path.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('COLLETTE continues with her fun a moment, then stands and walks through to '
 'the... ..-where her seven-year-old brother SEAN is eating a bowl of cereal. '
 'SEAN emerges into the street, leaving the door open so that He instinctively '
 'turns left for the shop, but then checks himself. There’s a huge riot at the '
 'other end of the street. Parents gossip in the doorways; every day life in a '
 'city torn apart by civil strife.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("It’s now lying beside a plate at a perfectly set table; JACK TWEEDY (Penny's "
 "married name is Tweedy), mid-40's, sits down and tucks a napkin above the "
 'knot of his necktie to cover his white shirt. They are With the expertise of '
 "long practice she slides the omelet into Jack's plate, places orange slices "
 'beside it, and begins mixing pancakes. SARAH is 18; she hurries in and sits '
 'on one side of Jack; her sister KATE, 16, in too, both girls kissing')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('TRAPPED AT DUNKIRK, THEY AWAIT THEIR He wads the leaflets up, crouches, '
 'drops his trousers... the Tommy with the hose carefully lifts each side... '
 'HOPING FOR DELIVERANCE. He gets a tiny DRIBBLE of water which he licks from '
 'the nozzle- BLAMBLAMBLAM!- Tommy JOLTS, grabs his trousers- all six RACE '
 'away from us, towards a FENCE twenty yards away- one by one FIVE are SHOT '
 'DOWN- the survivor CLIMBS the fence- Gunfire BURSTS through the fence, TEN '
 'FEET AWAY- Tommy tries to RELOAD his rifle- fingers STRUGGLING with the '
 'Gunfire SPLINTERS the fencing, FIVE FEET AWAY - Tommy THRUSTS h')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Just a handful of buildings, looking like toy blocks dropped onto a clean '
 'white quilt. Not so much a backyard as an expanse between the distant '
 'TREELINE and a roughly-hewn LOG CABIN, smoking chimney and mossy walls, a '
 'soft rhythmic wump wump wiump leading us to... A BOY (7), kneeling to pack a '
 'small mound of snow. He looks swollen in his pillowed coat, working a '
 'well-used ARMY MAN into the mound.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('SHAZZER, early forties, and her folk singer husband FERGUS, scurry down the '
 'aisle. Shazzer spots JUDE, now a pillar of married respectability, her '
 'husband GILES and their tiny baby. She takes a deep breath and collects '
 'herself. A random guy, JOHN, takes his place next to Bridget. She’s older '
 'than when we last saw her, forty three to be exact, slim and elegantly '
 'dressed. A hush descends and the servic')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A HOMELY GIRL makes her ‘way across the front lawn with a THREE '
 'AVERAGELOOKING GIRLS pile into a car giggling. _ dust then, THREE LARGE BOYS '
 'tackle Clarke and begin to beat before a blackboard with the word, '
 '“Choices”, scrawled across A NICE GIRL raises her hand. THE NOTE makes its '
 'way across the classroom, reaches its A REDHEADED GIRL raises your hand. A '
 'CLAMMY BOY this time. Mister Potter points.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('eth pays and is about to drop a dollar in the tip jar, but notices the '
 'cashier isn’t looking. He waits for the cashier to look up, and then drops '
 'his tip in the jar. Jay turns to see Seth, and has a slight “busted” 1 his '
 'face. Jay and Seth are laughing while playing Call of Duty. Jay stands up '
 'and raises hi in triumph. Jay starts singing to the tune of in, Dion’s “It’s '
 'All Coming Back To Me” as he perf sexually graphic and immature')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Ryan pans his camera to show a literal PARADE of SHIRTLESS MEN and '
 'HALF-NAKED WOMEN with insane bodies all walking up Runyon, taking selfies '
 'and shooting videos as they "hike". Behind Ryan, a VAMPY GIRL in crazy '
 'skimpy booty shorts, holds her phone down below her ass then brings it up to '
 'her face where she smiles and gives a peace sign while holding a package of '
 '"Flat Tummy Tea" she\'s hawking.')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('highly classified research facility and surveillance station; HQ for '
 "COSDEC's ultra-secret Science Division uninhabitable from radiological and "
 'biological contaminants of The Precursors. the unofficial Sci-Division code '
 'name for the hypothetical entities on the other side of the Interstice '
 'Science Division/Sci-Division. an ultra- secret research arm of V-50 '
 "Jumphawk. the Jaeger's detachable cockpit module; sometimes while directly "
 'linked via the Pons; triggered by subconscious stimuli, a generally '
 'involuntary but seldom debilitating + drivesuit.')
{'The Guard': {'rouge1': 0.1487603305785124, 'rouge2': 0.016806722689075633, 'rougeL': 0.08264462809917354, 'rougeLsum': 0.08264462809917354}, 'Batman Year One': {'rouge1': 0.19689119170984457, 'rouge2': 0.020942408376963352, 'rougeL': 0.10362694300518135, 'rougeLsum': 0.10362694300518135}, 'Inherent Vice': {'rouge1': 0.21428571428571427, 'rouge2': 0.0, 'rougeL': 0.09999999999999999, 'rougeLsum': 0.09999999999999

In [None]:
pegasus_large_results = combine_rouge_and_summary_results(pegasus_large_rouge_dict, pegasus_large_summary_dict, 'pegasus_large')
pegasus_large_results.head()

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum,pegasus_large_summary
The Guard,0.14876,0.016807,0.082645,0.082645,A Garda police car is parked in the road. The ...
Batman Year One,0.196891,0.020942,0.103627,0.103627,Awoken from his NIGHTMARE by the sound of his ...
Inherent Vice,0.214286,0.0,0.1,0.1,"He looks up, notices someone standing at his d..."
Colombiana,0.192593,0.015038,0.162963,0.162963,ON A BLACK SCREEN CLOSE ON A PAGE OF A LARA CR...
Barney s Version,0.211382,0.033058,0.146341,0.146341,"Barney hangs up, puffs on his cigar. The bottl..."


In [None]:
# save results to a csv
pegasus_large_results.to_csv('/content/pegasus_large_text_results.csv')
# files.download('/content/pegasus_large_results.csv') 

In [None]:
# average rouge scores
pegasus_large_results.drop(columns=['pegasus_large_summary']).mean(axis=0)

rouge1       0.201616
rouge2       0.017361
rougeL       0.124374
rougeLsum    0.124374
dtype: float64

## LongT5


---

Max 16,384 tokens using pretrained *pszemraj/long-t5-tglobal-base-16384-book-summary* model

In [None]:
longt5_text_rouge_dict = {}
longt5_text_summary_dict = {}

for i in range(1):
    longt5_summary = run_longt5_model(subset_train_df.iloc[i]['bert_text'])
    longt5_rouge_scores = calculate_rouge_scores(longt5_summary, subset_train_df.iloc[i]['plot_outline'])

    # add rouge scores and summary to a dictionary
    title = subset_train_df.iloc[i]['title']
    longt5_text_summary_dict[title] = longt5_summary
    longt5_text_rouge_dict[title] = longt5_rouge_scores

print(longt5_text_rouge_dict)
print(longt5_text_summary_dict)

In [None]:
longt5_text_rouge_dict = {}
longt5_text_summary_dict = {}

for i in range(30):
    longt5_summary = run_longt5_model(subset_train_df.iloc[i]['bert_text'])
    longt5_rouge_scores = calculate_rouge_scores(longt5_summary, subset_train_df.iloc[i]['plot_outline'])

    # add rouge scores and summary to a dictionary
    title = subset_train_df.iloc[i]['title']
    longt5_text_summary_dict[title] = longt5_summary
    longt5_text_rouge_dict[title] = longt5_rouge_scores

print(longt5_text_rouge_dict)
print(longt5_text_summary_dict)



('A red car speeds past a barren landscape as the sun rises. Five young men '
 'pass a bottle of whiskey around and watch a police car drive by. Boyle is '
 'behind the wheel, staring out into space. When the red car shoots past his '
 "police car, Boyle doesn't react. The other cars in the road start to crash, "
 'and one man is killed while the other four are crushed inside the vehicle. '
 'Boys drives up beside the dead man, who has been tossed from the car. He '
 'grabs a finger on his throat and dies. After Boyle returns, he closes the '
 'door on the blackened car so that he can look inside.')
('In this short scene, the action shifts to a dark and desolate place. A '
 'menacing "police" is hovering over a sleeping Bruce Wayne. He awakens from '
 'his nightmarish sleep and goes to the washroom to throw cold water on his '
 'face. The sound of an incomprehensible horror fills him with a scream. When '
 'he wakes up, he looks for a clean place to stay. It is a filthy, run-down '
 'b

Your max_length is set to 256, but you input_length is only 242. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=121)


('In this short scene, a third-world bus passes through the streets of Middle '
 'Eastern cities. A woman wearing a "hajib" -- a traditional Arab headdress -- '
 'sits on the floor and stares at the view. Scarlett Durang, an archeologist '
 'from the United States, is watching with a tiny camcorder attached to her '
 'Hajib. He whispers to it not scared. The border between India and the Middle '
 'East is clearly marked.')
('In this short scene, Jesse and Celeste grow to be best friends. Their hands '
 'are crossed in a "C" shape and their lips are kissed passionately. Jesse is '
 "jealous of the couple's college activities and flirts with them at the same "
 'time. The two finally get married on a beautiful wedding cake. They spend '
 'their honeymoon moving into their home and enjoying themselves as they '
 'dance, read, and discuss their past together.')
('A Spotlight reveals that we see ryan standing next to a pile of stuff. He '
 'unzipps a backpack and puts it beside him. People 

Your max_length is set to 256, but you input_length is only 250. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=125)


('The narrator describes a Politician who travels on a jeep. He is wearing '
 'white shirt, pants, and a yellow hat. His supporters ride in front of him '
 'shouting "Yeah, we support you." They cheer him when he doesn\'t give up. '
 'When they do, he tells them to get out of the car. Everyone panics because '
 'there is a tree blocking the road.')
('The narrator shifts gears and muses about the recent political uprisings in '
 'the U.S., which have taken place over the past several months. He notes that '
 'there are still important people in the country who are marching or planning '
 'to march, such as the National Guard and police. A lock on the wall keeps an '
 "alarm going, and when it is time to break it, a man's hand comes into the "
 'frame. It is then that we find ourselves in a kitchen with a bunch of '
 "furniture shrinking away from us. There are pictures of two kids airman's "
 'uniform and a pretty woman.')
('In this short scene, Charles Kingley speaks with an ardor outs

Your max_length is set to 256, but you input_length is only 252. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=126)


('The two women set up a performance space outside the park. Frances plays the '
 'shitty guitar and Sophie sings. When people come by, they stop and shout '
 '"Cutter to Frances" and Sophie runs through the park with her. They make '
 'sure to keep one of their cases so that they can go into the subway '
 'together. After dinner, Frances reads aloud a sentence from Sincerity and '
 'Accurity by Philippe Lionel Trolling. She smokes in two separate windows.')


Your max_length is set to 256, but you input_length is only 244. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=122)


('In this short scene, a meteorite strikes Africa and changes the landscape. '
 'The five tribes of Africa unite and eventually break apart as war breaks '
 "out. A man named N'jobu shoots at a gun and goes over to the border. As the "
 'light from the sky approaches, kids play pick-up basketball in the '
 'background. An African guy named Nejobu sorting through firearms goes over '
 'toward the edge of the earth.')
('The action shifts to a small, speeding van that is being driven by a young '
 'man who has just died. His mother is screaming in the front of the van and '
 'two other boys are slumped on the passenger seats. Outside, there are many '
 'crashes taking place. Most of the cars contain the corpses of recently '
 'deceased men. Some of the drivers are also killed. There are some minor '
 'flaws in the landscape as well.')
("The narrator tells us that this is the first time we've heard of a "
 "twelve-year old girl named Collett. She's making a bracelet out of beads and "
 "li

In [None]:
longt5_text_results = combine_rouge_and_summary_results(longt5_text_rouge_dict, longt5_text_summary_dict, 'longt5')
longt5_text_results.head()

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum,longt5_summary
The Guard,0.198758,0.025157,0.124224,0.124224,A red car speeds past a barren landscape as th...
Batman Year One,0.282828,0.020408,0.141414,0.141414,"In this short scene, the action shifts to a da..."
Inherent Vice,0.188976,0.016,0.094488,0.094488,A sweet young woman speaks to Doctor Salerio. ...
Colombiana,0.201439,0.0,0.129496,0.129496,"On a black cover of a comic book, being copied..."
Barney s Version,0.259542,0.031008,0.167939,0.167939,Barney sits down at the table and smokes a cig...


In [None]:
# save results to a csv
longt5_text_results.to_csv('/content/longt5_text_results.csv')
# files.download('/content/longt5_text_results.csv') 

In [None]:
# average rouge scores
longt5_text_results.drop(columns=['longt5_summary']).mean(axis=0)

rouge1       0.228796
rouge2       0.017371
rougeL       0.131008
rougeLsum    0.131008
dtype: float64

# Custom Models (BERT - Dialog)

## Two Stage Short Model
This model combines an extractive summary using TextRank, which is then used for abstractive summary using the *pegasus-xsum* model

In [None]:
rouge_short_dict = {}
summary_short_dict = {}

rouge_short_scores, summaries_short = execute_two_stage_short_model(0, 30, rouge_short_dict, summary_short_dict, subset_train_df)

print(rouge_short_scores)
print(summaries_short)

1701


Downloading (…)lve/main/config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

Downloading tf_model.h5:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading (…)neration_config.json:   0%|          | 0.00/259 [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/87.0 [00:00<?, ?B/s]

('You know nothing about Sergeant Aidan Boyle, you don’t know anything about '
 'him, you didn’t do anything wrong, you weren’t anything to do with anything, '
 'you’ve never done anything like that, you haven’t done anything wrong at '
 'all, you never did anything to hurt anybody, you did nothing to do anything '
 'to harm anyone, you hadn’t ever done anything to anybody, yeah, I’ve heard '
 'nothing about it, I haven’ta heard anything about it.')


Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

1480


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("I don't know how long I'm going to be here, but I've got to get something "
 "out of this nut, I'll get it out of him, or he's going to put a bullet in my "
 "head, or I're going to kill me, or we're both going to die, or you're just "
 "going to break into my house and take my stuff, or maybe we'll meet up in "
 'the middle of the night, or something.')
1807


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("I'm writing to you because I've been looking into the disappearance of "
 'Shasta Stogery, who was last seen with Mickey givin away at Disneyland in '
 "Anaheim, California, and I think I might have found the answer to why she's "
 'gone, because I saw her on my ticket and I thought she was the one who '
 "introduced me to the snitch on the other side of the ticket, Burke, who's "
 'been working as a snitch for the LAPD.')
1467


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("It's been a long time since I've been here, but now I'm here again, and I "
 "want to tell you about something that has never happened before, and it's "
 "called a lock-up, but I don't know what it is, but it is a place where you "
 'can lay down and rest for a little while, and if anything comes back, you '
 "can go back and lock it up again, so that when you get home, you won't have "
 'to worry about anything.')
1736


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("It's been a long time since I've had a chance to talk to you, Barney, but "
 "I'm here to tell you that I want you to know that I am truly in love with "
 "you, and I want to say it's the best thing that's ever happened to me in my "
 "life - and I just can't get enough of you, you're just such a wonderful "
 "person, I just don't want to stop talking to you.")
1702


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("It's been a long time since I've written a song, but I thought I'd do it "
 "again for you, because I know how much you love me, and I know you'll always "
 "be my friend, and you can always come back to me when you're ready, but "
 "first I want to tell you a little bit about myself... I'm not the nicest "
 "person in the world, but when I see you, I know I'll be there for you...")
1173


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Y’know, I don’t know what to do with the story, I’ve never heard it before, '
 'so I’m not really sure what to make of it, but I’ll tell you what I know, '
 'it’s pretty much everyone’s got something to say about it, and everyone '
 'knows it was her dad who did it, so yeah, I guess I’d have to say it was '
 'him, but no one really believed him.')
1458


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('I feel like people don\'t know a lot about me because they\'re like, "Oh, '
 'she\'s got her nails cut off in the movie," and I\'m like, yeah, that\'s '
 "what it's all about, but I've got a whole lot more to me than that, you "
 "know... I mean, there's a lot more than that to me, and I want people to "
 'know about it, because I want them to see it.')
1366


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('I’m going to go and see you in the caves, but I don’t know how to get there, '
 "I don't know what to do, I can't see what you're up to, I'm not going to "
 "tell you, I won't tell you anything, so I'll leave it to you to tell me what "
 'you want to see, I’ll tell you what I want to hear, and I won’t tell anyone '
 'else.')
2077


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Can somebody tell me how to get things back the way they used to be...oh God '
 "give me a reason, I'm Ooohh ohhh Yeah,z Skillz made me do it and get the "
 'maid of honor to go along with the journal and keep the journal along with '
 "one of the sweet dad's maids of honour and they're going to be serious and "
 "we're just going to talk well, I just got something for a sweet dad.")
1597


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("I'm not even going to answer that... No, I can't even think of him the way "
 "that way, right, here you go...Don't know where you found the time...Sure, "
 "here, here You're going to find a lot of money than you know... More money "
 'than they put on Quite what a hard-on these guys put on quite what they do '
 "with their name on stuff... You guys don't grow up - You just need to pee on "
 'everything')
1322


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Aapke puri mein hota hai, aur puri puri aur hota mein puri, aap puri hota, '
 'aapka puri nahin, abhi puri pehle maras, apni puri maras , apka puri '
 'chahiye, puri choli puri... aap aap nahin... aapke aap mein aap... aye puri!')
1833


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('It’s been a long time since I’ve had anything to say to you, I don’t know '
 'what to say, I’m not even sure what to do, you’ve been so good to me, I '
 'never thought I’d ever have to say anything to you again, I didn’t even '
 'think I would have to do anything to anyone else, I think I was so good at '
 "what I did that I didn't even have to worry about anything.")
1062


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Alice's Adventures in Wonderland is published by Little, Brown and Company "
 'in the United States, and is available to read online at www.little, brown '
 'and company.co.uk or by calling 0844  800-273-3217 800-273-3217 800-273-3217 '
 '(toll-free in the US) or visit '
 'www.facebook.com/pages/Alice-Adventures-in-Wonderland-UK-LLC-LLC/posts/27667531 '
 '(67531) subject to our credit rating.')
1009


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("I've been looking for you for a long time, but I can't seem to find you - so "
 "I'm going to take a look at the place where you were born and see if I can "
 "find you.I'm not going to tell you where to go, I'll tell you what to do - "
 "let me know if you've got anything to say - I won't tell you if it's true or "
 'not.')
1643


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Here is the full version of the song, written and performed by The Weeknd, '
 'which can be found on his debut album, GIRLS, which was released in May '
 "2014...and on 888-492-0 888-492-0's new album, HOME, which is out now on ATO "
 "Records, as well as on the band's own website, www. 888-492-0.com/home, "
 'where you can find out more about the album, including the tracklisting, '
 'lyrics and video.')
1440


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Here’s the full text of Vicki Gunvalson’s letter to Kyle Richards, in which '
 'she tells him to rip her dick off and shove it in her wrist, on the latest '
 'episode of The Real Housewives of Orange County.... and it’s all in the '
 'first line of the letter, which you can read below: Okay, Vicki, I’m going '
 'to rip your dickoff, and I’ll shove it into your wrist.')
1115


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('It’s Paul McCartney’s birthday today, and he’s in the middle of a press '
 'conference, and a journalist asks him what he’d do if he were in charge of a '
 'homeless charity, and his response is ‘I’d give them a load of dope!’ (hands '
 'off the journalist’s shoulders; himself again) And then he says (as Lennon) '
 '‘I know exactly what the people there need... I’m thinking brilliant.’')
1587


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('It’s been a long time since I’ve done a song like this, but I’m going to do '
 'it again because it’s the only thing that’s going to keep me going for the '
 'rest of the year... so here it is, the last song I’ll do before I go on my '
 'summer holidays... and I hope you all have a wonderful Christmas and a happy '
 'new year!... and if you want to know more about me, check out my blog...')
1531


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('This is the full text of the song "Killmonger" by The Killmongers, which was '
 'released on the remix album "Rebirth of the King", which is also available '
 "on the album's official website: https://rebirthoftheking.bandcamp.com/ "
 "Listen to the full song on The King's website: www.thekings.com Listen to "
 "more of the album on the band's Facebook and Twitter pages:")
1417


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('It’s been a long, long time since I’ve been here, but I’m glad to be here '
 'again, and I’d like to take this opportunity to wish you all a happy '
 'birthday and welcome you back to the world of medicine and science and '
 'everything else that’s going on in the world today, and thank you for being '
 'with me, I hope you’re having a good time, and thanks again for being here, '
 'I look forward to seeing you again, Yorick.')
1578


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('It’s time for you to go back to your mother and tell her that you’ve been '
 'taken away and that you want to go home, but you can’t because it’s too '
 'dangerous to do so, and you’re too old to do it, so you’ll have to go '
 'somewhere else and you won’t be able to see your son again, because he’s '
 'gone, and he’ll be gone forever, and it’ll never be the same again.')
1446


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("FastJack, come on, let’s go, let's see what we can do, we've got a lot of "
 "horses in our barn, we can't afford to keep them any more, so we're going to "
 'take them out and see if we can find a few men willing to ride them and '
 "share them with us, and we'll do it, and I'm going to spend 200 grand for a "
 'share in a horse that should be in my barn now, and maybe all we need to do '
 'is find one man willing to do this, somebody the others would feel safe in a '
 'barn')
510


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('All photographs courtesy of BBC News, courtesy of the Royal Navy and Royal '
 'Air Force, and permission to use them on social media, subject to copyright '
 "and other restrictions.'' - Sir David Hempleman-Adams, former editor of the "
 'BBC News website, on the 70th anniversary of the Battle of the Somme, which '
 'led to the end of World War Two, and the liberation of the French port city '
 "of Dunkirk.' - Dame Shirley Bassey, former BBC Radio 4 presenter and wartime "
 'correspondent, on what it was like to be part of the rescue operation at '
 'Dunkirk.')
1083


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('She’s good at what she does but she’s not good at hunting, so we need to get '
 'her out of the way and see what we can do for her, because she might run out '
 'of goo and we might have to call it back and tell her to run again, but we '
 'can’t do that because we’ve got to get him out of its way first, so let’s '
 'see if we can find her first, and then we can tell her what to do.')
1672


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("This is the full text of Bridget Jones's letter to her boyfriend, Giles, "
 "from the day she found out she was pregnant - here's the full extract: I’m "
 'sorry, I’ve got to tell you, you’re the worst thing that’s ever happened to '
 "me in my life, and I’ll never forgive you for that, because you're the most "
 "miserable person I've ever met, and you'll never see me again.")
1471


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("There was a circus in town, and it was a big one, but it wasn't as big as I "
 "thought it would be, cause I didn't know what it would look like, but I knew "
 "it wouldn't be the same again, because it had a big top down, and they "
 'pulled the big top up, and there was a clown in the middle of it, and I told '
 'her to learn how to hide her feelings out loud, and if you almost had it '
 'all!')
1478


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Jay-Z's full statement on the fracas that broke out between him and Kanye "
 'West at the MTV Video Music Awards, in which the rapper said he was "not '
 'going back to there" and "didn\'t get it", after the two men clashed on '
 'stage during the performance of his song "Stronger": I don\'t want to go '
 "back to that place, I'm not going to get it, I’m not gonna get it!")
594


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('So, here\'s what Baz had to say about Ugly\'s "boring" Broadway audition on '
 "last night's episode of America's Got Talent: I'm just so bored with this "
 "whole thing, I can't believe it's all over, I don't know what's going to "
 "happen next, but I can tell you this is just not going to work, I've got "
 'nothing else to say, so here it is...')
1415


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("I'm not sure what it is, but I've been trying to get it to communicate with "
 "me for a long time, and it's just not working... I don't know what's wrong "
 "with it, but it doesn't seem to be working at the moment, so I'll just have "
 "to wait and see what happens... I can't tell you what it looks like, because "
 'I can’t hear anything... I’ve never heard anything like that...')
{'The Guard': {'rouge1': 0.07751937984496124, 'rouge2': 0.0, 'rougeL': 0.07751937984496124, 'rougeLsum': 0.07751937984496124}, 'Batman Year One': {'rouge1': 0.16831683168316833, 'rouge2': 0.01, 'rougeL': 0.10891089108910892, 'rougeLsum': 0.10891089108910892}, 'Inherent Vice': {'rouge1': 0.13636363636363638, 'rouge2': 0.0, 'rougeL': 0.10606060606060606, 'rougeLsum': 0.10606060606060606}, 'Colombiana': {'rouge1': 0.163265306122449, 'rouge2': 0.0, 'rougeL': 0.10884353741496598, 'rougeLsum': 0.10884353741496598}, 'Barney s Version': {'rouge1': 0.13533834586466165, 'rouge2': 0.015267175572519083, 'rougeL': 0.0

In [None]:
two_stage_short_results = combine_rouge_and_summary_results(rouge_short_dict, summary_short_dict, 'two_stage_short')
two_stage_short_results.head()

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum,two_stage_short_summary
The Guard,0.077519,0.0,0.077519,0.077519,"You know nothing about Sergeant Aidan Boyle, y..."
Batman Year One,0.168317,0.01,0.108911,0.108911,"I don't know how long I'm going to be here, bu..."
Inherent Vice,0.136364,0.0,0.106061,0.106061,I'm writing to you because I've been looking i...
Colombiana,0.163265,0.0,0.108844,0.108844,"It's been a long time since I've been here, bu..."
Barney s Version,0.135338,0.015267,0.090226,0.090226,It's been a long time since I've had a chance ...


In [None]:
# save results to a csv
two_stage_short_results.to_csv('/content/two_stage_short_results.csv')
# files.download('/content/two_stage_short_results.csv') 

In [None]:
# average rouge scores
two_stage_short_results.drop(columns=['two_stage_short_summary']).mean(axis=0)

rouge1       0.151861
rouge2       0.009955
rougeL       0.094948
rougeLsum    0.094948
dtype: float64

## Two Stage Long Model
This model combines an extractive summary using TextRank, which is then used for abstractive summary using the *pegasus-cnn_dailymail* model

In [None]:
rouge_long_dict = {}
summary_long_dict = {}

rouge_long_scores, summaries_long = execute_two_stage_long_model(0, 30, rouge_long_dict, summary_long_dict, subset_train_df)

print(rouge_long_scores)
print(summaries_long)

1701


Downloading (…)lve/main/config.json:   0%|          | 0.00/1.12k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading (…)ve/main/spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/88.0 [00:00<?, ?B/s]

('You know nothing about me.(getting worked up)<n>Well why don’t you put in '
 'your report that Sergeant Boyle went out of his fucking way to do you boys a '
 'fucking favour and he got really fucking annoyed when you started asking him '
 'stupid fucking questions about a few missing fucking guns, trying to fucking '
 'catch him out as if he’s just some kind of fucking gobshite!<n>Do you think '
 'he might have met someone here who did something bad to him?')
1480


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("I'm going to take a little something back from those bastards.I don't know "
 'how much longer I can go without your help.<n>I could put in a couple days, '
 'help you out down here, if you want a break.You let her get away?<n>I have '
 "this bad feeling that the only way I'll see you again is if I continue my "
 'life of crime.<n>They tried to make it look like burglars but I think it was '
 "Flass and his crew looking for my file.You and me, we're going have.")
1807


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Shasta has disappeared, too, just like her boyfriend Mickey... do you think '
 "there could be a connection?<n>He's been working as a snitch for the LAPD, "
 'and I also saw him on this outfit called Vigilant California... and you '
 "don't look surprised enough, Shasta, he's meant to be dead...<n>Then I guess "
 "that one's on my ticket because it was me who introduced him to Burke "
 'Stodger and Burke who set him up with the Viggies...')
1467


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("It's really late, and I'm really really tired .<n>I think we got her. I "
 "think we had a good time.<n>Let's see if anything comes back. Take her in "
 "the back, lock her up, Let her sleep it off, we'll book her tomorrow.I want "
 "you to think hard... Was there anything else?It'sreally late,and I've been "
 'tired all day.<n>Now I know some of you are thinking whoever is doing this '
 'makes less work for mother.I think our got her .')
1736


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Getting to race in New York was considered hitting the big time .<n>I'm just "
 "saying that we'll never really know what happened.<n>Boogie, for the first "
 'time in my life I am truly, seriously, Oh Barney, so am I.<n>"No one will '
 'think anything les" is what I\'m trying to say to Barney. "Boogie" is the '
 'first thing I say to you. "You\'re coming for dinner this Friday, right?" "I '
 'love you but I really do have to go"')
1702


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('When I was a kid, I thought Zootopia was this perfect place where everyone '
 'got along and anyone could be anything...(getting emotional)<n>And soon '
 'enough those dreams die and our no choice but to go back home with that '
 "cute, fuzzy wuzzy little tail between her legs to become-- you're from "
 'Bunnyburrow, is that what you said?-- so how bout a carrot farmer?<n>I want '
 'you to remember this moment-- the next time you think you will ever be '
 'anything more than just a stu .')
1173


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("The story says she wasn't alone Every time he turned on his high (breaks "
 'down to Ramén) There was someone else- Just like he said- But I didn’t '
 'believe him- No one believed him- Because it was her- It was Sarah-- I hated '
 'him so much, I wanted to kill myself.<n>The third caller who’s called in to '
 'say something property.<n>I think she just wanted to be like the other kids, '
 'but knew she could never be like them.To tell them you’ve got nothing to do '
 'with this.<n>She doesn’t like to talk about it, but everyone pretty much '
 'knows it is her dad that did it- Not on like no one really buys it.')
1458


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Erwin's new thing is to tell people really obvious shit like they don't know "
 'it .<n>"Hey fuckers, You know how much we give a shit about prom?"<n>"You '
 'told him about the tupperware?!Hey, um, come!"<n>"Oh, be quiet... (then) '
 'Wait, you mean that in, like, a good way, right?<n>I mean, what if you get '
 'to know me and you\'re all like, "oh wow, Erwin is even funnier than Will '
 'Ferrell in that one movie."')
1366


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('bend, they see kind of underground party we all know probably happens '
 'somewhere, but would never get invited to ourselves.<n>We’re We need to go '
 'down another hundred.They might know a way out.<n>I’m going to find La Taupe '
 "because these caves are dangerous and you need to know where you're going!I "
 'have yet to see her do anything.You think because you see something in a '
 'book you know it?!So there’s no wa')
2077


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Skillz kinda made me do it and from the get go, she was way into...what are '
 'you looking at?Let me get you something for that.decides to keep the one '
 "journal with the sweet passages for You're going to be a really good "
 "dad.Things are going really well.But I'm sure the universe is looking out "
 'for me and when the time is right, the right thing will come along, you '
 'know?Sorry, I know this is serious talky time but would you look at the '
 "fucking seat on that girl?<n>I think I really like her.I really like you.I'm "
 'so sorry but I am the maid of honor and I am supposed to be at the wedding '
 'real far away, would you mind if I just got in front of you?')
1597


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("We're coming in on three weeks to go for Julie's wedding and there's "
 "something we could really use your help on .<n>Natalie's not even going to "
 "answer that... No, I can't even think of him that way...Don't know where you "
 'found the time... Oh right, here you go.I know you want to be there for '
 "her... Well here it is.<n>You guys don't grow up - You just need to pee on "
 'everything .')
1322


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Inhe bataiye inka neta dilli jaega.Bhains gobar karne ki tankha nahi '
 'leti.Woh dekh bhag gaya... usko roko!<n>Kyonki gaon wale aadivaasi kai baras '
 'pehle mar chuke hain.Sabko uska aa man-na hoga.Aap kahe to bandook yahi '
 'chhod jaye?Ghante bhar mein pahuch jayenge.Do dhai baje.Awwaal to hogi .')
1833


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Mr.(Felt looks furious) I'm not going to tell anyone who you are.You don't "
 'know anything.You know you were.<n>What is there to know?What did you '
 'know?We know who he is.I know. I know that.Do they know?And you want me to '
 'light a fire (as Felt says nothing--)?<n>Now I see why they didn’t give you '
 'the job. You know what that will mean for me.If you know what I mean.<n>‘cuz '
 'whenever I get lucky enough to get someone over there to actually talk to '
 'me, they know .<n>what I’m going to ask before I ask it.I don’t think you '
 'have anything to worry about .')
1062


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The Hatter would not have given himself up for just any Alice .<n>I was '
 'Hatter to the Queen at the time.<n>The Hatter, the Dormouse, the White '
 'Rabbit, the March Hare, I had hoped to have a champion by now.<n>There is a '
 "place called Wonderland. Alice At Last!We're looking for the girl called "
 'Alice.50 51 (to Alice)<n>I have a mother named Helen and a sister named '
 "Margaret. Alice by Linda Woolverton based on Alice's Adventur .")
1009


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A seven-year-old chimpanzee went nuts - and all of a sudden, there you were '
 "on raised since birth.<n>If you got lost in these woods I'd never find "
 "you.Sure, that would be good.Hope you're feeling better - let me know if you "
 "need anything.<n>I just wanted to let you know that I've seen Caesar.Mind if "
 "I take a look?You could at least try to look excited.<n>You don't understand "
 '- this chimp is smart - h.')
1643


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("I think she remembered two buttons. And the stars will remind you, we'll "
 'meet again..."<n>No one would even know who they were if they hadn\'t been '
 'playing here that night.<n>I get presents from starfuckers who saw my '
 'picture in the paper and want to marry me or something.I tell it like it '
 'is.Mrs. Dove.<n>The only thing I would change about this song is that it '
 'makes me want to cry.')
1440


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Cunth and Vicki go for a throat rip .<n>Cunth says he'll rip the dick off "
 "and shove it in Vicki's mouth.<n>Vicki says she's a good woman.<n>The two go "
 "on a date.<n>It's all over on Real Housewives of Orange County ., tonight at "
1115


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The Only Good Wog’s a Dead Wog is out now on Capitol .<n>The Only Good One’s '
 'A Dead One is out on Capitol. You can pre-order it now.<n>It’s out on '
 'Tuesday, September 9. It’s available on Capitol’s website at: '
 'www.capitol.com/the-only-good-wog-s-a-dead-one, and you can download it for '
 'free via iTunes.')
1587


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('This is Frances...yes, “Frances for tonight”...ha thanks!...I just wanted to '
 'double check that we’re still on for me bringing my guys in a little * '
 'early...okay!<n>I don’t really have time to talk, I’m doing the job of three '
 'people here, but I can see you after rehearsal.(lying) Yeah, I was, but you '
 'know, I needed a break and I thought it would be so funny to come back to '
 'the college for the summer.Remember because when I do the Christmas show '
 "I’1l be doing like ten shows a week... Oh right right.<n>I'm sorry I was so "
 'drunk last night, but thank you for being nice to me which I know you were '
 "even though I don't remember it.Oh, let me, I'm going to go to the bathroom, "
 'just one second.Oh yeah, thanks, I forget I make my own work sometimes, but '
 'thanks.')
1531


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Sir , I'm going to have to ask you to leave .<n>Look, uh, your highness, the "
 "new King is a -- You cannot talk.<n>But I'll make sure we're even.yea go "
 'ahead and burn all that.<n>You want to see us become just like the people '
 "you hate so much?But I can get you one if you'd like.<n>If you don't believe "
 'me you ask your friend what his suit is made of ... what his c')
1417


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("We're on our way to find her. We still owe a debt to society, and without "
 "someone like him, there might not be another one.<n>It's going to take more "
 'than the apocalypse for me to start taking orders from my patients.We just '
 'take up position here .<n>If you hand Yorick over to another country, you '
 'people will never know another peaceful day. Well, it’s a good thing I found '
 'you when I did.')
1578


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('See, Collette, it’s like this; only two men knew the time and place.I need '
 'to get home.It’1ll never end like that.<n>You have the right to remain '
 'silent, but anything you do say may be taken down and later used as evidence '
 'against you.<n>49, He wanted to know why.<n>For Christ’s sake-- (on his feet '
 'also) You went back to your mother’s place, took your son into your arms and '
 'cried until it was time to leave.')
1446


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('It’s like every other all-or- nothing, multi-million-dollar gamble we '
 'housewives make every day.<n>Jack, I consider the children every day of my '
 'life-- I just mean, do what you want with your own money.<n>Every one of '
 'those horses was holding back, I mean really holding to th .<n>Few days at '
 'home, get some rest, things’1l be better.<n>Come on Red...Come on, Red... '
 'COME ON!You sure you want to send hima mile next time out?')
510


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Heinkel, 11 @'clock, lining up to drop her load on that minesweeper "
 '.<n>Fighter- ME 109, from the South.Angels two, over Right, '
 "Highlanders.yacht around to head back- Watch for a parachute!<n>No, we're "
 'going to England!(four bearers per stretcher, one at each corner) one day '
 "Navy's requisitioned her- there's Dunkirk, need taking off.<n>One stretcher "
 "takes the Rear Admiral.Seeing home doesn't.")
1083


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('She wanted a witness to her life.No, he’s passed out but I don’t know for '
 'how much longer so get your people out here pronto .<n>She’s good at it.Get '
 'you to come without any mess.I can see you need to let the wolf out a '
 'little.We’ll get her, I promise you that.We see anything...we’1ll turn back, '
 'call it in.You think I could have stopped her?Can you think of anyone she '
 'might run to?Look, if there’s even a chance...if we can find her, it might '
 'be as goo .')
1672


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Giles tried the other night and apart from the fact that when he pulled off '
 'the duvet I looked like I still had it on .<n>Bridget, I’m not going to '
 'pretend this isn’t a sho.<n>Honestly... ‘think the pain away’... absolute '
 "nonsense, you’re squeezing out another human, I'd love to see them ‘think it "
 "away’.<n>You don’t even need a man, Bridget. You're squeezing us out of each "
 'other .')
1471


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Baby saw that when they pulled the big top down/ They left behind her dreams '
 'among the litter... And the different kind of love she thought she had '
 "found/ There was some glitter/ But baby can't be broken, cause you see, she "
 'had the finest teacher that was me, I told her... (singing) Don’t exy out '
 'loud, just keep it inside and learn how to hide your feelings/ Fly high and '
 'proud, and if you should fall, remember you almost had it all!<n>Baby cried '
 'the day the circus came to town/ Cause she didn‘t want with some clown .')
1478


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("I'm not gonnna let yo Shit!I'm gonna go take a piss.We don't have to go in "
 'there!<n>Oh, is this the new gam “The Blame Danny Game?” W Danny don’t play '
 'that.<n>All I know is I’m not going back out there.How the fuck should I '
 "know where they went?<n>We'll kick your ass out!I don’T want to se with you "
 'stupid eally fucking doing this?<n>A frightened Jay ai see shit.Don’t let me '
 'die!!')
594


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('This whole thing was just a walk & talk one-er.Could be your "thing", plus '
 "you know, mouth props.<n>I'm pretty much like the Zero Dark Thirty of "
 'auditioning -- this case Osama would be the audition, not like a real person '
 "-- and get out.<n>Technically maybe, but Baz's was like wa .<n>Cuz nothing "
 "is keeping me from landing this one. I know what it's like to love something "
 "that doesn't love you back .")
1415


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('There was nothing there that could put out a signal like that.Whatever it '
 'is, it has to be triggered on site.<n>Run my math by Sci-Division.I need to '
 'get this damn thing taken out.<n>You think I want to?You’ll feel it every '
 "time you hear about another attack and you won’t know why...Oh, he wasn't "
 "going to let us down without my signal.<n>I've never even heard of back-to- "
 'back simulations without someone Talk to me!')
{'The Guard': {'rouge1': 0.14925373134328357, 'rouge2': 0.0, 'rougeL': 0.08955223880597014, 'rougeLsum': 0.08955223880597014}, 'Batman Year One': {'rouge1': 0.1643835616438356, 'rouge2': 0.0, 'rougeL': 0.0821917808219178, 'rougeLsum': 0.0821917808219178}, 'Inherent Vice': {'rouge1': 0.1804511278195489, 'rouge2': 0.015267175572519083, 'rougeL': 0.10526315789473684, 'rougeLsum': 0.10526315789473684}, 'Colombiana': {'rouge1': 0.2, 'rouge2': 0.0, 'rougeL': 0.12, 'rougeLsum': 0.12}, 'Barney s Version': {'rouge1': 0.05882352941176471, 'rouge2': 0.0, 'rougeL

In [None]:
two_stage_long_results = combine_rouge_and_summary_results(rouge_long_dict, summary_long_dict, 'two_stage_long')
two_stage_long_results.head()

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum,two_stage_long_summary
The Guard,0.149254,0.0,0.089552,0.089552,You know nothing about me.(getting worked up)<...
Batman Year One,0.164384,0.0,0.082192,0.082192,I'm going to take a little something back from...
Inherent Vice,0.180451,0.015267,0.105263,0.105263,"Shasta has disappeared, too, just like her boy..."
Colombiana,0.2,0.0,0.12,0.12,"It's really late, and I'm really really tired ..."
Barney s Version,0.058824,0.0,0.058824,0.058824,Getting to race in New York was considered hit...


In [None]:
# save results to a csv
two_stage_long_results.to_csv('/content/two_stage_long_results.csv')
# files.download('/content/two_stage_long_results.csv') 

In [None]:
# average rouge scores
two_stage_long_results.drop(columns=['two_stage_long_summary']).mean(axis=0)

rouge1       0.150243
rouge2       0.003830
rougeL       0.088418
rougeLsum    0.088418
dtype: float64

## Two Stage Large Model
This model combines an extractive summary using TextRank, which is then used for abstractive summary using the *pegasus-large* model

In [None]:
rouge_large_dict = {}
summary_large_dict = {}

rouge_large_scores, summaries_large = execute_two_stage_large_model(0, 30, rouge_large_dict, summary_large_dict, subset_train_df)

print(rouge_large_scores)
print(summaries_large)

1701


Downloading (…)lve/main/config.json:   0%|          | 0.00/3.09k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading (…)ve/main/spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/88.0 [00:00<?, ?B/s]

('You know nothing about me.(getting worked up) Well why don’t you put in your '
 'report that Sergeant Boyle went out of his fucking way to do you boys a '
 'fucking favour and he got really fucking annoyed when you started asking him '
 'stupid fucking questions about a few missing fucking guns, trying to fucking '
 'catch him out as if he’s just some kind of fucking gobshite!I’m not sure if '
 'you ever get over something like that.I just didn’t realise...')
1480


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Sooner or later, we'll get lucky and someone will put a bullet in this "
 'nut.Can I get you something?Get out of my way.My 10 (at the door) You know, '
 "maybe there are no real men.I'm going to take a little something back from "
 "those bastards.I don't know how much longer I can go without your help.You "
 'need time to sort stuff out.Maybe if I had kids like the other guys, or had '
 "trouble making ends meet it would be different.He's got to keep moving, like "
 "a shark, You can't stay stuck in one place, Bruce It just ain't HEALTHY.I "
 'think I know a way to find out.You know I COULD put in a couple days, help '
 "you out down here, if you want a break.You let her get away?Looks like it's "
 'going to be a good night, boys.Bring them back whenever you like.I have this '
 "bad feeling that the only way I'm ever going to see you again is if I "
 'continue my life of crime.They tried to make it look like burglars but I '
 'think it was Flass and his crew looking for my file.You

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Said Shasta was the only one that ever made any sense around there -- she '
 "was nervous about Mickey givin' away all his money -- which I guess caused "
 "some problems because it made him think that's all she was worried about was "
 'her meal ticket -- which i guess was really crazy cause she was in love '
 "Shasta and I lived for a short while together and I can't say for sure how "
 'deep it went.coy Well, thanks for looking into that, you know, I just '
 "wondered the way people do.Relax -- don't be so sensitive -- all we know at "
 "this point is that she's disappeared now, too, just like her boyfriend "
 "Mickey... Then I guess that one's on my ticket because it was me who "
 'introduced him to Burke Stodger and Burke who set him up with the Viggies...')
1467


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("You know what time it is?We'll get all this stuff, then go over to this "
 'little place I know down on Maxwell street for some of these hot dogs the '
 'Polish people make here.I know a lot of guys would think this is perfect.And '
 'since until now, there has never been a clue to go on, why go on?Do I look '
 'like I could make something like that up?But when someone is good to you and '
 "you are not good to them back, then bad things happen.I don't know if it "
 'helped with the growing, but I was well rested for whatever came after.Well, '
 "'very tired', I'm sure you want to lay down and rest your little head?Now I "
 'know some of you are thinking whoever is doing this makes less work for '
 'mother.I think we got her.If you do, you would never have let me get '
 "involved in the first place.Let's see if anything comes back.Take her in the "
 "back, lock her up, Let her sleep it off, we'll book her tomorrow.I want you "
 'to think hard...')
1736


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Oh, I know - have you read, uh, what's it called... give me a second-- "
 "Barney, hold on.I'm just saying that we'll never really know what "
 'happened.Getting to race in New York was considered hitting the big time - '
 'getting a bite of that "Big Apple" and I\'m only going to keep talking '
 "because I''m afraid if I stop there's going to be pause, or a break where "
 'you\'ll say "it\'s getting late" or, "I should probably get going" and i\'m '
 "not ready for that to happen, I don''t want it to happen.(re Tell me if this "
 'is still good.Listen, man, are you sure you want to go through with this?I '
 "want one thing from you.I don't have to tell you where things like this lead "
 "to - it starts with a turd, next thing we're wearing wonderful, oh and "
 "you're coming for dinner this Friday, right?I love you but I really do have "
 'to go.Boogie, for the first time in my life I am truly, seriously, Oh '
 'Barney, so am I.I just saying, no one will think anything')
1702


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('and you can walk away knowing you were right all along, I really am just a '
 'dumb bunny.And soon enough those dreams die and our no choice but to go back '
 'home with that cute, fuzzy wuzzy little tail between her legs to become-- '
 "you're from Bunnyburrow, is that what you said?-- so how bout a carrot "
 'farmer... That sound about right?Of course-- I could let you off with a '
 'this nice dad and his son a... what was it?Um, they thought it would be '
 "better if a predator such as myself wasn't the first face you that you see "
 'when you walk into the ZPD.I want you to remember this moment-- the next '
 'time you think you will ever be anything more than just a stu')
1173


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Even me... (then) But the story says she wasn't alone Every time he turned "
 'on his high (breaks down to Ramén) There was someone else- Just like he '
 'said- But I didn’t believe him- No one believed him- Because it was her- It '
 'was Sarah-- I hated him so much, I wanted to kill myself.And then told '
 'everyone that she was the monster.” I really don’t know what to make of it, '
 'but this is the third caller who’s called in to say something property.I '
 'think she just wanted to be like the other kids, but knew she could never be '
 'like them.To tell them you’ve got nothing to do with this.As for the rest, I '
 'guess we’ll never know.She doesn’t like to talk about it., but everyone '
 "pretty much knows it wasn’t her dad that did it- He didn't kill her- Not on "
 'like no one really buys it.‘What kind of father would kid back at home.’ '
 'Y’know what he said?they both know there may never be one again.Don’t you '
 'want to know how the story ends?...Don’s you want your sto

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('(then) Wait, you mean that in, like, a good way, right?I mean, what if you '
 'get to know me and you\'re all like, "oh wow, Erwin\'s even funnier than '
 'Will Ferrell in that one movie."Thanks a lot for the help there, my uh\'re '
 'like-- (imitates, eyes her nails) Wow, I should really get my cuticles '
 'cut... You told him about the tupperware?!Hey, um, come')
1366


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('bend, they see kind of underground party we all know probably happens '
 'somewhere, but would never get invited to ourselves.And I never thought I’d '
 'fall for the kind of guy who runs the second things get complicated.There '
 'must be another way to get there.Like it’s time to get to work.I don’t care '
 'who you think you are, but the next time you take a chance with my people, '
 'I’1l hurt you.I’m sure the skinhead party would love to see you again '
 'without me to save you.(to La Taupe) How do you know we’ll all be able to '
 'get back up there?Oh, and if I’m found dead, please know I entered the '
 'country of my own free will, fully knowing the penalty for trespassing here '
 'is burial up to your neck in sand and you.We’re We need to go down another '
 "hundred.They might know a way out.I'm going to find La Taupe because these "
 "caves are dangerous and you need to know where you're going!I have yet to "
 'see her do anything.You think because you see something in a boo

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Can somebody tell me how to get things back the way they used to be...oh God '
 "give me a reason, I'm Ooohh ohhh.Yeah, Skillz kinda made me do it and from "
 'the get go, she was way into...what are you looking at?Let me get you '
 'something for that.decides to keep the one journal with the sweet passages '
 "for You're going to be a really good dad.Things are going really well.But "
 "I've got a lot of work to do.")
1597


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Oh right, here you go.I know you want to be there for her... Well here it '
 "is.I think you're going to find a lot of Oh, I'm sure it's going be really "
 "helpful.I mean, I don't know if... More money than they know These guy put "
 'on a quite a what to do with... party... No, I get it.You just need to pee '
 'on everything.Sure, you can set up an iChat...')
1322


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Inhe bataiye inka neta dilli jaega.Bhains gobar karne ki tankha nahi '
 'leti.Woh dekh bhag gaya... usko roko!Wahan bahi hogi, yahan nahi '
 'bahegi.Pehle kyon nahi bataya mahashay?Kyonki gaon wale aadivaasi kai baras '
 'pehle mar chuke hain.Sabko uska aadesh man-na hoga.Aap kahe to bandook yahi '
 'chhod jaye?Ghante bhar mein pahuch jayenge.Do dhai baje.Awwaal to hogi '
 'nahi.Shuru mein paanch sau rupaye dene hote hai.Jahan bachha khush rahe apne '
 'liye to wahi Bhopal hai.Kal aap thhe duggi magar dilwale.Aisa inhone '
 'kaha?Laal salaad!Laal salad!Lal salaade!Tendu patta laane jaa rahe hai.Kaun '
 'gawahi dega?Phir nahi hogi.Phir hai.Isse bhi ch')
1833


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Until there’s just one thing left.We don’t even know what this is.You open '
 'those scabs, there’’s a lot of things underneath.But I want to know what '
 'I’ve done.‘cuz whenever I get lucky enough to get someone over there to '
 "actually talk to me, they know What I’m going to ask before I ask it.I don't "
 "think you have anything to worry about, Mr.(Felt looks furious) I'm not "
 'going to tell anyone who you are.You don’’t know anything.You know you '
 'were.You knew how he is.What is there to know?What did you know?We know who '
 'he is,I know.I know that.Do they know?!Do them know what--?And you want me '
 'to light a fire (as Felt says nothing--) Now I see why they didn’t give you '
 'the job.You have what that will mean for me.If you know what i mean.+ % I '
 'know what you mean.What do you think they know?and ‘I’m not go to tell you’, '
 "both and neither.He didn't say it was a secret or anything.It’s")
1062


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The Hatter would not have given himself up for just any Alice.I was Hatter '
 'to the Queen at the time.The Hatter, the Dormouse, the White Rabbit, the '
 'March Hare, I had hoped to have a champion by now.There is a place called '
 "Wonderland.Alice At Last!We're looking for the girl called Alice.50 51 (to "
 "Alice) You'll be my new favorite.37 38 38 her court, including the white "
 'Rabbit, March hare, Cheshire who stand together to protect the children.Take '
 'it to the White Queen!We "ll go to the white Queen together.Would your name '
 'be “Alice” by any chance?One!I should be the one.And that one too!Only '
 "one.You've brought the wrong Alice.Today is Griblig day in the time of the "
 'Red Queen.Who will step forth to be champion for the White Queens?Four, '
 'Alice!I believe your name has slipped the Queen’s mind.My father said he '
 'sometimes believed in six impossible things before breakfast.I have a mother '
 'named Helen and a sister named Margaret.')
1009


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("If you give me your number I'll let you know from time to time how things "
 "are going - Let's grab a drink.I'll give you an M&amp;M if you can get don't "
 'do it the way you did last time.You think you know where to find this '
 "monkey?If I can reason with him, I know I can put an end to this.He's got to "
 'be put down like the others.You know it was a funny thing, but I was '
 'watching the news and they started talking about a seven-year- old '
 'chimpanzee that went nuts - and all of a sudden, there you were on raised '
 'since birth.Good to know.Remember - stay where I can see you - if you got '
 "lost in these woods I'd never find you.Sure, that would be good.Hope you're "
 'feeling better - let me know if you need anything.(loud enough for Will) No '
 "place like home, huh monkey boy?I've got something that's going to make you "
 "better.I just wanted to let ya know that I've seen Caesar.Mind if I take a "
 "look?You could at least try to look excited.You don's understand 

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("5.No one would even know who they were if they hadn't been playing here that "
 'night.Anyway, this singer guy is like 22, so he could get locked up in the '
 'state pen if he actually dated her.And I just know Colin would never go out '
 'with someone like that, and I just need confirmation.I think you forgot, '
 'like, two buttons.By that I mean he was poking her I know this must be hard '
 "for you to hear.Well-- yeah.(to audience) I said, it's so good to be "
 'here!Hey, I think they need two groupies.Occasionally I get presents from '
 'starfuckers who saw my picture in the paper and want to marry me or '
 'something.I tell it like it is.Mrs.')
1440


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('(frantically) Yes yes, great, yeah, sure!You know, you’re a good guy.I think '
 "I actually got him, but you're doing great.And if I’m not mistaken -- which "
 'I never am -- that briefcase-carrying (into wrist mic) Okay Vicki, it’s go '
 'time.I mean, don’t get me wrong, she’s a good woman.You know me.I know.I '
 'knew it.I was gonna go for a throat rip, but I know you’ve not been into '
 'them.I do wanna get a throat tear in here somehow -- think that’s gonna be '
 'you, small fry.I don’ta know what happened back there.But I’s going to have '
 'to say something to you that I')
1115


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('And then he says... (as Lennon) ‘I know exactly what the people there need.’ '
 '(hands off the journalist’s shoulders; himself again) I’m thinking, '
 'brilliant, a load of free records... dope!We were going to call it ‘The Only '
 'Good Wog’s a Dead Wog‘, then we thought ‘The only Good Taig’, but then we '
 'said ‘TheOnly Good One’s A Dead One’ covered pretty much everything.Yes, '
 'Paul has told us the offer... (listens a moment) Well, tell you the truth '
 'we’d be hoping for a bit more... others who nod in encouragement ) A lot '
 'more Kids got from EMI... 61.‘Well I won’t do that, and I can’tdo this, and '
 'we tell you something we hate all this...’ ‘Every time I see you, makes me '
 'realise, the pressure’s on, every single day...’ 123 124 125 126 127 '
 "68.(through a fixed smile) I'd have to speak to them at work about taking "
 'time off if you want me to cover at the shop.No, really, take your time.Why '
 'do you think I never let you get started?This is')
1587


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Yes, hi, this is Frances...yes, “Frances for tonight”...ha thanks!...I just '
 'wanted to double check that we’re still on for me bringing my guys in a '
 'little * early...okay!I don’t really have time to talk, I’m doing the job of '
 'three people here, but I can see you after rehearsal.(lying) Yeah, I was, '
 'but you know, I needed a break and I thought it would be so funny to come '
 'back to the college for the summer.Remember because when I do the Christmas '
 'show I’11 make more because I’1l be doing like ten shows a week...')
1531


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Oh, you think they're for you?If Killmonger gets control of it, who do you "
 'think he will come for next?Go, go , come on!Now, tell me about this one '
 ".That ' ll make my day .We have to go now !Go!- Go!Where'd he go ?Go go go "
 "go !, We're not going to make it!We'll go .You want to see us become just "
 "like the people you hate so much?But I can get you one if you'd like .Hello "
 'Sophia, good to see you .(rolls her eyes) Yea I bet the elders loved that '
 ".If you don't believe me you ask your friend what his suit is made of ...")
1417


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('I thought you weren’t coming over for another-- You talk too much.We-- '
 'Perhaps, but all it would take is our enemies finding a single boy for them '
 'to resurrect their entire armies.47 You know, I actually stayed there one '
 'night.I guess he thought naming his kid after an obscure Shakespeare '
 "character would help him get tenure.We'll figure out the rest of this 32 "
 'Everything’s going to be all right, kid.of the sky here needs another ten to '
 'finish gassing up, so I figured I might as well teach you a little '
 'self-defense.55 Exactly who is that thing supposed to be helping?If you hand '
 'Yorick over to another country, you people will never know another peaceful '
 'day.Well, it’s a good thing I found you when I did.We’re on our way to find '
 'her.We still owe a debt to society, and without someone like him, there '
 'might not be another one.Last man or not, it is going to take more than the '
 'apocalypse for me to start taking orders from my patients.We just 

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('see who can get there first.And one day, if you want it, we can give you a '
 'life far away from this.(with cold, hard eyes) See, Collette, it’s like '
 'this; only two men knew the time and place.I need to get home.It’1ll never '
 'end like that.You have the right to remain silent, but anything you do say '
 'may be taken down and later used as evidence against you.We all want to go '
 'home. It’s not going to end like-- How do you know?It was too dangerous to '
 'call in because the whole city was Memorise the number and the timeand place '
 'of the first meeting, then burn it.It was time to go.And there was no one '
 'else around, you say?He may not come at you right away.I know he fucked you '
 'so roughly that-- the table back) For Christ’s sake-- (on his feet also) You '
 'went back to your mother’s place, took your son into your arms and cried '
 'until it was time for to leave.But no one ever used to care about us.You had '
 'two weeks to convince us you mean what you say.49

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Maybe all we need to do is find one man willing to do this, somebody the '
 "others would feel safe in You're asking me to spend 200 grand for a share in "
 'a horse that should be in my barn right now.It’s like every other all-or- '
 'nothing, multi-million-dollar gamble we housewives make every day.So how '
 'many times were you called little lady today?I would’ve put him up today, '
 'but he had another ride.If I ever even hear your name again I’1l see you go '
 'to prison....seeing nothing but trouble on the rail, seeing zero Fast first '
 'quarter in 23 and two... the half mile...')
510


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Dad, Spitfires!Spitfires, George.Flew Hurricanes.bomber and its fighter '
 'escort- He got him, he got him!!Greatest plane ever built.Did it help you '
 'against the dive bombers?Super title one week lift sand into the air- wave '
 "of bombs.he DIVES between the German planes... that's a fighter- ME 109, "
 'from the South.Angels two, over Right, Highlanders.yacht around to head '
 "back- Watch for a parachute!No, we're going to England!(four bearers per "
 "stretcher, one at each corner) one day Navy's requisitioned her- there's "
 "Dunkirk, need taking off.Heinkel, 11 @'clock, lining up to drop her load on "
 "that minesweeper - Fighters?Colonel, you're not going to have to decide how "
 'many more wounded to evacuate...')
1083


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('He’1ll see what has happened when he comes home, what we’ve done, what no '
 'one was able to stop.First light they’1l turn that whole But if we can’t get '
 'ahead of him, more people are gonna die.scared he might as well be inside of '
 "a dream-- (re Is that magazine full?We'll get out of its way.I have never "
 'seen anything like that today.It was already done by then but I think she '
 'wanted a witness to her life.No, he’s passed out but I don’t know for how '
 'much longer so get your people out here pronto and I want that special '
 'consideration--huh?It means they’ve been around a long time and know how to '
 'hunt better than we do.Maybe I could have been but...she’s good at it.Get '
 'you to come without any mess.I can see you need to let the wolf out a '
 'little.We’ll get her, I promise you that.We see anything...we’1 will turn '
 'back, call it in.You think I could had stopped her?Can you think of anyone '
 'she might run to?Look, if there’s even a chance...')
1672


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('one of those lovely new Minis.Stay calm, whole So which of the times with me '
 'and my wonderful man do you think we might have got pregnant on?Right, yes, '
 'put like that... Bridget, they want to get a photo with you and me and the '
 'baby.You don’t even need a man, Bridget.... ‘think the pain away’... '
 "absolute nonsense, you’re squeezing out another human, I'd love to see them "
 '‘think it away’.Bridget, I’m not going to pretend this isn’t a sho')
1471


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Baby saw that when they pulled the big top down/ They left behind her dreams '
 'among the litter... And the different kind of love she thought she had '
 "found/ There was some glitter/ But baby can't be broken, cause you see, she "
 'had the finest teacher that was me, I told her... (singing) Don’t exy out '
 'loud, just keep it inside and learn how to hide your feelings/ Fly high and '
 'proud, and if you should fall, remember you almost had it all!Don’t Cry out '
 'loud/ Just keep it in and learn what to do with it/ Fly High and proud., And '
 'if you shouldn’t fall remember that you almost made it / Don ’t cry out loud '
 '/ Just keep your feelings inside and get your feelings out!itittt '
 '109.(singing as best she can, but still rather badly) Baby cried the day the '
 'circus came to town/ Cause she didn‘t want with some clown/ While she danced '
 'without a net upon the wire/ I know a lot about her ‘cause you see , baby is '
 'an awful lot lik')
1478


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("You know... We'll kick your ass out!I don’t want to se with you stupid eally "
 'fucking doing this?That Yeah... A frightened Jay ai see shit.Don’t let me '
 'die!!That’s, like, one of the thing was gonna do while I’m here.He could be '
 'the sociopa letting some psycho i And thanks t Dan ow!Lets go!Let go of '
 "me!ay’ for argument's sake that to fuck up everyone's shit")
594


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('See how we just went in like a big circle but nothing really happened-- That '
 'just did.This whole thing was just a walk & talk one-er.Could be your '
 '"thing", plus you know, mouth props.Make it look like another '
 "murder-suicide?Call me, I can get you work tomorrow.You're like the first "
 "half of A Beautiful Mind.It's like a way to make boring plot stuff less "
 'boring for the audience.(beat) Can I just ask you one thing?Well I guess I '
 'mean it literally.There was no way we were going to let those two steal the '
 'show tonight.If I get it, it could like totally change my career.No, I mean '
 'that in a good way.Cuz nothing is keeping me from landing this one.I know '
 "what it's cool to love something that doesn't love you back.I'm pretty much "
 'like the Zero Dark Thirty of auditioning -- this case Osama would be the '
 'audition, not like a real person -- and get out.Ugly way to go.Im gonna take '
 'a look around back. I have nothing else to say about this.Technicall

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('There was nothing there that could put out a signal like that.Whatever it '
 "is, it has to be triggered on site -- If I'm going to have someone riding a "
 "bomb into the Interstice, I'd like to be certain it will work... Run my math "
 'by Sci-Division.I need to get this damn thing taken out.They were broken '
 "when I got them -- I didn't even think it was possible, but by god, they are "
 'You cannot make them to do this...')
{'The Guard': {'rouge1': 0.14925373134328357, 'rouge2': 0.0, 'rougeL': 0.08955223880597014, 'rougeLsum': 0.08955223880597014}, 'Batman Year One': {'rouge1': 0.19161676646706585, 'rouge2': 0.0, 'rougeL': 0.08982035928143711, 'rougeLsum': 0.08982035928143711}, 'Inherent Vice': {'rouge1': 0.17435897435897435, 'rouge2': 0.010362694300518135, 'rougeL': 0.09230769230769231, 'rougeLsum': 0.09230769230769231}, 'Colombiana': {'rouge1': 0.15748031496062995, 'rouge2': 0.0, 'rougeL': 0.08661417322834646, 'rougeLsum': 0.08661417322834646}, 'Barney s Version': {'rouge1': 

In [None]:
two_stage_large_results = combine_rouge_and_summary_results(rouge_large_dict, summary_large_dict, 'two_stage_large')
two_stage_large_results.head()

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum,two_stage_large_summary
The Guard,0.149254,0.0,0.089552,0.089552,You know nothing about me.(getting worked up) ...
Batman Year One,0.191617,0.0,0.08982,0.08982,"Sooner or later, we'll get lucky and someone w..."
Inherent Vice,0.174359,0.010363,0.092308,0.092308,Said Shasta was the only one that ever made an...
Colombiana,0.15748,0.0,0.086614,0.086614,You know what time it is?We'll get all this st...
Barney s Version,0.104839,0.0,0.064516,0.064516,"Oh, I know - have you read, uh, what's it call..."


In [None]:
# save results to a csv
two_stage_large_results.to_csv('/content/two_stage_large_results.csv')
# files.download('/content/two_stage_large_results.csv') 

In [None]:
# average rouge scores
two_stage_large_results.drop(columns=['two_stage_large_summary']).mean(axis=0)

rouge1       0.157633
rouge2       0.004987
rougeL       0.087577
rougeLsum    0.087577
dtype: float64

## Two Stage Long T5 Model
This model combines an extractive summary using TextRank, which is then used for abstractive summary using the *LongT5* model

In [None]:
rouge_t5_dict = {}
summary_t5_dict = {}

rouge_t5_scores, summaries_t5 = execute_two_stage_t5_model(0, 30, rouge_t5_dict, summary_t5_dict, subset_train_df)

print(rouge_t5_scores)
print(summaries_t5)

1701


Downloading (…)lve/main/config.json:   0%|          | 0.00/3.09k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading (…)ve/main/spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/88.0 [00:00<?, ?B/s]

('You know nothing about me.(getting worked up) Well why don’t you put in your '
 'report that Sergeant Boyle went out of his fucking way to do you boys a '
 'fucking favour and he got really fucking annoyed when you started asking him '
 'stupid fucking questions about a few missing fucking guns, trying to fucking '
 'catch him out as if he’s just some kind of fucking gobshite!I’m not sure if '
 'you ever get over something like that.I just didn’t realise...')


Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

1480


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Sooner or later, we'll get lucky and someone will put a bullet in this "
 'nut.Can I get you something?Get out of my way.My 10 (at the door) You know, '
 "maybe there are no real men.I'm going to take a little something back from "
 "those bastards.I don't know how much longer I can go without your help.You "
 'need time to sort stuff out.Maybe if I had kids like the other guys, or had '
 "trouble making ends meet it would be different.He's got to keep moving, like "
 "a shark, You can't stay stuck in one place, Bruce It just ain't HEALTHY.I "
 'think I know a way to find out.You know I COULD put in a couple days, help '
 "you out down here, if you want a break.You let her get away?Looks like it's "
 'going to be a good night, boys.Bring them back whenever you like.I have this '
 "bad feeling that the only way I'm ever going to see you again is if I "
 'continue my life of crime.They tried to make it look like burglars but I '
 'think it was Flass and his crew looking for my file.You

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Said Shasta was the only one that ever made any sense around there -- she '
 "was nervous about Mickey givin' away all his money -- which I guess caused "
 "some problems because it made him think that's all she was worried about was "
 'her meal ticket -- which i guess was really crazy cause she was in love '
 "Shasta and I lived for a short while together and I can't say for sure how "
 'deep it went.coy Well, thanks for looking into that, you know, I just '
 "wondered the way people do.Relax -- don't be so sensitive -- all we know at "
 "this point is that she's disappeared now, too, just like her boyfriend "
 "Mickey... Then I guess that one's on my ticket because it was me who "
 'introduced him to Burke Stodger and Burke who set him up with the Viggies...')
1467


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("You know what time it is?We'll get all this stuff, then go over to this "
 'little place I know down on Maxwell street for some of these hot dogs the '
 'Polish people make here.I know a lot of guys would think this is perfect.And '
 'since until now, there has never been a clue to go on, why go on?Do I look '
 'like I could make something like that up?But when someone is good to you and '
 "you are not good to them back, then bad things happen.I don't know if it "
 'helped with the growing, but I was well rested for whatever came after.Well, '
 "'very tired', I'm sure you want to lay down and rest your little head?Now I "
 'know some of you are thinking whoever is doing this makes less work for '
 'mother.I think we got her.If you do, you would never have let me get '
 "involved in the first place.Let's see if anything comes back.Take her in the "
 "back, lock her up, Let her sleep it off, we'll book her tomorrow.I want you "
 'to think hard...')
1736


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Oh, I know - have you read, uh, what's it called... give me a second-- "
 "Barney, hold on.I'm just saying that we'll never really know what "
 'happened.Getting to race in New York was considered hitting the big time - '
 'getting a bite of that "Big Apple" and I\'m only going to keep talking '
 "because I''m afraid if I stop there's going to be pause, or a break where "
 'you\'ll say "it\'s getting late" or, "I should probably get going" and i\'m '
 "not ready for that to happen, I don''t want it to happen.(re Tell me if this "
 'is still good.Listen, man, are you sure you want to go through with this?I '
 "want one thing from you.I don't have to tell you where things like this lead "
 "to - it starts with a turd, next thing we're wearing wonderful, oh and "
 "you're coming for dinner this Friday, right?I love you but I really do have "
 'to go.Boogie, for the first time in my life I am truly, seriously, Oh '
 'Barney, so am I.I just saying, no one will think anything')
1702


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('and you can walk away knowing you were right all along, I really am just a '
 'dumb bunny.And soon enough those dreams die and our no choice but to go back '
 'home with that cute, fuzzy wuzzy little tail between her legs to become-- '
 "you're from Bunnyburrow, is that what you said?-- so how bout a carrot "
 'farmer... That sound about right?Of course-- I could let you off with a '
 'this nice dad and his son a... what was it?Um, they thought it would be '
 "better if a predator such as myself wasn't the first face you that you see "
 'when you walk into the ZPD.I want you to remember this moment-- the next '
 'time you think you will ever be anything more than just a stu')
1173


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Even me... (then) But the story says she wasn't alone Every time he turned "
 'on his high (breaks down to Ramén) There was someone else- Just like he '
 'said- But I didn’t believe him- No one believed him- Because it was her- It '
 'was Sarah-- I hated him so much, I wanted to kill myself.And then told '
 'everyone that she was the monster.” I really don’t know what to make of it, '
 'but this is the third caller who’s called in to say something property.I '
 'think she just wanted to be like the other kids, but knew she could never be '
 'like them.To tell them you’ve got nothing to do with this.As for the rest, I '
 'guess we’ll never know.She doesn’t like to talk about it., but everyone '
 "pretty much knows it wasn’t her dad that did it- He didn't kill her- Not on "
 'like no one really buys it.‘What kind of father would kid back at home.’ '
 'Y’know what he said?they both know there may never be one again.Don’t you '
 'want to know how the story ends?...Don’s you want your sto

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('(then) Wait, you mean that in, like, a good way, right?I mean, what if you '
 'get to know me and you\'re all like, "oh wow, Erwin\'s even funnier than '
 'Will Ferrell in that one movie."Thanks a lot for the help there, my uh\'re '
 'like-- (imitates, eyes her nails) Wow, I should really get my cuticles '
 'cut... You told him about the tupperware?!Hey, um, come')
1366


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('bend, they see kind of underground party we all know probably happens '
 'somewhere, but would never get invited to ourselves.And I never thought I’d '
 'fall for the kind of guy who runs the second things get complicated.There '
 'must be another way to get there.Like it’s time to get to work.I don’t care '
 'who you think you are, but the next time you take a chance with my people, '
 'I’1l hurt you.I’m sure the skinhead party would love to see you again '
 'without me to save you.(to La Taupe) How do you know we’ll all be able to '
 'get back up there?Oh, and if I’m found dead, please know I entered the '
 'country of my own free will, fully knowing the penalty for trespassing here '
 'is burial up to your neck in sand and you.We’re We need to go down another '
 "hundred.They might know a way out.I'm going to find La Taupe because these "
 "caves are dangerous and you need to know where you're going!I have yet to "
 'see her do anything.You think because you see something in a boo

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Can somebody tell me how to get things back the way they used to be...oh God '
 "give me a reason, I'm Ooohh ohhh.Yeah, Skillz kinda made me do it and from "
 'the get go, she was way into...what are you looking at?Let me get you '
 'something for that.decides to keep the one journal with the sweet passages '
 "for You're going to be a really good dad.Things are going really well.But "
 "I've got a lot of work to do.")
1597


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Oh right, here you go.I know you want to be there for her... Well here it '
 "is.I think you're going to find a lot of Oh, I'm sure it's going be really "
 "helpful.I mean, I don't know if... More money than they know These guy put "
 'on a quite a what to do with... party... No, I get it.You just need to pee '
 'on everything.Sure, you can set up an iChat...')
1322


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Inhe bataiye inka neta dilli jaega.Bhains gobar karne ki tankha nahi '
 'leti.Woh dekh bhag gaya... usko roko!Wahan bahi hogi, yahan nahi '
 'bahegi.Pehle kyon nahi bataya mahashay?Kyonki gaon wale aadivaasi kai baras '
 'pehle mar chuke hain.Sabko uska aadesh man-na hoga.Aap kahe to bandook yahi '
 'chhod jaye?Ghante bhar mein pahuch jayenge.Do dhai baje.Awwaal to hogi '
 'nahi.Shuru mein paanch sau rupaye dene hote hai.Jahan bachha khush rahe apne '
 'liye to wahi Bhopal hai.Kal aap thhe duggi magar dilwale.Aisa inhone '
 'kaha?Laal salaad!Laal salad!Lal salaade!Tendu patta laane jaa rahe hai.Kaun '
 'gawahi dega?Phir nahi hogi.Phir hai.Isse bhi ch')
1833


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Until there’s just one thing left.We don’t even know what this is.You open '
 'those scabs, there’’s a lot of things underneath.But I want to know what '
 'I’ve done.‘cuz whenever I get lucky enough to get someone over there to '
 "actually talk to me, they know What I’m going to ask before I ask it.I don't "
 "think you have anything to worry about, Mr.(Felt looks furious) I'm not "
 'going to tell anyone who you are.You don’’t know anything.You know you '
 'were.You knew how he is.What is there to know?What did you know?We know who '
 'he is,I know.I know that.Do they know?!Do them know what--?And you want me '
 'to light a fire (as Felt says nothing--) Now I see why they didn’t give you '
 'the job.You have what that will mean for me.If you know what i mean.+ % I '
 'know what you mean.What do you think they know?and ‘I’m not go to tell you’, '
 "both and neither.He didn't say it was a secret or anything.It’s")
1062


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The Hatter would not have given himself up for just any Alice.I was Hatter '
 'to the Queen at the time.The Hatter, the Dormouse, the White Rabbit, the '
 'March Hare, I had hoped to have a champion by now.There is a place called '
 "Wonderland.Alice At Last!We're looking for the girl called Alice.50 51 (to "
 "Alice) You'll be my new favorite.37 38 38 her court, including the white "
 'Rabbit, March hare, Cheshire who stand together to protect the children.Take '
 'it to the White Queen!We "ll go to the white Queen together.Would your name '
 'be “Alice” by any chance?One!I should be the one.And that one too!Only '
 "one.You've brought the wrong Alice.Today is Griblig day in the time of the "
 'Red Queen.Who will step forth to be champion for the White Queens?Four, '
 'Alice!I believe your name has slipped the Queen’s mind.My father said he '
 'sometimes believed in six impossible things before breakfast.I have a mother '
 'named Helen and a sister named Margaret.')
1009


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("If you give me your number I'll let you know from time to time how things "
 "are going - Let's grab a drink.I'll give you an M&amp;M if you can get don't "
 'do it the way you did last time.You think you know where to find this '
 "monkey?If I can reason with him, I know I can put an end to this.He's got to "
 'be put down like the others.You know it was a funny thing, but I was '
 'watching the news and they started talking about a seven-year- old '
 'chimpanzee that went nuts - and all of a sudden, there you were on raised '
 'since birth.Good to know.Remember - stay where I can see you - if you got '
 "lost in these woods I'd never find you.Sure, that would be good.Hope you're "
 'feeling better - let me know if you need anything.(loud enough for Will) No '
 "place like home, huh monkey boy?I've got something that's going to make you "
 "better.I just wanted to let ya know that I've seen Caesar.Mind if I take a "
 "look?You could at least try to look excited.You don's understand 

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("5.No one would even know who they were if they hadn't been playing here that "
 'night.Anyway, this singer guy is like 22, so he could get locked up in the '
 'state pen if he actually dated her.And I just know Colin would never go out '
 'with someone like that, and I just need confirmation.I think you forgot, '
 'like, two buttons.By that I mean he was poking her I know this must be hard '
 "for you to hear.Well-- yeah.(to audience) I said, it's so good to be "
 'here!Hey, I think they need two groupies.Occasionally I get presents from '
 'starfuckers who saw my picture in the paper and want to marry me or '
 'something.I tell it like it is.Mrs.')
1440


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('(frantically) Yes yes, great, yeah, sure!You know, you’re a good guy.I think '
 "I actually got him, but you're doing great.And if I’m not mistaken -- which "
 'I never am -- that briefcase-carrying (into wrist mic) Okay Vicki, it’s go '
 'time.I mean, don’t get me wrong, she’s a good woman.You know me.I know.I '
 'knew it.I was gonna go for a throat rip, but I know you’ve not been into '
 'them.I do wanna get a throat tear in here somehow -- think that’s gonna be '
 'you, small fry.I don’ta know what happened back there.But I’s going to have '
 'to say something to you that I')
1115


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('And then he says... (as Lennon) ‘I know exactly what the people there need.’ '
 '(hands off the journalist’s shoulders; himself again) I’m thinking, '
 'brilliant, a load of free records... dope!We were going to call it ‘The Only '
 'Good Wog’s a Dead Wog‘, then we thought ‘The only Good Taig’, but then we '
 'said ‘TheOnly Good One’s A Dead One’ covered pretty much everything.Yes, '
 'Paul has told us the offer... (listens a moment) Well, tell you the truth '
 'we’d be hoping for a bit more... others who nod in encouragement ) A lot '
 'more Kids got from EMI... 61.‘Well I won’t do that, and I can’tdo this, and '
 'we tell you something we hate all this...’ ‘Every time I see you, makes me '
 'realise, the pressure’s on, every single day...’ 123 124 125 126 127 '
 "68.(through a fixed smile) I'd have to speak to them at work about taking "
 'time off if you want me to cover at the shop.No, really, take your time.Why '
 'do you think I never let you get started?This is')
1587


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Yes, hi, this is Frances...yes, “Frances for tonight”...ha thanks!...I just '
 'wanted to double check that we’re still on for me bringing my guys in a '
 'little * early...okay!I don’t really have time to talk, I’m doing the job of '
 'three people here, but I can see you after rehearsal.(lying) Yeah, I was, '
 'but you know, I needed a break and I thought it would be so funny to come '
 'back to the college for the summer.Remember because when I do the Christmas '
 'show I’11 make more because I’1l be doing like ten shows a week...')
1531


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Oh, you think they're for you?If Killmonger gets control of it, who do you "
 'think he will come for next?Go, go , come on!Now, tell me about this one '
 ".That ' ll make my day .We have to go now !Go!- Go!Where'd he go ?Go go go "
 "go !, We're not going to make it!We'll go .You want to see us become just "
 "like the people you hate so much?But I can get you one if you'd like .Hello "
 'Sophia, good to see you .(rolls her eyes) Yea I bet the elders loved that '
 ".If you don't believe me you ask your friend what his suit is made of ...")
1417


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('I thought you weren’t coming over for another-- You talk too much.We-- '
 'Perhaps, but all it would take is our enemies finding a single boy for them '
 'to resurrect their entire armies.47 You know, I actually stayed there one '
 'night.I guess he thought naming his kid after an obscure Shakespeare '
 "character would help him get tenure.We'll figure out the rest of this 32 "
 'Everything’s going to be all right, kid.of the sky here needs another ten to '
 'finish gassing up, so I figured I might as well teach you a little '
 'self-defense.55 Exactly who is that thing supposed to be helping?If you hand '
 'Yorick over to another country, you people will never know another peaceful '
 'day.Well, it’s a good thing I found you when I did.We’re on our way to find '
 'her.We still owe a debt to society, and without someone like him, there '
 'might not be another one.Last man or not, it is going to take more than the '
 'apocalypse for me to start taking orders from my patients.We just 

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('see who can get there first.And one day, if you want it, we can give you a '
 'life far away from this.(with cold, hard eyes) See, Collette, it’s like '
 'this; only two men knew the time and place.I need to get home.It’1ll never '
 'end like that.You have the right to remain silent, but anything you do say '
 'may be taken down and later used as evidence against you.We all want to go '
 'home. It’s not going to end like-- How do you know?It was too dangerous to '
 'call in because the whole city was Memorise the number and the timeand place '
 'of the first meeting, then burn it.It was time to go.And there was no one '
 'else around, you say?He may not come at you right away.I know he fucked you '
 'so roughly that-- the table back) For Christ’s sake-- (on his feet also) You '
 'went back to your mother’s place, took your son into your arms and cried '
 'until it was time for to leave.But no one ever used to care about us.You had '
 'two weeks to convince us you mean what you say.49

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Maybe all we need to do is find one man willing to do this, somebody the '
 "others would feel safe in You're asking me to spend 200 grand for a share in "
 'a horse that should be in my barn right now.It’s like every other all-or- '
 'nothing, multi-million-dollar gamble we housewives make every day.So how '
 'many times were you called little lady today?I would’ve put him up today, '
 'but he had another ride.If I ever even hear your name again I’1l see you go '
 'to prison....seeing nothing but trouble on the rail, seeing zero Fast first '
 'quarter in 23 and two... the half mile...')
510


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Dad, Spitfires!Spitfires, George.Flew Hurricanes.bomber and its fighter '
 'escort- He got him, he got him!!Greatest plane ever built.Did it help you '
 'against the dive bombers?Super title one week lift sand into the air- wave '
 "of bombs.he DIVES between the German planes... that's a fighter- ME 109, "
 'from the South.Angels two, over Right, Highlanders.yacht around to head '
 "back- Watch for a parachute!No, we're going to England!(four bearers per "
 "stretcher, one at each corner) one day Navy's requisitioned her- there's "
 "Dunkirk, need taking off.Heinkel, 11 @'clock, lining up to drop her load on "
 "that minesweeper - Fighters?Colonel, you're not going to have to decide how "
 'many more wounded to evacuate...')
1083


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('He’1ll see what has happened when he comes home, what we’ve done, what no '
 'one was able to stop.First light they’1l turn that whole But if we can’t get '
 'ahead of him, more people are gonna die.scared he might as well be inside of '
 "a dream-- (re Is that magazine full?We'll get out of its way.I have never "
 'seen anything like that today.It was already done by then but I think she '
 'wanted a witness to her life.No, he’s passed out but I don’t know for how '
 'much longer so get your people out here pronto and I want that special '
 'consideration--huh?It means they’ve been around a long time and know how to '
 'hunt better than we do.Maybe I could have been but...she’s good at it.Get '
 'you to come without any mess.I can see you need to let the wolf out a '
 'little.We’ll get her, I promise you that.We see anything...we’1 will turn '
 'back, call it in.You think I could had stopped her?Can you think of anyone '
 'she might run to?Look, if there’s even a chance...')
1672


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('one of those lovely new Minis.Stay calm, whole So which of the times with me '
 'and my wonderful man do you think we might have got pregnant on?Right, yes, '
 'put like that... Bridget, they want to get a photo with you and me and the '
 'baby.You don’t even need a man, Bridget.... ‘think the pain away’... '
 "absolute nonsense, you’re squeezing out another human, I'd love to see them "
 '‘think it away’.Bridget, I’m not going to pretend this isn’t a sho')
1471


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Baby saw that when they pulled the big top down/ They left behind her dreams '
 'among the litter... And the different kind of love she thought she had '
 "found/ There was some glitter/ But baby can't be broken, cause you see, she "
 'had the finest teacher that was me, I told her... (singing) Don’t exy out '
 'loud, just keep it inside and learn how to hide your feelings/ Fly high and '
 'proud, and if you should fall, remember you almost had it all!Don’t Cry out '
 'loud/ Just keep it in and learn what to do with it/ Fly High and proud., And '
 'if you shouldn’t fall remember that you almost made it / Don ’t cry out loud '
 '/ Just keep your feelings inside and get your feelings out!itittt '
 '109.(singing as best she can, but still rather badly) Baby cried the day the '
 'circus came to town/ Cause she didn‘t want with some clown/ While she danced '
 'without a net upon the wire/ I know a lot about her ‘cause you see , baby is '
 'an awful lot lik')
1478


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("You know... We'll kick your ass out!I don’t want to se with you stupid eally "
 'fucking doing this?That Yeah... A frightened Jay ai see shit.Don’t let me '
 'die!!That’s, like, one of the thing was gonna do while I’m here.He could be '
 'the sociopa letting some psycho i And thanks t Dan ow!Lets go!Let go of '
 "me!ay’ for argument's sake that to fuck up everyone's shit")
594


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('See how we just went in like a big circle but nothing really happened-- That '
 'just did.This whole thing was just a walk & talk one-er.Could be your '
 '"thing", plus you know, mouth props.Make it look like another '
 "murder-suicide?Call me, I can get you work tomorrow.You're like the first "
 "half of A Beautiful Mind.It's like a way to make boring plot stuff less "
 'boring for the audience.(beat) Can I just ask you one thing?Well I guess I '
 'mean it literally.There was no way we were going to let those two steal the '
 'show tonight.If I get it, it could like totally change my career.No, I mean '
 'that in a good way.Cuz nothing is keeping me from landing this one.I know '
 "what it's cool to love something that doesn't love you back.I'm pretty much "
 'like the Zero Dark Thirty of auditioning -- this case Osama would be the '
 'audition, not like a real person -- and get out.Ugly way to go.Im gonna take '
 'a look around back. I have nothing else to say about this.Technicall

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('There was nothing there that could put out a signal like that.Whatever it '
 "is, it has to be triggered on site -- If I'm going to have someone riding a "
 "bomb into the Interstice, I'd like to be certain it will work... Run my math "
 'by Sci-Division.I need to get this damn thing taken out.They were broken '
 "when I got them -- I didn't even think it was possible, but by god, they are "
 'You cannot make them to do this...')
{'The Guard': {'rouge1': 0.14925373134328357, 'rouge2': 0.0, 'rougeL': 0.08955223880597014, 'rougeLsum': 0.08955223880597014}, 'Batman Year One': {'rouge1': 0.19161676646706585, 'rouge2': 0.0, 'rougeL': 0.08982035928143711, 'rougeLsum': 0.08982035928143711}, 'Inherent Vice': {'rouge1': 0.17435897435897435, 'rouge2': 0.010362694300518135, 'rougeL': 0.09230769230769231, 'rougeLsum': 0.09230769230769231}, 'Colombiana': {'rouge1': 0.15748031496062995, 'rouge2': 0.0, 'rougeL': 0.08661417322834646, 'rougeLsum': 0.08661417322834646}, 'Barney s Version': {'rouge1': 

In [None]:
two_stage_t5_results = combine_rouge_and_summary_results(rouge_t5_dict, summary_t5_dict, 'two_stage_t5')
two_stage_t5_results.head()

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum,two_stage_t5_summary
The Guard,0.149254,0.0,0.089552,0.089552,You know nothing about me.(getting worked up) ...
Batman Year One,0.191617,0.0,0.08982,0.08982,"Sooner or later, we'll get lucky and someone w..."
Inherent Vice,0.174359,0.010363,0.092308,0.092308,Said Shasta was the only one that ever made an...
Colombiana,0.15748,0.0,0.086614,0.086614,You know what time it is?We'll get all this st...
Barney s Version,0.104839,0.0,0.064516,0.064516,"Oh, I know - have you read, uh, what's it call..."


In [None]:
# save results to a csv
two_stage_t5_results.to_csv('/content/two_stage_t5_results.csv')
# files.download('/content/two_stage_t5_results.csv') 

In [None]:
# average rouge scores
two_stage_t5_results.drop(columns=['two_stage_t5_summary']).mean(axis=0)

rouge1       0.157633
rouge2       0.004987
rougeL       0.087577
rougeLsum    0.087577
dtype: float64

# Custom Models (BERT - Text)

## Two Stage Short Model
This model combines an extractive summary using TextRank, which is then used for abstractive summary using the *pegasus-xsum* model

In [None]:
rouge_short_dict = {}
summary_short_dict = {}

rouge_short_scores, summaries_short = execute_two_stage_text_short_model(0, 30, rouge_short_dict, summary_short_dict, subset_train_df)

print(rouge_short_scores)
print(summaries_short)

846


Downloading (…)lve/main/config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

Downloading tf_model.h5:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading (…)neration_config.json:   0%|          | 0.00/259 [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/87.0 [00:00<?, ?B/s]

('Three men are sitting in a car, one with a gun in his hand, the other with a '
 'big smile on his face, the third with a blank look in his eye, as if to say, '
 '"I don\'t care, I\'m just going to go home and have a good night\'s sleep, '
 'but I want to know what\'s going to happen to you when I get home."<n>The '
 'three men look at each other with blank eyes.')
1112


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("The cops are on their way, the SWAT team are on the way, and the dude who's "
 "been on the run for a year is finally about to be caught. He's got a gun in "
 "his hand and he's going to take it out on the cops, but they're not going to "
 'let him take it without a fight, so he grabs the gun out of their hands and '
 'shoots them in the head, then takes off running.')
374


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Bigfoot's visit to Doc's office has been captured on camera by a member of "
 'staff at the hospital where the Doc is a patient, and the footage has been '
 "released to the BBC's Newsround programme... and it's pretty funny... and "
 "not just because of the giant hairy creature's appearance... and the fact "
 "that he's wearing a giant t-shirt... and Doc has a big smile on his face... "
 "and there's a handshake between Doc and Bigfoot...")
1524


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Cat's eyes light up as she looks out of the window and sees a man with a gun "
 "in the middle of the street, a man who looks like he's about to start a war, "
 "and a woman with a knife in her hand, a woman who looks as if she's going to "
 "stab a man in the back, and another woman with her arm around a man's neck, "
 'all in the space of a few seconds, all at once.')
1007


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Barney and Miriam's last night together before she moves into her new home - "
 "here's a look back at their final night together in the Big Brother house, "
 'as they bid a fond farewell to each other and bid farewell to the world of '
 "Big Brother - all in the video below: Barney's eyes light up when he sees "
 "Miriam - she's the most beautiful woman he's ever met - they've known each "
 "other for years, they're so in love, they can't believe it's all come to an "
 'end.')
849


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Hopps and Nick are on their way home from the polar bear enclosure when '
 "Hopps's eyes light up and she starts to look for a way out of the cage, but "
 "she can't see the door, so Nick takes her to the back of the train and Hopps "
 'runs to the front, where she sees two polar bears on the tracks, one of '
 "which is Grand, who is Nick's polar bear friend, and the other is Big Kid.")
1416


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Ramén and Sarah were sitting on the steps of their new house when the wind '
 'suddenly picked up and they heard a loud bang and saw a huge cloud of dust '
 'swirling around their house and Sarah’s head tilted to one side and Ramén’s '
 'face turned to the other side of the room as if to say, “I’m sorry, but I’m '
 'not sure what happened to you.” Ramén turned to Sarah and whispered in her '
 'ear, “You’re not the only one!”')
635


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Watch the video for Nadine\'s new single, "Something", featuring '
 "Radiohead's Thom Yorke, on the BBC's Sound of 2016 programme on Wednesday, "
 '10 March.., at 19:30 BST on BBC One in the UK, and on the iPlayer later in '
 "the year, for the song's remix by Thom YorkE., directed by Mark Ronson and "
 'featuring guest vocals from Joanna Gruesome, who plays the role of Nadine in '
 'the film.')
953


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('George stands on the edge of a gutter and the Raver starts to attack him '
 'with a huge sword which he grabs from his hand and starts to stab him in the '
 'back with the sword, but George doesn’t let him get close to him because '
 'he’s got a flashlight in his hand, and he uses it to shoot out a huge light '
 'into the darkness, and then he turns his flashlight on himself and shoots '
 'out another huge light, and this time it’s into a huge hole in the ground.')
842


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Celeste and Jesse's first night out together as a married couple in Las "
 "Vegas, USA, and it's not long before they're having a bit of a meltdown, "
 'with Celeste screaming at Jesse, "I don\'t want to be married to you, I want '
 'to live with you!" and Jesse screaming back, "No, no, no!" and Celeste '
 'screaming, "You\'re not married to me, you\'re married to Jesse!"')
899


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Ryan and Natalie are sitting in the middle of the elevator, looking at each '
 'other with a mixture of indifference and a mixture Of Course Is This The Way '
 "It Is, as if they're meant to be, but it doesn't turn out to be the way they "
 "want to see each other's faces, and it's not the way Ryan wants to see "
 'Natalie\'s face, so he turns to look at her and says, "I\'m sorry, I\'m not '
 'going to see you again."')
844


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Newton's father Lakhma and mother Krishna sit in a long queue outside the "
 'police station, waiting for Atma Singh, the officer in charge of the mobile '
 'phone control room, to come out and take control of the unit from the '
 "control room's control room on the other side of the booth., as they wait "
 'for Newton to return home, Lakhma\'s voice softens and he says, " Newton, I '
 'want to tell you that I want you to shoot you."')
1189


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('They’re sitting at a table, talking on the phone, and Felt leans over and '
 'pulls out a folder from his pocket and pulls it out of Joan’s hand, pulls '
 'out her eyes, and pulls them out of his hand, pulling them out from under '
 'his arm, pulling out her gaze, pulling her eyes out of the folder and '
 'pulling her gaze out of him, pulling him out of her hands, pulling his hands '
 'out of theirs and pulling his arms out of them.')
1028


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Alice and the Bandersnatch sit at the table, waiting for the WHACK to come '
 'out of the stall he’s been in for the last few hours..., and then they look '
 'through the window and see a small door, a door with a bump on the side, and '
 'a small peephole in the door, and the frog starts to squirm, its claws '
 'clawing at the edge of the peephole, then it starts to claw again, and this '
 'time it’s a WHACK.')
1549


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Watch the full match between John Cena and Dolph Lundgren in the main event '
 "of WWE's Night Of Champions on the WWE Network on Sunday, 8 June at 19:55 "
 'BST..... and remember to tune in to WWE Monday Night Raw on the same night '
 'at 10:35 BST on the BBC.. and follow us on social media for all the latest '
 'wrestling news, rumours, previews and reaction... plus all the best '
 'interviews with the stars of the show..')
1149


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Jennifer is sitting on the floor with her hands on her hips as if she's "
 'trying to hold her own against the weight of the world around her, as if '
 "it's just another day in the life of a young woman with no control over her "
 "own body, no idea what's going to happen next, and no idea how long it will "
 'take her to recover from the trauma of being hit by a car, or by a train, or '
 'even by a dog.')
732


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('MacGruber and Piper are sitting at a desk, watching the news, when they hear '
 'noises coming from the side of the building, they run to the back of the '
 'room to see if they can find the man they’re looking for, only to find he’s '
 'not there, so they run back into the room andPiper takes off her shirt to '
 'reveal a diamond ring on her knee, but it’s too late because the man is '
 'already dead.')
1041


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('They’re sitting in the bar, watching TV, listening to the news, when a man '
 'walks in with a big smile on his face and a big voice in his head.... and '
 'they’re not the only ones... it’s the entire cast of the new TV series, The '
 'Man in the High Castle, which hits our screens this weekend, and here’s a '
 'look back at some of the cast’s favourite moments... and some of their most '
 'memorable ones...')
637


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Two girls are sitting on the floor, one is holding a phone in one hand and '
 'the other has a picture of a dog on her phone in the other hand, they both '
 "look at each other and laugh.., the girl holding the phone says, “I don't "
 "know what you're talking about, but I'm going to put it on the table and "
 'I\'ll take it home with me when I get home.”, the other girl says, "I\'m not '
 'going to do that, I\'ve got to go to bed."')
969


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Black Panther's first official trailer has been released, and it's a pretty "
 'good look at what we can expect from the new film in the Spider-Man series, '
 'which will be released in the UK on 26 May and in the US on 2 June... and '
 'the rest of the cast... including Chadwick Boseman as Black Panther, Lupita '
 "Nyong'o as Nakia, Jamie Foxx as T'Challa, Paul Rudd as Doctor Strange, "
 'Scarlett Johansson as Black Widow, Mark Ruffalo as Ruffalo, Chris Evans as '
 'Captain America, Chris Hemsworth as Thor, Jeremy Renner as Renner, Zoe '
 'Saldana as Nebula,')
776


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Yorick's eyes light up when he sees Beth's face as she leans back in her "
 "seat to take a look at the world around her, a world she's never been to, "
 "and one she'd rather not be in, but she can't help but notice that it's not "
 'quite what she expected it to be, and then she takes a deep breath and leans '
 'back into her seat, arms crossed, to look around her.')
1434


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('In the early hours of the morning we are sitting at the kitchen table in the '
 'basement of MAC’s house in the middle of the city, drinking beer and talking '
 'about what’s happened and what we’re going to do about it, and we hear a '
 'huge roar from the next room... We see MAC running towards the room... we '
 'hear MAC shout... we see MAC run towards the back of the room and MAC runs '
 'towards the front of the house...')
843


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The race is under way, the crowd is cheering, the jockeys are jockeying, the '
 'horses are galloping, and the camera pans to a young girl, Penny, sitting on '
 'the backstretch, gazing out at the crowd, as the race unfolds... and then we '
 'see her eyes widen, as if to say, "This is it, I\'m done." The camera pans '
 'again to a man, Turcotte, on the other side of the track, his eyes fixed on '
 'his horse, Red, as it makes its way to the finish line.')
549


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("A selection of images from the BBC's coverage of the Battle of the Somme, "
 'which took place on 6 July 1916, and which remains one of the most iconic '
 "images of the conflict.... and it's not just the men on board the Spitfire "
 'who are affected... the pictures also show the aftermath of the battle... '
 'and the men who lost their lives in it... and how they came to be on the '
 'front line of the war... and what happened next...')
1378


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Hesium’s eyes light up as he sees Chee-Hesium on the other side of the door, '
 'his body in flames, his head on the edge of the frame.<n> ...and then he’s '
 'on the verge of death, his chest heaving with the weight of his body, his '
 'hands shaking with the heat of the fire... and then he falls to the ground, '
 'his face contorted into a grimace, his eyes fixed on the burning body.')
1178


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Bridget Jones: The Edge of Reason is out in cinemas on 15 January 2017... '
 "and here's what you need to know to watch it: the film opens in cinemas "
 'across the UK on Monday, 15 January, at 00:30 GMT, and on TV on BBC One, BBC '
 'Two, BBC Three, BBC Four, BBC Five, BBC Music, BBC Sport, BBC Radio 4 and '
 'BBC Radio 5 live, as well as on demand on the BBC iPlayer.')
634


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Danielle Clarke leans over to kiss Good Looking Man on the cheek, but he '
 "doesn't seem to want to kiss her, so she leans back to kiss him again, and "
 'he leans back again, but this time with his hand on her shoulder, and they '
 'both look at each other with a blank stare, and Danielle Clarke looks at him '
 'again with a smile on her face, and Good looking Man leans back with a kiss '
 'on his lips, and the two look at one another, and then Good looking man '
 'leans back and kisses Danielle Clarke on her cheek, and she looks at Good '
 'Looking man again, with a look on her')
622


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Jay and Craig are sitting in the living room with a cup of tea when they are '
 "interrupted by a knock at the door from a man dressed in black who says he's "
 "from the police and wants to talk to them about a case they've been involved "
 'in in the past few days.., they run to the kitchen where they are joined by '
 'Seth, who is dressed in a black suit and white shirt, who runs up to them '
 'with a knife in his hand.')
251


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("It's the final episode of The Only Way Is Essex and it's all about the final "
 'three contestants - Ryan, Parker and Mathers - as they prepare to say their '
 'final goodbyes to the show and move on to bigger and better things in their '
 "lives - and we've got all the details in this week's edition of What's Up "
 'With That Girl?, which airs on Channel 4 on Sunday night at 19:30 BST.')
1717


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("They're sitting on the edge of the bed, arms interlocked, palms facing each "
 'other, eyes fixed on the floor, fingers hovering over the edges of the '
 'frame, fingers pointing at the ceiling, arms hovering above the bed as if '
 "they're waiting for the wind to blow them away, hands hovering over their "
 "eyes as if to warn them of what's about to happen, palms hovering above "
{'The Guard': {'rouge1': 0.1705426356589147, 'rouge2': 0.015748031496062992, 'rougeL': 0.10852713178294573, 'rougeLsum': 0.10852713178294573}, 'Batman Year One': {'rouge1': 0.30845771144278605, 'rouge2': 0.020100502512562814, 'rougeL': 0.15920398009950248, 'rougeLsum': 0.15920398009950248}, 'Inherent Vice': {'rouge1': 0.16417910447761197, 'rouge2': 0.0, 'rougeL': 0.1044776119402985, 'rougeLsum': 0.1044776119402985}, 'Colombiana': {'rouge1': 0.23943661971830985, 'rouge2': 0.0, 'rougeL': 0.15492957746478872, 'rougeLsum': 0.15492957746478872}, 'Barney s Version': {'rouge1': 0.19444444444444445, 'rouge2': 0.0,

In [None]:
two_stage_short_results = combine_rouge_and_summary_results(rouge_short_dict, summary_short_dict, 'two_stage_short')
two_stage_short_results.head()

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum,two_stage_short_summary
The Guard,0.170543,0.015748,0.108527,0.108527,"Three men are sitting in a car, one with a gun..."
Batman Year One,0.308458,0.020101,0.159204,0.159204,"The cops are on their way, the SWAT team are o..."
Inherent Vice,0.164179,0.0,0.104478,0.104478,Bigfoot's visit to Doc's office has been captu...
Colombiana,0.239437,0.0,0.15493,0.15493,Cat's eyes light up as she looks out of the wi...
Barney s Version,0.194444,0.0,0.125,0.125,Barney and Miriam's last night together before...


In [None]:
# save results to a csv
two_stage_short_results.to_csv('/content/two_stage_text_short_results.csv')
# files.download('/content/two_stage_text_short_results.csv') 

In [None]:
# average rouge scores
two_stage_short_results.drop(columns=['two_stage_short_summary']).mean(axis=0)

rouge1       0.218762
rouge2       0.019177
rougeL       0.135875
rougeLsum    0.135875
dtype: float64

## Two Stage Long Model
This model combines an extractive summary using TextRank, which is then used for abstractive summary using the *pegasus-cnn_dailymail* model

In [None]:
rouge_long_dict = {}
summary_long_dict = {}

rouge_long_scores, summaries_long = execute_two_stage_text_long_model(0, 30, rouge_long_dict, summary_long_dict, subset_train_df)

print(rouge_long_scores)
print(summaries_long)

846


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('BOYLE leans back into the pillows and sighs, a big grin on his face.<n>Two '
 'little girls are looking on. STANTON, who is still seated.They look blankly '
 "at one another.<n>O'LEARY is sitting in an armchair, a gun in one hand, "
 'pointed at BOYle, a whiskey in the other.<n>As CORNELL walks off, he makes a '
 'sinister turn and looks back at STANTON and MOODY, giving them pause.')


Downloading builder script: 0.00B [00:00, ?B/s]

1112


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('While FLASS looks at the cuffs FLASS, angry, throws them back and SLAMS the '
 'CLOSET .<n>BRUCE CRASHES into the rat-infested GDON hearing something, takes '
 'a look out the window where BRUCE was hanging moments ago.<n>Outside the '
 'window BRUCE is slipping.Once she is close enough to the guards, she whips '
 'off her  the first one across the face with the buckle, knocking his  her '
 "long nails across the second's face.")
374


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Bigfoot leads Doc down a hall and into a room with a Bigfoot opens it up '
 '.<n>Bigfoot hurls bananas at Doc, changing hisMOLECULES as he sees something '
 "over Doc's shoulder.<n>Doc meets Sloane Wolfmann... sign under construction "
 'that partially reads, "Made Possible Doc sees a bunch of kids cleaning the '
 'place... etc... was endowing looney bins wit.<n> Doc meets ADRIAN... smiling '
 'and kissing babies with ADRIAN walks over.Arrival and greeting, handshakes '
 'outside... Doc walking through with DR. THREEPLY and staff.')
1524


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Cat and the MONITOR wake up is at seven.<n>Cat hesitates for a moment, and '
 'then removes one hand from underneath the table, her fist closed tight.<n>As '
 'he goes south, kissing as he goes, the CAMERA holds on her face, and for the '
 'camera to hold on her neck .<n>The gate begins to open and the dogs are up '
 'hurling themselves toward the opening gate eyes flashing and barking loud '
 'enough scare the dead back to life fangs bared, ready to kill.')
1007


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Barney and IZZY dance on the dancefloor .<n>Miriam is shocked when Barney '
 "drops into her seat across from Miriam, startling her Barney doesn't look so "
 'good.<n>After pouring his drink he pauses for a moment to stare at one of '
 'the many personal photos hanging behind the bar.<n>When he walks away WE '
 'REMAIN ON THE PHOTO he was looking at In Black & White - Barney and his '
 'unc.<n>The band playing an upbeat number for a jam-packed dancefloor Barney '
 'slips out from the crowd and races to the bar .')
849


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Nick and Hopps find a door that looks like it belongs on a refrigerator '
 '.<n>Nick ushers Hopps to the door, but opens it to find two big polar '
 'bears.<n>Suddenly, a BIG KID turns off the lights.Polar bear fur... Rat Pack '
 'music... Nick tries to put everything back the way he found it.<n>A SAVAGE '
 'TIGER emerges behind the glass of a nearby cage, startling Nick and sending '
 'him running to Hopps.<n>Hopps looks up to see... Another train coming toward '
 'them... on the same track!')
1416


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Ramén sees a broken rocking chair next to cleft toys strewn on the ground, '
 'and with her words, they take on new meaning.<n>Young Sarah is reading to a '
 'child who turns his head to look straight at him, breaking the fourth '
 'wall.<n>The front door to the house kicks open to a wind-ravaged SHERIFF '
 'MILNER!Inside, they share a hopeful glance, but the floor numbers .<n>With '
 'Sarah just twenty feet behind him, Auggie turns another corner and finds THE '
 'ELEVATOR!')
635


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Krista is folding laundry on her bed when she hears something .<n>It almost '
 "sounds like Radiohead's Karma Police, except She moves to the open window, "
 "looks down to see she's being APERSIAN MAN (45)<n>He's standing below her "
 'window with a guitar, singing Karma Police in a very thick accent, about 10x '
 'more up-tempo, and with lots of little middle-eastern vocal trills.<n>We go '
 'to an ultra-wide shot of Nadine as a tiny little dot, the only person eating '
 'by herself in She immediately grabs .')
953


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('George touches the back of his hand to his mouth and sees the Papillon makes '
 'a crazy sign with his hands.<n>George fights like mad to keep the Raver at '
 'bay, and even gets back to his feet as ANOTHER RAVER RIPS A BITE OUT of '
 'GEORGE’S NECK, shooting blood into the air.<n>Young Man struggles to get to '
 'the surface of the water to The Young Man pushes against the bones, but '
 'George’s weight on top of them makes it impossible .')
842


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Celeste gets a bear hug and it looks like she may have broken her back '
 '.<n>It is intercut with a montage of Super 8 footage of Celeste and Jesse in '
 'the past reluctantly cutting a head off a fish to cook it, Celeste is '
 "repulsed but laughing.<n>Celeste can't stop herself from constantly looking "
 'over at the studio to see if Yogurt Girl is still in there.<n>A beat later, '
 'Celeste tears out of the bar, walking quickly after Jesse.')
899


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Natalie accidentally makes eye contact with one guy, then quickly shifts her '
 'gaze forward .<n>A beautiful choreographed ballet of a bag handle '
 'collapsing, shoes coming off, a laptop going in a separate tray, wallet and '
 'watch sliding into a shoe, a boarding card Sliding into a back pocket... '
 'both hands always moving, Ryan enters and presents his ADMIRALS CLUB '
 'CARD.<n>Ryan looks around for a while before quickly turning his gaze back '
 'and moving through the security screen .')
844


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Atma Singh looks like a miffed school boy sitting in one .<n>Priya is '
 'looking for something in the bushes outside the She breaks a stem and comes '
 'out, holding it.<n>Another constable comes in front of them and ‘shoots’ him '
 'with his finger (shaped like a gun)<n>Atma Singh takes out his cell phone '
 'and directs the camera side towards the boy and clicks a picture.<n>Newton '
 'enters the booth to see sits deep in thought with a worried expression on '
 'his face.')
1189


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('He stops her, “I want to see you.” She opens to him, brings him inside.At '
 'the end of something.Smith waves her to keep going.We hear only Eyes make a '
 'subtle shift, no more than the dilation of pupils.She turns him slow, '
 'sensual, and we think maybe with love and Now he turns to look at her.come '
 'out and unwrap everything.<n>Without going to him.Turns right into it.A day '
 'like any other.And goes to the sink.And by the sheer force of his love Joan '
 'relents and collapses into him, just like that.We HOLD, stay with '
 'him.Flattens the list on WE MOVE IN ON THE LIST Felt picks up the phone.Then '
 'they move from the folder to Gray’s eyes and HOLD.Bates starts to '
 'leave.<n>Gray exits, joining the flow of federal employees on lunch .')
1028


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Lady Long-Ears rushes in and whispers into the Queen’s ear.He leans in for a '
 'kiss just as Lady Long Ears passes.something with wicked claws.<n>Wind '
 'knocked out, she gasps for air, then she sits up, rubbing the bump on her '
 'forehead.Long Ear .<n>She opens the little door and bends down to look '
 'through to the other side.<n>Lady Long Ears rushes in, whispers in the '
 "Queen's ear .")
1549


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Will's making sandwiches, his eye on Caesar through the WE FIND WALTER just "
 'sitting down on the piano bench .<n>Caesar gets right up against the bars '
 "Rocket's cage, takes Rocket’s face in his hands and looks into his eyes "
 'Reeling Caesar tries to make sense of this.Guard #2 turns to another screen '
 '- where DONNIE IS VISIBLE IN SILENT AGONY, PUSHED UP AGAINST THE IMAGES.')
1149


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('In the moonlight, we see Needy still has a infected- looking BITEMARK near '
 'her shoulder .<n>The bile has hundred of tiny SPINES sticking out of it, '
 'like little porcupine needles, and it appears to move by itself, oozing '
 'across the linoleum, creeping into the Needy ..<n>Needy glances around the '
 'room and sees students fighting back tears.As she stares out the window, she '
 'winds colored yarn around a pair of Popsicle sticks to create a Out a single '
 'window .')
732


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The noises get GUARD POV - looking left and right around the deserted rail '
 'yard, finally the camera pans up to find MacGruber on the roof of a small '
 'building .<n>MacGruber dances around completely naked (except for his boots) '
 'with a carrot in his ass.<n>We see two hands grip the side of the grave, we '
 'see two eyes dart around the room for a way out.<n>Piper and Mac Gruber '
 'shoot like crazy as they race for the door.')
1041


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('RUDI look at one another and laugh.<n>TERRI at moments in this scene is '
 'picking things up off the floor, the counter RUDI and DAVE look like they '
 'agree.<n>The others laugh, pat his back.The music is distorted .<n>DAVE, '
 'RUTH, MARILYN, TERRI sit in a line at the bar watching a tiny black and '
 'white TV on which the UNDERTONES play ‘Get Over You’, wearing their usual '
 'skinner jeans.TERRI and the CARTOON PUNKS are capering around the living '
 'room while Rudi play on TV and TERRI hollers along.')
637


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Frances gets up does something quickly and sits back.<n>Frances has the '
 'stove going with a couple of items.<n>She sees a Younger Girl crying.Sophie '
 'falls asleep first and Frances quietly puts the computer away.She is really, '
 'really dancing her Frances is one of the last ones to get her stuff out.of '
 'the bed and put one foot on the Frances.<n>He finds the photo he was looking '
 'for and hands the phone to Frances across the table.')
969


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Klaue smiles, dropping his gun and puts his hands up -- as KLAUE's "
 "PROSTHETIC LEFT ARMsuddenly splits open .<n>T'Challa snatches up a ROLLING "
 'CHANGECART TABLE just as Klaue fires the blaster at him.<n>Zuri turns back '
 "and T'Challa lifts his hand, showing his zuri turns away "
 'uncomfortable.<n>James walks over and looks out of a peep hole, then turns '
 'back with a confused expression .')
776


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Yorick looks around to make sure that no one is watching, before he reaches '
 "for one of the fallen Amazon's REVERS .<n>She pulls out her own .357 MAGNUM "
 'and looks around, finally noticing a CRATE that’s been recently opened, one '
 'large enough for two people to hide inside of.<n>The woman turns on the '
 'flashlight to get a better look at the young man she just struck.The two '
 'look int.')
1434


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('GERRY’s son LIAM has been driving a remote controlled car around the floor '
 "and at this moment he’s so excited he knocks over the table GERRY's glass of "
 'beer has been standing on .<n>Another sweeps his hand along a kitchen '
 'cabinet, tipping everything onto the floor.He tears arou.<n>MA shakes her '
 'Gerry’s daughter LIAM and he says his goodbyes. She is about to go to bed. '
 'He says he will be back in a few hours.')
843


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The horse that comes around that turn in the lead is the horse likely to win '
 '.<n>Penny watches breathlessly, and the crowd around her, as SECRETARIAT '
 'makes his way to the finish line.<n>A little girl--Penny, at 6--standing at '
 'the rail of a racetrack, beside her tall, handsome father--in the prime of '
 'his life.<n>The bugle call to post sounds.....and Red hears it get out to '
 'her.')
549


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The Stretcher Bearer NODS, too breathless to speak, then follows his '
 'colleagues back down off the ship- An EXPLOSION hits the water nearby- '
 'Everyone hits the deck as shells IMPACT the water Tommy is PULLED UP SHORT '
 'as Gibson STUMBLES- A 109 STRAFES the length of the mole with gunfire- '
 'soldiers hit the deck, several are HIT... Peter and George RUSH things off '
 'the boat- then start loading the orange life preservers Mr.Dawson looks up '
 'from his charts to see NAVAL OFFICERS AND CREW coming along the harbor .')
1378


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Cheeon laughs, a high nasal Marium nods in defeat, shows his palms, backs '
 'away...<n>As he turns back into the road, Cheeon calls to him...<n>Marium '
 'doesn’t turn back, keeps walking stiffly away, his jaw tight as Cheeon SLAMS '
 'the door behind him.<n>Core is flat on the ground, looking out from '
 'underneath it, shaking, awestruck-- He sees DEAD AND WOUNDED COPS splayed '
 'before CHEEON’S House with MUZZLE FLASHES flaming out of the ATTIC '
 'WINDOW.<n>Cheon’s House with CHEEKON’S .')
1178


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Bridget and Jack are together on the sofa watching Mark, distracted, at the '
 'boarding gate for KHARTOUM .<n>A good looking man clocks her and takes her '
 'in as she walks - she smiles to herself once he has passed.<n>She stops '
 'behind a flower display and looks back at Mark patiently watching a film '
 'with five there, cows with opposable thumbs, We find Jude fast asleep in her '
 'hotel room, with the baby next to her.<n>Bridget strides into the studio, '
 'ready for the day.studio, where Miranda is waiting .')
634


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The Good Looking Man spies Clarke hovering on the stairs behind Danielle '
 'waits for his response .<n>Good Looking Man draws only a blank ..<n>Joseph '
 "pulls over to the side of the road and smacks Clarke's face, grabs him by "
 'the shirt, and pulls him into the front military academy is looking like a A '
 'knowing look from his mother, tells Clarke that agreeing with her will free '
 'him.<n> Danielle looks at Ray, rolls her eyes and heads to her room .')
622


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Jay opens the bathroom door to find Michael Cera with one girl blowing him '
 'and another eating his ass.His eyes go wide, his jaw drops, and he screams '
 'like a little girl.<n>Seth, Jay and Franco reach the gars getting cornered '
 'by the large, over to the guys and sees“ smiles, then looks back at 1 speed '
 'towards his doom.<n>Jay looks to knife from the kitchen, and throws it laps, '
 'sending the guys CRASHING back into the house.<n>They look up and see Seth '
 "and Jay take off running, as they smash into thing ING BACK TO FRANCO'S!")
251


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Mathers tries to buck him up... love aftermath scenes.clearly replaced the '
 'fake poison in Ryan turns to Priya .<n>Behind Ryan, a VAMPY GIRL in crazy '
 'skimpy booty shorts, holds her phone down below her ass then brings it up to '
 'her face where she smiles and gives a peace sign.<n>Jared, sporting a cast '
 'on his broken arm, shoves Ryan and Mathers DEEPER BACK STAGE, away from '
 'everyone, as Parker joins his side smiling smugly.')
1717


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Flick packs a suitcase, her eyes still red, her hands still shaking '
 'slightly.<n>She hears a deafening ROAR and turns to see another memory '
 'playing out in the water just off the Pier -- Two vast figures clashing in '
 "the surf washes over Flick's face as she watches.<n>Without turning on the "
 'lights she drags one out into the middle of the floor and strains to pry '
 'open the lid with her bare hands -- she doesn’t know what kind of time she '
 'has.')
{'The Guard': {'rouge1': 0.16528925619834708, 'rouge2': 0.016806722689075633, 'rougeL': 0.09917355371900827, 'rougeLsum': 0.09917355371900827}, 'Batman Year One': {'rouge1': 0.16494845360824742, 'rouge2': 0.010416666666666668, 'rougeL': 0.1134020618556701, 'rougeLsum': 0.1134020618556701}, 'Inherent Vice': {'rouge1': 0.10294117647058824, 'rouge2': 0.0, 'rougeL': 0.07352941176470588, 'rougeLsum': 0.07352941176470588}, 'Colombiana': {'rouge1': 0.18055555555555552, 'rouge2': 0.0, 'rougeL': 0.125, 'rougeLsum': 0.125}, 'Barney s 

In [None]:
two_stage_long_results = combine_rouge_and_summary_results(rouge_long_dict, summary_long_dict, 'two_stage_long')
two_stage_long_results.head()

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum,two_stage_long_summary
The Guard,0.165289,0.016807,0.099174,0.099174,"BOYLE leans back into the pillows and sighs, a..."
Batman Year One,0.164948,0.010417,0.113402,0.113402,"While FLASS looks at the cuffs FLASS, angry, t..."
Inherent Vice,0.102941,0.0,0.073529,0.073529,Bigfoot leads Doc down a hall and into a room ...
Colombiana,0.180556,0.0,0.125,0.125,Cat and the MONITOR wake up is at seven.<n>Cat...
Barney s Version,0.193103,0.0,0.110345,0.110345,Barney and IZZY dance on the dancefloor .<n>Mi...


In [None]:
# save results to a csv
two_stage_long_results.to_csv('/content/two_stage_text_long_results.csv')
# files.download('/content/two_stage_text_long_results.csv') 

In [None]:
# average rouge scores
two_stage_long_results.drop(columns=['two_stage_long_summary']).mean(axis=0)

rouge1       0.207879
rouge2       0.012080
rougeL       0.116712
rougeLsum    0.116712
dtype: float64

## Two Stage Large Model
This model combines an extractive summary using TextRank, which is then used for abstractive summary using the *pegasus-large* model

In [None]:
rouge_large_dict = {}
summary_large_dict = {}

rouge_large_scores, summaries_large = execute_two_stage_text_large_model(0, 30, rouge_large_dict, summary_large_dict, subset_train_df)

print(rouge_large_scores)
print(summaries_large)

846


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('OVERHEAD SHOT -- BOYLE leans back into the pillows and sighs, a big grin on '
 'his face.Then looks back at BOYLLE.Gets out, looks around -- TWO LITTLE '
 'GIRLS are looking on.EVERETT watches her go, then looks at STANTON, who is '
 'still seated.They look blankly at one another.They are blankly up at ONE '
 'another.Then at the BOYLES, who are standing nearby, playing with the big '
 "toe of another corpse, a blank look on their face.He turns around -- O'LEARY "
 'is sitting in an armchair, a gun in one hand, pointed at BOYLE, a whiskey in '
 'the other.BOYLE enters and looks around, grinning -- A big map.EVEREET looks '
 'on, impressed, as the MAN turns towards the strand and swims in.As CORNELL '
 'walks off, he makes a sinister turn and looks backat STANTON and MOODY, '
 'giving them pause.He and SHEEHY look at one other, until SHEEhy becomes '
 'aware of the song that is playing --  BOY LE shoots a glance at AOIFE.Then '
 'he hears a sound and looks up -- ’BOYLE’s')
1112


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('While FLASS looks at the cuffs, FLASS, angry, throws them back and SLAMS the '
 'CLOSET DOOR  Just at the moment BRUCE CRASHES into the rat-infested GORDON '
 'hearing something, takes a look out the window where BRUCE was hanging '
 'moments ago.Upstairs, the SWAT TEAM, guns poised, peeks over the lip of the '
 'broken ceiling just in time to get a face full of  Outside the BATHROOM '
 'door, THE BAT-MAN covers his ears until the debris settles.But he COMES UP '
 'SHOOTING DOUBLE HEAD SHOT  blood sprays, one down  TWO CHEST SHOTS  a second '
 'cop staggers back, stricken and ONE IN THE HIP  third cop spins, but manages '
 'to keep his feet, until  BLAM!He hangs four stories up by his fingers as '
 'inside  Then, confronted with the sight of his DEAD PARTNER  Out the window '
 'BRUCE is slipping.Once she is close enough to the guards, she whips off her  '
 'the first one across the face with the buckle, knocking his  her long nails '
 "across the second's face.After a moment BAVEN st

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('DOC gives him a PHOTO OF COY like The Last Supper, Coy as Jesus grabbing '
 'food from BIGFOOT looks at Doc, then the picture He motions down a back '
 'corridor... Bigfoot leads Doc down a hall and into a room with a Bigfoot it '
 'opens up He puts the FROZEN BANANAS in the PNEUMATIC TUBE DISPENSER, sending '
 "chocolate frozen bananas hurling Bigfoot's FACE TURNS AND THE MOLECULES "
 "COMPLETELY CHANGE AS HE SEES SOMETHING OVER DOC'S SHOULDER...")
1524


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('She flattens against the wall, holding onto the ON THE BATHROOM One of the '
 'gunman comes to the window, cannot fit through, looks out, does not see '
 'anybody across, then hears something beneath him, looks down, sees the tip '
 'of a shoe below.The gate to the garden begins to open, and like a shot the '
 'dogs are up, hurling themselves toward the opening gate, eyes flashing, '
 'barking loud enough to scare the dead back to life, fangs bared, ready to '
 'kill.Some do homework on their beds, others do their hair, dance, And '
 'suddenly, at the sound of a door opening, they all stop and look in the same '
 'direction at Cat and the MONITOR Wake up is at seven.Cat hesitates for a '
 'moment, and then removes one hand from underneath the table, her fist closed '
 'tight.A small smile breaks across his face as he looks in his rear view '
 "mirror and sees Cat running and firing, losing ground.Let's him kiss her "
 'lips, her neck, and as he goes south, kissing as she goes, the CA

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Off Barney, completely shell shocked we -- PRE-LAP - A KNOCKING at the door '
 'The door opens revealing KATE, her eyes filled with tears she throws her '
 "arms around Barney and won't let go.Barney looks around the big, empty "
 'house, going out of his mind, not knowing what to do with himself.Even more '
 'gorgeous than the first time he laid eyes on her Barney drops into his seat '
 "across from Miriam, startling her Barney doesn't look so good.The BAND "
 'playing an upbeat number for a jam-packed dancefloor Barney slips out from '
 'the crowd and races to the bar -- CU - A BLACK & WHITE TV hidden behind the '
 'bar showing the Barney downs another shot, then looks back to the dancefloor '
 'where he sees IZZY slow dancing with The Second Mrs.')
849


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Hopps wipes snow off a back bumper They open the big door, which looks like '
 'it belongs on a refrigerator.As Hopps rolls her eyes and goes back to '
 'collecting clues, Nick lowers the back partition and his eyes go wide.Nick '
 'ushers Hopps to the door, but opens it to find two big The polar bears grab '
 'Nick and Hopps by their throats... ...and yank them off screen.She looks '
 'around to see animals coming back together.Suddenly, a BIG KID turns off the '
 'lights.Polar bear fur... Rat Pack music...')
1416


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('and spot the parked CORVAIR MONZA.KLAXONS sound off-- IN FIRST FLOOR HALLWAY '
 'Dream’ in the Bellows Book, quickly skipping to the end.A five story fall '
 'onto a sidewalk if they don’t make the jump.As Ramén takes in the dreary '
 'room, Ramén sees a broken rocking chair next to cleft toys strewn on the '
 'ground, and with her words, they take on new meaning.A CHILD who Young Sarah '
 'is reading to turns his head to look straight at him, breaking the fourth '
 'wall.But then he sees something horrific -- The front door to the house '
 'kicks open to a wind-ravaged SHERIFF MILNER!Inside, they share a hopeful '
 'glance, but the floor numbers With Sarah just twenty feet behind him, Auggie '
 'turns another corner and finds THE ELEVATOR!Ramén double chec')
635


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('She leaves and Nadine drops the smile, then moves to her window, watching '
 'the two of them walk down the driveway hand in hand, Darian sweetly helping '
 'Krista into the passenger Something about it makes her feel like absolute '
 'shit.Just then, Nadine comes up, squeezes herself between Krista and Darian, '
 'puts an arm around each, big smile.Krista is folding laundry on her bed when '
 "she hears something It almost sounds like Radiohead's Karma Police, except "
 "She moves to the open window, looks down to see she's being A PERSIAN MAN "
 '(45) -- big mustache, long pony tail, iridescent shirt unbuttoned to reveal '
 "copious chest hair and He's standing below her window with a guitar, singing "
 'Karma Police in a very thick accent, about 10x more up-tempo, and with lots '
 'of little middle-eastern vocal trills,Krista pulls away, They just look at '
 'each other for a second, freaked out.We go to an ultra-wide shot of Nadine '
 'as a tiny little dot, the only person eating

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('George touches the back of his hand to his mouth and sees the Papillon makes '
 'a crazy sign with his hands.George fights like mad to keep the Raver at bay, '
 'and even gets back to his feet as ANOTHER RAVER RIPS A HUGE BITE OUT OF '
 'GEORGE’S NECK, shooting blood into the air.One of them slowly turns his '
 'head, LOOKS RIGHT AT ZED AND GEORGE.He shines a light into Underneath the '
 'bones is a gutter full of water, in which a YOUNG MAN, struggles to get to '
 'the surface of the water to The Young Man pushes against the bones, but '
 'George’s weight on top of them makes it impossible for the Young Man to '
 'TheYoung Man seems to keep pace with George as he desperately tries to get '
 'out of the way.One turns his Head to look at them.Not one to take criticism '
 'She turns and goes back to the fire with the others.George is first to get '
 'his hands on a flashlight which he flips on and looks for the source of the '
 'music.The camera lies right behind Zed and George just ca

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('But her face is still wet with Jesse gives Celeste a big, long hug.Celeste '
 'and Jesse giggle at their stupid inside joke for a little too long.Lucky has '
 'Celeste in a bear hug and it looks like she may have broken her back.Jesse '
 "is mid-laugh and looks at his phone to see Celeste is calling.Bon Iver's "
 '"Skinny Love" plays as Celeste walks slowly amongst the celebrity '
 'impersonators, tourists and drunks on It is intercut with a montage of Super '
 '8 footage of Celeste and Jesse in the past reluctantly cutting a head off a '
 'fish to cook it, Celeste is repulsed but laughing.Celiste and Paul walk down '
 "stairs into an incredibly cool- looking speak easy.CELESTE'S LAWYER, male, "
 '40, speaks Celeste looks up, makes eye contact with Jesse and smiles Celeste '
 "is still signing.Celesteste can't stop herself from constantly looking over "
 'at the studio to see if Yogurt Girl is still in there.A beat later, Celeste '
 "tears out of the bar, walking quickly after Jesse.

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('almost as if looking through binoculars.Ryan turns, thinks of something, '
 "then turns back.RYAN MOVING THROUGH THE SECURITY SCREENING It's a beautiful "
 'choreographed ballet of a bag handle collapsing, shoes coming off, a laptop '
 'going in a separate tray, wallet and watch sliding into a shoe, a boarding '
 'card sliding into back pocket... both hands always moving, Ryan enters and '
 'presents his ADMIRALS CLUB CARD.Natalie accidentally makes eye contact with '
 'one guy, then quickly shifts her gaze forward.Ryan looks around for a')
844


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Atma Singh looks like a miffed school boy sitting in one Another beep is '
 'heard.The old head man who was in the queue, walks towards Priya and shows '
 'her his ID.Atma Singh sits on a ledge and listens The two children become '
 "quiet upon hearing Newton's voice.Priya is looking for something in the "
 'bushes outside the She breaks a stem and comes out, holding it.Newton looks '
 'at his father with anger as if asking Two cycle rickshaws- on one, sits '
 "Newton, and on the other, Whenever Newton''s rickshaw goes slightly ahead, "
 'his father tells his rider to buck up.Another constable comes in front of '
 'them and ‘shoots’ him with his finger (shaped like a gun).Atmam Singh takes '
 'out his cell phone and directs the camera side towards the boy and clicks a '
 'picture.Newton enters the booth to see sits deep in thought with a worried '
 'expression on his face.As they walk back, Krishna and Lakhma try to reason '
 'with Newton in their own way.He goes and holds the contro

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('He stops her, “I want to see you.” She opens to him, brings him inside.At '
 'the end of something.Smith waves her to keep going.We hear only Eyes make a '
 'subtle shift, no more than the dilation of pupils.She turns him slow, '
 'sensual, and we think maybe with love and Now he turns to look at her.come '
 'out and unwrap everything.Moving with a kind of imperious grace.Without '
 'going to him.Turns right into it.A day like any other.And goes to the '
 'sink.And by the sheer force of his love Joan relents and collapses into him, '
 'just like that.We HOLD, stay with him.Flattens the list on WE MOVE IN ON THE '
 'LIST Felt picks up the phone.Then they move from the folder to Gray’s eyes '
 'and HOLD.Bates starts to leave.Gray exits, joining the flow of federal '
 'employees on lunch A REFLECTION IN A WINDOW.And without a word or permission '
 'he just wraps her and the baby in his arms, justlike that.with force and '
 'direction.’ And makes some stabbing motion with his hand.Then h

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Once inside, they smile, gasp and flatter.She carries it out of the stall, '
 'glancing back at the Bandersnatch before going out.He staggers back, '
 'gasping.His voice goes hoarse with emotion as he begins the tale.She smiles '
 'and goes off.He sits, amused if nothing else.Frustrated, Alice looks out the '
 'window and mutters.She wipes a telltale bit of jam from the side of its '
 'mouth.She laughs at herself and goes back to the table.She hears a loud '
 'WHACK, a small cry of pain, then cheers and laughter.At the end, she whirls '
 'to leer into the face of one terrified frog.Lady Long-Ears rushes in and '
 'whispers into the Queen’s ear.He leans in for a kiss just as Lady Long Ears '
 'passes.something with wicked claws.His face starts to shimmer as he whispers '
 'in her ear.She jerks and drags himself back from the edge of hysteria.They '
 'laugh.She opens the little door and bends down to look through to the other '
 'side.Wind knocked out, she gasps for air, then she sits 

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('As Will pulls away Caesar strains for a last glimpse, twisting around to '
 'look out the back window.AND SOMETHING ELSE We look through cage bars at '
 'Will and Franklin, who stand far away, across the room.At the height of his '
 'backward motion he catches another Caesar pumps his legs, eager to get a '
 "better look... Will's making sandwiches, his eye on Caesar through the WE "
 'FIND WALTER just sitting down on the piano bench.HE LOOKS UP TO SEE '
 'CAESAR...')
1149


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Needy reaches up and distractedly pulls back the neckline In the moonlight, '
 'we see Needy still has a infected- looking BITEMARK near her shoulder.Needy '
 'rolls her eyes, turns away, and continues limping into the gym.The bile has '
 'hundred of tiny SPINES sticking out of it, like little porcupine needles, '
 'and it appears to move by itself, oozing across the linoleum, creeping into '
 'the Needy grabs Jennifer blindly and claps a hand over her Jennifer falls to '
 'her knees, her screams turning to jagged, insane laughter. Needy scrapes the '
 'sandwich out of the skillet and takes a She tuns on the kitchen radio and is '
 'surprised to hear a Suddenly, she pauses, a look of UNEASE and even FEAR '
 'crossing her face.Needly glances around the room and sees students fighting '
 'back tears.As she stares out the window, she winds colored yarn around a '
 'pair of Popsicle sticks to create a Out a single window, we See an imposing '
 'nine-foot Next to Needy, we saw a pile of u

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The noises get GUARD POV - looking left and right around the deserted rail '
 'yard, finally the camera pans up to find MacGruber on the roof of a small '
 'building, dancing around completely naked (except for his boots) with a '
 'carrot in his ass.Piper shuts the door then turns back to see MacGrueber '
 'with his pants off, braced for a fucking against the desk.MacGruber turns '
 'around and starts MacGrubers turns back around, gets down on one knee and '
 'presents to Vicki a hastily assembled copper wire ring with a ball bearing '
 'diamond.Moments later, MacGrubert runs back in, grabs his shorn hair '
 'fragments and takes off again.Macgruber twirls Piper around like a fancy '
 'dance move, clutching him against his chest.Vicki’s eyes dart around the '
 'room for a way out.Piper and MacGruBER shoot like crazy as they race for the '
 'door.Mac Gruber searches his pockets, but instead of a ticket, presents to '
 'the man his hand, middle finger sticking out.From above the grave,

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('He hugs each one in turn, unaware quite how little the boys DAVE arrives at '
 'TERRI’s shoulder just in time to hear... RUDI look at one another and '
 'laugh.TERRI at moments in this scene is picking things up off the floor, the '
 'counter RUDI and DAVE look like they agree.TERRELIC plunges his hand in, '
 'shakes the eye dry and pops it back in just as BRIAN and RONNIE of RUDI '
 'approach.TER RI and RUC MAN finally look at the stage where RUDI are ripping '
 'into ‘Cops’.The solo continues On one side of the glass RUDI sit exhausted, '
 'but elated.DAVE, RUTH, MARILYN, TERRI sit in a line at the bar watching a '
 'tiny black and white TV on which the UNDERTONES play ‘Get Over You’, wearing '
 'their usual skinner jeans.TerRI and the CARTOON PUNKS are capering around '
 'the living room while Rudi play on TV and TERRI hollers along.TERPRI stands '
 'behind DAVY SMYTH at the desk, watching RUDI.DIVE and the other RUDI members '
 'are in He runs out of fingers They leave.The others

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Frances gets up does something quickly and sits back Frances has the stove '
 'going with a couple of items.(Found Frances comes down an escalator, sees '
 'her parents waiting for her, holding a dog.How NOT THE ONE IN THE LIVING '
 'ROOM THE Frances is in a T-Shirts that says “ASK ME!” Another girl has on '
 'the same T-Shirt.A very annoyed looking girl is waiting for him there '
 'Frances tiptoes around the other girl and into her room.On the walk back to '
 'her room, she sees a Younger Girl crying.Sophie falls asleep first and '
 'Frances quietly puts the computer away.She is really, really dancing her '
 'Frances is one of the last ones to get her stuff out.of the bed and put one '
 'foot on the Frances nods.Frances comes over and sits next to him, puts her '
 'hand on his back.FranceS makes a move out of the bed.Then decides the girl '
 'needs someone Frances sits and the Younger Girl cries.He finds the photo he '
 "was looking for and hands the phone to Frances across the tabl

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Klaue smiles, dropping his gun and puts his hands up -- as KLAUE'S "
 "PROSTHETIC LEFT ARMsuddenly splits open, revealing T'Challa snatches up a "
 'ROLLING CHANGECART TABLE just as Klaue fires the blaster at him .Zuri turns '
 "back and T'CChalla lifts his hand, showing his zuri turns away "
 'uncomfortable.James walks over and looks out of a peep hole, then turns back '
 'with a confused expression .Klaue spins toward the commotion as ...')
776


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Track over to his face, which we can now see is wearing the familiar GAS The '
 'gasmask-clad Yorick looks around to make sure that no one is watching, '
 "before he reaches for one of the fallen Amazon's REVOLVERS.She pulls out her "
 'own .357 MAGNUM and looks around, finally noticing a CRATE that’s been '
 "recently opened, one large enough for two people to hide inside of.She's "
 'rolling a PAWN PIECE back and forth across her As a baby CRIES somewhere in '
 'the plane’s cabin, a pensive Beth is looking out her window at the world far '
 'below.Smash cut to a few hours later for this image of a hissing Yorick, as '
 'his eyes suddenly SPRING OPEN.Just then, a SCREECHING is heard, and the two '
 'soldiers pivot to see Ampersand at them from another column in the Before '
 'this bizarre sight has time to sink in with the two soldier, 355 takes '
 'advantage of the distraction to SHOOT the women dead.The woman turns on the '
 'flashlight to get a better look at the young man she ju

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('In the side street where MAC is standing -- barely fifty yards behind him -- '
 'a couple of guys in balaclavas are throwing home-made pipe bombs over the '
 'top of the cops and into the crowd.We hear the roar of the chamber... Hacks, '
 'officials...all stunned, paralysed...GERRY is slowly walking towards the '
 'front.... MAC jumps a desk, slides across a wide central table strewn with '
 'press releases and bursts through the door... .+.as GERRY raises his gun '
 'arm.He looks down at the water -- there is indeed a boat waiting on the far '
 'side -- then turns to face his pursuer.We see a man spinning around and '
 'moving away.He looked at the receiver and puts it slowly back onto the We '
 'hear a cry in the dark.MA shakes her GERRY’s son LIAM has been driving a '
 'remote controlled car around the floor and at this moment he’s so excited he '
 "knocks over the table GERRY's glass of beer has been standing on.Another "
 'sweeps his hand along a kitchen cabinet, tipping everyth

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Then SECRETARIAT--majestic, rippling, fluid, both beautiful and '
 'ferocious--appears around the turn and flies onto the backstretch, Turcotte '
 'on his saddle, steady as a rock.We see WHAT SHE SEES IN HER MEMORY --A '
 'little girl--Penny, at 6--standing at the rail of a racetrack, beside her '
 'tall, handsome father--CHRISTOPHER CHENERY--in the prime of his life, as the '
 'crowd thunders --A pack of racehorses rounding a turn and heading into the '
 'home stretch, hooves pounding, muscles rippling, colors flashing, the horses '
 'thundering toward the wire.She pulls the camera down...and takes a step '
 'backwards toward her .the bugle call to post sounds...')
549


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The Stretcher Bearer NODS, too breathless to speak, then follows his '
 'colleagues BACK DOWN OFF THE SHIP- An EXPLOSION hits the water nearby- '
 'Everyone hits the deck as shells IMPACT the water Tommy is PULLED UP SHORT '
 'as Gibson STUMBLES- A 109 STRAFES the length of the mole with gunfire- '
 'soldiers hit the deck, several are HIT... Peter and George RUSH things off '
 'the boat- then start loading the orange life preservers Mr.Dawson looks up '
 'from his charts to see NAVAL OFFICERS AND CREW coming along the harbor, '
 'Peter follows his gaze- Peter bursts into the cabin, stacking life vests- '
 'The pile of life vests on the dock SHRINKS...')
1378


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('As he turns back into the road, Cheeon calls to him... Marium doesn’t turn '
 'back, keeps walking stiffly away, his jaw tight as Cheeon SLAMS the door '
 'behind him.He whips back to panic-frozen Arnie-- BEHIND ANOTHER TRUCK, Core '
 'is flat on the ground, looking out from underneath it, shaking, awestruck-- '
 'He sees DEAD AND WOUNDED COPS splayed before CHEEON’S HOUSE with MUZZLE '
 'FLASHES flaming out of the ATTIC WINDOW.Core pulls him down to some kind of '
 "safety behind some ROCKS, throwing his own body of Marium's, trying to press "
 'his gloves But Marium’s already dead, his eyes fixed and startled.A deep '
 'breath for courage and he eases it open but wait-- LOOKING OUT ON MARIUM '
 'THROUGH THE DOOR WINDW as he freezes, terrified eyes locked on something '
 'dead ahead.He coughs softly, rolls away from it... CLOSE ON CORE as he '
 'stares straight ahead at something else, something in n')
1178


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('This time Bridget pauses a moment too long.Bridget and Jack are together on '
 'the sofa watching Mark, distracted, at the boarding gate for KHARTOUM.As '
 'they walk away Bridget turns around and exchanges a smile works in the '
 'Starbucks in Balham.A good looking man clocks her and takes her in as she '
 'walks - she smiles to herself once he has passed.She stops behind a flower '
 'display and looks back at Mark patiently watching a film with five there, '
 'cows with opposable thumbs, We find Jude fast asleep in her hotel room, with '
 'the baby next to her.Bridgemt strides into the studio, ready for the '
 'day.studio, where Miranda is waiting Behind Bridget, we see Miranda, live in '
 'the studio , waiting We see the Hard News countdown clock on the screens and '
 'then Bridget holds up her phone and says “Good Evening Miranda”.Dad fights '
 'his way to the stage, phone in hand.Bridger steps back and looks down to her '
 'bump.bridget turns round to find somebody to take a pho

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Danielle plants a kiss on Greg, Greg goes along with the kiss, but is '
 "embarrassed in front of Danielle, having retrieved Greg's gum from his "
 'mouth, blows a Clarke, slowly walks away, then turns back.She seemed '
 'Danielle looks at Ray, rolls her eyes and heads to her room.Joseph pulls '
 "over to the side of the road and smacks Clarke's face, grabs him by the "
 'shirt, and pulls him into the front military academy is looking like a A '
 'knowing look from his mother, tells Clarke that agreeing with her will free '
 'him.')
622


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Jay opens the bathroom door to find Michael Cera with one girl blowing him '
 'and another eating his ass.His eyes go wide, his jaw drops, and he screams '
 'like a little girl.Seth, Jay and Franco reach the gars getting cornered by '
 'the large, over to the guys and sees“ smiles, then looks back at 1 speed '
 'towards his doom.Jay looks to knife from the kitchen, and throws it laps, '
 'sending the guys CRASHING back into the house.They look up and see Seth and '
 "Jay take off running, as they smash into thing ING BACK TO FRANCO'S!Jay "
 'flicks the lighter again, and this time, the FLAME LIGHTS and we see WHAT '
 'APPEARS TO BE A MONSTER standing right beside them.The guys slowly WALK into '
 'the living room and wriggling around on the floor, his eyes roli Jay and '
 'Craig tip toe through the house, their weapons held high.They looked away '
 'away from trying the same thing.Thick black s Jayand Craig hold hands as '
 'they slowly make their way through the smoke.Jonah slams in

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('light smile lines coming in here and Mathers and Ryan talk with a '
 'scarf-wearing ALFONSO DIAPHANO (60s) who flutters about the space getting '
 'things ready for the big show tonight.Behind Ryan, a VAMPY GIRL in crazy '
 'skimpy booty shorts, holds her phone down below her ass then brings it up to '
 'her face where she smiles and gives a peace sign while holding a package of '
 '"Flat Tummy Tea" she\'s hawking.Reveal Jared, sporting a cast on his broken '
 "arm, shoves Ryan and Mather's DEEPER BACK STAGE, away from everyone, as "
 'Parker joins his side smiling smugly.Then we hear the cry... Parker spins '
 'just in time to take a crisp right cross to the jaw from Mathers -- '
 'CLUNK.Suddenly, Ryan starts jerk his body all over the')
1717


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('She hears a deafening ROAR and turns to see another memory playing out in '
 'the water just off the Pier -- Two vast figures clashing in the surf washes '
 "over Flick's face as she watches.The shattered windows come back together as "
 'the shock wave lifts Young Mako off the floor, and the roar retreats, going '
 "back into Trespasser's Dread.Without turning on the lights she drags one out "
 'into the middle of the floor and strains to pry open the lid with her bare '
 'hands -- she doesn’t know what kind of time she has.Flick packs a suitcase, '
 'her eyes still red, her hands still shaking slightly.BACK TO PRESENT The '
 'NOISE of the memory echoes as Raleigh pulls himself up to sit on the edge of '
 'the bed, his face in his hands.They both chuckle awkwardly as they stare at '
 'one another across the hall, a thought on the tip of their tongues, '
 "Simultaneously, they realize there's no point in trying to phrase it, and "
 'they let themselves collide in the middle hallway, 

In [None]:
two_stage_large_results = combine_rouge_and_summary_results(rouge_large_dict, summary_large_dict, 'two_stage_large')
two_stage_large_results.head()

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum,two_stage_large_summary
The Guard,0.133929,0.009009,0.098214,0.098214,OVERHEAD SHOT -- BOYLE leans back into the pil...
Batman Year One,0.200669,0.013468,0.113712,0.113712,"While FLASS looks at the cuffs, FLASS, angry, ..."
Inherent Vice,0.139535,0.0,0.124031,0.124031,DOC gives him a PHOTO OF COY like The Last Sup...
Colombiana,0.170732,0.0,0.105691,0.105691,"She flattens against the wall, holding onto th..."
Barney s Version,0.141304,0.010989,0.097826,0.097826,"Off Barney, completely shell shocked we -- PRE..."


In [None]:
# save results to a csv
two_stage_large_results.to_csv('/content/two_stage_text_large_results.csv')
# files.download('/content/two_stage_large_results.csv') 

In [None]:
# average rouge scores
two_stage_large_results.drop(columns=['two_stage_large_summary']).mean(axis=0)

rouge1       0.202298
rouge2       0.012198
rougeL       0.120102
rougeLsum    0.120102
dtype: float64

The code below exists because the above model timed out and did not save to a .csv

In [None]:
rouge_large_dict = {'The Guard': {'rouge1': 0.13392857142857142, 'rouge2': 0.009009009009009009, 'rougeL': 0.09821428571428573, 'rougeLsum': 0.09821428571428573}, 'Batman Year One': {'rouge1': 0.20066889632107024, 'rouge2': 0.01346801346801347, 'rougeL': 0.11371237458193979, 'rougeLsum': 0.11371237458193979}, 'Inherent Vice': {'rouge1': 0.13953488372093026, 'rouge2': 0.0, 'rougeL': 0.12403100775193798, 'rougeLsum': 0.12403100775193798}, 'Colombiana': {'rouge1': 0.17073170731707316, 'rouge2': 0.0, 'rougeL': 0.1056910569105691, 'rougeLsum': 0.1056910569105691}, 'Barney s Version': {'rouge1': 0.14130434782608697, 'rouge2': 0.010989010989010988, 'rougeL': 0.09782608695652174, 'rougeLsum': 0.09782608695652174}, 'Zootopia': {'rouge1': 0.22754491017964074, 'rouge2': 0.012121212121212121, 'rougeL': 0.1437125748502994, 'rougeLsum': 0.1437125748502994}, 'Scary Stories to Tell in the Dark': {'rouge1': 0.21686746987951805, 'rouge2': 0.032388663967611336, 'rougeL': 0.12048192771084337, 'rougeLsum': 0.12048192771084337}, 'The Edge of Seventeen': {'rouge1': 0.15624999999999997, 'rouge2': 0.0, 'rougeL': 0.09374999999999999, 'rougeLsum': 0.09374999999999999}, 'As Above So Below': {'rouge1': 0.17843866171003717, 'rouge2': 0.022471910112359546, 'rougeL': 0.13382899628252787, 'rougeLsum': 0.13382899628252787}, 'Celeste Jesse Forever': {'rouge1': 0.22053231939163498, 'rouge2': 0.030651340996168584, 'rougeL': 0.12927756653992395, 'rougeLsum': 0.12927756653992395}, 'Up in the Air': {'rouge1': 0.14062500000000003, 'rouge2': 0.0, 'rougeL': 0.078125, 'rougeLsum': 0.078125}, 'Newton': {'rouge1': 0.28484848484848485, 'rouge2': 0.03048780487804878, 'rougeL': 0.1333333333333333, 'rougeLsum': 0.1333333333333333}, 'Mark Felt The Man Who Brought Down the White House': {'rouge1': 0.24647887323943662, 'rouge2': 0.021201413427561842, 'rougeL': 0.11267605633802817, 'rougeLsum': 0.11267605633802817}, 'Alice in Wonderland': {'rouge1': 0.2760942760942761, 'rouge2': 0.006779661016949153, 'rougeL': 0.1548821548821549, 'rougeLsum': 0.1548821548821549}, 'Rise of the Planet of the Apes': {'rouge1': 0.227027027027027, 'rouge2': 0.010928961748633882, 'rougeL': 0.12972972972972974, 'rougeLsum': 0.12972972972972974}, 'Jennifer s Body': {'rouge1': 0.1698841698841699, 'rouge2': 0.0, 'rougeL': 0.09266409266409266, 'rougeLsum': 0.09266409266409266}, 'MacGruber': {'rouge1': 0.22641509433962265, 'rouge2': 0.0076045627376425855, 'rougeL': 0.13584905660377358, 'rougeLsum': 0.13584905660377358}, 'Good Vibrations': {'rouge1': 0.1755485893416928, 'rouge2': 0.006309148264984227, 'rougeL': 0.10031347962382445, 'rougeLsum': 0.10031347962382445}, 'Frances Ha': {'rouge1': 0.21052631578947367, 'rouge2': 0.022727272727272728, 'rougeL': 0.12030075187969924, 'rougeLsum': 0.12030075187969924}, 'Black Panther': {'rouge1': 0.1951219512195122, 'rouge2': 0.012345679012345678, 'rougeL': 0.10975609756097561, 'rougeLsum': 0.10975609756097561}, 'Y The Last Man': {'rouge1': 0.13675213675213677, 'rouge2': 0.008620689655172414, 'rougeL': 0.10256410256410257, 'rougeLsum': 0.10256410256410257}, 'Shadow Dancer': {'rouge1': 0.19047619047619047, 'rouge2': 0.02214022140221402, 'rougeL': 0.14652014652014653, 'rougeLsum': 0.14652014652014653}, 'Secretariat': {'rouge1': 0.16149068322981366, 'rouge2': 0.0, 'rougeL': 0.11180124223602485, 'rougeLsum': 0.11180124223602485}, 'Dunkirk': {'rouge1': 0.20853080568720378, 'rouge2': 0.009569377990430622, 'rougeL': 0.14218009478672985, 'rougeLsum': 0.14218009478672985}, 'Hold the Dark': {'rouge1': 0.2406015037593985, 'rouge2': 0.01515151515151515, 'rougeL': 0.11278195488721805, 'rougeLsum': 0.11278195488721805}, 'Bridget Jones s Baby': {'rouge1': 0.19455252918287935, 'rouge2': 0.00784313725490196, 'rougeL': 0.10116731517509728, 'rougeLsum': 0.10116731517509728}, 'Dirty Girl': {'rouge1': 0.27672955974842767, 'rouge2': 0.012738853503184712, 'rougeL': 0.13836477987421383, 'rougeLsum': 0.13836477987421383}, 'This Is the End': {'rouge1': 0.254416961130742, 'rouge2': 0.021352313167259784, 'rougeL': 0.1625441696113074, 'rougeLsum': 0.1625441696113074}, 'Ryan Hansen Solves Crimes on Television': {'rouge1': 0.2272727272727273, 'rouge2': 0.0, 'rougeL': 0.11818181818181818, 'rougeLsum': 0.11818181818181818}, 'Pacific Rim': {'rouge1': 0.23974763406940064, 'rouge2': 0.019047619047619053, 'rougeL': 0.13880126182965302, 'rougeLsum': 0.13880126182965302}}
rouge_large_dict

{'The Guard': {'rouge1': 0.13392857142857142,
  'rouge2': 0.009009009009009009,
  'rougeL': 0.09821428571428573,
  'rougeLsum': 0.09821428571428573},
 'Batman Year One': {'rouge1': 0.20066889632107024,
  'rouge2': 0.01346801346801347,
  'rougeL': 0.11371237458193979,
  'rougeLsum': 0.11371237458193979},
 'Inherent Vice': {'rouge1': 0.13953488372093026,
  'rouge2': 0.0,
  'rougeL': 0.12403100775193798,
  'rougeLsum': 0.12403100775193798},
 'Colombiana': {'rouge1': 0.17073170731707316,
  'rouge2': 0.0,
  'rougeL': 0.1056910569105691,
  'rougeLsum': 0.1056910569105691},
 'Barney s Version': {'rouge1': 0.14130434782608697,
  'rouge2': 0.010989010989010988,
  'rougeL': 0.09782608695652174,
  'rougeLsum': 0.09782608695652174},
 'Zootopia': {'rouge1': 0.22754491017964074,
  'rouge2': 0.012121212121212121,
  'rougeL': 0.1437125748502994,
  'rougeLsum': 0.1437125748502994},
 'Scary Stories to Tell in the Dark': {'rouge1': 0.21686746987951805,
  'rouge2': 0.032388663967611336,
  'rougeL': 0.1204

In [None]:
summary_large_dict = {'The Guard': "OVERHEAD SHOT -- BOYLE leans back into the pillows and sighs, a big grin on his face.Then looks back at BOYLLE.Gets out, looks around -- TWO LITTLE GIRLS are looking on.EVERETT watches her go, then looks at STANTON, who is still seated.They look blankly at one another.They are blankly up at ONE another.Then at the BOYLES, who are standing nearby, playing with the big toe of another corpse, a blank look on their face.He turns around -- O'LEARY is sitting in an armchair, a gun in one hand, pointed at BOYLE, a whiskey in the other.BOYLE enters and looks around, grinning -- A big map.EVEREET looks on, impressed, as the MAN turns towards the strand and swims in.As CORNELL walks off, he makes a sinister turn and looks backat STANTON and MOODY, giving them pause.He and SHEEHY look at one other, until SHEEhy becomes aware of the song that is playing --  BOY LE shoots a glance at AOIFE.Then he hears a sound and looks up -- ’BOYLE’s", 'Batman Year One': "While FLASS looks at the cuffs, FLASS, angry, throws them back and SLAMS the CLOSET DOOR  Just at the moment BRUCE CRASHES into the rat-infested GORDON hearing something, takes a look out the window where BRUCE was hanging moments ago.Upstairs, the SWAT TEAM, guns poised, peeks over the lip of the broken ceiling just in time to get a face full of  Outside the BATHROOM door, THE BAT-MAN covers his ears until the debris settles.But he COMES UP SHOOTING DOUBLE HEAD SHOT  blood sprays, one down  TWO CHEST SHOTS  a second cop staggers back, stricken and ONE IN THE HIP  third cop spins, but manages to keep his feet, until  BLAM!He hangs four stories up by his fingers as inside  Then, confronted with the sight of his DEAD PARTNER  Out the window BRUCE is slipping.Once she is close enough to the guards, she whips off her  the first one across the face with the buckle, knocking his  her long nails across the second's face.After a moment BAVEN stands up and they bo", 'Inherent Vice': "DOC gives him a PHOTO OF COY like The Last Supper, Coy as Jesus grabbing food from BIGFOOT looks at Doc, then the picture He motions down a back corridor... Bigfoot leads Doc down a hall and into a room with a Bigfoot it opens up He puts the FROZEN BANANAS in the PNEUMATIC TUBE DISPENSER, sending chocolate frozen bananas hurling Bigfoot's FACE TURNS AND THE MOLECULES COMPLETELY CHANGE AS HE SEES SOMETHING OVER DOC'S SHOULDER...", 'Colombiana': "She flattens against the wall, holding onto the ON THE BATHROOM One of the gunman comes to the window, cannot fit through, looks out, does not see anybody across, then hears something beneath him, looks down, sees the tip of a shoe below.The gate to the garden begins to open, and like a shot the dogs are up, hurling themselves toward the opening gate, eyes flashing, barking loud enough to scare the dead back to life, fangs bared, ready to kill.Some do homework on their beds, others do their hair, dance, And suddenly, at the sound of a door opening, they all stop and look in the same direction at Cat and the MONITOR Wake up is at seven.Cat hesitates for a moment, and then removes one hand from underneath the table, her fist closed tight.A small smile breaks across his face as he looks in his rear view mirror and sees Cat running and firing, losing ground.Let's him kiss her lips, her neck, and as he goes south, kissing as she goes, the CAMERA holds on her face, and for the", 'Barney s Version': "Off Barney, completely shell shocked we -- PRE-LAP - A KNOCKING at the door The door opens revealing KATE, her eyes filled with tears she throws her arms around Barney and won't let go.Barney looks around the big, empty house, going out of his mind, not knowing what to do with himself.Even more gorgeous than the first time he laid eyes on her Barney drops into his seat across from Miriam, startling her Barney doesn't look so good.The BAND playing an upbeat number for a jam-packed dancefloor Barney slips out from the crowd and races to the bar -- CU - A BLACK & WHITE TV hidden behind the bar showing the Barney downs another shot, then looks back to the dancefloor where he sees IZZY slow dancing with The Second Mrs.", 'Zootopia': 'Hopps wipes snow off a back bumper They open the big door, which looks like it belongs on a refrigerator.As Hopps rolls her eyes and goes back to collecting clues, Nick lowers the back partition and his eyes go wide.Nick ushers Hopps to the door, but opens it to find two big The polar bears grab Nick and Hopps by their throats... ...and yank them off screen.She looks around to see animals coming back together.Suddenly, a BIG KID turns off the lights.Polar bear fur... Rat Pack music...', 'Scary Stories to Tell in the Dark': 'and spot the parked CORVAIR MONZA.KLAXONS sound off-- IN FIRST FLOOR HALLWAY Stella and Ramén witness RED WARNING LIGHTS, causing every Ramén skims ‘The Dream’ in the Bellows Book, quickly skipping to the end.A five story fall onto a sidewalk if they don’t make the jump.As Ramén takes in the dreary room, Ramén sees a broken rocking chair next to cleft toys strewn on the ground, and with her words, they take on new meaning.A CHILD who Young Sarah is reading to turns his head to look straight at him, breaking the fourth wall.But then he sees something horrific -- The front door to the house kicks open to a wind-ravaged SHERIFF MILNER!Inside, they share a hopeful glance, but the floor numbers With Sarah just twenty feet behind him, Auggie turns another corner and finds THE ELEVATOR!Ramén double chec', 'The Edge of Seventeen': "She leaves and Nadine drops the smile, then moves to her window, watching the two of them walk down the driveway hand in hand, Darian sweetly helping Krista into the passenger Something about it makes her feel like absolute shit.Just then, Nadine comes up, squeezes herself between Krista and Darian, puts an arm around each, big smile.Krista is folding laundry on her bed when she hears something It almost sounds like Radiohead's Karma Police, except She moves to the open window, looks down to see she's being A PERSIAN MAN (45) -- big mustache, long pony tail, iridescent shirt unbuttoned to reveal copious chest hair and He's standing below her window with a guitar, singing Karma Police in a very thick accent, about 10x more up-tempo, and with lots of little middle-eastern vocal trills,Krista pulls away, They just look at each other for a second, freaked out.We go to an ultra-wide shot of Nadine as a tiny little dot, the only person eating by herself in She immediately grabs", 'As Above So Below': 'George touches the back of his hand to his mouth and sees the Papillon makes a crazy sign with his hands.George fights like mad to keep the Raver at bay, and even gets back to his feet as ANOTHER RAVER RIPS A HUGE BITE OUT OF GEORGE’S NECK, shooting blood into the air.One of them slowly turns his head, LOOKS RIGHT AT ZED AND GEORGE.He shines a light into Underneath the bones is a gutter full of water, in which a YOUNG MAN, struggles to get to the surface of the water to The Young Man pushes against the bones, but George’s weight on top of them makes it impossible for the Young Man to TheYoung Man seems to keep pace with George as he desperately tries to get out of the way.One turns his Head to look at them.Not one to take criticism She turns and goes back to the fire with the others.George is first to get his hands on a flashlight which he flips on and looks for the source of the music.The camera lies right behind Zed and George just catching a piece of the passing Figure', 'Celeste Jesse Forever': 'But her face is still wet with Jesse gives Celeste a big, long hug.Celeste and Jesse giggle at their stupid inside joke for a little too long.Lucky has Celeste in a bear hug and it looks like she may have broken her back.Jesse is mid-laugh and looks at his phone to see Celeste is calling.Bon Iver\'s "Skinny Love" plays as Celeste walks slowly amongst the celebrity impersonators, tourists and drunks on It is intercut with a montage of Super 8 footage of Celeste and Jesse in the past reluctantly cutting a head off a fish to cook it, Celeste is repulsed but laughing.Celiste and Paul walk down stairs into an incredibly cool- looking speak easy.CELESTE\'S LAWYER, male, 40, speaks Celeste looks up, makes eye contact with Jesse and smiles Celeste is still signing.Celesteste can\'t stop herself from constantly looking over at the studio to see if Yogurt Girl is still in there.A beat later, Celeste tears out of the bar, walking quickly after Jesse.Ceste gives Beth\'s mom, CAROL, 60, ver', 'Up in the Air': "almost as if looking through binoculars.Ryan turns, thinks of something, then turns back.RYAN MOVING THROUGH THE SECURITY SCREENING It's a beautiful choreographed ballet of a bag handle collapsing, shoes coming off, a laptop going in a separate tray, wallet and watch sliding into a shoe, a boarding card sliding into back pocket... both hands always moving, Ryan enters and presents his ADMIRALS CLUB CARD.Natalie accidentally makes eye contact with one guy, then quickly shifts her gaze forward.Ryan looks around for a", 'Newton': "Atma Singh looks like a miffed school boy sitting in one Another beep is heard.The old head man who was in the queue, walks towards Priya and shows her his ID.Atma Singh sits on a ledge and listens The two children become quiet upon hearing Newton's voice.Priya is looking for something in the bushes outside the She breaks a stem and comes out, holding it.Newton looks at his father with anger as if asking Two cycle rickshaws- on one, sits Newton, and on the other, Whenever Newton''s rickshaw goes slightly ahead, his father tells his rider to buck up.Another constable comes in front of them and ‘shoots’ him with his finger (shaped like a gun).Atmam Singh takes out his cell phone and directs the camera side towards the boy and clicks a picture.Newton enters the booth to see sits deep in thought with a worried expression on his face.As they walk back, Krishna and Lakhma try to reason with Newton in their own way.He goes and holds the control unit, which has a button which needs to", 'Mark Felt The Man Who Brought Down the White House': 'He stops her, “I want to see you.” She opens to him, brings him inside.At the end of something.Smith waves her to keep going.We hear only Eyes make a subtle shift, no more than the dilation of pupils.She turns him slow, sensual, and we think maybe with love and Now he turns to look at her.come out and unwrap everything.Moving with a kind of imperious grace.Without going to him.Turns right into it.A day like any other.And goes to the sink.And by the sheer force of his love Joan relents and collapses into him, just like that.We HOLD, stay with him.Flattens the list on WE MOVE IN ON THE LIST Felt picks up the phone.Then they move from the folder to Gray’s eyes and HOLD.Bates starts to leave.Gray exits, joining the flow of federal employees on lunch A REFLECTION IN A WINDOW.And without a word or permission he just wraps her and the baby in his arms, justlike that.with force and direction.’ And makes some stabbing motion with his hand.Then he hangs Felt slows up to and stops ou', 'Alice in Wonderland': 'Once inside, they smile, gasp and flatter.She carries it out of the stall, glancing back at the Bandersnatch before going out.He staggers back, gasping.His voice goes hoarse with emotion as he begins the tale.She smiles and goes off.He sits, amused if nothing else.Frustrated, Alice looks out the window and mutters.She wipes a telltale bit of jam from the side of its mouth.She laughs at herself and goes back to the table.She hears a loud WHACK, a small cry of pain, then cheers and laughter.At the end, she whirls to leer into the face of one terrified frog.Lady Long-Ears rushes in and whispers into the Queen’s ear.He leans in for a kiss just as Lady Long Ears passes.something with wicked claws.His face starts to shimmer as he whispers in her ear.She jerks and drags himself back from the edge of hysteria.They laugh.She opens the little door and bends down to look through to the other side.Wind knocked out, she gasps for air, then she sits up, rubbing the bump on her forehead.Long Ear', 'Rise of the Planet of the Apes': "As Will pulls away Caesar strains for a last glimpse, twisting around to look out the back window.AND SOMETHING ELSE We look through cage bars at Will and Franklin, who stand far away, across the room.At the height of his backward motion he catches another Caesar pumps his legs, eager to get a better look... Will's making sandwiches, his eye on Caesar through the WE FIND WALTER just sitting down on the piano bench.HE LOOKS UP TO SEE CAESAR...", 'Jennifer s Body': 'Needy reaches up and distractedly pulls back the neckline In the moonlight, we see Needy still has a infected- looking BITEMARK near her shoulder.Needy rolls her eyes, turns away, and continues limping into the gym.The bile has hundred of tiny SPINES sticking out of it, like little porcupine needles, and it appears to move by itself, oozing across the linoleum, creeping into the Needy grabs Jennifer blindly and claps a hand over her Jennifer falls to her knees, her screams turning to jagged, insane laughter. Needy scrapes the sandwich out of the skillet and takes a She tuns on the kitchen radio and is surprised to hear a Suddenly, she pauses, a look of UNEASE and even FEAR crossing her face.Needly glances around the room and sees students fighting back tears.As she stares out the window, she winds colored yarn around a pair of Popsicle sticks to create a Out a single window, we See an imposing nine-foot Next to Needy, we saw a pile of unopened mail scattered casually on t', 'MacGruber': 'The noises get GUARD POV - looking left and right around the deserted rail yard, finally the camera pans up to find MacGruber on the roof of a small building, dancing around completely naked (except for his boots) with a carrot in his ass.Piper shuts the door then turns back to see MacGrueber with his pants off, braced for a fucking against the desk.MacGruber turns around and starts MacGrubers turns back around, gets down on one knee and presents to Vicki a hastily assembled copper wire ring with a ball bearing diamond.Moments later, MacGrubert runs back in, grabs his shorn hair fragments and takes off again.Macgruber twirls Piper around like a fancy dance move, clutching him against his chest.Vicki’s eyes dart around the room for a way out.Piper and MacGruBER shoot like crazy as they race for the door.Mac Gruber searches his pockets, but instead of a ticket, presents to the man his hand, middle finger sticking out.From above the grave, we see two hands grip the side of the', 'Good Vibrations': 'He hugs each one in turn, unaware quite how little the boys DAVE arrives at TERRI’s shoulder just in time to hear... RUDI look at one another and laugh.TERRI at moments in this scene is picking things up off the floor, the counter RUDI and DAVE look like they agree.TERRELIC plunges his hand in, shakes the eye dry and pops it back in just as BRIAN and RONNIE of RUDI approach.TER RI and RUC MAN finally look at the stage where RUDI are ripping into ‘Cops’.The solo continues On one side of the glass RUDI sit exhausted, but elated.DAVE, RUTH, MARILYN, TERRI sit in a line at the bar watching a tiny black and white TV on which the UNDERTONES play ‘Get Over You’, wearing their usual skinner jeans.TerRI and the CARTOON PUNKS are capering around the living room while Rudi play on TV and TERRI hollers along.TERPRI stands behind DAVY SMYTH at the desk, watching RUDI.DIVE and the other RUDI members are in He runs out of fingers They leave.The others laugh, pat his back.The music is distorted', 'Frances Ha': "Frances gets up does something quickly and sits back Frances has the stove going with a couple of items.(Found Frances comes down an escalator, sees her parents waiting for her, holding a dog.How NOT THE ONE IN THE LIVING ROOM THE Frances is in a T-Shirts that says “ASK ME!” Another girl has on the same T-Shirt.A very annoyed looking girl is waiting for him there Frances tiptoes around the other girl and into her room.On the walk back to her room, she sees a Younger Girl crying.Sophie falls asleep first and Frances quietly puts the computer away.She is really, really dancing her Frances is one of the last ones to get her stuff out.of the bed and put one foot on the Frances nods.Frances comes over and sits next to him, puts her hand on his back.FranceS makes a move out of the bed.Then decides the girl needs someone Frances sits and the Younger Girl cries.He finds the photo he was looking for and hands the phone to Frances across the table.France's glances at the hand, and in", 'Black Panther': "Klaue smiles, dropping his gun and puts his hands up -- as KLAUE'S PROSTHETIC LEFT ARMsuddenly splits open, revealing T'Challa snatches up a ROLLING CHANGECART TABLE just as Klaue fires the blaster at him .Zuri turns back and T'CChalla lifts his hand, showing his zuri turns away uncomfortable.James walks over and looks out of a peep hole, then turns back with a confused expression .Klaue spins toward the commotion as ...", 'Y The Last Man': "Track over to his face, which we can now see is wearing the familiar GAS The gasmask-clad Yorick looks around to make sure that no one is watching, before he reaches for one of the fallen Amazon's REVOLVERS.She pulls out her own .357 MAGNUM and looks around, finally noticing a CRATE that’s been recently opened, one large enough for two people to hide inside of.She's rolling a PAWN PIECE back and forth across her As a baby CRIES somewhere in the plane’s cabin, a pensive Beth is looking out her window at the world far below.Smash cut to a few hours later for this image of a hissing Yorick, as his eyes suddenly SPRING OPEN.Just then, a SCREECHING is heard, and the two soldiers pivot to see Ampersand at them from another column in the Before this bizarre sight has time to sink in with the two soldier, 355 takes advantage of the distraction to SHOOT the women dead.The woman turns on the flashlight to get a better look at the young man she just struck.The two look int", 'Shadow Dancer': "In the side street where MAC is standing -- barely fifty yards behind him -- a couple of guys in balaclavas are throwing home-made pipe bombs over the top of the cops and into the crowd.We hear the roar of the chamber... Hacks, officials...all stunned, paralysed...GERRY is slowly walking towards the front.... MAC jumps a desk, slides across a wide central table strewn with press releases and bursts through the door... .+.as GERRY raises his gun arm.He looks down at the water -- there is indeed a boat waiting on the far side -- then turns to face his pursuer.We see a man spinning around and moving away.He looked at the receiver and puts it slowly back onto the We hear a cry in the dark.MA shakes her GERRY’s son LIAM has been driving a remote controlled car around the floor and at this moment he’s so excited he knocks over the table GERRY's glass of beer has been standing on.Another sweeps his hand along a kitchen cabinet, tipping everything onto the floor.He tears arou", 'Secretariat': 'Then SECRETARIAT--majestic, rippling, fluid, both beautiful and ferocious--appears around the turn and flies onto the backstretch, Turcotte on his saddle, steady as a rock.We see WHAT SHE SEES IN HER MEMORY --A little girl--Penny, at 6--standing at the rail of a racetrack, beside her tall, handsome father--CHRISTOPHER CHENERY--in the prime of his life, as the crowd thunders --A pack of racehorses rounding a turn and heading into the home stretch, hooves pounding, muscles rippling, colors flashing, the horses thundering toward the wire.She pulls the camera down...and takes a step backwards toward her .the bugle call to post sounds...', 'Dunkirk': 'The Stretcher Bearer NODS, too breathless to speak, then follows his colleagues BACK DOWN OFF THE SHIP- An EXPLOSION hits the water nearby- Everyone hits the deck as shells IMPACT the water Tommy is PULLED UP SHORT as Gibson STUMBLES- A 109 STRAFES the length of the mole with gunfire- soldiers hit the deck, several are HIT... Peter and George RUSH things off the boat- then start loading the orange life preservers Mr.Dawson looks up from his charts to see NAVAL OFFICERS AND CREW coming along the harbor, Peter follows his gaze- Peter bursts into the cabin, stacking life vests- The pile of life vests on the dock SHRINKS...', 'Hold the Dark': "As he turns back into the road, Cheeon calls to him... Marium doesn’t turn back, keeps walking stiffly away, his jaw tight as Cheeon SLAMS the door behind him.He whips back to panic-frozen Arnie-- BEHIND ANOTHER TRUCK, Core is flat on the ground, looking out from underneath it, shaking, awestruck-- He sees DEAD AND WOUNDED COPS splayed before CHEEON’S HOUSE with MUZZLE FLASHES flaming out of the ATTIC WINDOW.Core pulls him down to some kind of safety behind some ROCKS, throwing his own body of Marium's, trying to press his gloves But Marium’s already dead, his eyes fixed and startled.A deep breath for courage and he eases it open but wait-- LOOKING OUT ON MARIUM THROUGH THE DOOR WINDW as he freezes, terrified eyes locked on something dead ahead.He coughs softly, rolls away from it... CLOSE ON CORE as he stares straight ahead at something else, something in n", 'Bridget Jones s Baby': 'This time Bridget pauses a moment too long.Bridget and Jack are together on the sofa watching Mark, distracted, at the boarding gate for KHARTOUM.As they walk away Bridget turns around and exchanges a smile works in the Starbucks in Balham.A good looking man clocks her and takes her in as she walks - she smiles to herself once he has passed.She stops behind a flower display and looks back at Mark patiently watching a film with five there, cows with opposable thumbs, We find Jude fast asleep in her hotel room, with the baby next to her.Bridgemt strides into the studio, ready for the day.studio, where Miranda is waiting Behind Bridget, we see Miranda, live in the studio , waiting We see the Hard News countdown clock on the screens and then Bridget holds up her phone and says “Good Evening Miranda”.Dad fights his way to the stage, phone in hand.Bridger steps back and looks down to her bump.bridget turns round to find somebody to take a photo of them.Both men are at the door, look', 'Dirty Girl': "Danielle plants a kiss on Greg, Greg goes along with the kiss, but is embarrassed in front of Danielle, having retrieved Greg's gum from his mouth, blows a Clarke, slowly walks away, then turns back.She seemed Danielle looks at Ray, rolls her eyes and heads to her room.Joseph pulls over to the side of the road and smacks Clarke's face, grabs him by the shirt, and pulls him into the front military academy is looking like a A knowing look from his mother, tells Clarke that agreeing with her will free him.", 'This Is the End': "Jay opens the bathroom door to find Michael Cera with one girl blowing him and another eating his ass.His eyes go wide, his jaw drops, and he screams like a little girl.Seth, Jay and Franco reach the gars getting cornered by the large, over to the guys and sees“ smiles, then looks back at 1 speed towards his doom.Jay looks to knife from the kitchen, and throws it laps, sending the guys CRASHING back into the house.They look up and see Seth and Jay take off running, as they smash into thing ING BACK TO FRANCO'S!Jay flicks the lighter again, and this time, the FLAME LIGHTS and we see WHAT APPEARS TO BE A MONSTER standing right beside them.The guys slowly WALK into the living room and wriggling around on the floor, his eyes roli Jay and Craig tip toe through the house, their weapons held high.They looked away away from trying the same thing.Thick black s Jayand Craig hold hands as they slowly make their way through the smoke.Jonah slams into the painting, hits the floor", 'Ryan Hansen Solves Crimes on Television': 'light smile lines coming in here and Mathers and Ryan talk with a scarf-wearing ALFONSO DIAPHANO (60s) who flutters about the space getting things ready for the big show tonight.Behind Ryan, a VAMPY GIRL in crazy skimpy booty shorts, holds her phone down below her ass then brings it up to her face where she smiles and gives a peace sign while holding a package of "Flat Tummy Tea" she\'s hawking.Reveal Jared, sporting a cast on his broken arm, shoves Ryan and Mather\'s DEEPER BACK STAGE, away from everyone, as Parker joins his side smiling smugly.Then we hear the cry... Parker spins just in time to take a crisp right cross to the jaw from Mathers -- CLUNK.Suddenly, Ryan starts jerk his body all over the', 'Pacific Rim': "She hears a deafening ROAR and turns to see another memory playing out in the water just off the Pier -- Two vast figures clashing in the surf washes over Flick's face as she watches.The shattered windows come back together as the shock wave lifts Young Mako off the floor, and the roar retreats, going back into Trespasser's Dread.Without turning on the lights she drags one out into the middle of the floor and strains to pry open the lid with her bare hands -- she doesn’t know what kind of time she has.Flick packs a suitcase, her eyes still red, her hands still shaking slightly.BACK TO PRESENT The NOISE of the memory echoes as Raleigh pulls himself up to sit on the edge of the bed, his face in his hands.They both chuckle awkwardly as they stare at one another across the hall, a thought on the tip of their tongues, Simultaneously, they realize there's no point in trying to phrase it, and they let themselves collide in the middle hallway, kissing hungrily.An AIR R"}
summary_large_dict

{'The Guard': "OVERHEAD SHOT -- BOYLE leans back into the pillows and sighs, a big grin on his face.Then looks back at BOYLLE.Gets out, looks around -- TWO LITTLE GIRLS are looking on.EVERETT watches her go, then looks at STANTON, who is still seated.They look blankly at one another.They are blankly up at ONE another.Then at the BOYLES, who are standing nearby, playing with the big toe of another corpse, a blank look on their face.He turns around -- O'LEARY is sitting in an armchair, a gun in one hand, pointed at BOYLE, a whiskey in the other.BOYLE enters and looks around, grinning -- A big map.EVEREET looks on, impressed, as the MAN turns towards the strand and swims in.As CORNELL walks off, he makes a sinister turn and looks backat STANTON and MOODY, giving them pause.He and SHEEHY look at one other, until SHEEhy becomes aware of the song that is playing --  BOY LE shoots a glance at AOIFE.Then he hears a sound and looks up -- ’BOYLE’s",
 'Batman Year One': "While FLASS looks at the 

In [None]:
two_stage_large_results = combine_rouge_and_summary_results(rouge_large_dict, summary_large_dict, 'two_stage_large')
two_stage_large_results.head()

Unnamed: 0,rouge1,rouge2,rougeL,rougeLsum,two_stage_large_summary
The Guard,0.133929,0.009009,0.098214,0.098214,OVERHEAD SHOT -- BOYLE leans back into the pil...
Batman Year One,0.200669,0.013468,0.113712,0.113712,"While FLASS looks at the cuffs, FLASS, angry, ..."
Inherent Vice,0.139535,0.0,0.124031,0.124031,DOC gives him a PHOTO OF COY like The Last Sup...
Colombiana,0.170732,0.0,0.105691,0.105691,"She flattens against the wall, holding onto th..."
Barney s Version,0.141304,0.010989,0.097826,0.097826,"Off Barney, completely shell shocked we -- PRE..."


In [None]:
# save results to a csv
two_stage_large_results.to_csv('/content/two_stage_text_large_results.csv')
# files.download('/content/two_stage_large_results.csv') 

## Two Stage Long T5 Model
This model combines an extractive summary using TextRank, which is then used for abstractive summary using the *LongT5* model

In [None]:
rouge_t5_dict = {}
summary_t5_dict = {}

rouge_t5_scores, summaries_t5 = execute_two_stage_text_t5_model(0, 30, rouge_t5_dict, summary_t5_dict, subset_train_df)

print(rouge_t5_scores)
print(summaries_t5)

846


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('OVERHEAD SHOT -- BOYLE leans back into the pillows and sighs, a big grin on '
 'his face.Then looks back at BOYLLE.Gets out, looks around -- TWO LITTLE '
 'GIRLS are looking on.EVERETT watches her go, then looks at STANTON, who is '
 'still seated.They look blankly at one another.They are blankly up at ONE '
 'another.Then at the BOYLES, who are standing nearby, playing with the big '
 "toe of another corpse, a blank look on their face.He turns around -- O'LEARY "
 'is sitting in an armchair, a gun in one hand, pointed at BOYLE, a whiskey in '
 'the other.BOYLE enters and looks around, grinning -- A big map.EVEREET looks '
 'on, impressed, as the MAN turns towards the strand and swims in.As CORNELL '
 'walks off, he makes a sinister turn and looks backat STANTON and MOODY, '
 'giving them pause.He and SHEEHY look at one other, until SHEEhy becomes '
 'aware of the song that is playing --  BOY LE shoots a glance at AOIFE.Then '
 'he hears a sound and looks up -- ’BOYLE’s')
1112


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('While FLASS looks at the cuffs, FLASS, angry, throws them back and SLAMS the '
 'CLOSET DOOR  Just at the moment BRUCE CRASHES into the rat-infested GORDON '
 'hearing something, takes a look out the window where BRUCE was hanging '
 'moments ago.Upstairs, the SWAT TEAM, guns poised, peeks over the lip of the '
 'broken ceiling just in time to get a face full of  Outside the BATHROOM '
 'door, THE BAT-MAN covers his ears until the debris settles.But he COMES UP '
 'SHOOTING DOUBLE HEAD SHOT  blood sprays, one down  TWO CHEST SHOTS  a second '
 'cop staggers back, stricken and ONE IN THE HIP  third cop spins, but manages '
 'to keep his feet, until  BLAM!He hangs four stories up by his fingers as '
 'inside  Then, confronted with the sight of his DEAD PARTNER  Out the window '
 'BRUCE is slipping.Once she is close enough to the guards, she whips off her  '
 'the first one across the face with the buckle, knocking his  her long nails '
 "across the second's face.After a moment BAVEN st

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('DOC gives him a PHOTO OF COY like The Last Supper, Coy as Jesus grabbing '
 'food from BIGFOOT looks at Doc, then the picture He motions down a back '
 'corridor... Bigfoot leads Doc down a hall and into a room with a Bigfoot it '
 'opens up He puts the FROZEN BANANAS in the PNEUMATIC TUBE DISPENSER, sending '
 "chocolate frozen bananas hurling Bigfoot's FACE TURNS AND THE MOLECULES "
 "COMPLETELY CHANGE AS HE SEES SOMETHING OVER DOC'S SHOULDER...")
1524


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('She flattens against the wall, holding onto the ON THE BATHROOM One of the '
 'gunman comes to the window, cannot fit through, looks out, does not see '
 'anybody across, then hears something beneath him, looks down, sees the tip '
 'of a shoe below.The gate to the garden begins to open, and like a shot the '
 'dogs are up, hurling themselves toward the opening gate, eyes flashing, '
 'barking loud enough to scare the dead back to life, fangs bared, ready to '
 'kill.Some do homework on their beds, others do their hair, dance, And '
 'suddenly, at the sound of a door opening, they all stop and look in the same '
 'direction at Cat and the MONITOR Wake up is at seven.Cat hesitates for a '
 'moment, and then removes one hand from underneath the table, her fist closed '
 'tight.A small smile breaks across his face as he looks in his rear view '
 "mirror and sees Cat running and firing, losing ground.Let's him kiss her "
 'lips, her neck, and as he goes south, kissing as she goes, the CA

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Off Barney, completely shell shocked we -- PRE-LAP - A KNOCKING at the door '
 'The door opens revealing KATE, her eyes filled with tears she throws her '
 "arms around Barney and won't let go.Barney looks around the big, empty "
 'house, going out of his mind, not knowing what to do with himself.Even more '
 'gorgeous than the first time he laid eyes on her Barney drops into his seat '
 "across from Miriam, startling her Barney doesn't look so good.The BAND "
 'playing an upbeat number for a jam-packed dancefloor Barney slips out from '
 'the crowd and races to the bar -- CU - A BLACK & WHITE TV hidden behind the '
 'bar showing the Barney downs another shot, then looks back to the dancefloor '
 'where he sees IZZY slow dancing with The Second Mrs.')
849


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Hopps wipes snow off a back bumper They open the big door, which looks like '
 'it belongs on a refrigerator.As Hopps rolls her eyes and goes back to '
 'collecting clues, Nick lowers the back partition and his eyes go wide.Nick '
 'ushers Hopps to the door, but opens it to find two big The polar bears grab '
 'Nick and Hopps by their throats... ...and yank them off screen.She looks '
 'around to see animals coming back together.Suddenly, a BIG KID turns off the '
 'lights.Polar bear fur... Rat Pack music...')
1416


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('and spot the parked CORVAIR MONZA.KLAXONS sound off-- IN FIRST FLOOR HALLWAY '
 'Dream’ in the Bellows Book, quickly skipping to the end.A five story fall '
 'onto a sidewalk if they don’t make the jump.As Ramén takes in the dreary '
 'room, Ramén sees a broken rocking chair next to cleft toys strewn on the '
 'ground, and with her words, they take on new meaning.A CHILD who Young Sarah '
 'is reading to turns his head to look straight at him, breaking the fourth '
 'wall.But then he sees something horrific -- The front door to the house '
 'kicks open to a wind-ravaged SHERIFF MILNER!Inside, they share a hopeful '
 'glance, but the floor numbers With Sarah just twenty feet behind him, Auggie '
 'turns another corner and finds THE ELEVATOR!Ramén double chec')
635


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('She leaves and Nadine drops the smile, then moves to her window, watching '
 'the two of them walk down the driveway hand in hand, Darian sweetly helping '
 'Krista into the passenger Something about it makes her feel like absolute '
 'shit.Just then, Nadine comes up, squeezes herself between Krista and Darian, '
 'puts an arm around each, big smile.Krista is folding laundry on her bed when '
 "she hears something It almost sounds like Radiohead's Karma Police, except "
 "She moves to the open window, looks down to see she's being A PERSIAN MAN "
 '(45) -- big mustache, long pony tail, iridescent shirt unbuttoned to reveal '
 "copious chest hair and He's standing below her window with a guitar, singing "
 'Karma Police in a very thick accent, about 10x more up-tempo, and with lots '
 'of little middle-eastern vocal trills,Krista pulls away, They just look at '
 'each other for a second, freaked out.We go to an ultra-wide shot of Nadine '
 'as a tiny little dot, the only person eating

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('George touches the back of his hand to his mouth and sees the Papillon makes '
 'a crazy sign with his hands.George fights like mad to keep the Raver at bay, '
 'and even gets back to his feet as ANOTHER RAVER RIPS A HUGE BITE OUT OF '
 'GEORGE’S NECK, shooting blood into the air.One of them slowly turns his '
 'head, LOOKS RIGHT AT ZED AND GEORGE.He shines a light into Underneath the '
 'bones is a gutter full of water, in which a YOUNG MAN, struggles to get to '
 'the surface of the water to The Young Man pushes against the bones, but '
 'George’s weight on top of them makes it impossible for the Young Man to '
 'TheYoung Man seems to keep pace with George as he desperately tries to get '
 'out of the way.One turns his Head to look at them.Not one to take criticism '
 'She turns and goes back to the fire with the others.George is first to get '
 'his hands on a flashlight which he flips on and looks for the source of the '
 'music.The camera lies right behind Zed and George just ca

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('But her face is still wet with Jesse gives Celeste a big, long hug.Celeste '
 'and Jesse giggle at their stupid inside joke for a little too long.Lucky has '
 'Celeste in a bear hug and it looks like she may have broken her back.Jesse '
 "is mid-laugh and looks at his phone to see Celeste is calling.Bon Iver's "
 '"Skinny Love" plays as Celeste walks slowly amongst the celebrity '
 'impersonators, tourists and drunks on It is intercut with a montage of Super '
 '8 footage of Celeste and Jesse in the past reluctantly cutting a head off a '
 'fish to cook it, Celeste is repulsed but laughing.Celiste and Paul walk down '
 "stairs into an incredibly cool- looking speak easy.CELESTE'S LAWYER, male, "
 '40, speaks Celeste looks up, makes eye contact with Jesse and smiles Celeste '
 "is still signing.Celesteste can't stop herself from constantly looking over "
 'at the studio to see if Yogurt Girl is still in there.A beat later, Celeste '
 "tears out of the bar, walking quickly after Jesse.

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('almost as if looking through binoculars.Ryan turns, thinks of something, '
 "then turns back.RYAN MOVING THROUGH THE SECURITY SCREENING It's a beautiful "
 'choreographed ballet of a bag handle collapsing, shoes coming off, a laptop '
 'going in a separate tray, wallet and watch sliding into a shoe, a boarding '
 'card sliding into back pocket... both hands always moving, Ryan enters and '
 'presents his ADMIRALS CLUB CARD.Natalie accidentally makes eye contact with '
 'one guy, then quickly shifts her gaze forward.Ryan looks around for a')
844


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Atma Singh looks like a miffed school boy sitting in one Another beep is '
 'heard.The old head man who was in the queue, walks towards Priya and shows '
 'her his ID.Atma Singh sits on a ledge and listens The two children become '
 "quiet upon hearing Newton's voice.Priya is looking for something in the "
 'bushes outside the She breaks a stem and comes out, holding it.Newton looks '
 'at his father with anger as if asking Two cycle rickshaws- on one, sits '
 "Newton, and on the other, Whenever Newton''s rickshaw goes slightly ahead, "
 'his father tells his rider to buck up.Another constable comes in front of '
 'them and ‘shoots’ him with his finger (shaped like a gun).Atmam Singh takes '
 'out his cell phone and directs the camera side towards the boy and clicks a '
 'picture.Newton enters the booth to see sits deep in thought with a worried '
 'expression on his face.As they walk back, Krishna and Lakhma try to reason '
 'with Newton in their own way.He goes and holds the contro

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('He stops her, “I want to see you.” She opens to him, brings him inside.At '
 'the end of something.Smith waves her to keep going.We hear only Eyes make a '
 'subtle shift, no more than the dilation of pupils.She turns him slow, '
 'sensual, and we think maybe with love and Now he turns to look at her.come '
 'out and unwrap everything.Moving with a kind of imperious grace.Without '
 'going to him.Turns right into it.A day like any other.And goes to the '
 'sink.And by the sheer force of his love Joan relents and collapses into him, '
 'just like that.We HOLD, stay with him.Flattens the list on WE MOVE IN ON THE '
 'LIST Felt picks up the phone.Then they move from the folder to Gray’s eyes '
 'and HOLD.Bates starts to leave.Gray exits, joining the flow of federal '
 'employees on lunch A REFLECTION IN A WINDOW.And without a word or permission '
 'he just wraps her and the baby in his arms, justlike that.with force and '
 'direction.’ And makes some stabbing motion with his hand.Then h

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Once inside, they smile, gasp and flatter.She carries it out of the stall, '
 'glancing back at the Bandersnatch before going out.He staggers back, '
 'gasping.His voice goes hoarse with emotion as he begins the tale.She smiles '
 'and goes off.He sits, amused if nothing else.Frustrated, Alice looks out the '
 'window and mutters.She wipes a telltale bit of jam from the side of its '
 'mouth.She laughs at herself and goes back to the table.She hears a loud '
 'WHACK, a small cry of pain, then cheers and laughter.At the end, she whirls '
 'to leer into the face of one terrified frog.Lady Long-Ears rushes in and '
 'whispers into the Queen’s ear.He leans in for a kiss just as Lady Long Ears '
 'passes.something with wicked claws.His face starts to shimmer as he whispers '
 'in her ear.She jerks and drags himself back from the edge of hysteria.They '
 'laugh.She opens the little door and bends down to look through to the other '
 'side.Wind knocked out, she gasps for air, then she sits 

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('As Will pulls away Caesar strains for a last glimpse, twisting around to '
 'look out the back window.AND SOMETHING ELSE We look through cage bars at '
 'Will and Franklin, who stand far away, across the room.At the height of his '
 'backward motion he catches another Caesar pumps his legs, eager to get a '
 "better look... Will's making sandwiches, his eye on Caesar through the WE "
 'FIND WALTER just sitting down on the piano bench.HE LOOKS UP TO SEE '
 'CAESAR...')
1149


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Needy reaches up and distractedly pulls back the neckline In the moonlight, '
 'we see Needy still has a infected- looking BITEMARK near her shoulder.Needy '
 'rolls her eyes, turns away, and continues limping into the gym.The bile has '
 'hundred of tiny SPINES sticking out of it, like little porcupine needles, '
 'and it appears to move by itself, oozing across the linoleum, creeping into '
 'the Needy grabs Jennifer blindly and claps a hand over her Jennifer falls to '
 'her knees, her screams turning to jagged, insane laughter. Needy scrapes the '
 'sandwich out of the skillet and takes a She tuns on the kitchen radio and is '
 'surprised to hear a Suddenly, she pauses, a look of UNEASE and even FEAR '
 'crossing her face.Needly glances around the room and sees students fighting '
 'back tears.As she stares out the window, she winds colored yarn around a '
 'pair of Popsicle sticks to create a Out a single window, we See an imposing '
 'nine-foot Next to Needy, we saw a pile of u

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The noises get GUARD POV - looking left and right around the deserted rail '
 'yard, finally the camera pans up to find MacGruber on the roof of a small '
 'building, dancing around completely naked (except for his boots) with a '
 'carrot in his ass.Piper shuts the door then turns back to see MacGrueber '
 'with his pants off, braced for a fucking against the desk.MacGruber turns '
 'around and starts MacGrubers turns back around, gets down on one knee and '
 'presents to Vicki a hastily assembled copper wire ring with a ball bearing '
 'diamond.Moments later, MacGrubert runs back in, grabs his shorn hair '
 'fragments and takes off again.Macgruber twirls Piper around like a fancy '
 'dance move, clutching him against his chest.Vicki’s eyes dart around the '
 'room for a way out.Piper and MacGruBER shoot like crazy as they race for the '
 'door.Mac Gruber searches his pockets, but instead of a ticket, presents to '
 'the man his hand, middle finger sticking out.From above the grave,

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('He hugs each one in turn, unaware quite how little the boys DAVE arrives at '
 'TERRI’s shoulder just in time to hear... RUDI look at one another and '
 'laugh.TERRI at moments in this scene is picking things up off the floor, the '
 'counter RUDI and DAVE look like they agree.TERRELIC plunges his hand in, '
 'shakes the eye dry and pops it back in just as BRIAN and RONNIE of RUDI '
 'approach.TER RI and RUC MAN finally look at the stage where RUDI are ripping '
 'into ‘Cops’.The solo continues On one side of the glass RUDI sit exhausted, '
 'but elated.DAVE, RUTH, MARILYN, TERRI sit in a line at the bar watching a '
 'tiny black and white TV on which the UNDERTONES play ‘Get Over You’, wearing '
 'their usual skinner jeans.TerRI and the CARTOON PUNKS are capering around '
 'the living room while Rudi play on TV and TERRI hollers along.TERPRI stands '
 'behind DAVY SMYTH at the desk, watching RUDI.DIVE and the other RUDI members '
 'are in He runs out of fingers They leave.The others

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Frances gets up does something quickly and sits back Frances has the stove '
 'going with a couple of items.(Found Frances comes down an escalator, sees '
 'her parents waiting for her, holding a dog.How NOT THE ONE IN THE LIVING '
 'ROOM THE Frances is in a T-Shirts that says “ASK ME!” Another girl has on '
 'the same T-Shirt.A very annoyed looking girl is waiting for him there '
 'Frances tiptoes around the other girl and into her room.On the walk back to '
 'her room, she sees a Younger Girl crying.Sophie falls asleep first and '
 'Frances quietly puts the computer away.She is really, really dancing her '
 'Frances is one of the last ones to get her stuff out.of the bed and put one '
 'foot on the Frances nods.Frances comes over and sits next to him, puts her '
 'hand on his back.FranceS makes a move out of the bed.Then decides the girl '
 'needs someone Frances sits and the Younger Girl cries.He finds the photo he '
 "was looking for and hands the phone to Frances across the tabl

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Klaue smiles, dropping his gun and puts his hands up -- as KLAUE'S "
 "PROSTHETIC LEFT ARMsuddenly splits open, revealing T'Challa snatches up a "
 'ROLLING CHANGECART TABLE just as Klaue fires the blaster at him .Zuri turns '
 "back and T'CChalla lifts his hand, showing his zuri turns away "
 'uncomfortable.James walks over and looks out of a peep hole, then turns back '
 'with a confused expression .Klaue spins toward the commotion as ...')
776


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Track over to his face, which we can now see is wearing the familiar GAS The '
 'gasmask-clad Yorick looks around to make sure that no one is watching, '
 "before he reaches for one of the fallen Amazon's REVOLVERS.She pulls out her "
 'own .357 MAGNUM and looks around, finally noticing a CRATE that’s been '
 "recently opened, one large enough for two people to hide inside of.She's "
 'rolling a PAWN PIECE back and forth across her As a baby CRIES somewhere in '
 'the plane’s cabin, a pensive Beth is looking out her window at the world far '
 'below.Smash cut to a few hours later for this image of a hissing Yorick, as '
 'his eyes suddenly SPRING OPEN.Just then, a SCREECHING is heard, and the two '
 'soldiers pivot to see Ampersand at them from another column in the Before '
 'this bizarre sight has time to sink in with the two soldier, 355 takes '
 'advantage of the distraction to SHOOT the women dead.The woman turns on the '
 'flashlight to get a better look at the young man she ju

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('In the side street where MAC is standing -- barely fifty yards behind him -- '
 'a couple of guys in balaclavas are throwing home-made pipe bombs over the '
 'top of the cops and into the crowd.We hear the roar of the chamber... Hacks, '
 'officials...all stunned, paralysed...GERRY is slowly walking towards the '
 'front.... MAC jumps a desk, slides across a wide central table strewn with '
 'press releases and bursts through the door... .+.as GERRY raises his gun '
 'arm.He looks down at the water -- there is indeed a boat waiting on the far '
 'side -- then turns to face his pursuer.We see a man spinning around and '
 'moving away.He looked at the receiver and puts it slowly back onto the We '
 'hear a cry in the dark.MA shakes her GERRY’s son LIAM has been driving a '
 'remote controlled car around the floor and at this moment he’s so excited he '
 "knocks over the table GERRY's glass of beer has been standing on.Another "
 'sweeps his hand along a kitchen cabinet, tipping everyth

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Then SECRETARIAT--majestic, rippling, fluid, both beautiful and '
 'ferocious--appears around the turn and flies onto the backstretch, Turcotte '
 'on his saddle, steady as a rock.We see WHAT SHE SEES IN HER MEMORY --A '
 'little girl--Penny, at 6--standing at the rail of a racetrack, beside her '
 'tall, handsome father--CHRISTOPHER CHENERY--in the prime of his life, as the '
 'crowd thunders --A pack of racehorses rounding a turn and heading into the '
 'home stretch, hooves pounding, muscles rippling, colors flashing, the horses '
 'thundering toward the wire.She pulls the camera down...and takes a step '
 'backwards toward her .the bugle call to post sounds...')
549


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('The Stretcher Bearer NODS, too breathless to speak, then follows his '
 'colleagues BACK DOWN OFF THE SHIP- An EXPLOSION hits the water nearby- '
 'Everyone hits the deck as shells IMPACT the water Tommy is PULLED UP SHORT '
 'as Gibson STUMBLES- A 109 STRAFES the length of the mole with gunfire- '
 'soldiers hit the deck, several are HIT... Peter and George RUSH things off '
 'the boat- then start loading the orange life preservers Mr.Dawson looks up '
 'from his charts to see NAVAL OFFICERS AND CREW coming along the harbor, '
 'Peter follows his gaze- Peter bursts into the cabin, stacking life vests- '
 'The pile of life vests on the dock SHRINKS...')
1378


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('As he turns back into the road, Cheeon calls to him... Marium doesn’t turn '
 'back, keeps walking stiffly away, his jaw tight as Cheeon SLAMS the door '
 'behind him.He whips back to panic-frozen Arnie-- BEHIND ANOTHER TRUCK, Core '
 'is flat on the ground, looking out from underneath it, shaking, awestruck-- '
 'He sees DEAD AND WOUNDED COPS splayed before CHEEON’S HOUSE with MUZZLE '
 'FLASHES flaming out of the ATTIC WINDOW.Core pulls him down to some kind of '
 "safety behind some ROCKS, throwing his own body of Marium's, trying to press "
 'his gloves But Marium’s already dead, his eyes fixed and startled.A deep '
 'breath for courage and he eases it open but wait-- LOOKING OUT ON MARIUM '
 'THROUGH THE DOOR WINDW as he freezes, terrified eyes locked on something '
 'dead ahead.He coughs softly, rolls away from it... CLOSE ON CORE as he '
 'stares straight ahead at something else, something in n')
1178


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('This time Bridget pauses a moment too long.Bridget and Jack are together on '
 'the sofa watching Mark, distracted, at the boarding gate for KHARTOUM.As '
 'they walk away Bridget turns around and exchanges a smile works in the '
 'Starbucks in Balham.A good looking man clocks her and takes her in as she '
 'walks - she smiles to herself once he has passed.She stops behind a flower '
 'display and looks back at Mark patiently watching a film with five there, '
 'cows with opposable thumbs, We find Jude fast asleep in her hotel room, with '
 'the baby next to her.Bridgemt strides into the studio, ready for the '
 'day.studio, where Miranda is waiting Behind Bridget, we see Miranda, live in '
 'the studio , waiting We see the Hard News countdown clock on the screens and '
 'then Bridget holds up her phone and says “Good Evening Miranda”.Dad fights '
 'his way to the stage, phone in hand.Bridger steps back and looks down to her '
 'bump.bridget turns round to find somebody to take a pho

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
two_stage_t5_results = combine_rouge_and_summary_results(rouge_t5_dict, summary_t5_dict, 'two_stage_t5')
two_stage_t5_results.head()

In [None]:
# save results to a csv
two_stage_t5_results.to_csv('/content/two_stage_text_t5_results.csv')
# files.download('/content/two_stage_text_t5_results.csv') 

In [None]:
# average rouge scores
two_stage_t5_results.drop(columns=['two_stage_t5_summary']).mean(axis=0)

# TextRank Sentences (BERT Dialog)

In [None]:
ranked_sentences_dialog_dict = {}

for i in range(30):
    ranked_sentences = run_textrank_model(subset_train_df.iloc[i]['bert_dialog'])
    
    # add rouge scores and summary to a dictionary
    title = subset_train_df.iloc[i]['title']
    ranked_sentences_dialog_dict[title] = ranked_sentences

ranked_sentences_dialog_dict

1701
1480
1807
1467
1736
1702
1173
1458
1366
2077
1597
1322
1833
1062
1009
1643
1440
1115
1587
1531
1417
1578
1446
510
1083
1672
1471
1478
594
1415


{'The Guard': "You know nothing about me.(getting worked up)\n Well why don’t you put in your report\n that Sergeant Boyle went out of his\n fucking way to do you boys a fucking\n favour and he got really fucking\n annoyed when you started asking him\n stupid fucking questions about a few\n missing fucking guns, trying to\n fucking catch him out as if he’s just\n some kind of fucking gobshite!I’m not sure if you ever get over\n something like that.I just didn’t realise...\n Do you think he might have met\n someone here who did something bad\n to him?Pretending to be sad is what\n they pay me for, I always think.But I cannot think of anybody who\n would have wanted to do something\n bad to Aidan.Are you going to continue to make\n or are you going to fucking tell\n me something?Sure don’t I know well.We all know that.you know.You know.You know him?You know him?You know for what.Now do I know him?I know.I know.I know.I know.I know.87\n 87\n I know.I know you weren’t.Sure I know well.You 

In [None]:
textrank_dialog_df = pd.DataFrame(ranked_sentences_dialog_dict.items(), columns=['title', 'ranked_sentences'])
textrank_dialog_df.head()

Unnamed: 0,title,ranked_sentences
0,The Guard,You know nothing about me.(getting worked up)\...
1,Batman Year One,"Sooner or\n later, we'll get lucky and someone..."
2,Inherent Vice,Said Shasta was the only one that\n ever made ...
3,Colombiana,You know what time it\n is?We'll get all this ...
4,Barney s Version,"There's so much I want\n to talk about... Oh, ..."


In [None]:
# save results to a csv
textrank_dialog_df.to_csv('/content/textrank_dialog.csv')
# files.download('/content/textrank_dialog.csv') 

# TextRank Sentences (BERT Text)

In [None]:
ranked_sentences_text_dict = {}

for i in range(30):
    ranked_sentences = run_textrank_model(subset_train_df.iloc[i]['bert_text'])
    
    # add rouge scores and summary to a dictionary
    title = subset_train_df.iloc[i]['title']
    ranked_sentences_text_dict[title] = ranked_sentences

ranked_sentences_text_dict

846
1112
374
1524
1007
849
1416
635
953
842
899
844
1189
1028
1549
1149
732
1041
637
969
776
1434
843
549
1378
1178
634
622
251
1717


{'The Guard': "OVERHEAD SHOT -- BOYLE leans back into the pillows and\n sighs, a big grin on his face.Then looks back at\n BOYLE.Gets out, looks around --\n TWO LITTLE GIRLS are looking on.EVERETT watches her go, then looks\n at STANTON, who is still seated.They look blankly at one another.They look blankly at one another.Then at\n BOYLE, who is standing nearby, playing with the big toe\n of another corpse, a blank look on his face.He turns around --\n O'LEARY is sitting in an armchair, a gun in one hand,\n pointed at BOYLE, a whiskey in the other.BOYLE enters and looks around, grinning --\n A big map.EVERETT looks on, impressed, as the MAN turns towards\n the strand and swims in.As CORNELL walks off, he makes a sinister turn and looks\n back at STANTON and MOODY, giving them pause.He and SHEEHY look at one another, until\n SHEEHY becomes aware of the song that is playing --\n BOYLE shoots a glance at AOIFE.Then he hears a sound and looks up --\n BOYLE’s POV -- a car is coasting down t

In [None]:
textrank_text_df = pd.DataFrame(ranked_sentences_text_dict.items(), columns=['title', 'ranked_sentences'])
textrank_text_df.head()

Unnamed: 0,title,ranked_sentences
0,The Guard,OVERHEAD SHOT -- BOYLE leans back into the pil...
1,Batman Year One,"While FLASS looks at the cuffs,\n FLASS, angry..."
2,Inherent Vice,DOC gives him a PHOTO OF COY like The Last Sup...
3,Colombiana,"She flattens against the wall, holding onto th..."
4,Barney s Version,"Off Barney, completely shell shocked we --\n P..."


In [None]:
# save results to a csv
textrank_text_df.to_csv('/content/textrank_text.csv')
# files.download('/content/textrank_text.csv') 

# Avatar -- Summary Comparison

### Movie script

In [None]:
# get the movie info for Avatar
avatar_info = data_df.loc[data_df['title'] == 'Avatar']

# drop leading zeros for imdb_id
avatar_info['imdb_id'] = avatar_info['imdb_id'].str.lstrip('0')

# merge with wiki reference
avatar_df = pd.merge(avatar_info, wiki_df, how='outer', on='imdb_id')

# rename columns
avatar_df = avatar_df.rename(columns={'title_x': 'title'})

# only keep the first row
avatar = avatar_df.head(1)

# get bert dialog
avatar['bert_dialog'] = avatar.apply(lambda x: concatenate_dialog(x['bert_data']), axis=1)

# get bert text
avatar['bert_text'] = avatar.apply(lambda x: concatenate_text(x['bert_data']), axis=1)

# reorder columns
avatar = avatar[['title', 'imdb_id', 'bert_data', 'lemma_data', 'plot_outline', 'bert_dialog', 'bert_text']]

avatar

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  avatar_info['imdb_id'] = avatar_info['imdb_id'].str.lstrip('0')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  avatar['bert_dialog'] = avatar.apply(lambda x: concatenate_dialog(x['bert_data']), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  avatar['bert_text'] = avatar.apply(lambda x: conca

Unnamed: 0,title,imdb_id,bert_data,lemma_data,plot_outline,bert_dialog,bert_text
0,Avatar,499549,/content/drive/MyDrive/W266_Movie_Data/BERT_an...,/content/drive/MyDrive/W266_Movie_Data/raw_tex...,"When his brother is killed in a robbery, parap...",\n \n \n \n \n \n \n Written by\n \n James Ca...,"THE SOUND OF DRUMS, from a great distance, gr..."


In [None]:
# store all summary information and their rouge scores
avatar_summary_dict = {}
avatar_rouge_dict = {}

## Lemmas

Pegasus Short

In [None]:
raw_lemma = avatar['lemma_data'][0]

with open(raw_lemma, 'r') as file:
    text_data = file.read().replace('\n', '')

summary = run_pegasus_short_model(text_data)
rouge_scores = calculate_rouge_scores(summary, avatar['plot_outline'])

# add rouge scores and summary to a dictionary
avatar_summary_dict['lemma_pegasus_short'] = summary
avatar_rouge_dict['lemma_pegasus_short'] = rouge_scores

print(summary)
print(rouge_scores)

All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("Jake's voice is so strong it's hard to hear him , but I can see it in his "
 "eyes and his face and his eyes can't see me , so I can hear him in my head "
 'and my head can see him in his body and his body can see me in my body and '
 'my body can hear me in his mind and my mind can hear us in our body and our '
 "mind and we can hear each other's voices.")
Jake's voice is so strong it's hard to hear him , but I can see it in his eyes and his face and his eyes can't see me , so I can hear him in my head and my head can see him in his body and his body can see me in my body and my body can hear me in his mind and my mind can hear us in our body and our mind and we can hear each other's voices.
{'rouge1': 0.14225941422594143, 'rouge2': 0.0, 'rougeL': 0.10041841004184099, 'rougeLsum': 0.10041841004184099}


Pegasus Long

In [None]:
raw_lemma = avatar['lemma_data'][0]

with open(raw_lemma, 'r') as file:
    text_data = file.read().replace('\n', '')

summary = run_pegasus_long_model(text_data)
rouge_scores = calculate_rouge_scores(summary, avatar['plot_outline'])

# add rouge scores and summary to a dictionary
avatar_summary_dict['lemma_pegasus_long'] = summary
avatar_rouge_dict['lemma_pegasus_long'] = rouge_scores

print(summary)
print(rouge_scores)

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.12k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading (…)ve/main/spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/88.0 [00:00<?, ?B/s]

('Combat vet Jake sully has been injured on duty in Iraq and Afghanistan '
 '.<n>He now lives in a wheelchair in a VA hospital in NYC and struggles to '
 'cope with his injuries ..<n>The show follows Jake as he tries to come to '
 'terms with his disability and the challenges he faces in getting back on his '
 "feet . Hilarious and touching performances from the show's cast and crew, "
 "including Jake's best friend, Kate who is also a vet .")
Combat vet Jake sully has been injured on duty in Iraq and Afghanistan .<n>He now lives in a wheelchair in a VA hospital in NYC and struggles to cope with his injuries ..<n>The show follows Jake as he tries to come to terms with his disability and the challenges he faces in getting back on his feet . Hilarious and touching performances from the show's cast and crew, including Jake's best friend, Kate who is also a vet .
{'rouge1': 0.2638297872340426, 'rouge2': 0.034334763948497854, 'rougeL': 0.15319148936170213, 'rougeLsum': 0.15319148936170213}


Pegasus Large

In [None]:
raw_lemma = avatar['lemma_data'][0]

with open(raw_lemma, 'r') as file:
    text_data = file.read().replace('\n', '')

summary = run_pegasus_large_model(text_data)
rouge_scores = calculate_rouge_scores(summary, avatar['plot_outline'])

# add rouge scores and summary to a dictionary
avatar_summary_dict['lemma_pegasus_large'] = summary
avatar_rouge_dict['lemma_pegasus_large'] = rouge_scores

print(summary)
print(rouge_scores)

Downloading (…)lve/main/config.json:   0%|          | 0.00/3.09k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading (…)ve/main/spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/88.0 [00:00<?, ?B/s]

('we be very low over the forest now , glide fast , the drum build to a peak '
 '-- voice ( v.o. CITY - night a screech of brake as a vehicle wipe frame , '
 'revealing -- jake sully , a scar and scruffy combat vet , sit in a beat up '
 'carbon - fiber wheelchair . Jake stare upward at the level of the city . ) '
 'they can fix a spinal , if you have get the money .')
we be very low over the forest now , glide fast , the drum build to a peak -- voice ( v.o. CITY - night a screech of brake as a vehicle wipe frame , revealing -- jake sully , a scar and scruffy combat vet , sit in a beat up carbon - fiber wheelchair . Jake stare upward at the level of the city . ) they can fix a spinal , if you have get the money .
{'rouge1': 0.16289592760180996, 'rouge2': 0.027397260273972605, 'rougeL': 0.09954751131221719, 'rougeLsum': 0.09954751131221719}


Long T5

In [None]:
raw_lemma = avatar['lemma_data'][0]

with open(raw_lemma, 'r') as file:
    text_data = file.read().replace('\n', '')

summary = run_longt5_model(text_data)
rouge_scores = calculate_rouge_scores(summary, avatar['plot_outline'])

# add rouge scores and summary to a dictionary
avatar_summary_dict['lemma_longt5'] = summary
avatar_rouge_dict['lemma_longt5'] = rouge_scores

print(summary)
print(rouge_scores)

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.12k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.36k [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]



('The narrator tells us that this is one of the most famous lines in the play. '
 "It's about a drum roll, which sounds like it's coming from somewhere high up "
 "in the sky. You can hear it when you're lying down in the hospital and you "
 'wake up with a huge blow through your heart. This is kind of like watching a '
 "horror movie on repeat. In other words, there's a lot of scary stuff going "
 'on here.')
The narrator tells us that this is one of the most famous lines in the play. It's about a drum roll, which sounds like it's coming from somewhere high up in the sky. You can hear it when you're lying down in the hospital and you wake up with a huge blow through your heart. This is kind of like watching a horror movie on repeat. In other words, there's a lot of scary stuff going on here.
{'rouge1': 0.19491525423728814, 'rouge2': 0.008547008547008546, 'rougeL': 0.11864406779661017, 'rougeLsum': 0.11864406779661017}


## BERT - Dialog

Pegasus Short

In [None]:
summary = run_pegasus_short_model(avatar['bert_dialog'][0])
rouge_scores = calculate_rouge_scores(summary, avatar['plot_outline'])

# add rouge scores and summary to a dictionary
avatar_summary_dict['dialog_pegasus_short'] = summary
avatar_rouge_dict['dialog_pegasus_short'] = rouge_scores

print(summary)
print(rouge_scores)

All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("James Cameron's latest film, The Lone Ranger, opens in US cinemas on Friday, "
 "8 March and in UK cinemas on Monday, 9 April..... and here's a song from the "
 'film, written and directed by James Cameron, featuring the cast of '
 "characters from the movie, including Forest Whitaker, Lupita Nyong'o, and "
 "Forest Whitaker's wife, Jennifer Lawrence.. and the film's soundtrack, which "
 'includes music from the Oscar-winning director James Cameron..')
James Cameron's latest film, The Lone Ranger, opens in US cinemas on Friday, 8 March and in UK cinemas on Monday, 9 April..... and here's a song from the film, written and directed by James Cameron, featuring the cast of characters from the movie, including Forest Whitaker, Lupita Nyong'o, and Forest Whitaker's wife, Jennifer Lawrence.. and the film's soundtrack, which includes music from the Oscar-winning director James Cameron..
{'rouge1': 0.14035087719298245, 'rouge2': 0.0, 'rougeL': 0.11403508771929824, 'rougeLsum': 0.1140350877192

Pegasus Long

In [None]:
summary = run_pegasus_long_model(avatar['bert_dialog'][0])
rouge_scores = calculate_rouge_scores(summary, avatar['plot_outline'])

# add rouge scores and summary to a dictionary
avatar_summary_dict['dialog_pegasus_long'] = summary
avatar_rouge_dict['dialog_pegasus_long'] = rouge_scores

print(summary)
print(rouge_scores)

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


("James Cameron's new song 'I Don't Want Your pity' is out now on iTunes "
 ".<n>It's a response to President Obama's speech at last week's G-8 summit in "
 'L.A.<n>The song was written while Cameron was lying in the VA during the '
 "middle of his life ..<n>Cameron: 'They can fix a spinal, if you've got the "
 'money. But not on vet benefits, not in this economy. isolated souls. '
 "century, is making a comeback'")
James Cameron's new song 'I Don't Want Your pity' is out now on iTunes .<n>It's a response to President Obama's speech at last week's G-8 summit in L.A.<n>The song was written while Cameron was lying in the VA during the middle of his life ..<n>Cameron: 'They can fix a spinal, if you've got the money. But not on vet benefits, not in this economy. isolated souls. century, is making a comeback'
{'rouge1': 0.16033755274261602, 'rouge2': 0.0, 'rougeL': 0.10970464135021098, 'rougeLsum': 0.10970464135021098}


Pegasus Large

In [None]:
summary = run_pegasus_large_model(avatar['bert_dialog'][0])
rouge_scores = calculate_rouge_scores(summary, avatar['plot_outline'])

# add rouge scores and summary to a dictionary
avatar_summary_dict['dialog_pegasus_large'] = summary
avatar_rouge_dict['dialog_pegasus_large'] = rouge_scores

print(summary)
print(rouge_scores)

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Sooner or later though, you always have to wake up... pain beyond his years. '
 'All I ever wanted in my sorry-ass life was a single thing worth fighting '
 "for. I don't want your pain. I know the world's a cold ass bitch. You want a "
 "fair deal, you're on the wrong chair. And nobody does a damn thing. I told "
 'myself I could pass any test a man can pass. To be hammered on the anvil of '
 'life.')
Sooner or later though, you always have to wake up... pain beyond his years. All I ever wanted in my sorry-ass life was a single thing worth fighting for. I don't want your pain. I know the world's a cold ass bitch. You want a fair deal, you're on the wrong chair. And nobody does a damn thing. I told myself I could pass any test a man can pass. To be hammered on the anvil of life.
{'rouge1': 0.13559322033898308, 'rouge2': 0.008547008547008546, 'rougeL': 0.10169491525423728, 'rougeLsum': 0.10169491525423728}


LongT5

In [None]:
summary = run_longt5_model(avatar['bert_dialog'][0])
rouge_scores = calculate_rouge_scores(summary, avatar['plot_outline'])

# add rouge scores and summary to a dictionary
avatar_summary_dict['dialog_longt5'] = summary
avatar_rouge_dict['dialog_longt5'] = rouge_scores

print(summary)
print(rouge_scores)

Your max_length is set to 256, but you input_length is only 248. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=124)


('In this chapter, the narrator explains how he came to be in the middle of '
 'his life when he was lying in the hospital. He dreamed of flying and '
 "becoming a soldier. But now that he's out of the hospital, he realizes what "
 "it's like to be an isolated soul. He decides to become a Marine so he can "
 'deal with the "anvil of life" .')
In this chapter, the narrator explains how he came to be in the middle of his life when he was lying in the hospital. He dreamed of flying and becoming a soldier. But now that he's out of the hospital, he realizes what it's like to be an isolated soul. He decides to become a Marine so he can deal with the "anvil of life" .
{'rouge1': 0.2533936651583711, 'rouge2': 0.027397260273972605, 'rougeL': 0.15384615384615385, 'rougeLsum': 0.15384615384615385}


## BERT - Text

Pegasus Short

In [None]:
summary = run_pegasus_short_model(avatar['bert_text'][0])
rouge_scores = calculate_rouge_scores(summary, avatar['plot_outline'])

# add rouge scores and summary to a dictionary
avatar_summary_dict['text_pegasus_short'] = summary
avatar_rouge_dict['text_pegasus_short'] = rouge_scores

print(summary)
print(rouge_scores)

All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Jake is a young man in his early 20s who has just been released from prison '
 'and is about to embark on a new life in a small town in the American state '
 'of New Mexico, where he will live out his childhood dream of becoming a '
 'police officer, working alongside his father, who has been killed in the '
 'line of duty, and his mother, who is pregnant with his first child, and who '
 'is struggling to make ends meet as a single mother in a town with a high '
 'crime rate.')
Jake is a young man in his early 20s who has just been released from prison and is about to embark on a new life in a small town in the American state of New Mexico, where he will live out his childhood dream of becoming a police officer, working alongside his father, who has been killed in the line of duty, and his mother, who is pregnant with his first child, and who is struggling to make ends meet as a single mother in a town with a high crime rate.
{'rouge1': 0.2439024390243902, 'rouge2': 0.032786885245901

Pegasus Long

In [None]:
summary = run_pegasus_long_model(avatar['bert_text'][0])
rouge_scores = calculate_rouge_scores(summary, avatar['plot_outline'])

# add rouge scores and summary to a dictionary
avatar_summary_dict['text_pegasus_long'] = summary
avatar_rouge_dict['text_pegasus_long'] = rouge_scores

print(summary)
print(rouge_scores)

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('A scarred and scruffy combat vet, sitting in a beat up carbon-fiber '
 "wheelchair .<n>Jake's eyes are hardened by the wisdom and wariness of one "
 'who has endured Jake stares upward at the levels of the city.<n>The room is '
 'a tiny CCLE, prison cell meets 747 bathroom.<n>Narrow cot, wall-screen drums '
 'droning away in the B.G. -- The Bengal tiger, extinct for over over.<n>Most '
 'people wear KSKS to protect them from the toxic air.')
A scarred and scruffy combat vet, sitting in a beat up carbon-fiber wheelchair .<n>Jake's eyes are hardened by the wisdom and wariness of one who has endured Jake stares upward at the levels of the city.<n>The room is a tiny CCLE, prison cell meets 747 bathroom.<n>Narrow cot, wall-screen drums droning away in the B.G. -- The Bengal tiger, extinct for over over.<n>Most people wear KSKS to protect them from the toxic air.
{'rouge1': 0.20338983050847456, 'rouge2': 0.017094017094017092, 'rougeL': 0.1271186440677966, 'rougeLsum': 0.1271186440677966}


Pegasus Large

In [None]:
summary = run_pegasus_large_model(avatar['bert_text'][0])
rouge_scores = calculate_rouge_scores(summary, avatar['plot_outline'])

# add rouge scores and summary to a dictionary
avatar_summary_dict['text_pegasus_large'] = summary
avatar_rouge_dict['text_pegasus_large'] = rouge_scores

print(summary)
print(rouge_scores)

All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('hospital, with a big hole blown through We are very low over the forest now, '
 'gliding fast, the drums to a PEAK -- A SCREECH OF BRAKES as a vehicle WIPES '
 'FRAME, revealing -- JAKE SULLY, a scarred and scruffy combat vet, sitting in '
 'a beat up carbon-fiber wheelchair. At 22, his eyes are hardened by the '
 'wisdom and wariness of one who has endured Jake stares upward at the levels '
 'of the city.')
hospital, with a big hole blown through We are very low over the forest now, gliding fast, the drums to a PEAK -- A SCREECH OF BRAKES as a vehicle WIPES FRAME, revealing -- JAKE SULLY, a scarred and scruffy combat vet, sitting in a beat up carbon-fiber wheelchair. At 22, his eyes are hardened by the wisdom and wariness of one who has endured Jake stares upward at the levels of the city.
{'rouge1': 0.18502202643171806, 'rouge2': 0.026666666666666665, 'rougeL': 0.11453744493392068, 'rougeLsum': 0.11453744493392068}


LongT5

In [None]:
summary = run_longt5_model(avatar['bert_text'][0])
rouge_scores = calculate_rouge_scores(summary, avatar['plot_outline'])

# add rouge scores and summary to a dictionary
avatar_summary_dict['text_longt5'] = summary
avatar_rouge_dict['text_longt5'] = rouge_scores

print(summary)
print(rouge_scores)

('The sound of drums is heard from a distance. We are flying through mist and '
 'over the forest below. There is a hospital with a hole blown in, and we are '
 'glided by the drums building to a peak. A screech of brakes as a vechicle '
 'whines frammed; Jake Sully, a Scarred and Scuffy Combat Vet, sits in a '
 'beating up carbon fiber wheelchair. At 22 he eyes hardened by his wisdom '
 'andwariness of one whom has endured such a thing. Maglev Trails Whoosh '
 'overhead on raised tracks against a garish advertisementizing')
The sound of drums is heard from a distance. We are flying through mist and over the forest below. There is a hospital with a hole blown in, and we are glided by the drums building to a peak. A screech of brakes as a vechicle whines frammed; Jake Sully, a Scarred and Scuffy Combat Vet, sits in a beating up carbon fiber wheelchair. At 22 he eyes hardened by his wisdom andwariness of one whom has endured such a thing. Maglev Trails Whoosh overhead on raised tracks ag

## Two Stage BERT - Dialog

Short

In [None]:
rouge_scores, summary = execute_two_stage_short_model(0, 1, avatar_rouge_dict, avatar_summary_dict, avatar)

# add rouge scores and summary to a dictionary
avatar_summary_dict['two_stage_dialog_pegasus_short'] = summary
avatar_rouge_dict['two_stage_dialog_pegasus_short'] = rouge_scores

print(summary)
print(rouge_scores)

1459


Downloading (…)lve/main/config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

Downloading tf_model.h5:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading (…)neration_config.json:   0%|          | 0.00/259 [00:00<?, ?B/s]

Downloading (…)ve/main/spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/87.0 [00:00<?, ?B/s]

('The Great People have sent a message that they can take whatever they want, '
 "and want one can stop them, but one can't stop them because they only come "
 'and make problems, so one day I could bring this message, and you would '
 'never be one of them, because one day you could become one of me, and I '
 'would never come back, and never come again, because the Sky has sent this '
 'message to you, so I have to go all the way to become one.')


Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

{'Avatar': "The Great People have sent a message that they can take whatever they want, and want one can stop them, but one can't stop them because they only come and make problems, so one day I could bring this message, and you would never be one of them, because one day you could become one of me, and I would never come back, and never come again, because the Sky has sent this message to you, so I have to go all the way to become one.", 'two_stage_dialog_pegasus_short': {...}}
{'Avatar': {'rouge1': 0.13223140495867772, 'rouge2': 0.0, 'rougeL': 0.08264462809917356, 'rougeLsum': 0.08264462809917356}, 'two_stage_dialog_pegasus_short': {...}}


Long

In [None]:
rouge_scores, summary = execute_two_stage_long_model(0, 1, avatar_rouge_dict, avatar_summary_dict, avatar)

# add rouge scores and summary to a dictionary
avatar_summary_dict['two_stage_dialog_pegasus_long'] = summary
avatar_rouge_dict['two_stage_dialog_pegasus_long'] = rouge_scores

print(summary)
print(rouge_scores)

1459


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Goddammit, Jake, you can never be one of them!<n>The Sky People have sent a '
 'message that they can take whatever they want, and no one can stop '
 'them.<n>The Great Mother may choose to save all that she is -- -- in this '
 "body.It's time to come in.<n>Nothing like an old-school safety brief to put "
 'your mind at ease.<n>And you may choose a woman.Neytiri, you know m')
{'Avatar': "Goddammit, Jake, you can never be one of them!<n>The Sky People have sent a message that they can take whatever they want, and no one can stop them.<n>The Great Mother may choose to save all that she is -- -- in this body.It's time to come in.<n>Nothing like an old-school safety brief to put your mind at ease.<n>And you may choose a woman.Neytiri, you know m", 'two_stage_dialog_pegasus_short': {...}, 'lemma_pegasus_short': "Jake's voice is so strong it's hard to hear him , but I can see it in his eyes and his face and his eyes can't see me , so I can hear him in my head and my head can see him in his

Large

In [None]:
rouge_scores, summary = execute_two_stage_large_model(0, 1, avatar_rouge_dict, avatar_summary_dict, avatar)

# add rouge scores and summary to a dictionary
avatar_summary_dict['two_stage_dialog_pegasus_large'] = summary
avatar_rouge_dict['two_stage_dialog_pegasus_large'] = rouge_scores

print(summary)
print(rouge_scores)

1459


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('He was the one who wanted to get shot light years out 5.You only come and '
 'make problems.So one day I could bring this message, and you would believe '
 'it.I have to go all the way -- become one of Goddammit, Jake, you can never '
 'be one of them!All I ever wanted in my sorry-ass life was a single thing '
 'worth fighting for.Where ever you are, come back to me now.The Sky People '
 'have sent a message that they can take whatever they want, and no one can '
 "stop them.Get me what I need, I'll see you get your legs back when you "
 'rotate home.Never come back!The Great Mother may choose to save all that she '
 "is -- -- in this body.It's time to come in.Nothing like an old-school safety "
 "brief to put your mind at ease.Since a deal can't be made -- it gets real "
 "simple.So you just figured you'd come out here to the most hostile "
 'environment known to man, with no training of any kind, and see how it '
 'went?You may make your (she looks away) And you may choose a woman.

LongT5

In [None]:
rouge_scores, summary = execute_two_stage_t5_model(0, 1, avatar_rouge_dict, avatar_summary_dict, avatar)

# add rouge scores and summary to a dictionary
avatar_summary_dict['two_stage_dialog_longt5'] = summary
avatar_rouge_dict['two_stage_dialog_longt5'] = rouge_scores

print(summary)
print(rouge_scores)

1459




('The narrator tells us that Neytiri is the only person who has ever wanted to '
 "be shot out of the sky. He's been waiting for this message for years, and "
 "now it's time for him to come back. No one can stop the sky people from "
 "taking whatever they want. They're going to save everything in her body--and "
 "she's going to take care of herself. That's right, there's nothing like an "
 'old school brief to put you at ease.')
{'Avatar': "The narrator tells us that Neytiri is the only person who has ever wanted to be shot out of the sky. He's been waiting for this message for years, and now it's time for him to come back. No one can stop the sky people from taking whatever they want. They're going to save everything in her body--and she's going to take care of herself. That's right, there's nothing like an old school brief to put you at ease.", 'two_stage_dialog_pegasus_short': {...}, 'lemma_pegasus_short': "Jake's voice is so strong it's hard to hear him , but I can see it in his

## Two Stage BERT - Text

Short

In [None]:
rouge_scores, summary = execute_two_stage_text_short_model(0, 1, avatar_rouge_dict, avatar_summary_dict, avatar)

# add rouge scores and summary to a dictionary
avatar_summary_dict['two_stage_text_pegasus_short'] = summary
avatar_rouge_dict['two_stage_text_pegasus_short'] = rouge_scores

print(summary)
print(rouge_scores)

2083


All model checkpoint layers were used when initializing TFPegasusForConditionalGeneration.

Some layers of TFPegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['final_logits_bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('All photographs courtesy of Getty Images, EPA, Getty Images and Reuters, '
 'subject of copyright laws and subject to change at any time without prior '
 'notice.,  Copyright (c) The Vancouver Sun E-mail this Article to a friend or '
 '(250 words or more) Postmedia News (Vancouver, British Columbia, Canada) '
 'Twitter: www.twitter.com/thesun Vancouver Sun (British Columbia) Instagram: '
 'www.99thesun.tumblr.com (Vancouver)')
{'Avatar': 'All photographs courtesy of Getty Images, EPA, Getty Images and Reuters, subject of copyright laws and subject to change at any time without prior notice.,  Copyright (c) The Vancouver Sun E-mail this Article to a friend or (250 words or more) Postmedia News (Vancouver, British Columbia, Canada) Twitter: www.twitter.com/thesun Vancouver Sun (British Columbia) Instagram: www.99thesun.tumblr.com (Vancouver)', 'two_stage_dialog_pegasus_short': {...}, 'lemma_pegasus_short': "Jake's voice is so strong it's hard to hear him , but I can see it in his eyes 

Long

In [None]:
rouge_scores, summary = execute_two_stage_text_long_model(0, 1, avatar_rouge_dict, avatar_summary_dict, avatar)

# add rouge scores and summary to a dictionary
avatar_summary_dict['two_stage_text_pegasus_long'] = summary
avatar_rouge_dict['two_stage_text_pegasus_long'] = rouge_scores

print(summary)
print(rouge_scores)

2083


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('Jake runs in a blur, dodging between trunks as a glistening black tornado '
 "shreds the forest behind him .<n>The thanator's jaws SHUT SNAP inches behind "
 'him as he flies out into open space and -- JAKE SPLASHIVES down into a '
 'swiftly moving river ..<n>SNARLING, a wounded wolf attacks Jake, and he '
 'KICKS it away, It SPINS and leaps back onto him, and Jake barely catches its '
 'throat in time to keep the SNAPPING JAWS away from his face .')
{'Avatar': "Jake runs in a blur, dodging between trunks as a glistening black tornado shreds the forest behind him .<n>The thanator's jaws SHUT SNAP inches behind him as he flies out into open space and -- JAKE SPLASHIVES down into a swiftly moving river ..<n>SNARLING, a wounded wolf attacks Jake, and he KICKS it away, It SPINS and leaps back onto him, and Jake barely catches its throat in time to keep the SNAPPING JAWS away from his face .", 'two_stage_dialog_pegasus_short': {...}, 'lemma_pegasus_short': "Jake's voice is so strong it's 

Large

In [None]:
rouge_scores, summary = execute_two_stage_text_large_model(0, 1, avatar_rouge_dict, avatar_summary_dict, avatar)

# add rouge scores and summary to a dictionary
avatar_summary_dict['two_stage_text_pegasus_large'] = summary
avatar_rouge_dict['two_stage_text_pegasus_large'] = rouge_scores

print(summary)
print(rouge_scores)

2083


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('JAKE RUNS in a blur, dodging between trunks as a glistening black tornado '
 'shreds the forest behind him and -- He sees WATER ahead and DIVES OUTWARD '
 "with all his might -- The thanator's jaws SHUT inches behind him as he flies "
 'out into open space and -- JAKE SPLASHES down into a swiftly moving river.He '
 'just lies there, blinking -- then shouts jauntily to no one in particular -- '
 'CAMERA PULLS BACK high and wide, as Jake lies spread-eagled amongst the '
 'trash, getting drenched.SNARLING, a wounded wolf attacks Jake, and he KICKS '
 'it away, It SPINS and leaps back onto him, and Jake barely catches its '
 'throat in time to keep the SNAPPING JAWS away from his face.TIME CUT -- '
 'Grace is scanning through images and Jake stops her on one -- a 3D aerial '
 'shot of the strange arched She moves the virtual camera, and we seem to fly '
 'around the Well of Souls, catching only a glimpse of the interior.CAMERA '
 'SCREAMS down on him as the shadow grows larger -- WE RUSH

LongT5

In [None]:
rouge_scores, summary = execute_two_stage_text_t5_model(0, 1, avatar_rouge_dict, avatar_summary_dict, avatar)

# add rouge scores and summary to a dictionary
avatar_summary_dict['two_stage_text_longt5'] = summary
avatar_rouge_dict['two_stage_text_longt5'] = rouge_scores

print(summary)
print(rouge_scores)

('The sound of drums is heard from a distance. We are flying through mist and '
 'over the forest below. There is a hospital with a hole blown in, and we are '
 'glided by the drums building to a peak. A screech of brakes as a vechicle '
 'whines frammed; Jake Sully, a Scarred and Scuffy Combat Vet, sits in a '
 'beating up carbon fiber wheelchair. At 22 he eyes hardened by his wisdom '
 'andwariness of one whom has endured such a thing. Maglev Trails Whoosh '
 'overhead on raised tracks against a garish advertisementizing')


All PyTorch model weights were used when initializing TFPegasusForConditionalGeneration.

Some weights or buffers of the TF 2.0 model TFPegasusForConditionalGeneration were not initialized from the PyTorch model and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


('There is a hospital with a hole blown in, and we are glided by the drums '
 'building to a peak. A screech of brakes as a vechicle whines frammed; Jake '
 'Sully, a Scarred and Scuffy Combat Vet, sits in a beating up carbon fiber '
 'wheelchair. At 22 he eyes hardened by his wisdom andwariness of one whom has '
 'endured such a thing. Maglev Trails Whoosh overhead on raised tracks against '
 'a garish advertisementizing')
{'Avatar': 'There is a hospital with a hole blown in, and we are glided by the drums building to a peak. A screech of brakes as a vechicle whines frammed; Jake Sully, a Scarred and Scuffy Combat Vet, sits in a beating up carbon fiber wheelchair. At 22 he eyes hardened by his wisdom andwariness of one whom has endured such a thing. Maglev Trails Whoosh overhead on raised tracks against a garish advertisementizing', 'two_stage_dialog_pegasus_short': {...}, 'lemma_pegasus_short': "Jake's voice is so strong it's hard to hear him , but I can see it in his eyes and his fa

## Display Results

In [None]:
print(avatar_summary_dict)
print(avatar_rouge_dict)

{'Avatar': 'There is a hospital with a hole blown in, and we are glided by the drums building to a peak. A screech of brakes as a vechicle whines frammed; Jake Sully, a Scarred and Scuffy Combat Vet, sits in a beating up carbon fiber wheelchair. At 22 he eyes hardened by his wisdom andwariness of one whom has endured such a thing. Maglev Trails Whoosh overhead on raised tracks against a garish advertisementizing', 'two_stage_dialog_pegasus_short': {...}, 'lemma_pegasus_short': "Jake's voice is so strong it's hard to hear him , but I can see it in his eyes and his face and his eyes can't see me , so I can hear him in my head and my head can see him in his body and his body can see me in my body and my body can hear me in his mind and my mind can hear us in our body and our mind and we can hear each other's voices.", 'lemma_pegasus_long': "Combat vet Jake sully has been injured on duty in Iraq and Afghanistan .<n>He now lives in a wheelchair in a VA hospital in NYC and struggles to cope 

In [None]:
avatar_summary_df = pd.DataFrame(avatar_summary_dict.items(), columns=['title', 'summary'])
avatar_summary_df.head()

Unnamed: 0,title,summary
0,Avatar,"There is a hospital with a hole blown in, and ..."
1,two_stage_dialog_pegasus_short,{'Avatar': 'There is a hospital with a hole bl...
2,lemma_pegasus_short,Jake's voice is so strong it's hard to hear hi...
3,lemma_pegasus_long,Combat vet Jake sully has been injured on duty...
4,lemma_pegasus_large,"we be very low over the forest now , glide fas..."


In [None]:
avatar_rouge_df = pd.DataFrame(avatar_rouge_dict.items(), columns=['title', 'rouge'])
avatar_rouge_df.head()

Unnamed: 0,title,rouge
0,Avatar,"{'rouge1': 0.17621145374449337, 'rouge2': 0.01..."
1,two_stage_dialog_pegasus_short,"{'Avatar': {'rouge1': 0.17621145374449337, 'ro..."
2,lemma_pegasus_short,"{'rouge1': 0.14225941422594143, 'rouge2': 0.0,..."
3,lemma_pegasus_long,"{'rouge1': 0.2638297872340426, 'rouge2': 0.034..."
4,lemma_pegasus_large,"{'rouge1': 0.16289592760180996, 'rouge2': 0.02..."


In [None]:
# save results to a csv
avatar_summary_df.to_csv('/content/avatar_summary.csv')
# files.download('/content/avatar_summary.csv') 

In [None]:
# save results to a csv
avatar_rouge_df.to_csv('/content/avatar_rouge.csv')
# files.download('/content/avatar_rouge.csv') 