# Imports

In [1]:
!pip install torch
!pip install transformers
!pip install sentencepiece
!pip install bert-extractive-summarizer
!pip install rouge-score nltk git+https://github.com/google-research/bleurt.git

Collecting transformers
  Downloading transformers-4.18.0-py3-none-any.whl (4.0 MB)
[K     |████████████████████████████████| 4.0 MB 12.4 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.49-py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 44.3 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 59.5 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.5.1-py3-none-any.whl (77 kB)
[K     |████████████████████████████████| 77 kB 6.1 MB/s 
[?25hCollecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.11.6-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.5 MB)
[K     |████████████████████████████████| 6.5 MB 38.6 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    F

In [2]:
from absl import logging
import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import re
from pathlib import Path
import nltk
from ast import literal_eval

import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, PegasusForConditionalGeneration, PegasusTokenizer, logging
from summarizer import Summarizer #bert-extractive-summarizer

logging.set_verbosity_error()

In [3]:
print(tf.__version__)

2.8.0


In [4]:
!python --version

Python 3.7.13


# Universal Sentence Encoder (USE)

In [5]:
module_url = "https://tfhub.dev/google/universal-sentence-encoder/4" #@param ["https://tfhub.dev/google/universal-sentence-encoder/4", "https://tfhub.dev/google/universal-sentence-encoder-large/5"]
model = hub.load(module_url)
print ("module %s loaded" % module_url)
def embed(input):
  return model(input)
  
word = "Elephant"
sentence = "I am a sentence for which I would like to get its embedding."
paragraph = (
    "Universal Sentence Encoder embeddings also support short paragraphs. "
    "There is no hard limit on how long the paragraph is. Roughly, the longer "
    "the more 'diluted' the embedding will be.")
messages = [word, sentence, paragraph]

message_embeddings = embed(messages)

for i, message_embedding in enumerate(np.array(message_embeddings).tolist()):
  print("Message: {}".format(messages[i]))
  print("Embedding size: {}".format(len(message_embedding)))
  message_embedding_snippet = ", ".join(
      (str(x) for x in message_embedding[:3]))
  print("Embedding: [{}, ...]\n".format(message_embedding_snippet))

module https://tfhub.dev/google/universal-sentence-encoder/4 loaded
Message: Elephant
Embedding size: 512
Embedding: [0.008344466798007488, 0.00048083445290103555, 0.06595246493816376, ...]

Message: I am a sentence for which I would like to get its embedding.
Embedding size: 512
Embedding: [0.050808604806661606, -0.01652432233095169, 0.015737831592559814, ...]

Message: Universal Sentence Encoder embeddings also support short paragraphs. There is no hard limit on how long the paragraph is. Roughly, the longer the more 'diluted' the embedding will be.
Embedding size: 512
Embedding: [-0.028332673013210297, -0.05586216226220131, -0.012941500172019005, ...]



# Explanation
Cool, so we can get embeddings for a string of arbitrary length 😎

Let's get embeddings for each sentence in our [scripts](https://github.com/ppapalampidi/TRIPOD) and just like in [Papalampidi et al](https://arxiv.org/pdf/2004.12727.pdf) represent a scene by the mean of its sentence representations and measure scene similarity $e_i{_j}$ using cosine similarity


In [6]:
%%bash
git clone https://github.com/ppapalampidi/TRIPOD.git
cd TRIPOD
python screenplays_scene_segmentation.py

The Mummy (1999 film)
The Kids Are All Right (film)
17 Again (film)
Drive (2011 film)
Funny People
One Eight Seven
What Women Want
Saw (film)
Superman (1978 film)
We Own the Night (film)
The Searchers (film)
Bridesmaids (2011 film)
Sherlock Holmes (2009 film)
Arbitrage (film)
The Apartment
10 Things I Hate About You
Bonnie and Clyde (film)
The Dark Knight (film)
The Ugly Truth
Ring (film)
Young Frankenstein
The Breakfast Club
Reservoir Dogs
Indiana Jones and the Kingdom of the Crystal Skull
Total Recall (1990 film)
Top Gun
The Time Machine (1960 film)
Pirates of the Caribbean: The Curse of the Black Pearl
While She Was Out
Gran Torino
Minority Report (film)
The Sixth Sense
Juno (film)
Pride & Prejudice (2005 film)
Star Wars Episode I: The Phantom Menace
Panic Room
Seven (film)
The Back-up Plan
Slumdog Millionaire
Youve Got Mail
Vertigo (film)
Meet Joe Black
30 Minutes or Less
My Girl (film)
Men in Black (film)
Titanic (1997 film)
The Truman Show
Kalifornia
Jaws (film)
Sleepless in Seat

Cloning into 'TRIPOD'...


# NLTK Sentence Splitter

In [7]:
nltk.download('punkt')
def sentence(text):
   return nltk.sent_tokenize(text) 

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


# Turn into Dataframe

In [8]:
def padding(array,x,y):
  '''
  appends zeros to end of array to create array of desired shape
  '''
  a = x - array.shape[0]
  b = y - array.shape[1]
  return np.pad(array, pad_width=((0,a),(0,b)))

In [9]:
rows = []
screenplays = os.listdir('/content/TRIPOD/Segmented_screenplays/')
for sp in screenplays:
  scenes_raw = Path('/content/TRIPOD/Segmented_screenplays/'+sp).read_text().split('========================================')[:-1]
  for sr in scenes_raw:
    row = {}
    row['scene'] = sr.split('====================')[2].strip()
    row['number'] = int(sr.split('====================')[1])
    row['screenplay'] = sp.split('_segmented')[0]
    rows.append(row)
scenes = pd.DataFrame(rows)

scenes['movie'] = [i[0] for i in scenes['screenplay'].str.split('_script')]
scenes_as_embeddings = [np.array(embed(sentence(scene))).mean(axis=0) for scene in scenes['scene'].iloc]
scenes['embedding'] = scenes_as_embeddings
scenes

Unnamed: 0,scene,number,screenplay,movie,embedding
0,EXT. ROAD -- TEXAS/MEXICO BORDER -- NIGHT\n\nA...,0,Men in Black (film)_script,Men in Black (film),"[-0.0063585145, 0.012928951, 0.019146854, -0.0..."
1,INT. VAN -- TEXAS/MEXICO BORDER -- NIGHT\n\n\n...,1,Men in Black (film)_script,Men in Black (film),"[0.004618446, 0.022468233, 0.01473625, -0.0033..."
2,EXT. ROAD -- TEXAS/MEXICO BORDER -- NIGHT\n\n\...,2,Men in Black (film)_script,Men in Black (film),"[-0.008483303, 0.0077187833, 0.0056559527, -0...."
3,EXT. DESERT CLEARING - NIGHT\n\n\n\nKay and De...,3,Men in Black (film)_script,Men in Black (film),"[-0.016636118, 0.03233801, -0.004568748, -0.00..."
4,EXT. ROAD -- TEXAS/MEXICO BORDER -- NIGHT\n\n\...,4,Men in Black (film)_script,Men in Black (film),"[0.018280465, -0.017645584, 0.017716356, -0.02..."
...,...,...,...,...,...
13607,INT. POGUE HOME - DAY\n\n\n\nWe're in the mids...,109,Angel Eyes (film)_script,Angel Eyes (film),"[-0.0069848457, 0.027921446, 0.012036826, -0.0..."
13608,"INT. KITCHEN - DAY\n\n\n\nSharon enters, waits...",110,Angel Eyes (film)_script,Angel Eyes (film),"[-0.004442618, 0.009855239, 0.017026015, 0.001..."
13609,INT. LIVING ROOM - DAY\n\n\n\nSharon comes out...,111,Angel Eyes (film)_script,Angel Eyes (film),"[-0.0022966485, 0.00900619, 0.008827509, -0.00..."
13610,EXT. POGUE HOUSE - DAY\n\n\n\nSharon exits ...,112,Angel Eyes (film)_script,Angel Eyes (film),"[-0.022781255, 0.018216148, 0.005998155, -0.00..."


In [10]:
gold = pd.read_csv('/content/TRIPOD/Synopses_and_annotations/TRIPOD_screenplays_test.csv')
full_gold_rows = []
for movie in gold.iloc:
  tp1=literal_eval(movie['tp1'])
  tp2=literal_eval(movie['tp2'])
  tp3=literal_eval(movie['tp3'])
  tp4=literal_eval(movie['tp4'])
  tp5=literal_eval(movie['tp5'])
  for i in tp1:
    for j in tp2:
      for k in tp3:
        for l in tp4:
          for m in tp5:
            row = {}
            row['movie'] = movie['movie_name']
            row['tp1'] = i
            row['tp2'] = j
            row['tp3'] = k
            row['tp4'] = l
            row['tp5'] = m
            full_gold_rows.append(row)
full_gold = pd.DataFrame(full_gold_rows)
gold_synopses = pd.read_csv('/content/TRIPOD/Synopses_and_annotations/TRIPOD_synopses_test.csv')
full_gold = pd.merge(full_gold,gold_synopses[['movie_name','synopsis_raw']],left_on='movie',right_on='movie_name').drop(columns=['movie_name'])
full_gold

Unnamed: 0,movie,tp1,tp2,tp3,tp4,tp5,synopsis_raw
0,The Back-up Plan,9,40,82,106,131,Zoe (Jennifer Lopez) has given up on finding t...
1,The Back-up Plan,9,40,82,107,131,Zoe (Jennifer Lopez) has given up on finding t...
2,The Back-up Plan,9,40,82,111,131,Zoe (Jennifer Lopez) has given up on finding t...
3,The Back-up Plan,9,41,82,106,131,Zoe (Jennifer Lopez) has given up on finding t...
4,The Back-up Plan,9,41,82,107,131,Zoe (Jennifer Lopez) has given up on finding t...
...,...,...,...,...,...,...,...
198,The Last Temptation of Christ (film),21,49,65,74,77,The film begins with a man whispering in despa...
199,The Last Temptation of Christ (film),21,50,64,74,76,The film begins with a man whispering in despa...
200,The Last Temptation of Christ (film),21,50,64,74,77,The film begins with a man whispering in despa...
201,The Last Temptation of Christ (film),21,50,65,74,76,The film begins with a man whispering in despa...


In [11]:
rrows=[]
for row in full_gold.iloc:
  rrow={}
  rrow['movie'] = row['movie']
  
  # X for each movie tpid is the embeddings for all scenes 
  rrow['X'] = padding(np.stack(scenes[scenes['movie']==row['movie']]['embedding']),256,512) # embeddings for all scenes padded with zeros to have uniform length of 256 scene embeddings
  rrow['Y'] = np.array(row[['tp1','tp2','tp3','tp4','tp5']])
  rrow['screenplay'] = np.stack(scenes[scenes['movie']==row['movie']]['scene']) # string representation of screenplays as opposed to embedding representaiton

# ============================
# ENCODING (ONE HOT OR NOT?)
# ============================

  # y for each movie tpid is [prob of scene being tp1, ... , prob of scene being tp5]
  y = np.zeros(shape=[256,])

  # Get index from gold df and apply it to the array 
  y[row['tp1']] = np.array([1])
  # y[row['tp1']] = np.array([1,0,0,0,0])
  # y[row['tp2']] = np.array([0,1,0,0,0])
  # y[row['tp3']] = np.array([0,0,1,0,0])
  # y[row['tp4']] = np.array([0,0,0,1,0])
  # y[row['tp5']] = np.array([0,0,0,0,1])
  rrow['y']=y
  rrows.append(rrow)
train_df = pd.DataFrame(rrows)
train_df
train_df = train_df.sample(frac=1,random_state=25)

In [12]:
#oversampling
os = 256
for i in range(len(train_df)):
  x_tp = np.array(train_df.iloc[i]['X'][train_df.iloc[i]['y']==1])
  x_fill = np.tile(x_tp,(os,1))
  y_fill = np.ones(os)

  train_df.iloc[i]['X'] = np.concatenate((train_df.iloc[i]['X'], x_fill))
  train_df.iloc[i]['y'] = np.concatenate((train_df.iloc[i]['y'], y_fill))

In [13]:
train_df.iloc[0]['X'].shape

(512, 512)

In [14]:
# Define train and test data

X_train = np.stack([i for i in train_df['X'][:-43]])
y_train = np.stack([i for i in train_df['y'][:-43]])

X_test = np.stack([i for i in train_df['X'][-43:]])
y_test = np.stack([i for i in train_df['y'][-43:]])



In [15]:
X_train.shape

(160, 512, 512)

In [16]:
y_train.shape

(160, 512)

In [17]:
X_test.shape

(43, 512, 512)

In [18]:
y_test.shape
y_test[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.

# Tensorflow

In [19]:
# Import the libraries

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, MultiHeadAttention, LayerNormalization
from keras.activations import softmax

import tensorflow as tf

# Create our custom Softmax:
def sm(x):
    return softmax(x,axis=1)

# Creating the layers
input_layer = Input(shape=(512,512)) # number of scenes in movie
context_layer = MultiHeadAttention(num_heads=2, key_dim=512,)(input_layer,input_layer,input_layer,) 
norm_layer = LayerNormalization()(context_layer)

# tpid_layer = MultiHeadAttention(num_heads=2, key_dim=512,)(norm_layer,norm_layer) 
# norm_layer_2 = LayerNormalization()(tpid_layer)
# ff_layer_2 = Dense(256, activation='relu')(norm_layer_2)
ff_layer_2 = Dense(256, activation='relu')(norm_layer)

output_layer = Dense(1, activation=sm)(ff_layer_2)


# Create the model
model_context = Model(inputs=input_layer, outputs=output_layer,)

# Defining the optimiser and loss function
# s = Semantic_loss_functions()
model_context.compile(optimizer='adam',
              loss='binary_crossentropy',
              #loss = focal_loss(gamma=0.001),
              #loss=BinaryFocalLoss(gamma=2),
              #loss=s.weighted_cross_entropyloss,
              metrics = 'accuracy')

print(model_context.summary())

# To treat every instance of class 1 as 50 instances of class 0


# Training the model
model_context.fit(X_train, y_train, epochs=10, batch_size=4)#, class_weight=class_dict)

# Evaluating the model
print()
results = model_context.evaluate(X_test,y_test, batch_size=4)
print("test loss, test acc:", results)

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 512, 512)]   0           []                               
                                                                                                  
 multi_head_attention (MultiHea  (None, 512, 512)    2100736     ['input_1[0][0]',                
 dAttention)                                                      'input_1[0][0]',                
                                                                  'input_1[0][0]']                
                                                                                                  
 layer_normalization (LayerNorm  (None, 512, 512)    1024        ['multi_head_attention[0][0]']   
 alization)                                                                                   

In [20]:
print('pred','act','correct')
count = 0
for i in range(len(X_test)):
  x_ = X_test[i:i+1]
  y_ = y_test[i:i+1]

  pred = np.argmax(model_context.predict(x_)[0])
  act = np.argmax(y_)
  if pred == act:
    count+=1
  print(pred,act,count)

print('acc: ',count/len(X_test))

pred act correct
12 12 1
16 16 2
21 21 3
16 16 4
12 12 5
31 31 6
16 16 7
30 30 8
36 36 9
12 12 10
44 44 11
12 12 12
3 3 13
20 20 14
7 7 15
12 12 16
17 18 16
17 18 16
23 23 17
12 12 18
9 9 19
11 11 20
17 18 20
16 16 21
22 22 22
22 22 23
16 16 24
9 9 25
45 45 26
11 11 27
44 44 28
16 16 29
21 21 30
12 12 31
12 12 32
30 30 33
45 45 34
12 12 35
11 11 36
36 36 37
12 12 38
36 36 39
11 11 40
acc:  0.9302325581395349


In [21]:
# adding explicit tp_scenes column for string reps of tp scenes
all_tp_scenes = []
for row in train_df.iloc:
  tp_scenes = []
  for scene_index in row['Y']:
    tp_scenes.append(row['screenplay'][scene_index])
  all_tp_scenes.append(np.stack(tp_scenes))
train_df['tp_scenes'] = all_tp_scenes
train_df

Unnamed: 0,movie,X,Y,screenplay,y,tp_scenes
49,Panic Room,"[[0.010312125, -0.021104999, 0.004685092, 0.02...","[20, 56, 135, 148, 159]",[EXT. MANHATTAN - DAY 1\n\n \n\n The ...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[INT. MASTER BEDROOM - NIGHT 20\n\n \n\n ...
103,Total Recall (1990 film),"[[-0.005835405, -0.0034244575, -0.0052854903, ...","[16, 55, 72, 112, 151]",[1 EXT. MARS RED ROCK DESERT (DREAM SEQ.#1) - ...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[22 INT. REKALL - MEMORY STUDIO - DUSK 17 ...
139,Die Hard,"[[-0.01005826, 0.027367715, 0.002491984, -0.00...","[12, 26, 100, 114, 116]",[2 INT. 747 - PASSENGERS - SAME ...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[24 INT. BUILDING LOBBY - SAME TIME ...
58,Arbitrage (film),"[[-0.008415646, -0.0059916712, 0.0056184717, -...","[35, 62, 88, 105, 109]",[INT. ROBERT'S MANSION - DAY 1\n\n ...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[INT. JULIE'S LOFT - BEDROOM 35\n\n ...
117,Die Hard,"[[-0.01005826, 0.027367715, 0.002491984, -0.00...","[11, 26, 100, 115, 116]",[2 INT. 747 - PASSENGERS - SAME ...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[21 INT. ELLIS' OFFICE - NIGHT ...
...,...,...,...,...,...,...
118,Die Hard,"[[-0.01005826, 0.027367715, 0.002491984, -0.00...","[11, 26, 100, 115, 117]",[2 INT. 747 - PASSENGERS - SAME ...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[21 INT. ELLIS' OFFICE - NIGHT ...
61,Arbitrage (film),"[[-0.008415646, -0.0059916712, 0.0056184717, -...","[36, 57, 88, 105, 109]",[INT. ROBERT'S MANSION - DAY 1\n\n ...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[EXT. HIGHWAY - DEAD OF NIGHT 36\n\n ...
143,Die Hard,"[[-0.01005826, 0.027367715, 0.002491984, -0.00...","[12, 28, 99, 114, 116]",[2 INT. 747 - PASSENGERS - SAME ...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[24 INT. BUILDING LOBBY - SAME TIME ...
62,Arbitrage (film),"[[-0.008415646, -0.0059916712, 0.0056184717, -...","[36, 62, 67, 105, 109]",[INT. ROBERT'S MANSION - DAY 1\n\n ...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[EXT. HIGHWAY - DEAD OF NIGHT 36\n\n ...


In [22]:
rrows = []
for row in train_df.iloc:
  for scene in row['tp_scenes']:
    rrow = {}
    rrow['movie'] = row['movie']
    rrow['scene'] = scene
    rrows.append(rrow)
screenplay_dataset = pd.DataFrame(rrows)
screenplay_dataset = pd.merge(screenplay_dataset, full_gold[['movie','synopsis_raw']],on='movie').drop_duplicates()
screenplay_dataset.columns=['movie','scene','summary']
screenplay_dataset

Unnamed: 0,movie,scene,summary
0,Panic Room,INT. MASTER BEDROOM - NIGHT 20\n\n \n\n ...,Recently divorced Meg Altman (Foster) and her ...
12,Panic Room,INT. THIRD FLOOR HALLWAY - NIGHT 55\n\n \...,Recently divorced Meg Altman (Foster) and her ...
24,Panic Room,INT. PANIC ROOM - NIGHT 132\n\n \n\n ...,Recently divorced Meg Altman (Foster) and her ...
36,Panic Room,INT. PANIC ROOM - NIGHT\n\n \n\n A HA...,Recently divorced Meg Altman (Foster) and her ...
48,Panic Room,INT. PANIC ROOM - NIGHT\n\n \n\n The ...,Recently divorced Meg Altman (Foster) and her ...
...,...,...,...
24005,The Breakfast Club,6. INT. LIBRARY - DAY 6\n\n\n\n There are ...,"On March 24, 1984, five students — ""criminal"" ..."
24007,The Breakfast Club,20. INT. HALLWAY - DAY 21\n\n\n\n Vernon w...,"On March 24, 1984, five students — ""criminal"" ..."
24009,The Breakfast Club,30. INT. LIBRARY - DAY 32\n\n\n\n They are...,"On March 24, 1984, five students — ""criminal"" ..."
24013,The Breakfast Club,38. INT. HALLWAY - DAY 40\n\n\n\n The five...,"On March 24, 1984, five students — ""criminal"" ..."


# Metrics for Huggingface Transformer Summarizations

In [23]:
def abs_sum(text, architecture, min_length=10, max_length=512, length_penalty=1, repetition_penalty=1):
  """
  Abstractive summarization of text using input Huggingface architecture 
  Token sequences with length > 512 are retokenized after extractive summarization
  See https://huggingface.co/models for available models
  """
  text = text.strip()
  device = "cuda" if torch.cuda.is_available() else "cpu"
  model = AutoModelForSeq2SeqLM.from_pretrained(architecture).to(device)
  tokenizer = AutoTokenizer.from_pretrained(architecture)
  if 't5' in architecture:
    text = f"summarize: {text}"
  tokens_input = tokenizer.encode(text, return_tensors='pt', truncation=False).to(device)
  if 't5' in architecture:
    text = text.split('summarize: ')[1]
  while tokens_input.shape[1] > 512:
    ratio=512/tokens_input.shape[1]
    print('Token sequence length > 512')
    print(f'Performing extractive summarization and retokenization with ratio={ratio}')
    text = ext_sum(text, ratio=ratio).strip()
    if 't5' in architecture:
      text = f"summarize: {text}"
    tokens_input = tokenizer.encode(text, return_tensors='pt', truncation=False).to(device)
    if 't5' in architecture:
      text = text.split('summarize: ')[1]
  summary_ids = model.generate(tokens_input, min_length=min_length, max_length=max_length, length_penalty=length_penalty, repetition_penalty=repetition_penalty)
  summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
  return summary

def ext_sum(text, ratio=None, num_sentences=None):
  """
  Uses Huggingface BERT to do extractive summarization
  See https://github.com/dmmiller612/bert-extractive-summarizer for more info
  """
  text = text.strip()
  model = Summarizer()
  if (ratio is None) and (num_sentences is None):
    num_sentences = model.calculate_optimal_k(text)
    summary = model(text, num_sentences=num_sentences)
  else:
    summary = model(text, ratio=ratio, num_sentences=num_sentences) # Specified with Ratio
  return summary


In [26]:
train_df

Unnamed: 0,movie,X,Y,screenplay,y,tp_scenes
49,Panic Room,"[[0.010312125, -0.021104999, 0.004685092, 0.02...","[20, 56, 135, 148, 159]",[EXT. MANHATTAN - DAY 1\n\n \n\n The ...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[INT. MASTER BEDROOM - NIGHT 20\n\n \n\n ...
103,Total Recall (1990 film),"[[-0.005835405, -0.0034244575, -0.0052854903, ...","[16, 55, 72, 112, 151]",[1 EXT. MARS RED ROCK DESERT (DREAM SEQ.#1) - ...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[22 INT. REKALL - MEMORY STUDIO - DUSK 17 ...
139,Die Hard,"[[-0.01005826, 0.027367715, 0.002491984, -0.00...","[12, 26, 100, 114, 116]",[2 INT. 747 - PASSENGERS - SAME ...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[24 INT. BUILDING LOBBY - SAME TIME ...
58,Arbitrage (film),"[[-0.008415646, -0.0059916712, 0.0056184717, -...","[35, 62, 88, 105, 109]",[INT. ROBERT'S MANSION - DAY 1\n\n ...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[INT. JULIE'S LOFT - BEDROOM 35\n\n ...
117,Die Hard,"[[-0.01005826, 0.027367715, 0.002491984, -0.00...","[11, 26, 100, 115, 116]",[2 INT. 747 - PASSENGERS - SAME ...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[21 INT. ELLIS' OFFICE - NIGHT ...
...,...,...,...,...,...,...
118,Die Hard,"[[-0.01005826, 0.027367715, 0.002491984, -0.00...","[11, 26, 100, 115, 117]",[2 INT. 747 - PASSENGERS - SAME ...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[21 INT. ELLIS' OFFICE - NIGHT ...
61,Arbitrage (film),"[[-0.008415646, -0.0059916712, 0.0056184717, -...","[36, 57, 88, 105, 109]",[INT. ROBERT'S MANSION - DAY 1\n\n ...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[EXT. HIGHWAY - DEAD OF NIGHT 36\n\n ...
143,Die Hard,"[[-0.01005826, 0.027367715, 0.002491984, -0.00...","[12, 28, 99, 114, 116]",[2 INT. 747 - PASSENGERS - SAME ...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[24 INT. BUILDING LOBBY - SAME TIME ...
62,Arbitrage (film),"[[-0.008415646, -0.0059916712, 0.0056184717, -...","[36, 62, 67, 105, 109]",[INT. ROBERT'S MANSION - DAY 1\n\n ...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",[EXT. HIGHWAY - DEAD OF NIGHT 36\n\n ...


In [29]:
rrows = []
for row in train_df.iloc:
  summaries_t5=[]
  summaries_pegasus=[]
  summaries_ext=[]
  for scene in row['tp_scenes']:
    summaries_t5.append(abs_sum(scene,'t5-large'))
    summaries_pegasus.append(abs_sum(scene,'google/pegasus-xsum'))
    summaries_ext.append(ext_sum(scene))
  rrow = {}
  rrow['movie'] = row['movie']
  rrow['summary_t5'] = ''.join(summaries_t5)
  rrow['summary_pegasus'] = ''.join(summaries_pegasus)
  rrow['summary_ext'] = ''.join(summaries_ext)
  rrows.append(rrow)
summaries_df = pd.DataFrame(rrows)
summaries_df
# summaries_df = pd.merge(summaries_df, full_gold[['movie','synopsis_raw']],on='movie').drop_duplicates()
# summaries_df.columns=['movie','scene','summary']
# summaries_df

Downloading:   0%|          | 0.00/1.17k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.75G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/773k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.32M [00:01<?, ?B/s]

Downloading:   0%|          | 0.00/1.36k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.12G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/87.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.82M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/3.36M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.25G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Token sequence length > 512
Performing extractive summarization and retokenization with ratio=0.6530612244897959
Token sequence length > 512
Performing extractive summarization and retokenization with ratio=0.7630402384500745
Token sequence length > 512
Performing extractive summarization and retokenization with ratio=0.579841449603624
Token sequence length > 512
Performing extractive summarization and retokenization with ratio=0.6854082998661312
Token sequence length > 512
Performing extractive summarization and retokenization with ratio=0.2007843137254902
Token sequence length > 512
Performing extractive summarization and retokenization with ratio=0.22928795342588446
Token sequence length > 512
Performing extractive summarization and retokenization with ratio=0.9846153846153847
Token sequence length > 512
Performing extractive summarization and retokenization with ratio=0.33442194644023515
Token sequence length > 512
Performing extractive summarization and retokenization with ratio=0

KeyboardInterrupt: ignored

In [30]:
summaries_df = pd.DataFrame(rrows)
summaries_df

Unnamed: 0,movie,summary_t5,summary_pegasus,summary_ext
0,Panic Room,master bedroom alarm system goes off at night....,"In the early hours of Sunday morning, a burgla...",MASTER BEDROOM - NIGHT 20\n\n \n\n In ...
1,Total Recall (1990 film),a group of spies are sent to a memory studio t...,Dennis Quaid's Dr. Lull is about to implant Qu...,REKALL - MEMORY STUDIO - DUSK 17 ...
2,Die Hard,"theo, a well-dressed businessman, is killed by...",Karl Emory is sitting at a desk in the lobby o...,BUILDING LOBBY - SAME TIME ...
3,Arbitrage (film),a woman is adamant that she can't be with her ...,"A woman walks into Julie'sLOFT, the art galler...",JULIE'S LOFT - BEDROOM 35\n\n ...
4,Die Hard,ELLIS' office - night 21 TILT UP FROM McClane'...,McClane leans back in his chair and leans back...,ELLIS' OFFICE - NIGHT ...
...,...,...,...,...
61,Die Hard,ELLIS' office - night 21 TILT UP FROM McClane'...,McClane leans back in his chair and leans back...,ELLIS' OFFICE - NIGHT ...
62,Die Hard,"theo, a well-dressed businessman, is killed by...",Karl Emory is sitting at a desk in the lobby o...,BUILDING LOBBY - SAME TIME ...
63,Juno (film),Juno arrives at a dingin' elk drogue on the ma...,The final episode of DRUGSTORE - Day 4 will be...,DRUGSTORE - DAY 4\n\n\n\n Finall...
64,The Back-up Plan,Stan and Zoe are in a taxi in the rain. they b...,This week's episode of Zoe and Stan's Happily ...,TAXI - CONTINUOUS 11\n\n\n\nZoe and Stan stand...


In [42]:
final_output = pd.merge(summaries_df,full_gold[['movie','synopsis_raw']].drop_duplicates(),how='inner',on='movie')
final_output

Unnamed: 0,movie,summary_t5,summary_pegasus,summary_ext,synopsis_raw
0,Panic Room,master bedroom alarm system goes off at night....,"In the early hours of Sunday morning, a burgla...",MASTER BEDROOM - NIGHT 20\n\n \n\n In ...,Recently divorced Meg Altman (Foster) and her ...
1,Panic Room,"meg sets the digital clock, puts it next to th...",In our series of letters from African-American...,MASTER BEDROOM - NIGHT 17\n\n \n\n Meg...,Recently divorced Meg Altman (Foster) and her ...
2,Panic Room,"meg sets the digital clock, puts it next to th...",In our series of letters from African-American...,MASTER BEDROOM - NIGHT 17\n\n \n\n Meg...,Recently divorced Meg Altman (Foster) and her ...
3,Panic Room,"meg sets the digital clock, puts it next to th...",In our series of letters from African-American...,MASTER BEDROOM - NIGHT 17\n\n \n\n Meg...,Recently divorced Meg Altman (Foster) and her ...
4,Total Recall (1990 film),a group of spies are sent to a memory studio t...,Dennis Quaid's Dr. Lull is about to implant Qu...,REKALL - MEMORY STUDIO - DUSK 17 ...,"In 2084, Douglas Quaid is an Earthbound constr..."
...,...,...,...,...,...
61,The Back-up Plan,Stan and Zoe are in a taxi in the rain. they b...,This week's episode of Zoe and Stan's Happily ...,TAXI - CONTINUOUS 11\n\n\n\nZoe and Stan stand...,Zoe (Jennifer Lopez) has given up on finding t...
62,One Eight Seven,"'that mess over there, that's Eskander's, that...",In our series of letters from African journali...,BUNGALOW #86 - THROUGH WINDOW PANE ...,Trevor Garfield is an African American high sc...
63,One Eight Seven,a class of students at a san francisco high sc...,In our series of letters from African-American...,"BUNGALOW ""84"" - MORNING ...",Trevor Garfield is an African American high sc...
64,One Eight Seven,"'that mess over there, that's Eskander's, that...",In our series of letters from African journali...,BUNGALOW #86 - THROUGH WINDOW PANE ...,Trevor Garfield is an African American high sc...


In [82]:

from bleurt import score

references = "This is a test."
candidates = "This is a test."

scorer_bleurt = score.BleurtScorer()
scorer_bleurt.score(references=[references], candidates=[candidates])


INFO:tensorflow:No checkpoint specified, defaulting to BLEURT-tiny.


INFO:tensorflow:No checkpoint specified, defaulting to BLEURT-tiny.


INFO:tensorflow:Reading checkpoint /usr/local/lib/python3.7/dist-packages/bleurt/test_checkpoint.


INFO:tensorflow:Reading checkpoint /usr/local/lib/python3.7/dist-packages/bleurt/test_checkpoint.


INFO:tensorflow:Config file found, reading.


INFO:tensorflow:Config file found, reading.


INFO:tensorflow:Will load checkpoint dbleurt_tiny


INFO:tensorflow:Will load checkpoint dbleurt_tiny


INFO:tensorflow:Loads full paths and checks that files exists.


INFO:tensorflow:Loads full paths and checks that files exists.


INFO:tensorflow:... name:dbleurt_tiny


INFO:tensorflow:... name:dbleurt_tiny


INFO:tensorflow:... vocab_file:vocab.txt


INFO:tensorflow:... vocab_file:vocab.txt


INFO:tensorflow:... bert_config_file:bert_config.json


INFO:tensorflow:... bert_config_file:bert_config.json


INFO:tensorflow:... do_lower_case:True


INFO:tensorflow:... do_lower_case:True


INFO:tensorflow:... max_seq_length:512


INFO:tensorflow:... max_seq_length:512


INFO:tensorflow:Creating BLEURT scorer.


INFO:tensorflow:Creating BLEURT scorer.


INFO:tensorflow:Creating WordPiece tokenizer.


INFO:tensorflow:Creating WordPiece tokenizer.


INFO:tensorflow:WordPiece tokenizer instantiated.


INFO:tensorflow:WordPiece tokenizer instantiated.


INFO:tensorflow:Creating Eager Mode predictor.


INFO:tensorflow:Creating Eager Mode predictor.


INFO:tensorflow:Loading model.


INFO:tensorflow:Loading model.


INFO:tensorflow:BLEURT initialized.


INFO:tensorflow:BLEURT initialized.


[0.92884361743927]

In [87]:
from rouge_score import rouge_scorer

scorer_rouge = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
scorer_rouge.score('The quick brown fox jumps over the lazy dog',
                      'The quick brown dog jumps on the log.')


{'rouge1': Score(precision=0.75, recall=0.6666666666666666, fmeasure=0.7058823529411765),
 'rouge2': Score(precision=0.2857142857142857, recall=0.25, fmeasure=0.26666666666666666),
 'rougeL': Score(precision=0.625, recall=0.5555555555555556, fmeasure=0.5882352941176471)}

In [71]:
final_output

Unnamed: 0,movie,summary_t5,summary_pegasus,summary_ext,synopsis_raw
0,Panic Room,master bedroom alarm system goes off at night....,"In the early hours of Sunday morning, a burgla...",MASTER BEDROOM - NIGHT 20\n\n \n\n In ...,Recently divorced Meg Altman (Foster) and her ...
1,Panic Room,"meg sets the digital clock, puts it next to th...",In our series of letters from African-American...,MASTER BEDROOM - NIGHT 17\n\n \n\n Meg...,Recently divorced Meg Altman (Foster) and her ...
2,Panic Room,"meg sets the digital clock, puts it next to th...",In our series of letters from African-American...,MASTER BEDROOM - NIGHT 17\n\n \n\n Meg...,Recently divorced Meg Altman (Foster) and her ...
3,Panic Room,"meg sets the digital clock, puts it next to th...",In our series of letters from African-American...,MASTER BEDROOM - NIGHT 17\n\n \n\n Meg...,Recently divorced Meg Altman (Foster) and her ...
4,Total Recall (1990 film),a group of spies are sent to a memory studio t...,Dennis Quaid's Dr. Lull is about to implant Qu...,REKALL - MEMORY STUDIO - DUSK 17 ...,"In 2084, Douglas Quaid is an Earthbound constr..."
...,...,...,...,...,...
61,The Back-up Plan,Stan and Zoe are in a taxi in the rain. they b...,This week's episode of Zoe and Stan's Happily ...,TAXI - CONTINUOUS 11\n\n\n\nZoe and Stan stand...,Zoe (Jennifer Lopez) has given up on finding t...
62,One Eight Seven,"'that mess over there, that's Eskander's, that...",In our series of letters from African journali...,BUNGALOW #86 - THROUGH WINDOW PANE ...,Trevor Garfield is an African American high sc...
63,One Eight Seven,a class of students at a san francisco high sc...,In our series of letters from African-American...,"BUNGALOW ""84"" - MORNING ...",Trevor Garfield is an African American high sc...
64,One Eight Seven,"'that mess over there, that's Eskander's, that...",In our series of letters from African journali...,BUNGALOW #86 - THROUGH WINDOW PANE ...,Trevor Garfield is an African American high sc...


In [108]:
rouge_1_t5 = []
rouge_2_t5 = []
rouge_L_t5 = []
rouge_1_pegasus = []
rouge_2_pegasus = []
rouge_L_pegasus = []
rouge_1_ext = []
rouge_2_ext = []
rouge_L_ext = []
bleurt_t5 = []
bleurt_pegasus = []
bleurt_ext = []

for row in final_output.iloc:
  rouge_1_t5.append(scorer_rouge.score(row['synopsis_raw'],row['summary_t5'])['rouge1'][2])
  rouge_2_t5.append(scorer_rouge.score(row['synopsis_raw'],row['summary_t5'])['rouge2'][2])
  rouge_L_t5.append(scorer_rouge.score(row['synopsis_raw'],row['summary_t5'])['rougeL'][2])

  rouge_1_pegasus.append(scorer_rouge.score(row['synopsis_raw'],row['summary_pegasus'])['rouge1'][2])
  rouge_2_pegasus.append(scorer_rouge.score(row['synopsis_raw'],row['summary_pegasus'])['rouge2'][2])
  rouge_L_pegasus.append(scorer_rouge.score(row['synopsis_raw'],row['summary_pegasus'])['rougeL'][2])

  rouge_1_ext.append(scorer_rouge.score(row['synopsis_raw'],row['summary_ext'])['rouge1'][2])
  rouge_2_ext.append(scorer_rouge.score(row['synopsis_raw'],row['summary_ext'])['rouge2'][2])
  rouge_L_ext.append(scorer_rouge.score(row['synopsis_raw'],row['summary_ext'])['rougeL'][2])

  bleurt_t5.append(scorer_bleurt.score(references=[row['synopsis_raw']], candidates=[row['summary_t5']])[0])
  bleurt_pegasus.append(scorer_bleurt.score(references=[row['synopsis_raw']], candidates=[row['summary_pegasus']])[0])
  bleurt_ext.append(scorer_bleurt.score(references=[row['synopsis_raw']], candidates=[row['summary_ext']])[0])



In [109]:
metrics=final_output.copy()
metrics['rouge_1_t5'] = rouge_1_t5
metrics['rouge_2_t5'] = rouge_2_t5
metrics['rouge_L_t5'] = rouge_L_t5
metrics['rouge_1_pegasus'] = rouge_1_pegasus
metrics['rouge_2_pegasus'] = rouge_2_pegasus
metrics['rouge_L_pegasus'] = rouge_L_pegasus
metrics['rouge_1_ext'] = rouge_1_ext
metrics['rouge_2_ext'] = rouge_2_ext
metrics['rouge_L_ext'] = rouge_L_ext
metrics['bleurt_t5'] = bleurt_t5
metrics['bleurt_pegasus'] = bleurt_pegasus
metrics['bleurt_ext'] = bleurt_ext
metrics

Unnamed: 0,movie,summary_t5,summary_pegasus,summary_ext,synopsis_raw,rouge_1_t5,rouge_2_t5,rouge_L_t5,rouge_1_pegasus,rouge_2_pegasus,rouge_L_pegasus,rouge_1_ext,rouge_2_ext,rouge_L_ext,bleurt_t5,bleurt_pegasus,bleurt_ext
0,Panic Room,master bedroom alarm system goes off at night....,"In the early hours of Sunday morning, a burgla...",MASTER BEDROOM - NIGHT 20\n\n \n\n In ...,Recently divorced Meg Altman (Foster) and her ...,0.172811,0.043880,0.092166,0.154882,0.029246,0.092031,0.167991,0.047782,0.095346,-0.933142,-0.876263,-0.881980
1,Panic Room,"meg sets the digital clock, puts it next to th...",In our series of letters from African-American...,MASTER BEDROOM - NIGHT 17\n\n \n\n Meg...,Recently divorced Meg Altman (Foster) and her ...,0.189402,0.045198,0.103720,0.151448,0.026786,0.089087,0.189636,0.048619,0.108049,-0.897555,-0.890559,-0.875141
2,Panic Room,"meg sets the digital clock, puts it next to th...",In our series of letters from African-American...,MASTER BEDROOM - NIGHT 17\n\n \n\n Meg...,Recently divorced Meg Altman (Foster) and her ...,0.197088,0.038159,0.103024,0.146667,0.024499,0.084444,0.233820,0.056485,0.129436,-0.952075,-0.930230,-0.828041
3,Panic Room,"meg sets the digital clock, puts it next to th...",In our series of letters from African-American...,MASTER BEDROOM - NIGHT 17\n\n \n\n Meg...,Recently divorced Meg Altman (Foster) and her ...,0.209713,0.046460,0.112583,0.164229,0.033822,0.096738,0.203724,0.063666,0.113910,-0.883870,-0.878543,-0.878560
4,Total Recall (1990 film),a group of spies are sent to a memory studio t...,Dennis Quaid's Dr. Lull is about to implant Qu...,REKALL - MEMORY STUDIO - DUSK 17 ...,"In 2084, Douglas Quaid is an Earthbound constr...",0.207664,0.039653,0.126082,0.190968,0.041397,0.110968,0.239726,0.032037,0.123288,-0.932142,-0.906040,-0.798273
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61,The Back-up Plan,Stan and Zoe are in a taxi in the rain. they b...,This week's episode of Zoe and Stan's Happily ...,TAXI - CONTINUOUS 11\n\n\n\nZoe and Stan stand...,Zoe (Jennifer Lopez) has given up on finding t...,0.260297,0.059504,0.135091,0.180917,0.029814,0.089219,0.217252,0.044872,0.130990,-0.947262,-1.177882,-0.974002
62,One Eight Seven,"'that mess over there, that's Eskander's, that...",In our series of letters from African journali...,BUNGALOW #86 - THROUGH WINDOW PANE ...,Trevor Garfield is an African American high sc...,0.213723,0.013529,0.110236,0.211020,0.037603,0.107855,0.190055,0.024363,0.101657,-0.938867,-0.947457,-0.985896
63,One Eight Seven,a class of students at a san francisco high sc...,In our series of letters from African-American...,"BUNGALOW ""84"" - MORNING ...",Trevor Garfield is an African American high sc...,0.195506,0.018018,0.112360,0.203940,0.039489,0.106605,0.202151,0.023707,0.101075,-0.899842,-0.936688,-0.976273
64,One Eight Seven,"'that mess over there, that's Eskander's, that...",In our series of letters from African journali...,BUNGALOW #86 - THROUGH WINDOW PANE ...,Trevor Garfield is an African American high sc...,0.183721,0.009324,0.102326,0.199052,0.038005,0.109005,0.187638,0.026549,0.097130,-0.931949,-1.002067,-0.971265


In [111]:
metrics.mean().to_dict()

  """Entry point for launching an IPython kernel.


{'bleurt_ext': -0.8500507537162665,
 'bleurt_pegasus': -1.0020897785822551,
 'bleurt_t5': -0.9323210996208768,
 'rouge_1_ext': 0.2289325042909262,
 'rouge_1_pegasus': 0.17559941927050327,
 'rouge_1_t5': 0.19856580863325443,
 'rouge_2_ext': 0.03542967506685264,
 'rouge_2_pegasus': 0.031428891613878185,
 'rouge_2_t5': 0.030731845951137062,
 'rouge_L_ext': 0.11587112109286131,
 'rouge_L_pegasus': 0.10214568187543352,
 'rouge_L_t5': 0.11066859186228799}

In [114]:
pd.DataFrame([
 {'metric':'Rouge 1','ext':0.2289325042909262,'pegasus':0.17559941927050327,'t5':0.19856580863325443},
 {'metric':'Rouge 2','ext':0.03542967506685264,'pegasus':0.031428891613878185,'t5':0.030731845951137062},
 {'metric':'Rouge L','ext':0.11587112109286131,'pegasus':0.10214568187543352,'t5':0.11066859186228799},
 {'metric':'Bleurt','ext':-0.8500507537162665,'pegasus':-1.0020897785822551,'t5':-0.9323210996208768}
]).set_index('metric')

Unnamed: 0_level_0,ext,pegasus,t5
metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rouge 1,0.228933,0.175599,0.198566
Rouge 2,0.03543,0.031429,0.030732
Rouge L,0.115871,0.102146,0.110669
Bleurt,-0.850051,-1.00209,-0.932321
