In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
!pip install transformers
!pip install simpletransformers



In [None]:
import numpy as np
import pandas as pd
import os, json, gc, re, random
from tqdm.notebook import tqdm
# from tqdm import tqdm
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

import logging
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

In [None]:
import torch, transformers, tokenizers
torch.__version__, transformers.__version__, tokenizers.__version__

('1.8.1+cu101', '4.6.1', '0.10.3')

In [None]:
cd drive/My Drive/Colab Notebooks/experiments

/content/drive/My Drive/Colab Notebooks/experiments


In [None]:
data_file = pd.read_csv("data/trofi.csv")

In [None]:
data_file = data_file.rename(columns={'sentence': 'input_text'}) #abstract - sentence
data_file['target_text'] = data_file[['verb']] #title - metaphorical word

In [None]:
papers = data_file[["input_text", "target_text"]]

In [None]:
papers.head()

Unnamed: 0,input_text,target_text
0,An Energy Department spokesman says the sulfur...,absorb
1,The yellow beta carotene pigment absorbs blue ...,absorb
2,"This time , the ground absorbed the shock wave...",absorb
3,'' Vitamins could be passed right out of the b...,absorb
4,"As Eliot wrote : '' In a warm haze , the sultr...",absorb


In [None]:
%%time

from simpletransformers.seq2seq import Seq2SeqModel

eval_df = papers.sample(frac=0.1, random_state=42)
train_df = papers.drop(eval_df.index)

model_args = {
    "reprocess_input_data": True,
    "overwrite_output_dir": True,
    "save_model_every_epoch": False,
    "save_eval_checkpoints": False,
    "max_seq_length": 512,
    "train_batch_size": 6,
    "num_train_epochs": 3,
}

# Create a Bart-base model
model = Seq2SeqModel(encoder_decoder_type="bart",
                    encoder_decoder_name="facebook/bart-base",
                    args=model_args)

CPU times: user 1.84 s, sys: 376 ms, total: 2.21 s
Wall time: 4.77 s


In [None]:
%%time

# Train the model
model.train_model(train_df)

# Evaluate the model
result = model.eval_model(eval_df)
print(result)

INFO:simpletransformers.seq2seq.seq2seq_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/3363 [00:00<?, ?it/s]

INFO:simpletransformers.seq2seq.seq2seq_model: Training started


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/561 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/561 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/561 [00:00<?, ?it/s]

INFO:simpletransformers.seq2seq.seq2seq_model:Saving model into outputs/
INFO:simpletransformers.seq2seq.seq2seq_model: Training of facebook/bart-base model complete. Saved to outputs/.
INFO:simpletransformers.seq2seq.seq2seq_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/374 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/47 [00:00<?, ?it/s]

INFO:simpletransformers.seq2seq.seq2seq_model:{'eval_loss': 0.24183904897273578}


{'eval_loss': 0.24183904897273578}
CPU times: user 7min 9s, sys: 3.83 s, total: 7min 13s
Wall time: 6min 45s


In [None]:
for _ in range(65):

    random_idx = random.randint(0, len(eval_df)-1)

    abstract = eval_df.iloc[random_idx]['input_text']
    true_title = eval_df.iloc[random_idx]['target_text']

    # Predict with trained BART model
    predicted_title = model.predict([abstract])[0]

    print(f'True Title: {true_title}\n')
    print(f'Predicted Title: {predicted_title}\n')
    print(f'Abstract: {abstract}\n\n\n')

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: miss

Predicted Title: miss

Abstract: Virginia Belden , who sells vacuum cleaners and sewing machines at the Towne East store , however , frets that some shoppers might miss the thrill of getting a bargain 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: fill

Predicted Title: fill

Abstract: To fill these growing gaps , Congress increasingly has turned to employers to expand coverage to even more employees and non - employees 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: destroy

Predicted Title: destroy

Abstract: An example : Last fall , Joseph Biden 's campaign was destroyed after a leaked videotape showed similarities between his speech and one given by British Labor Party leader Neil Kinnock 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: flood

Predicted Title: flood

Abstract: Mr. Davis says he has been flooded with requests for berths on future flights since announcing the October launch 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: strike

Predicted Title: strike

Abstract: In short , a completely effective SDI would give the Soviets what they do not now have : the ability to strike the U.S. without fear of retaliation 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: fill

Predicted Title: fill

Abstract: And if occupancy ran low , the administrator would just let the doctors know and the beds would fill up 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: examine

Predicted Title: examine

Abstract: Meanwhile , law - enforcement officials said that investigators are examining the fierce competition that developed between military contractors over a $ 168.9 million contract to build a prototype surveillance blimp for the Navy 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: ride

Predicted Title: ride

Abstract: The pumps can be reprogrammed by telephone to alter dosages , and he says that one of his young patients recently resumed riding a bicycle 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: roll

Predicted Title: roll

Abstract: The good times for the Rev. Jesse Jackson may roll on -- and neither his rivals for the Democratic presidential nomination nor other party heavyweights have the temerity to try to break up the celebration 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: dissolve

Predicted Title: dissolve

Abstract: '' Of course , the bondholders will have to ask : Is dissolving the company better than reorganization ? '





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: pump

Predicted Title: pump

Abstract: Under the pact , Ashland will treat the contaminated soil , and it will pump and treat ground water to meet drinking - water standards , among other things 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: target

Predicted Title: target

Abstract: Wells Rich will handle corporate advertising and '' market image and solutions advertising , '' targeted mainly at other businesses 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: destroy

Predicted Title: destroy

Abstract: Old clothes are often destroyed and new ones donned ; indoctrination is given in social mores and religious beliefs 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: eat

Predicted Title: assault

Abstract: His brother Woody , who 's worth $ 100 million , is a grotesque 250-pound alcoholic moron who spends his time watching Arnold Schwarzenegger movies , eating peanuts , and floating stark naked on a rubber raft in his indoor swimming pool listening to favorites from '' My Fair Lady . '' -LRB- He also has raped and assaulted a showgirl . -RRB-/-R





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: stick

Predicted Title: stick

Abstract: She 's been able to stick to this policy , largely because of the nature of her music 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: lend

Predicted Title: lend

Abstract: The rest would go to more than 30 international banks that lent the National Bank money before it was seized and closed by Brunei in November 1986 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: eat

Predicted Title: eat

Abstract: '' We encourage our people as much as we can to fly on United Airlines and eat Kellogg cereals , '' says Jack Ryan , executive vice president of Leo Burnett Co . , which also handles advertising for GM 's Oldsmobile division 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: drag

Predicted Title: drag

Abstract: They never say excuse me , '' says Jacqueline Yun , a crop - haired Korean - American , as she drags on a cigarette 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: pump

Predicted Title: pump

Abstract: Among the hundreds of pages of affidavits and depositions filed by the SEC with a federal court in New York earlier this week are sworn statements by six of Mr. Wang 's colleagues in Morgan Stanley 's mergers and acquisitions department indicating that he was intimately familiar with much of their work , and sometimes may have pumped them for details 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: attack

Predicted Title: attack

Abstract: Here in Masaya , supporters of a group known as the Mothers of Political Prisoners were brutally attacked by turbas in a well - documented incident in March 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: miss

Predicted Title: miss

Abstract: '' We have n't dug down to see '' what is missing , says Mr. Nelson 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: stumble

Predicted Title: stumble

Abstract: But there were stumbling blocks from the start 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: kick

Predicted Title: kick

Abstract: The article cited by many traders for kicking off Monday 's rally appeared in the Sunday Times of London and was written by Goldman Sachs 's London - based chief international economist , David Morrison , an influential exchange - rate forecaster 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: pump

Predicted Title: pump

Abstract: They also became a major element in the more - than - doubling of the Nikkei average in the two years to last October because of the huge amount of money they pumped into the market 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: wither

Predicted Title: wither

Abstract: But that bid withered after the Oct. 19 stock market crash 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: pump

Predicted Title: pump

Abstract: They also became a major element in the more - than - doubling of the Nikkei average in the two years to last October because of the huge amount of money they pumped into the market 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: sleep

Predicted Title: sleep

Abstract: '' I have long suspected that more people are sleeping apart because of snoring than are sleeping together for all other reasons combined . '





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: escape

Predicted Title: escape

Abstract: And so long as one - third of the black population is mired in poverty , blacks will not hear of dismantling programs that enabled so many within their ranks to escape poverty in the first place 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: kick

Predicted Title: kick

Abstract: Mr. Reagan 's speech was designed to kick off a triumphant final year in office , but a new poll suggests that his standing in public opinion has n't recovered from the dive it took after the disclosure of the Iran - Contra affair 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: escape

Predicted Title: escape

Abstract: Mr. Tennyson said it 's unfair that foreign cigarette manufacturers who advertise through U.S. magazines will escape the ban 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: stick

Predicted Title: stick

Abstract: Since 1977 , fund manager Peter Lynch has stuck to the same basic strategy : buy any attractive stock , no matter what size or sector , or what the market is doing 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: roll

Predicted Title: roll

Abstract: When the baby drops a rattle , Mr. Prentnieks scoops it up and puts it in the baby 's fist -- still listening to Mr. Jackson 's speech with his camera rolling 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: fill

Predicted Title: fill

Abstract: Mr. Cossa 's grain bin may be filling up , but just last month another 100 peasants -- looking for food and security -- moved into the village to escape the fighting in the countryside 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: target

Predicted Title: target

Abstract: It is targeted at space testing of SDI technologies 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: stick

Predicted Title: stick

Abstract: She 's been able to stick to this policy , largely because of the nature of her music 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: drink

Predicted Title: attack

Abstract: Coke is even directly attacking coffee 's breakfast appeal with radio ads urging people to drink something cold to come '' alive in the morning . '





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: kill

Predicted Title: kill

Abstract: It concluded that some 9 % of the population , or 1.5 million people , probably has been killed 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: miss

Predicted Title: miss

Abstract: There 's that damn ball of the Renthals that Laurence insists I not miss , and the Todesco wedding , and the final gala for the ballet where I ' m the chairperson 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: target

Predicted Title: target

Abstract: Most of the banks specifically have targeted marketing campaigns at profitable small companies . -LRB- Separately , Dai - Ichi Kangyo said it will name Ichiro Nakamura , now a vice president , as chairman , succeeding Tetsuya Fujimori 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: attack

Predicted Title: attack

Abstract: Labor unions , too , attack the underground economy 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: drink

Predicted Title: drink

Abstract: Among those who drink alcoholic beverages world - wide , 8 % consume daily amounts that are damaging to their health and to society 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: fill

Predicted Title: fill

Abstract: He now says that specialty retailing fills the bill , but he made a number of profitable forays in the meantime 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: kick

Predicted Title: kick

Abstract: Most - Remarkable Injury -- Angels outfielder Chili Davis dislocated a toe when he kicked a chair after popping out in a June 26 home game against Milwaukee 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: escape

Predicted Title: escape

Abstract: Limited grew so quickly and was such a hit on Wall Street for so long '' that some of us thought it could escape the current fashion retail slump , but that is n't the case . '





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: fix

Predicted Title: fix

Abstract: We begged his mother for months to let us take him into Nha Trang and have it fixed 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: kill

Predicted Title: kill

Abstract: The military said three soldiers also were killed when about 300 rebels attempted to overrun a 30-man government outpost 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: drink

Predicted Title: drink

Abstract: In seats of supple leather , Belgian students drink wine and listen to personal stereos 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: wither

Predicted Title: wither

Abstract: The debate for the rematch was so withering that Nissan Motor Co. used a news clip of Mr. Tucker 's criticisms in its bid to keep the UAW out of its Tennessee plant 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: destroy

Predicted Title: destroy

Abstract: An example : Last fall , Joseph Biden 's campaign was destroyed after a leaked videotape showed similarities between his speech and one given by British Labor Party leader Neil Kinnock 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: attack

Predicted Title: attack

Abstract: Iran said its jets bombarded an Iraqi military base and oil refinery in Baghdad , and Iraq said its forces attacked 10 Iranian towns 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: flow

Predicted Title: flow

Abstract: Mr. Marino said the the mechanism calls for debt service to be met from property and sales tax revenue that flows through the state comptroller 's office before being diverted to city coffers 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: assault

Predicted Title: assault

Abstract: Mr. Tyson has denied that he tried to kill himself and that he assaulted his wife 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: stick

Predicted Title: stick

Abstract: Pressure mounted on Pakistan to sign an accord aimed at securing the withdrawal of about 115 , 000 Soviet troops from neighboring Afghanistan , but Islamabad appeared to be sticking to its demand for a new government in Kabul to oversee the pullout 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: evaporate

Predicted Title: evaporate

Abstract: If dry weather cuts normal production by just three bushels an acre , the nation 's soybean stockpiles could evaporate by next year , says Richard Loewy , senior grain and oilseed analyst at Prudential - Bache Securities Inc 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: attack

Predicted Title: attack

Abstract: The only thing certain is that a Korean merchant wholly dependent on the good will of his customers would be crazy to attack a black grandmother in a virtually all - black neighborhood -- and Mr. Chung , who cleaned fish for three years to scrape together the money to buy his market , seems anything but crazy 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: stick

Predicted Title: miss

Abstract: MISSED CONNECTIONS : If you 're bound for Denver but get stuck in Detroit for the night because of a missed connection , do n't expect the airline to help 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: ride

Predicted Title: ride

Abstract: Lots of people ride bicycles , joggers are out in fair numbers , and the last time I passed a tennis complex , all the courts were full 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: sleep

Predicted Title: sleep

Abstract: Her daughter demanded that she keep silent , telling her , '' You wo n't have a home to sleep in if you say anything about this . '





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: flourish

Predicted Title: flourish

Abstract: The clash of goals is rare in postwar Japan , where consensus has enabled banks and industry to flourish 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: strike

Predicted Title: strike

Abstract: Like tiny beacons at night , alligators ' eyes reflect a brilliant red when struck by a beam of light 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: target

Predicted Title: target

Abstract: In a letter sent yesterday to the German media , the terrorist group said Mr. Tietmeyer , who was unharmed in the attack , has been targeted because of his central role in sustaining the world 's '' imperialist '' financial system 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: roll

Predicted Title: roll

Abstract: Charity officials say they need trustees who are willing to roll up their sleeves and work for the organization and who will back up their commitment with donations 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: plant

Predicted Title: plant

Abstract: Few things give him more satisfaction now , it seems , than the hundreds of roses , fruit trees and berry bushes the 57-year - old Mr. Lance and a grandson have planted together 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: evaporate

Predicted Title: evaporate

Abstract: The low pressure causes liquid in the arm to evaporate and to be pulled into the pipe 





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: knock

Predicted Title: knock

Abstract: Seagram must decide by Feb. 9 whether to come up with a higher offer that would knock Grand Met out of the bidding war and frustrate its expansion strategy 



