In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
!pip install transformers
!pip install simpletransformers



In [None]:
import numpy as np
import pandas as pd
import os, json, gc, re, random
from tqdm.notebook import tqdm
# from tqdm import tqdm
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

import logging
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

In [None]:
import torch, transformers, tokenizers
torch.__version__, transformers.__version__, tokenizers.__version__

('1.8.1+cu101', '4.6.1', '0.10.3')

In [None]:
cd drive/My Drive/Colab Notebooks/experiments

/content/drive/My Drive/Colab Notebooks/experiments


In [None]:
data_file = pd.read_csv("data/moh-x.csv")

In [None]:
data_file = data_file.rename(columns={'sentence': 'input_text'}) #abstract - sentence
data_file['target_text'] = data_file[['arg1', 'verb']].agg(' '.join, axis=1) #title - metaphorical words
#data_file['target_text'] = data_file['verb'] #title - metaphorical words

In [None]:
papers = data_file[["input_text", "target_text"]]
papers.head()

Unnamed: 0,input_text,target_text
0,He absorbed the knowledge or beliefs of his t...,knowledge absorb
1,He absorbed the costs for the accident .,cost absorb
2,The sales tax is absorbed into the state inco...,tax absorb
3,The immigrants were quickly absorbed into soc...,immigrant absorb
4,Her interest in butterflies absorbs her compl...,interest absorb


In [None]:
%%time

from simpletransformers.seq2seq import Seq2SeqModel

eval_df = papers.sample(frac=0.1, random_state=42)
train_df = papers.drop(eval_df.index)

model_args = {
    "reprocess_input_data": True,
    "overwrite_output_dir": True,
    "save_model_every_epoch": False,
    "save_eval_checkpoints": False,
    "max_seq_length": 512,
    "train_batch_size": 6,
    "num_train_epochs": 3,
}

# Create a Bart-base model
model = Seq2SeqModel(encoder_decoder_type="bart",
                    encoder_decoder_name="facebook/bart-base",
                    args=model_args)

CPU times: user 1.65 s, sys: 321 ms, total: 1.97 s
Wall time: 4.53 s


In [None]:
%%time

# Train the model
model.train_model(train_df)

# Evaluate the model
result = model.eval_model(eval_df)
print(result)

INFO:simpletransformers.seq2seq.seq2seq_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/582 [00:00<?, ?it/s]

INFO:simpletransformers.seq2seq.seq2seq_model: Training started


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/97 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/97 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/97 [00:00<?, ?it/s]

INFO:simpletransformers.seq2seq.seq2seq_model:Saving model into outputs/
INFO:simpletransformers.seq2seq.seq2seq_model: Training of facebook/bart-base model complete. Saved to outputs/.
INFO:simpletransformers.seq2seq.seq2seq_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/65 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/9 [00:00<?, ?it/s]

INFO:simpletransformers.seq2seq.seq2seq_model:{'eval_loss': 1.9344410631391737}


{'eval_loss': 1.9344410631391737}
CPU times: user 1min 15s, sys: 1.31 s, total: 1min 16s
Wall time: 1min 15s


In [None]:
for _ in range(65):

    random_idx = random.randint(0, len(eval_df)-1)

    abstract = eval_df.iloc[random_idx]['input_text']
    true_title = eval_df.iloc[random_idx]['target_text']

    # Predict with trained BART model
    predicted_title = model.predict([abstract])[0]

    print(f'True Title: {true_title}\n')
    print(f'Predicted Title: {predicted_title}\n')
    print(f'Abstract: {abstract}\n\n\n')

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: tax absorb

Predicted Title: tax absorb

Abstract:  The sales tax is absorbed into the state income tax .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: result accommodate

Predicted Title: result accommodate

Abstract:  The scientists had to accommodate the new results with the existing theories .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: excuse design

Predicted Title: c excuse

Abstract:  She designed a good excuse for not attending classes that day .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: tax absorb

Predicted Title: tax absorb

Abstract:  The sales tax is absorbed into the state income tax .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: way claw

Predicted Title: mount claw

Abstract:  They clawed their way to the top of the mountain .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: dictionary computerize

Predicted Title: f disction

Abstract:  Please use a computerized disctionary .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: state tax

Predicted Title: alcohol alcohol tax

Abstract:  The State taxes alcohol heavily .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: pig wallow

Predicted Title: p wallow

Abstract:  pigs were wallowing in the mud .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: language incline

Predicted Title: language incline

Abstract:  Their language inclines us to believe them .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: building level

Predicted Title: building levele

Abstract:  The building was levelled .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: intern rotate

Predicted Title: intern rotate

Abstract:  Interns have to rotate for a few months .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: husband poison

Predicted Title: husband poison

Abstract:  She poisoned her husband .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: dress mold

Predicted Title: woman mold

Abstract:  The dress molds her beautiful figure .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: soup revive

Predicted Title: food revive

Abstract:  A hot soup will revive me .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: color harmonize

Predicted Title: color harmonize

Abstract:  The colors do n't harmonize .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: chance ruin

Predicted Title: behavior ruin

Abstract:  This behavior will ruin your chances of winning the election .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: mushroom poison

Predicted Title: p poison

Abstract:  This mushrooms can poison .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: vienna besiege

Predicted Title: kiege

Abstract:  The Turks besieged Vienna .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: money pour

Predicted Title: money pour

Abstract:  We poured money into the education of our children .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: dress mold

Predicted Title: woman mold

Abstract:  The dress molds her beautiful figure .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: soup revive

Predicted Title: food revive

Abstract:  A hot soup will revive me .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: mother swell

Predicted Title: child swelling

Abstract:  The mother was swelling with importance when she spoke of her son .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: husband poison

Predicted Title: husband poison

Abstract:  She poisoned her husband .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: path climb

Predicted Title: path climb

Abstract:  The path climbed all the way to the top of the hill .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: story swallow

Predicted Title: story swallow

Abstract:  Am I supposed to swallow that story ?





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: society infect

Predicted Title: soc infect

Abstract:  society was infected by racism .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: market clog

Predicted Title: market clog

Abstract:  The market is being clogged by these operations .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: love buy

Predicted Title: love buy

Abstract:  She wanted to buy his love with her dedication to him and his work .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: crop rotate

Predicted Title: crop rotate

Abstract:  We rotate the crops so as to maximize the use of the soil .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: chance ruin

Predicted Title: behavior ruin

Abstract:  This behavior will ruin your chances of winning the election .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: intern rotate

Predicted Title: intern rotate

Abstract:  Interns have to rotate for a few months .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: ground sow

Predicted Title: ground sow

Abstract:  sow the ground with sunflower seeds .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: candidate sift

Predicted Title: cand sift

Abstract:  They sifted through the job candidates .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: mind poison

Predicted Title: mind poison

Abstract:  poison someone 's mind .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: skin crawl

Predicted Title: skin crawl

Abstract:  My skin crawled -- I was terrified .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: state tax

Predicted Title: alcohol alcohol tax

Abstract:  The State taxes alcohol heavily .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: airplane land

Predicted Title: plane land

Abstract:  the pilot managed to land the airplane safely .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: hair brush

Predicted Title: hair brush

Abstract:  Johnson brushed the hairs from his jacket .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: curtain rip

Predicted Title:  curtain curtain rip

Abstract:  The curtain ripped from top to bottom .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: result accommodate

Predicted Title: result accommodate

Abstract:  The scientists had to accommodate the new results with the existing theories .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: market clog

Predicted Title: market clog

Abstract:  The market is being clogged by these operations .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: candidate sift

Predicted Title: cand sift

Abstract:  They sifted through the job candidates .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: mother swell

Predicted Title: child swelling

Abstract:  The mother was swelling with importance when she spoke of her son .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: hair brush

Predicted Title: hair brush

Abstract:  Johnson brushed the hairs from his jacket .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: child fill

Predicted Title: child fill

Abstract:  fill the child with pride .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: lungs invade

Predicted Title: cancerancer invade

Abstract:  The cancer had invaded her lungs .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: result accommodate

Predicted Title: result accommodate

Abstract:  The scientists had to accommodate the new results with the existing theories .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: unrest erupt

Predicted Title: governmentrest erupt

Abstract:  Unrest erupted in the country .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: sale climb

Predicted Title: sold climb

Abstract:  Sales were climbing after prices were lowered .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: society infect

Predicted Title: soc infect

Abstract:  society was infected by racism .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: path climb

Predicted Title: path climb

Abstract:  The path climbed all the way to the top of the hill .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: road steamroller

Predicted Title: road steamroller

Abstract:  steamroller the road .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: man march

Predicted Title: p march

Abstract:  He marched into the classroom and announced the exam .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: crop rotate

Predicted Title: crop rotate

Abstract:  We rotate the crops so as to maximize the use of the soil .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: sale climb

Predicted Title: sold climb

Abstract:  Sales were climbing after prices were lowered .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: man march

Predicted Title: p march

Abstract:  He marched into the classroom and announced the exam .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: love buy

Predicted Title: love buy

Abstract:  She wanted to buy his love with her dedication to him and his work .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: teacher nod

Predicted Title: student nod

Abstract:  The teacher nodded when the student gave the right answer .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: garbage dump

Predicted Title: bag dump

Abstract:  The truck dumped the garbage in the street .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: approval nod

Predicted Title: appro nod

Abstract:  He nodded his approval .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: approval nod

Predicted Title: appro nod

Abstract:  He nodded his approval .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: question pose

Predicted Title: question pose

Abstract:  This poses an interesting question .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: essence capture

Predicted Title: person capture

Abstract:  capture the essence of Spring   .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: tax absorb

Predicted Title: tax absorb

Abstract:  The sales tax is absorbed into the state income tax .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: unrest erupt

Predicted Title: governmentrest erupt

Abstract:  Unrest erupted in the country .



