In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
!pip install transformers
!pip install simpletransformers



In [None]:
import numpy as np
import pandas as pd
import os, json, gc, re, random
from tqdm.notebook import tqdm
# from tqdm import tqdm
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

import logging
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

In [None]:
import torch, transformers, tokenizers
torch.__version__, transformers.__version__, tokenizers.__version__

('1.9.0+cu102', '4.8.1', '0.10.3')

In [None]:
cd drive/My Drive/Colab Notebooks/experiments

/content/drive/My Drive/Colab Notebooks/experiments


In [None]:
data_file = pd.read_csv("data/moh-x.csv")

In [None]:
data_file = data.rename(columns={'sentence': 'input_text'}) #abstract - sentence
data_file['target_text'] = data[['arg1', 'verb']].agg(' '.join, axis=1) #title - metaphorical words
#data_file['target_text'] = data_file['verb'] #title - metaphorical words

In [None]:
papers = data_file[["input_text", "target_text"]]
papers.head()

Unnamed: 0,input_text,target_text
0,He absorbed the knowledge or beliefs of his t...,knowledge absorb
1,He absorbed the costs for the accident .,cost absorb
2,The sales tax is absorbed into the state inco...,tax absorb
3,The immigrants were quickly absorbed into soc...,immigrant absorb
4,Her interest in butterflies absorbs her compl...,interest absorb


In [None]:
# Load test data - TroFi
import pandas as pd
# Use a subset for quick experiments
#data = data[:10000]

from sklearn.model_selection import train_test_split as tts
import pandas as pd

# Split to train, val and test
train_df, test_data = tts(data_file[["input_text", "target_text"]], random_state=42, test_size=0.1)
train_df, val = tts(train_df, random_state=42, test_size=test_data.shape[0])

In [None]:
%%time

from simpletransformers.seq2seq import Seq2SeqModel

eval_df = pd.read_csv("stockholm/bert_code/mohx_bert_subs/mohx_tp.csv")
eval_df = eval_df.rename(columns={'sentence': 'input_text'}) #abstract - sentence
eval_df['target_text'] = eval_df[['arg1', 'verb']].agg(' '.join, axis=1) #title - metaphorical words

model_args = {
    "reprocess_input_data": True,
    "overwrite_output_dir": True,
    "save_model_every_epoch": False,
    "save_eval_checkpoints": False,
    "max_seq_length": 512,
    "train_batch_size": 6,
    "num_train_epochs": 3,
}

# Create a Bart-base model
model = Seq2SeqModel(encoder_decoder_type="bart",
                    encoder_decoder_name="facebook/bart-base",
                    args=model_args)

CPU times: user 1.79 s, sys: 446 ms, total: 2.23 s
Wall time: 4.41 s


In [None]:
%%time

# Train the model
model.train_model(train_df)

# Evaluate the model
result = model.eval_model(eval_df)
print(result)

INFO:simpletransformers.seq2seq.seq2seq_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/517 [00:00<?, ?it/s]

INFO:simpletransformers.seq2seq.seq2seq_model: Training started


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/87 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/87 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/87 [00:00<?, ?it/s]

INFO:simpletransformers.seq2seq.seq2seq_model:Saving model into outputs/
INFO:simpletransformers.seq2seq.seq2seq_model: Training of facebook/bart-base model complete. Saved to outputs/.
INFO:simpletransformers.seq2seq.seq2seq_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/35 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/5 [00:00<?, ?it/s]

INFO:simpletransformers.seq2seq.seq2seq_model:{'eval_loss': 2.2303949117660524}


{'eval_loss': 2.2303949117660524}
CPU times: user 2min 26s, sys: 15.6 s, total: 2min 42s
Wall time: 2min 50s


In [None]:
for _ in range(65):

    random_idx = random.randint(0, len(eval_df)-1)

    abstract = eval_df.iloc[random_idx]['input_text']
    true_title = eval_df.iloc[random_idx]['target_text']

    # Predict with trained BART model
    predicted_title = model.predict([abstract])[0]

    print(f'True Title: {true_title}\n')
    print(f'Predicted Title: {predicted_title}\n')
    print(f'Abstract: {abstract}\n\n\n')

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: essence capture

Predicted Title: person capture

Abstract:  capture the essence of Spring   .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: money pour

Predicted Title: book pour

Abstract:  We poured money into the education of our children .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: story swallow

Predicted Title: story swallow

Abstract:  Am I supposed to swallow that story ?





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: tax absorb

Predicted Title: p absorb

Abstract:  The sales tax is absorbed into the state income tax .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: dress mold

Predicted Title: woman dress mold

Abstract:  The dress molds her beautiful figure .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: nickname fasten

Predicted Title: word fasten

Abstract:  They fastened various nicknames to each other .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: crop rotate

Predicted Title: c rotate

Abstract:  We rotate the crops so as to maximize the use of the soil .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: content condense

Predicted Title: book condense

Abstract:  condense the contents of a book into a summary .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: language spread

Predicted Title: word invade

Abstract:  The invaders spread their language all over the country .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: style revive

Predicted Title: p revive

Abstract:  He revived this style of opera .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: question pose

Predicted Title: fe pose

Abstract:  This poses an interesting question .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: style revive

Predicted Title: p revive

Abstract:  He revived this style of opera .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: sale climb

Predicted Title: market climb

Abstract:  Sales were climbing after prices were lowered .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: language spread

Predicted Title: word invade

Abstract:  The invaders spread their language all over the country .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: dress mold

Predicted Title: woman dress mold

Abstract:  The dress molds her beautiful figure .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: nickname fasten

Predicted Title: word fasten

Abstract:  They fastened various nicknames to each other .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: style revive

Predicted Title: p revive

Abstract:  He revived this style of opera .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: essence capture

Predicted Title: person capture

Abstract:  capture the essence of Spring   .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: child fill

Predicted Title: person fill

Abstract:  fill the child with pride .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: question pose

Predicted Title: fe pose

Abstract:  This poses an interesting question .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: present shower

Predicted Title: car showered

Abstract:  He showered her with presents .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: market clog

Predicted Title: market clog

Abstract:  The market is being clogged by these operations .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: crop rotate

Predicted Title: c rotate

Abstract:  We rotate the crops so as to maximize the use of the soil .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: moon glare

Predicted Title: p glare

Abstract:  The moon glared back at itself from the lake 's surface .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: question pose

Predicted Title: fe pose

Abstract:  This poses an interesting question .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: nickname fasten

Predicted Title: word fasten

Abstract:  They fastened various nicknames to each other .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: society infect

Predicted Title: p infect

Abstract:  society was infected by racism .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: nickname fasten

Predicted Title: word fasten

Abstract:  They fastened various nicknames to each other .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: nickname fasten

Predicted Title: word fasten

Abstract:  They fastened various nicknames to each other .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: tax absorb

Predicted Title: p absorb

Abstract:  The sales tax is absorbed into the state income tax .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: sun glare

Predicted Title: star glare

Abstract:  The sun glared down on us .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: mother swell

Predicted Title: fe swelling

Abstract:  The mother was swelling with importance when she spoke of her son .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: candidate sift

Predicted Title: person sift

Abstract:  They sifted through the job candidates .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: listener bolt

Predicted Title: fe bolt

Abstract:  The listeners bolted when he discussed his strange ideas .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: story swallow

Predicted Title: story swallow

Abstract:  Am I supposed to swallow that story ?





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: question pose

Predicted Title: fe pose

Abstract:  This poses an interesting question .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: sun glare

Predicted Title: star glare

Abstract:  The sun glared down on us .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: story swallow

Predicted Title: story swallow

Abstract:  Am I supposed to swallow that story ?





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: candidate sift

Predicted Title: person sift

Abstract:  They sifted through the job candidates .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: sun glare

Predicted Title: star glare

Abstract:  The sun glared down on us .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: problem besiege

Predicted Title: fe besieged

Abstract:  She was besieged by so many problems that she got discouraged .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: problem besiege

Predicted Title: fe besieged

Abstract:  She was besieged by so many problems that she got discouraged .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: listener bolt

Predicted Title: fe bolt

Abstract:  The listeners bolted when he discussed his strange ideas .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: mind poison

Predicted Title: mind poison

Abstract:  poison someone 's mind .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: unrest erupt

Predicted Title: ferest erupt

Abstract:  Unrest erupted in the country .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: crop rotate

Predicted Title: c rotate

Abstract:  We rotate the crops so as to maximize the use of the soil .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: path climb

Predicted Title: car climb

Abstract:  The path climbed all the way to the top of the hill .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: present shower

Predicted Title: car showered

Abstract:  He showered her with presents .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: story swallow

Predicted Title: story swallow

Abstract:  Am I supposed to swallow that story ?





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: tax absorb

Predicted Title: p absorb

Abstract:  The sales tax is absorbed into the state income tax .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: child fill

Predicted Title: person fill

Abstract:  fill the child with pride .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: sale climb

Predicted Title: market climb

Abstract:  Sales were climbing after prices were lowered .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: unrest erupt

Predicted Title: ferest erupt

Abstract:  Unrest erupted in the country .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: crop rotate

Predicted Title: c rotate

Abstract:  We rotate the crops so as to maximize the use of the soil .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: result accommodate

Predicted Title: p accommodate

Abstract:  The scientists had to accommodate the new results with the existing theories .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: mother swell

Predicted Title: fe swelling

Abstract:  The mother was swelling with importance when she spoke of her son .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: candidate sift

Predicted Title: person sift

Abstract:  They sifted through the job candidates .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: dress mold

Predicted Title: woman dress mold

Abstract:  The dress molds her beautiful figure .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: unrest erupt

Predicted Title: ferest erupt

Abstract:  Unrest erupted in the country .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: excuse design

Predicted Title: optim excuse

Abstract:  She designed a good excuse for not attending classes that day .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: language spread

Predicted Title: word invade

Abstract:  The invaders spread their language all over the country .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: present shower

Predicted Title: car showered

Abstract:  He showered her with presents .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: market clog

Predicted Title: market clog

Abstract:  The market is being clogged by these operations .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: nickname fasten

Predicted Title: word fasten

Abstract:  They fastened various nicknames to each other .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: unrest erupt

Predicted Title: ferest erupt

Abstract:  Unrest erupted in the country .



