In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [2]:
!pip install transformers
!pip install simpletransformers



In [3]:
import numpy as np
import pandas as pd
import os, json, gc, re, random
from tqdm.notebook import tqdm
# from tqdm import tqdm
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

import logging
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

In [4]:
import torch, transformers, tokenizers
torch.__version__, transformers.__version__, tokenizers.__version__

('1.9.0+cu102', '4.8.1', '0.10.3')

In [5]:
cd drive/My Drive/Colab Notebooks/experiments

/content/drive/My Drive/Colab Notebooks/experiments


In [6]:
data_file = pd.read_csv("data/moh-x.csv")

In [7]:
data_file = data_file.rename(columns={'sentence': 'input_text'}) #abstract - sentence
data_file['target_text'] = data_file[['arg1', 'verb']].agg(' '.join, axis=1) #title - metaphorical words
#data_file['target_text'] = data_file['verb'] #title - metaphorical words

In [8]:
papers = data_file[["input_text", "target_text"]]
papers.head()

Unnamed: 0,input_text,target_text
0,He absorbed the knowledge or beliefs of his t...,knowledge absorb
1,He absorbed the costs for the accident .,cost absorb
2,The sales tax is absorbed into the state inco...,tax absorb
3,The immigrants were quickly absorbed into soc...,immigrant absorb
4,Her interest in butterflies absorbs her compl...,interest absorb


In [9]:
# Load test data - TroFi
import pandas as pd
# Use a subset for quick experiments
#data = data[:10000]

from sklearn.model_selection import train_test_split as tts
import pandas as pd

# Split to train, val and test
train_df, test_data = tts(data_file[["input_text", "target_text"]], random_state=42, test_size=0.1)
train_df, val = tts(train_df, random_state=42, test_size=test_data.shape[0])

In [10]:
%%time

from simpletransformers.seq2seq import Seq2SeqModel

eval_df = pd.read_csv("stockholm/xlm_code/mohx_xlm_subs/mohx_tp.csv")
eval_df = eval_df.rename(columns={'sentence': 'input_text'}) #abstract - sentence
eval_df['target_text'] = eval_df[['arg1', 'verb']].agg(' '.join, axis=1) #title - metaphorical words

model_args = {
    "reprocess_input_data": True,
    "overwrite_output_dir": True,
    "save_model_every_epoch": False,
    "save_eval_checkpoints": False,
    "max_seq_length": 512,
    "train_batch_size": 6,
    "num_train_epochs": 3,
}

# Create a Bart-base model
model = Seq2SeqModel(encoder_decoder_type="bart",
                    encoder_decoder_name="facebook/bart-base",
                    args=model_args)

CPU times: user 4.04 s, sys: 1 s, total: 5.04 s
Wall time: 5.52 s


In [11]:
%%time

# Train the model
model.train_model(train_df)

# Evaluate the model
result = model.eval_model(eval_df)
print(result)

INFO:simpletransformers.seq2seq.seq2seq_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/517 [00:00<?, ?it/s]

INFO:simpletransformers.seq2seq.seq2seq_model: Training started


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/87 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/87 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/87 [00:00<?, ?it/s]

INFO:simpletransformers.seq2seq.seq2seq_model:Saving model into outputs/
INFO:simpletransformers.seq2seq.seq2seq_model: Training of facebook/bart-base model complete. Saved to outputs/.
INFO:simpletransformers.seq2seq.seq2seq_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/31 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4 [00:00<?, ?it/s]

INFO:simpletransformers.seq2seq.seq2seq_model:{'eval_loss': 2.200378566980362}


{'eval_loss': 2.200378566980362}
CPU times: user 2min 28s, sys: 16.7 s, total: 2min 45s
Wall time: 2min 51s


In [12]:
for _ in range(65):

    random_idx = random.randint(0, len(eval_df)-1)

    abstract = eval_df.iloc[random_idx]['input_text']
    true_title = eval_df.iloc[random_idx]['target_text']

    # Predict with trained BART model
    predicted_title = model.predict([abstract])[0]

    print(f'True Title: {true_title}\n')
    print(f'Predicted Title: {predicted_title}\n')
    print(f'Abstract: {abstract}\n\n\n')

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: excuse design

Predicted Title: book excuse

Abstract:  She designed a good excuse for not attending classes that day .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: mother swell

Predicted Title: fe swelling

Abstract:  The mother was swelling with importance when she spoke of her son .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: market clog

Predicted Title: market clog

Abstract:  The market is being clogged by these operations .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: dress mold

Predicted Title: book mold

Abstract:  The dress molds her beautiful figure .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: language incline

Predicted Title: word incline

Abstract:  Their language inclines us to believe them .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: essence capture

Predicted Title: book capture

Abstract:  capture the essence of Spring   .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: excuse design

Predicted Title: book excuse

Abstract:  She designed a good excuse for not attending classes that day .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: society infect

Predicted Title: book infect

Abstract:  society was infected by racism .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: question pose

Predicted Title: fe pose

Abstract:  This poses an interesting question .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: question pose

Predicted Title: fe pose

Abstract:  This poses an interesting question .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: problem besiege

Predicted Title: fe besieged

Abstract:  She was besieged by so many problems that she got discouraged .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: listener bolt

Predicted Title: febolt

Abstract:  The listeners bolted when he discussed his strange ideas .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: soup revive

Predicted Title: book revive

Abstract:  A hot soup will revive me .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: chance ruin

Predicted Title: book ruin

Abstract:  This behavior will ruin your chances of winning the election .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: essence capture

Predicted Title: book capture

Abstract:  capture the essence of Spring   .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: question pose

Predicted Title: fe pose

Abstract:  This poses an interesting question .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: language spread

Predicted Title: word spread

Abstract:  The invaders spread their language all over the country .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: skin crawl

Predicted Title: book crawl

Abstract:  My skin crawled -- I was terrified .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: nickname fasten

Predicted Title: word fasten

Abstract:  They fastened various nicknames to each other .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: lungs invade

Predicted Title: car invade

Abstract:  The cancer had invaded her lungs .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: excuse design

Predicted Title: book excuse

Abstract:  She designed a good excuse for not attending classes that day .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: language incline

Predicted Title: word incline

Abstract:  Their language inclines us to believe them .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: intern rotate

Predicted Title: book rotate

Abstract:  Interns have to rotate for a few months .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: language spread

Predicted Title: word spread

Abstract:  The invaders spread their language all over the country .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: candidate sift

Predicted Title: book sift

Abstract:  They sifted through the job candidates .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: chance ruin

Predicted Title: book ruin

Abstract:  This behavior will ruin your chances of winning the election .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: excuse design

Predicted Title: book excuse

Abstract:  She designed a good excuse for not attending classes that day .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: problem besiege

Predicted Title: fe besieged

Abstract:  She was besieged by so many problems that she got discouraged .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: story swallow

Predicted Title: book swallow

Abstract:  Am I supposed to swallow that story ?





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: intern rotate

Predicted Title: book rotate

Abstract:  Interns have to rotate for a few months .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: sale climb

Predicted Title: fe climb

Abstract:  Sales were climbing after prices were lowered .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: mind poison

Predicted Title: car poison

Abstract:  poison someone 's mind .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: intern rotate

Predicted Title: book rotate

Abstract:  Interns have to rotate for a few months .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: candidate sift

Predicted Title: book sift

Abstract:  They sifted through the job candidates .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: dress mold

Predicted Title: book mold

Abstract:  The dress molds her beautiful figure .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: listener bolt

Predicted Title: febolt

Abstract:  The listeners bolted when he discussed his strange ideas .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: society infect

Predicted Title: book infect

Abstract:  society was infected by racism .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: nickname fasten

Predicted Title: word fasten

Abstract:  They fastened various nicknames to each other .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: mother swell

Predicted Title: fe swelling

Abstract:  The mother was swelling with importance when she spoke of her son .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: listener bolt

Predicted Title: febolt

Abstract:  The listeners bolted when he discussed his strange ideas .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: candidate sift

Predicted Title: book sift

Abstract:  They sifted through the job candidates .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: result accommodate

Predicted Title: work accommodate

Abstract:  The scientists had to accommodate the new results with the existing theories .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: story swallow

Predicted Title: book swallow

Abstract:  Am I supposed to swallow that story ?





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: problem besiege

Predicted Title: fe besieged

Abstract:  She was besieged by so many problems that she got discouraged .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: language spread

Predicted Title: word spread

Abstract:  The invaders spread their language all over the country .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: market clog

Predicted Title: market clog

Abstract:  The market is being clogged by these operations .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: skin crawl

Predicted Title: book crawl

Abstract:  My skin crawled -- I was terrified .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: story swallow

Predicted Title: book swallow

Abstract:  Am I supposed to swallow that story ?





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: style revive

Predicted Title: book revive

Abstract:  He revived this style of opera .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: sale climb

Predicted Title: fe climb

Abstract:  Sales were climbing after prices were lowered .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: unrest erupt

Predicted Title: carrest erupt

Abstract:  Unrest erupted in the country .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: lungs invade

Predicted Title: car invade

Abstract:  The cancer had invaded her lungs .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: result accommodate

Predicted Title: work accommodate

Abstract:  The scientists had to accommodate the new results with the existing theories .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: language incline

Predicted Title: word incline

Abstract:  Their language inclines us to believe them .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: sale climb

Predicted Title: fe climb

Abstract:  Sales were climbing after prices were lowered .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: question pose

Predicted Title: fe pose

Abstract:  This poses an interesting question .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: essence capture

Predicted Title: book capture

Abstract:  capture the essence of Spring   .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: society infect

Predicted Title: book infect

Abstract:  society was infected by racism .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: chance ruin

Predicted Title: book ruin

Abstract:  This behavior will ruin your chances of winning the election .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: society infect

Predicted Title: book infect

Abstract:  society was infected by racism .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: excuse design

Predicted Title: book excuse

Abstract:  She designed a good excuse for not attending classes that day .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: language incline

Predicted Title: word incline

Abstract:  Their language inclines us to believe them .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: nickname fasten

Predicted Title: word fasten

Abstract:  They fastened various nicknames to each other .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: skin crawl

Predicted Title: book crawl

Abstract:  My skin crawled -- I was terrified .





Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

True Title: lungs invade

Predicted Title: car invade

Abstract:  The cancer had invaded her lungs .



