Run the following command to clone the repository in the current Colab environment.

In [1]:
!git clone https://github.com/jcorsetti/NLU-SpanSA.git
!mv NLU-SpanSA/* .
!rm -r NLU-SpanSA

Mounted at /content/drive


Install the requirements and check given GPU. Note that for BERT-Large with bs=32 (default config) at least 16Gb on the GPU are necessary.

In [None]:

!mkdir experiments
!pip install -r requirements.txt
!pip install --upgrade google-cloud-storage
!nvidia-smi

Run the below cell to download the pretrained BERT-Large from https://huggingface.co/bert-large-uncased/blob/main/pytorch_model.bin. The script will place the file in the /bert_models/bert-large-uncased folder. This may take some time due to the dimension of the downloaded files.

In [None]:
!git lfs clone https://huggingface.co/bert-large-uncased
!mv bert-large-uncased/pytorch_model.bin bert_models/bert-large-uncased/
!rm -r bert-large-uncased

Run the cell below to perform a single training. The settings can be customized in the config.ini file. A default training with 3 epochs and bs=32 takes about 4 minutes. When running an experiment, the config.ini is copied in the exp folder to make it reproducible. Also a predictions.json file with the predicted spans and classes is produced and saved in the experiments folder.

In [1]:
import os
import json
import torch
import random
import subprocess
from os.path import join
from configparser import ConfigParser
from utils import bert_load_state_dict, init_seed, get_optimizer, span_bound2position
from evaluation import evaluate, evaluate_spanonly, evaluate_classonly
from jointBert import JointBert
from bert.modeling import BertConfig
from dataset import get_dataloader
from losses import compute_class_loss, compute_span_loss

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

DEV='cuda'
#DICT = {'OTHER': 0, 'T-NEU': 1, 'T-POS': 2, 'T-NEG': 3, 'CONF' : 4}
DICT = {'T-NEU': 0, 'T-POS': 1, 'T-NEG': 2}
config = ConfigParser()
config.read('config.ini')

seed = int(config['data']['seed'])
init_seed(seed)

exp_root_dir = join('experiments', config['data']['exp_name'])
if not os.path.isdir(exp_root_dir): 
    subprocess.call('mkdir {}'.format(exp_root_dir), shell=True)
subprocess.call('cp config.ini {}/'.format(exp_root_dir), shell=True)

log_file = join(exp_root_dir,'log.txt')
if os.path.exists(log_file):
    print('Warning: overwriting previous logfile {}'.format(log_file))
    subprocess.call('rm {}'.format(log_file), shell=True)

bert_model = join(config['arch']['root'],config['arch']['bert'])
bert_config = BertConfig.from_json_file(join(bert_model,'bert_config.json'))

model = JointBert(bert_config, config['arch']['span_head'], config['arch']['polarity_head'])
model = bert_load_state_dict(model, torch.load(join(bert_model, 'pytorch_model.bin'), map_location='cpu'))
model = model.to(DEV)
print("Loading model from pretrained checkpoint : {}".format(join(bert_model, 'pytorch_model.bin')))

BS, N_EPOCHS = int(config['training']['batch_size']), int(config['training']['epochs'])
MAX_SEQ_LEN = int(config['arch']['max_sequence_lenght'])
M1,M2 = float(config['training']['span_loss_w']), float(config['training']['class_loss_w'])
filter_empty_train = config.getboolean('data','filter_empty_train')
filter_empty_valid = config.getboolean('data','filter_empty_valid')

train_dataloader, train_feats, num_train_samples = get_dataloader(config['data']['train_file'], DICT, filter_empty_train, config)
test_dataloader, test_feats, num_test_samples = get_dataloader(config['data']['valid_file'], DICT, filter_empty_valid, config)

print('Train samples: ', num_train_samples)
print('Test samples: ', num_test_samples)

# calculating save intervals like in original paper
num_steps = int(num_train_samples / BS * N_EPOCHS)
save_cp_interval = int(num_steps/ (5*N_EPOCHS))
start_save_steps = int(num_steps * 0.5)

optimizer = get_optimizer(model, config['training'], num_steps)

print('Num steps: ', num_steps)

global_step = 0

for epoch in range(N_EPOCHS):

    running_loss, count = 0., 0
    running_span, running_class = 0.,0.

    for step, batch in enumerate(train_dataloader):

        batch = tuple(t.to(DEV) for t in batch)  
        input_ids, input_mask, segment_ids, start_span, end_span, polarity, polarity_mask, example_indices = batch

        # produce spans from list of index to mask format, used for loss
        start_positions = span_bound2position(start_span, polarity_mask, MAX_SEQ_LEN)
        end_positions = span_bound2position(end_span, polarity_mask, MAX_SEQ_LEN)
        start_logits, end_logits, pol_logits = model.forward(input_ids, input_mask, segment_ids, start_span, end_span)

        # calculate losses
        span_loss = compute_span_loss(start_logits, end_logits, start_positions, end_positions)
        class_loss = compute_class_loss(pol_logits, polarity, polarity_mask)

        cur_loss = M1*span_loss + M2*class_loss

        # backward pass
        cur_loss.backward()
        running_loss += cur_loss.item()
        running_span += M1*span_loss.item()
        running_class += M2*class_loss.item()

        optimizer.step()
        model.zero_grad()
        global_step += 1
        count += 1

        # Each n steps print the loss
        if global_step % save_cp_interval == 0 and count > 0:
            print("Step: {:4d}, Loss: {:.4f} Span loss: {:.4f} Class loss: {:.4f}".format(global_step, running_loss / count, running_span / count, running_class / count))
            with open(log_file,'a') as f:
                print("Step: {:4d}, Loss: {:.4f} Span loss: {:.4f} Class loss: {:.4f}".format(global_step, running_loss / count, running_span / count, running_class / count), file=f)

            # also, if after half steps, start saving best checkpoint
            if global_step > start_save_steps:
                model.eval()

                p,r,f1,common,retrieved,relevant, _ = evaluate(model, test_dataloader, test_feats, DICT, config, DEV)
                toprint = 'General metrics --> P: {:2.2f} R: {:2.2f} F1: {:2.2f} Common: {:4d} Retrieved: {:4d} Relevant: {:4d}'.format(p*100,r*100,f1*100,int(common),int(retrieved),int(relevant)) 
                print(toprint)
                with open(log_file,'a') as f:
                    print(toprint,file=f)

                running_loss, running_span, running_class, count = 0., 0., 0., 0
                model.train()

                torch.save({
                    'model': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'step': global_step
                }, join(exp_root_dir, 'checkpoint_last.pth'))


# run final evaluation, make descriptive strings of metrics to be saved
p,r,f1,common,retrieved,relevant,preds = evaluate(model, test_dataloader, test_feats, DICT, config, DEV)
toprint_all = 'General metrics --> P: {:2.2f} R: {:2.2f} F1: {:2.2f} Common: {:4d} Retrieved: {:4d} Relevant: {:4d}'.format(p*100,r*100,f1*100,int(common),int(retrieved),int(relevant)) 
p,r,f1, = evaluate_spanonly(model, test_dataloader, test_feats, config, DEV)
toprint_span = 'Span metrics --> P: {:2.2f} R: {:2.2f} F1: {:2.2f}'.format(p*100,r*100,f1*100) 
accuracy, correct, total = evaluate_classonly(model, test_dataloader, test_feats, config, DEV)
toprint_class = 'Class metrics --> Accuracy: {:2.2f} Correct: {:4d} Total: {:4d}'.format(accuracy*100, int(correct), int(total))

print(toprint_all)
print(toprint_span)
print(toprint_class)

if log_file is not None:
    with open(log_file,'a') as f:
        print(toprint_all,file=f)
        print(toprint_span,file=f)
        print(toprint_class,file=f)


with open(join(exp_root_dir, 'predictions.json'),'w') as f:
    json.dump(preds, f)
#print('Epoch {:2d} : \tTotal: {:.4f}\tClass: {:.4f}\tSpan: {:.4f}'.format(epoch, running_loss / step_count,running_class / step_count,running_span / step_count))    





Loading model from pretrained checkpoint : bert_models/bert-large-uncased/pytorch_model.bin
Filtering sentences with no aspects.


  all_input_ids = torch.tensor([f['subtokens_id'] for f in feat_dataset], dtype=torch.long)


Filtering sentences with no aspects.
Train samples:  1458
Test samples:  411
Num steps:  136


	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  ../torch/csrc/utils/python_arg_parser.cpp:1174.)
  next_m.mul_(beta1).add_(1 - beta1, grad)


Step:    9, Loss: 5.1825 Span loss: 3.7695 Class loss: 1.4130
Step:   18, Loss: 3.8114 Span loss: 2.5959 Class loss: 1.2155
Step:   27, Loss: 3.1238 Span loss: 2.0329 Class loss: 1.0909
Step:   36, Loss: 2.7265 Span loss: 1.7278 Class loss: 0.9987
Step:   45, Loss: 2.4801 Span loss: 1.5529 Class loss: 0.9272
Step:   54, Loss: 1.0782 Span loss: 0.5961 Class loss: 0.4821
Step:   63, Loss: 1.1243 Span loss: 0.6200 Class loss: 0.5043
Step:   72, Loss: 1.0918 Span loss: 0.6318 Class loss: 0.4600
General metrics --> P: 64.60 R: 57.57 F1: 60.88 Common:  365 Retrieved:  565 Relevant:  634
Step:   81, Loss: 1.0746 Span loss: 0.6259 Class loss: 0.4487
General metrics --> P: 66.32 R: 60.57 F1: 63.31 Common:  384 Retrieved:  579 Relevant:  634
Step:   90, Loss: 1.0864 Span loss: 0.6821 Class loss: 0.4043
General metrics --> P: 64.13 R: 61.20 F1: 62.63 Common:  388 Retrieved:  605 Relevant:  634
Step:   99, Loss: 0.7535 Span loss: 0.5054 Class loss: 0.2481
General metrics --> P: 63.42 R: 58.52 F1: 

The following cell has been used to produce the metrics reported in the report. 5 independent experiments with random seed are run, and the final metrics are averaged. Note that the checkpoints and predictions are not saved in this case, and the evaluation is performed only at the end to save time.

In [1]:
import os
import json
import torch
import random
import subprocess
import numpy as np
from os.path import join
from configparser import ConfigParser
from utils import bert_load_state_dict, init_seed, get_optimizer, span_bound2position
from evaluation import evaluate, evaluate_spanonly, evaluate_classonly
from jointBert import JointBert
from bert.modeling import BertConfig
from dataset import get_dataloader
from losses import compute_class_loss, compute_span_loss

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

DEV='cuda'
#DICT = {'OTHER': 0, 'T-NEU': 1, 'T-POS': 2, 'T-NEG': 3, 'CONF' : 4}
DICT = {'T-NEU': 0, 'T-POS': 1, 'T-NEG': 2}
config = ConfigParser()
config.read('config.ini')



exp_root_dir = join('experiments', config['data']['exp_name'])
if not os.path.isdir(exp_root_dir): 
    subprocess.call('mkdir {}'.format(exp_root_dir), shell=True)
subprocess.call('cp config.ini {}/'.format(exp_root_dir), shell=True)

log_file = join(exp_root_dir,'log.txt')
if os.path.exists(log_file):
    print('Warning: overwriting previous logfile {}'.format(log_file))
    subprocess.call('rm {}'.format(log_file), shell=True)



BS, N_EPOCHS = int(config['training']['batch_size']), int(config['training']['epochs'])
MAX_SEQ_LEN = int(config['arch']['max_sequence_lenght'])
M1,M2 = float(config['training']['span_loss_w']), float(config['training']['class_loss_w'])
filter_empty_train = config.getboolean('data','filter_empty_train')
filter_empty_valid = config.getboolean('data','filter_empty_valid')

train_dataloader, train_feats, num_train_samples = get_dataloader(config['data']['train_file'], DICT, filter_empty_train, config)
test_dataloader, test_feats, num_test_samples = get_dataloader(config['data']['valid_file'], DICT, filter_empty_valid, config)

print('Train samples: ', num_train_samples)
print('Test samples: ', num_test_samples)

# calculating save intervals like in original paper
num_steps = int(num_train_samples / BS * N_EPOCHS)
save_cp_interval = int(num_steps/ (5*N_EPOCHS))
start_save_steps = int(num_steps * 0.5)

NUM_REPETITIONS = 3 # this can be changed for stabler results

all_metrics = {
    'p_all': [],
    'r_all': [],
    'f1_all': [],
    'p_span': [],
    'r_span': [],
    'f1_span': [],
    'acc_class': []
}

for repeat in range(NUM_REPETITIONS):

    seed = int(10000*random.random())
    print(seed)
    init_seed(seed)

    print('Repetition {} of {}'.format(repeat+1, NUM_REPETITIONS))
    bert_model = join(config['arch']['root'],config['arch']['bert'])
    bert_config = BertConfig.from_json_file(join(bert_model,'bert_config.json'))

    model = JointBert(bert_config, config['arch']['span_head'], config['arch']['polarity_head'])
    model = bert_load_state_dict(model, torch.load(join(bert_model, 'pytorch_model.bin'), map_location='cpu'))
    model = model.to(DEV)
    print("Loading model from pretrained checkpoint : {}".format(join(bert_model, 'pytorch_model.bin')))

    optimizer = get_optimizer(model, config['training'], num_steps)

    global_step = 0

    for epoch in range(N_EPOCHS):

        running_loss, count = 0., 0
        running_span, running_class = 0.,0.

        for step, batch in enumerate(train_dataloader):

            batch = tuple(t.to(DEV) for t in batch)  
            input_ids, input_mask, segment_ids, start_span, end_span, polarity, polarity_mask, example_indices = batch

            # produce spans from list of index to mask format, used for loss
            start_positions = span_bound2position(start_span, polarity_mask, MAX_SEQ_LEN)
            end_positions = span_bound2position(end_span, polarity_mask, MAX_SEQ_LEN)
            start_logits, end_logits, pol_logits = model.forward(input_ids, input_mask, segment_ids, start_span, end_span)

            span_loss = compute_span_loss(start_logits, end_logits, start_positions, end_positions)
            class_loss = compute_class_loss(pol_logits, polarity, polarity_mask)

            cur_loss = M1*span_loss + M2*class_loss

            cur_loss.backward()
            running_loss += cur_loss.item()
            running_span += M1*span_loss.item()
            running_class += M2*class_loss.item()

            optimizer.step()
            model.zero_grad()
            global_step += 1
            count += 1

            # Each n steps print the loss
            if global_step % save_cp_interval == 0 and count > 0:
                print("Step: {:4d}, Loss: {:.4f} Span loss: {:.4f} Class loss: {:.4f}".format(global_step, running_loss / count, running_span / count, running_class / count))
                with open(log_file,'a') as f:
                    print("Step: {:4d}, Loss: {:.4f} Span loss: {:.4f} Class loss: {:.4f}".format(global_step, running_loss / count, running_span / count, running_class / count), file=f)

                # also, if after half steps, start saving best checkpoint
                if global_step > start_save_steps:
                    running_loss, running_span, running_class, count = 0., 0., 0., 0

    p,r,f1,_,_,_,_ = evaluate(model, test_dataloader, test_feats, DICT, config, DEV)
    p_span, r_span, f1_span = evaluate_spanonly(model, test_dataloader, test_feats, config, DEV)
    acc, _, _ = evaluate_classonly(model, test_dataloader, test_feats, config, DEV)
    all_metrics['p_all'].append(p*100)
    all_metrics['r_all'].append(r*100)
    all_metrics['f1_all'].append(f1*100)
    all_metrics['p_span'].append(p_span*100)
    all_metrics['r_span'].append(r_span*100)
    all_metrics['f1_span'].append(f1_span*100)
    all_metrics['acc_class'].append(acc*100)

print('Final metrics')
updated_metrics = {}
for k, v in all_metrics.items():
    updated_metrics[k] = v
    array = np.asarray(v)
    updated_metrics[k + '_mean'] = np.mean(array)
    updated_metrics[k + '_std'] = np.std(array)
    print('{} : {:.2f} (+- {:.3f})'.format(k,np.mean(array),np.std(array)))
    with open(log_file,'a') as f:
        print('{} : {:.2f} (+- {:.3f})'.format(k,np.mean(array),np.std(array)), file=f)

with open(join(exp_root_dir,'final_metrics.json'), 'w') as f:
    json.dump(updated_metrics, f)

    





Filtering sentences with no aspects.


  all_input_ids = torch.tensor([f['subtokens_id'] for f in feat_dataset], dtype=torch.long)


Filtering sentences with no aspects.
Train samples:  1458
Test samples:  411
3626
Repetition 1 of 3
Loading model from pretrained checkpoint : bert_models/bert-large-uncased/pytorch_model.bin


	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  ../torch/csrc/utils/python_arg_parser.cpp:1174.)
  next_m.mul_(beta1).add_(1 - beta1, grad)


Step:    9, Loss: 5.2931 Span loss: 3.9359 Class loss: 1.3572
Step:   18, Loss: 3.8560 Span loss: 2.6709 Class loss: 1.1851
Step:   27, Loss: 3.1200 Span loss: 2.0673 Class loss: 1.0527
Step:   36, Loss: 2.7151 Span loss: 1.7436 Class loss: 0.9716
Step:   45, Loss: 2.4418 Span loss: 1.5402 Class loss: 0.9016
Step:   54, Loss: 1.0059 Span loss: 0.5787 Class loss: 0.4272
Step:   63, Loss: 0.9868 Span loss: 0.5553 Class loss: 0.4315
Step:   72, Loss: 0.9857 Span loss: 0.5700 Class loss: 0.4157
Step:   81, Loss: 0.9740 Span loss: 0.5520 Class loss: 0.4220
Step:   90, Loss: 0.9512 Span loss: 0.5613 Class loss: 0.3899
Step:   99, Loss: 0.6042 Span loss: 0.4113 Class loss: 0.1929
Step:  108, Loss: 0.7689 Span loss: 0.4947 Class loss: 0.2741
Step:  117, Loss: 0.6322 Span loss: 0.4278 Class loss: 0.2044
Step:  126, Loss: 0.6700 Span loss: 0.4657 Class loss: 0.2043
Step:  135, Loss: 0.6039 Span loss: 0.4419 Class loss: 0.1620
5882
Repetition 2 of 3
Loading model from pretrained checkpoint : bert

Running the following cell evaluate a given experiment. It is enough to change the variable EXP with the path of the desired experiment. In this case, the config used will be the one with which the experiment was performed.

In [2]:
import os
import torch
from os.path import join
import random
import subprocess
from jointBert import JointBert
from utils import *
from configparser import ConfigParser

EXP = 'experiments/repro-3classes'
CP = 'checkpoint_last.pth'

DEV='cuda'
#DICT = {'OTHER': 0, 'T-NEU': 1, 'T-POS': 2, 'T-NEG': 3, 'CONF' : 4}
DICT = {'T-NEU': 0, 'T-POS': 1, 'T-NEG': 2}

config = ConfigParser()
config.read(join(EXP, 'config.ini'))

seed = int(config['data']['seed'])
init_seed(seed)

bert_model = join(config['arch']['root'],config['arch']['bert'])
bert_config = BertConfig.from_json_file(join(bert_model,'bert_config.json'))
filter_empty_valid = config.getboolean('data','filter_empty_valid')

model = JointBert(bert_config, config['arch']['polarity_head'], config['arch']['span_head'])
checkpoint = torch.load(join(EXP,CP))
model.load_state_dict(checkpoint['model'])
model = model.to(DEV)

print("Loading model from pretrained checkpoint : {}".format(join(EXP,CP)))

MAX_SEQ_LEN = int(config['arch']['max_sequence_lenght'])
test_dataloader, test_feats, num_test_samples = get_dataloader(config['data']['valid_file'], DICT, filter_empty_valid, config)

print('Test samples: ', num_test_samples)

# run final evaluation, make descriptive strings of metrics to be saved
p,r,f1,common,retrieved,relevant,preds = evaluate(model, test_dataloader, test_feats, DICT, config, DEV)
toprint_all = 'General metrics --> P: {:2.2f} R: {:2.2f} F1: {:2.2f} Common: {:4d} Retrieved: {:4d} Relevant: {:4d}'.format(p*100,r*100,f1*100,int(common),int(retrieved),int(relevant)) 
p,r,f1, = evaluate_spanonly(model, test_dataloader, test_feats, config, DEV)
toprint_span = 'Span metrics --> P: {:2.2f} R: {:2.2f} F1: {:2.2f}'.format(p*100,r*100,f1*100) 
accuracy, correct, total = evaluate_classonly(model, test_dataloader, test_feats, config, DEV)
toprint_class = 'Class metrics --> Accuracy: {:2.2f} Correct: {:4d} Total: {:4d}'.format(accuracy*100, int(correct), int(total))

print(toprint_all)
print(toprint_span)
print(toprint_class)







Loading model from pretrained checkpoint : experiments/repro-3classes/checkpoint_last.pth
Filtering sentences with no aspects.
Test samples:  411
General metrics --> P: 63.23 R: 59.94 F1: 61.54 Common:  380 Retrieved:  601 Relevant:  634
Span metrics --> P: 81.03 R: 76.81 F1: 78.87
Class metrics --> Accuracy: 78.23 Correct:  496 Total:  634


Run the following to compute the dataset statistics as reported in the report.

In [2]:

!python stats.py 'train'
!python stats.py 'test'


Num sentences: 3045
  of which empty: 1587
  avg length in tokens: 16.6
Num aspects: 2302
  neutral: 455
  negative: 860
  positive: 987
  avg per nonempty sentence: 1.6
Num sentences: 800
  of which empty: 389
  avg length in tokens: 14.6
Num aspects: 634
  neutral: 165
  negative: 130
  positive: 339
  avg per nonempty sentence: 1.5
