Based on [this tutorial](https://github.com/huggingface/notebooks/blob/master/transformers_doc/training.ipynb)

In [None]:
import pandas as pd

from utils import load_dataset
mosi = load_dataset(["CMU_MOSI_TimestampedWords"])

In [4]:
next(iter(mosi['test']))

{'labels': 0, 'text': 'oh my gosh bad movie'}

In [2]:
import transformers
tokenizer = transformers.AutoTokenizer.from_pretrained("bert-base-cased")
# tokenizer = transformers.AutoTokenizer.from_pretrained("bert-base-uncased")
model = transformers.AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=2)
# model = transformers.AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2).cuda()

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at b

In [3]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)
tokenized_mosi = mosi.map(tokenize_function, batched=True)

  0%|          | 0/2 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

In [4]:
from transformers import Trainer, TrainingArguments
training_args = TrainingArguments("test_trainer", per_device_train_batch_size=2)
trainer = Trainer(
    model=model, args=training_args, train_dataset=tokenized_mosi["train"], eval_dataset=tokenized_mosi["val"]
)

In [5]:
trainer.train()

The following columns in the training set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running training *****
  Num examples = 1283
  Num Epochs = 3
  Instantaneous batch size per device = 2
  Total train batch size (w. parallel, distributed & accumulation) = 4
  Gradient Accumulation steps = 1
  Total optimization steps = 963


Step,Training Loss


Saving model checkpoint to test_trainer/checkpoint-500
Configuration saved in test_trainer/checkpoint-500/config.json
Model weights saved in test_trainer/checkpoint-500/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=963, training_loss=0.39194457627531154, metrics={'train_runtime': 179.2098, 'train_samples_per_second': 21.478, 'train_steps_per_second': 5.374, 'total_flos': 1012714452080640.0, 'train_loss': 0.39194457627531154, 'epoch': 3.0})

In [6]:
import numpy as np
from datasets import load_metric

metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [7]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_mosi["train"],
    eval_dataset=tokenized_mosi["val"],
    compute_metrics=compute_metrics,
)
trainer.evaluate()

The following columns in the evaluation set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Evaluation *****
  Num examples = 229
  Batch size = 16


{'eval_loss': 0.7910729050636292,
 'eval_accuracy': 0.851528384279476,
 'eval_runtime': 2.5951,
 'eval_samples_per_second': 88.244,
 'eval_steps_per_second': 5.78}

In [8]:
predictions = trainer.predict(tokenized_mosi["test"])

# test_accuracy is correct, predictions for each class, you can take argmax
predictions.metrics

The following columns in the test set  don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: text.
***** Running Prediction *****
  Num examples = 686
  Batch size = 16


{'test_loss': 1.1321409940719604,
 'test_accuracy': 0.7959183673469388,
 'test_runtime': 7.397,
 'test_samples_per_second': 92.74,
 'test_steps_per_second': 5.813}

In [19]:
test_eval_df = pd.DataFrame({'y_true': predictions.label_ids,
              'y_pred': predictions.predictions.argmax(axis=1)})

In [26]:
(test_eval_df['y_pred']==test_eval_df['y_true']).head()

0    False
1     True
2    False
3     True
4     True
dtype: bool

In [40]:
import shap
pred = transformers.pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, device=0, return_all_scores=True)
explainer = shap.Explainer(pred, tokenizer)

In [59]:
val = mosi['test']['text'][0:4]
shap_values = explainer(val)
# for 1 value do this:
# shap_values = explainer([val])
for i, predic in enumerate(pred(val)):
    print(f'y_pred: {predic[1]["score"]}, y_true: {predictions.label_ids[i]}')

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


y_pred: 0.9970405697822571, y_true: 0
y_pred: 0.001342606614343822, y_true: 0
y_pred: 0.9993368983268738, y_true: 0
y_pred: 0.9986466765403748, y_true: 1


In [60]:
import shap.plots
shap.plots.text(shap_values[:,:,'LABEL_1'])

Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray


In [79]:
print(pred("oh my god bad movie"))
print(pred("god bad movie"))
print(pred("oh my gosh good movie"))
print(pred("really bad movie"))
print(pred("really good movie"))

[[{'label': 'LABEL_0', 'score': 0.00038953256444074214}, {'label': 'LABEL_1', 'score': 0.9996104836463928}]]
[[{'label': 'LABEL_0', 'score': 0.8375083208084106}, {'label': 'LABEL_1', 'score': 0.16249167919158936}]]
[[{'label': 'LABEL_0', 'score': 0.0003483361506368965}, {'label': 'LABEL_1', 'score': 0.9996516704559326}]]
[[{'label': 'LABEL_0', 'score': 0.9986573457717896}, {'label': 'LABEL_1', 'score': 0.001342606614343822}]]
[[{'label': 'LABEL_0', 'score': 0.0007318808929994702}, {'label': 'LABEL_1', 'score': 0.999268114566803}]]


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


In [96]:
# test_str = "gosh bad movie"
# test_str = "god bad movie"
# test_str = "bad movie"
# test_str = "sh movie"
# test_str = "oh my god bad movie"
test_str = "i didnt attend the funeral but i sent a nice letter saying i approved of it"
print(pred(test_str))
shap_values2 = explainer([test_str])
shap.plots.text(shap_values2[:,:,'LABEL_1'])

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


[[{'label': 'LABEL_0', 'score': 0.0004191303451079875}, {'label': 'LABEL_1', 'score': 0.9995809197425842}]]
