# Evaluate the mode

In [3]:
import os
import logging
logging.basicConfig(level=logging.ERROR)

import pandas as pd
from tqdm import tqdm

from src.util import seed_everything, update_avg
from src.models.t5 import T5Model
from src.config import SEED, INTERIM_DATA_DIR
from src.metrics import calculate_all
from src.data.dataset import create_loaders

2023-11-06 07:20:04.058561: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
if not INTERIM_DATA_DIR.exists():
    os.chdir("..")

In [5]:
seed_everything(SEED)

In [6]:
BATCH_SIZE = 8

In [7]:
train_loader, val_loader, test_loader = create_loaders(
    pd.read_csv(INTERIM_DATA_DIR / "swapped.csv"), batch_size=BATCH_SIZE
)

In [8]:
model = T5Model()

In [None]:
logging.basicConfig(level=logging.ERROR)

In [9]:
all_predictions = []
avg_non_tox = 0
avg_bert = 0
avg_bleu = 0
for i, batch in (pbar := tqdm(enumerate(test_loader), total=len(test_loader))):
    inputs, target = batch
    test_pred = model.predict(inputs)
    all_predictions.extend(test_pred)
    non_tox, bert, bleu = calculate_all(inputs, test_pred, target)
    avg_non_tox = update_avg(avg_non_tox, non_tox, i)
    avg_bert = update_avg(avg_bert, bert, i)
    avg_bleu = update_avg(avg_bleu, bleu, i)
    pbar.set_description(f"{avg_non_tox=:.3f} {avg_bert=:.3f} {avg_bleu=:.3f}")

  0%|          | 0/28888 [00:00<?, ?it/s]Some weights of the model checkpoint at SkolkovoInstitute/roberta_toxicity_classifier_v1 were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
avg_non_tox=0.800 avg_bert=0.856 avg_bleu=0.408:   0%|          | 5/28888 [00:13<22:18:37,  2.78s/it]


OutOfMemoryError: CUDA out of memory. Tried to allocate 16.00 MiB (GPU 0; 3.81 GiB total capacity; 2.49 GiB already allocated; 13.12 MiB free; 2.96 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

# Visualize the predictions

In [None]:
import matplotlib.pyplot as plt

from src.visualization.make_wordcloud import make_wordcloud

In [None]:
plt.title("Translated text")
plt.axis("off")
plt.imshow(make_wordcloud(pd.Series(all_predictions)))
plt.show()