<a href="https://colab.research.google.com/github/duahauby/character-classifier-cnn-chars74k/blob/master/Subjectivity_cls.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!gdown --id 1QVt2PFsLkeq7Y153i4OY9_haHsMx3zwx
!gdown --id 1aUPi1SowAscj6liNRZ1rW8YcOrs2b5hl
!gdown --id 1ouIeE9LTZxPaT-SJxQEmNpRmtNvOe5bG
!unzip lm_pretrained2.zip
# !rm lm_pretrained2.zip
!ls

Downloading...
From: https://drive.google.com/uc?id=1QVt2PFsLkeq7Y153i4OY9_haHsMx3zwx
To: /content/intent_smt.csv
100% 182k/182k [00:00<00:00, 47.4MB/s]
Access denied with the following error:

 	Cannot retrieve the public link of the file. You may need to change
	the permission to 'Anyone with the link', or have had many accesses. 

You may still be able to access the file from the browser:

	 https://drive.google.com/uc?id=1aUPi1SowAscj6liNRZ1rW8YcOrs2b5hl 

Downloading...
From: https://drive.google.com/uc?id=1ouIeE9LTZxPaT-SJxQEmNpRmtNvOe5bG
To: /content/subjectivity_train.csv
100% 1.14M/1.14M [00:00<00:00, 98.2MB/s]
Archive:  lm_pretrained2.zip
  inflating: config.json             
  inflating: merges.txt              
  inflating: pytorch_model.bin       
  inflating: vocab.json              
config.json	lm_pretrained2.zip  pytorch_model.bin  subjectivity_train.csv
intent_smt.csv	merges.txt	    sample_data        vocab.json


In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
import numpy as np
import torch
import random
import os
import json
import string
from unicodedata import normalize
from nltk.tokenize import word_tokenize
import re
import pandas as pd


def seed_all(seed_value):
    np.random.seed(seed_value)  # cpu vars
    torch.manual_seed(seed_value)  # cpu  vars
    random.seed(seed_value)
    os.environ["PYTHONHASHSEED"] = str(seed_value)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)  # gpu vars
        torch.backends.cudnn.deterministic = True  # needed
        torch.backends.cudnn.benchmark = False


def count_parameters(model) -> int:
    return sum(p.numel() for p in model.parameters() if p.requires_grad is True)


def clean_text(text):
    try:
      text = normalize('NFC', text)
      text = text.lower()
      text = " ".join([w for w in word_tokenize(text) if w not in string.punctuation])

      text = re.sub('\s+', ' ', text)
    except:
      text = ""
    return text



def read_data_from_csv(data_path, sorting=False):
    data = pd.read_csv(data_path, sep=',')
    labels = data["label"]
    samples = [clean_text(text) for text in data["sample"]]

    if sorting:
      sort_idx = np.array([len(i.split()) for i in samples]).argsort()
      labels = list(np.array(labels, dtype=object)[sort_idx])
      samples = list(np.array(samples, dtype=object)[sort_idx])
    assert len(labels) == len(samples)
    return samples, labels


In [None]:
import numpy as np
import torch
import itertools
from nltk.tokenize import word_tokenize


class IntentDataset(torch.utils.data.Dataset):
    def __init__(self, samples, tokenizer, max_subword_length=128):
        self.samples = samples
        self.tokenizer = tokenizer
        self.max_subword_length = max_subword_length

    def __getitem__(self, idx):
        label, text = self.samples[idx]
        sub_word_ids = self.tokenizer.encode(text)
        return sub_word_ids, int(label)

    def __len__(self):
        return len(self.samples)

def collate_fn_padd(batch):
    text_idx, labels = zip(*batch)
    max_length = min(max([len(idx) for idx in text_idx]), 128)
    padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)

    for i, idx in enumerate(text_idx):
        padded_input_ids[i, :len(idx)] = idx[:128]

    input_mask = np.ones(padded_input_ids.shape)
    input_mask[padded_input_ids == 1] = 0

    item = {}
    item['input_ids'] = torch.tensor(padded_input_ids)
    item['input_mask'] = torch.tensor(input_mask)

    item['labels'] = torch.tensor(labels, dtype=torch.long)

    return item

In [None]:
import torch
from tqdm.auto import tqdm
import numpy as np
from sklearn.metrics import f1_score

def train_fn(
    dataloader, model, criterion, optimizer, scheduler, device="cuda", accu_step=1
):
    model.train()
    total_loss = 0

    pbar = tqdm(dataloader, total=len(dataloader))
    for i, (batch) in enumerate(pbar):
        input_ids = batch["input_ids"].to(device)
        input_mask = batch["input_mask"].to(device)
        labels = batch["labels"].to(device)

        logits = model(input_ids, input_mask)

        # Loss calculate
        loss = criterion(logits, labels)

        # Loss backward
        loss.backward()
        if (i + 1) % accu_step == 0:
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
            model.zero_grad()
            optimizer.zero_grad()

        total_loss += loss.item()

    total_loss /= len(dataloader)

    return total_loss


def validation_fn(dataloader, model, criterion, device="cuda"):
    model.eval()
    total_loss = 0
    pres, golds = [], []

    with torch.no_grad():
        pbar = tqdm(dataloader, total=len(dataloader))
        for i, (batch) in enumerate(pbar):
            input_ids = batch["input_ids"].to(device)
            input_mask = batch["input_mask"].to(device)
            labels = batch["labels"].to(device)

            logits = model(input_ids, input_mask)

            # Loss calculate
            loss = criterion(logits, labels)

            total_loss += loss.item()

            # Evaluate
            outputs = np.argmax(logits.detach().cpu().numpy(), axis=-1)
            labels = labels.detach().cpu().numpy()
            pres.extend(outputs)
            golds.extend(labels)

        f1 = f1_score(golds, pres, average='micro')

        print("F1 score: ", f1)

        total_loss /= len(dataloader)

        return total_loss, f1


In [None]:
import torch
from transformers import RobertaModel
from transformers import AutoModel, AutoConfig
import torch.nn as nn
from transformers import RobertaConfig


class IntentModel(nn.Module):
    def __init__(self, model_name, num_classes, device='cuda'):
        super().__init__()
        self.model_name = model_name
        self.device = device
        self.num_classes = num_classes
        self.config = RobertaConfig.from_pretrained(
            self.model_name, from_tf=False, output_hidden_states=True
        )
        # self.config.num_hidden_layers = 12
        self.roberta = RobertaModel.from_pretrained(self.model_name, config=self.config)


        # for n, param in self.roberta.named_parameters():
        #     # if "embeddings" in n:
        #         param.requires_grad = False

        # Intent head
        self.activation = nn.Tanh()
        self.hidden_layer = nn.Linear(
            self.config.hidden_size * 1, self.config.hidden_size
        )
        self.dropout = nn.Dropout(0.2)
        self.classifier = nn.Linear(
            self.config.hidden_size, self.num_classes
        )

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
    ):
        bert_outputs = self.roberta(
            input_ids,
            attention_mask,
        )

        # text_features = self.mean_pooling(bert_outputs[0], attention_mask)
        text_features = bert_outputs[0][:, 0, :]

        hidden = self.hidden_layer(text_features)
        hidden = self.activation(hidden)
        dropout = self.dropout(hidden)
        logits = self.classifier(dropout)  # [batch, sent_len, n_labels]

        return logits

    @staticmethod
    def mean_pooling(token_embeddings, attention_mask):
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
        sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
        return sum_embeddings / sum_mask


In [None]:
model_name = "./"
training_file = "./subjectivity_train.csv"
batch_size = 64
lr = 3e-5
n_epochs = 15
seed = 96
accu_step = 1

In [None]:
import gc
import argparse
import torch
from sklearn.model_selection import train_test_split, StratifiedKFold
from torch.utils.data import DataLoader, SequentialSampler
from transformers import RobertaTokenizer
from transformers import get_linear_schedule_with_warmup
from transformers import AdamW
import torch.nn as nn
from sklearn.utils.class_weight import compute_class_weight


seed_all(seed_value=seed)
device = "cuda" if torch.cuda.is_available() else "cpu"

training_samples, training_labels = read_data_from_csv(training_file)

cls_labels = list(set(training_labels))

label2id = {cls_labels[i]: i for i in range(len(cls_labels))}
id2label = {i: cls_labels[i] for i in range(len(cls_labels))}

training_labels = [label2id[i] for i in training_labels]

# Load data from file
weight_loss = compute_class_weight(class_weight="balanced", classes=range(len(cls_labels)), y=training_labels)


_, valid_samples, _, valid_labels = train_test_split(training_samples, training_labels,
                                                                                  test_size=0.1, random_state=2024,
                                                                                  shuffle=True)


training_samples = list(zip(training_labels, training_samples))
valid_samples = list(zip(valid_labels, valid_samples))

print('Number of training samples: ', len(training_samples))
print('Number of validation samples: ', len(valid_samples))

tokenizer = RobertaTokenizer.from_pretrained(model_name)

train_dataset = IntentDataset(
    training_samples, tokenizer=tokenizer
)
train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=collate_fn_padd,
    num_workers=1
)

valid_dataset = IntentDataset(
    valid_samples,
    tokenizer=tokenizer
)
valid_sampler = SequentialSampler(valid_dataset)
valid_loader = DataLoader(
    valid_dataset,
    batch_size=batch_size,
    shuffle=False,
    sampler=valid_sampler,
    collate_fn=collate_fn_padd,
    num_workers=1
)

model = IntentModel(model_name, num_classes=len(cls_labels), device=device)
print('The number of parameters of the model: ', count_parameters(model))
model.to(device)

param_optimizer = list(model.named_parameters())
no_decay = ["LayerNorm.bias", "LayerNorm.weight"]
optimizer_grouped_parameters = [
    {
        "params": [
            p for n, p in param_optimizer if (not any(nd in n for nd in no_decay))
        ],
        "weight_decay": 0.01,
    },
    {
        "params": [
            p for n, p in param_optimizer if (any(nd in n for nd in no_decay))
        ],
        "weight_decay": 0.0,
    },
]

optimizer = AdamW(optimizer_grouped_parameters, lr=lr, correct_bias=False)

total_steps = len(train_loader) * n_epochs
scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=int(len(train_loader) * 0.08), num_training_steps=total_steps
)

criterion = nn.CrossEntropyLoss(weight=torch.tensor(weight_loss, dtype=torch.float32).to(device))
# criterion = nn.CrossEntropyLoss()

max_score = -1
for epoch in range(n_epochs):
    gc.collect()
    print("Training on epoch", epoch + 1)

    total_loss = train_fn(
        dataloader=train_loader,
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        device=device,
        scheduler=scheduler,
        accu_step=accu_step
    )
    print('Training loss: ', total_loss)

    total_loss, f1 = validation_fn(
        valid_loader, model, criterion, device
    )
    if max_score < f1:
        max_score = f1
        model.eval()
        torch.save( {
              "weights": model.state_dict(),
              "label2id": label2id,
              "id2label": id2label
            },
            'model.pth')

        print("###############################")
    print('Validation loss', total_loss)
    print('*'*100)


Number of training samples:  6386
Number of validation samples:  639


Some weights of the model checkpoint at ./ were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at ./ and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


The number of parameters of the model:  9920002
Training on epoch 1


  0%|          | 0/100 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


Training loss:  0.07439113075612112


  0%|          | 0/10 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


F1 score:  0.9937402190923318
###############################
Validation loss 0.009482236913754605
****************************************************************************************************
Training on epoch 2


  0%|          | 0/100 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


Training loss:  0.008042750076274387


  0%|          | 0/10 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


F1 score:  1.0
###############################
Validation loss 0.0005074566201074049
****************************************************************************************************
Training on epoch 3


  0%|          | 0/100 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


Training loss:  0.0009803165077755694


  0%|          | 0/10 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


F1 score:  1.0
Validation loss 8.267984230769798e-05
****************************************************************************************************
Training on epoch 4


  0%|          | 0/100 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


Training loss:  0.0011330452201582374


  0%|          | 0/10 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


F1 score:  1.0
Validation loss 4.107184213353321e-05
****************************************************************************************************
Training on epoch 5


  0%|          | 0/100 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


Training loss:  0.0016400491140666417


  0%|          | 0/10 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


F1 score:  1.0
Validation loss 3.675423304230208e-05
****************************************************************************************************
Training on epoch 6


  0%|          | 0/100 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


Training loss:  0.00046317813232235497


  0%|          | 0/10 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


F1 score:  1.0
Validation loss 2.480523417034419e-05
****************************************************************************************************
Training on epoch 7


  0%|          | 0/100 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


Training loss:  0.0009259976274006476


  0%|          | 0/10 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


F1 score:  1.0
Validation loss 2.2892653032613452e-05
****************************************************************************************************
Training on epoch 8


  0%|          | 0/100 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


Training loss:  0.0005352149652935623


  0%|          | 0/10 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


F1 score:  1.0
Validation loss 1.946370812220266e-05
****************************************************************************************************
Training on epoch 9


  0%|          | 0/100 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


Training loss:  0.0012720650396840939


  0%|          | 0/10 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


F1 score:  1.0
Validation loss 2.3380675702355803e-05
****************************************************************************************************
Training on epoch 10


  0%|          | 0/100 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


Training loss:  0.00029157491651858434


  0%|          | 0/10 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


F1 score:  1.0
Validation loss 2.0704849521280267e-05
****************************************************************************************************
Training on epoch 11


  0%|          | 0/100 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


Training loss:  0.0005058940467824869


  0%|          | 0/10 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


F1 score:  1.0
Validation loss 2.0570729247992858e-05
****************************************************************************************************
Training on epoch 12


  0%|          | 0/100 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


Training loss:  0.0003692653533653356


  0%|          | 0/10 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


F1 score:  1.0
Validation loss 1.8884270593844123e-05
****************************************************************************************************
Training on epoch 13


  0%|          | 0/100 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


Training loss:  0.0004304691803372407


  0%|          | 0/10 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


F1 score:  1.0
Validation loss 1.8148680101148785e-05
****************************************************************************************************
Training on epoch 14


  0%|          | 0/100 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


Training loss:  0.00031263726747056354


  0%|          | 0/10 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


F1 score:  1.0
Validation loss 1.7438353188481416e-05
****************************************************************************************************
Training on epoch 15


  0%|          | 0/100 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


Training loss:  0.00019238107806813788


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


  0%|          | 0/10 [00:00<?, ?it/s]

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  padded_input_ids = np.ones((len(text_idx), max_length), dtype=np.long)


F1 score:  1.0
Validation loss 1.7757161549525337e-05
****************************************************************************************************
