# Sentiment analysis for SST-2 with CANINE

## Install HuggingFace dependencies

In [None]:
!pip install -q datasets
!pip install -q transformers

[K     |████████████████████████████████| 325 kB 14.0 MB/s 
[K     |████████████████████████████████| 1.1 MB 60.5 MB/s 
[K     |████████████████████████████████| 67 kB 6.5 MB/s 
[K     |████████████████████████████████| 212 kB 37.8 MB/s 
[K     |████████████████████████████████| 136 kB 53.5 MB/s 
[K     |████████████████████████████████| 127 kB 72.2 MB/s 
[K     |████████████████████████████████| 94 kB 3.5 MB/s 
[K     |████████████████████████████████| 271 kB 53.1 MB/s 
[K     |████████████████████████████████| 144 kB 55.6 MB/s 
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.[0m
[K     |████████████████████████████████| 3.8 MB 16.3 MB/s 
[K     |████████████████████████████████| 895 kB 64.0 MB/s 
[K     |████████████████████████████████| 6.5 MB 63.6 MB

## Create functions to download and preproccess data

In [None]:
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from datasets import load_dataset
from transformers import CanineTokenizer, DataCollatorWithPadding

tokenizer = CanineTokenizer.from_pretrained("google/canine-s")

def encode(samples):
    return tokenizer(samples['sentence'], padding=False, truncation=True)

def quantify(samples):
    return {'label': np.digitize(samples['label'], np.array([.5]), right=True)}

letters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
           'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
def typo(samples, typo_percent=0):
    for i,sample in enumerate(samples['sentence']):
        for j,x in enumerate(sample):
            if np.random.random() < typo_percent and x.isalpha():
                aux = list(samples['sentence'][i])
                aux[j] = np.random.choice(letters)
                samples['sentence'][i] = ''.join(aux)
    return samples

def generate_dataset(bz=16, typo_percent=0):
    dataset = load_dataset('glue', 'sst2')

    # Set validation as test
    dataset['test'] = dataset['validation']
    # Split train in train and valid
    dataset_aux = dataset['train'].train_test_split(0.05, seed=0)
    dataset['train'] = dataset_aux['train']
    dataset['validation'] = dataset_aux['test']

    if typo_percent > 0:
        dataset = dataset.map(typo, batched=True, batch_size=bz, fn_kwargs={'typo_percent':typo_percent})
    dataset = dataset.map(encode, batched=True, batch_size=bz)
    
    dataset.set_format(type='torch', columns=['input_ids', 'token_type_ids', 'attention_mask', 'label'])

    loader = {}
    for split in dataset:
        loader[split] = DataLoader(dataset[split], batch_size=bz, shuffle=(split=='train'), collate_fn=DataCollatorWithPadding(tokenizer))
    
    return dataset, loader

Downloading:   0%|          | 0.00/657 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/892 [00:00<?, ?B/s]

Using unk_token, but it is not set yet.
Using unk_token, but it is not set yet.
Using unk_token, but it is not set yet.
Using unk_token, but it is not set yet.
Using unk_token, but it is not set yet.
Using unk_token, but it is not set yet.
Using unk_token, but it is not set yet.
Using unk_token, but it is not set yet.


## Create train and eval functions

In [None]:
from tqdm import tqdm
import copy

def train_canine(net, device, optimizer, criterion, lr, lr_scheduler, train_loader, val_loader, epochs, unfreeze=False, unfreeze_ep=5):
    best_loss = np.Inf
    best_ep = 1
    iters = []
    train_losses = []
    val_losses = []
    log_interval = len(train_loader)//5

    for ep in range(epochs):
        with tqdm(total=len(train_loader), unit_scale=True, postfix={'Loss':0.0, 'Accuracy':0.0, 'lr':lr_scheduler.get_last_lr()[0]},
                        desc="Epoch : %i/%i" % (ep+1, epochs)) as pbar:
            net.train()
            total_loss = 0.0
            correct = 0
            nb_samples = 0
            for it, batch in enumerate(train_loader):
                # Converting to cuda tensors
                batch = {k: v.to(device) for k, v in batch.items()}
                seq, attn_masks, token_type_ids, labels = \
                    batch['input_ids'], batch['attention_mask'], batch['token_type_ids'], batch['labels']
                labels = labels.to(torch.int64)
        
                # Clear gradients
                optimizer.zero_grad()

                # Forward pass
                output = net(input_ids=seq, attention_mask=attn_masks,
                             token_type_ids=token_type_ids, labels=labels)

                # Backpropagating the gradients
                loss = output.loss #HUGGINGFACE
                #loss = criterion(output, labels)
                loss.backward()
                # Optimization step
                optimizer.step()

                total_loss += loss.item()

                logits = output.logits #HUGGINGFACE
                #logits = output
                predicted = logits.argmax(axis=1)
                correct += predicted.eq(labels).sum().item()
                nb_samples += len(predicted)

                pbar.set_postfix({'Loss':total_loss/(it+1), 'Accuracy':100*correct/nb_samples, 'lr':lr_scheduler.get_last_lr()[0]})
                pbar.update(1)
                
                lr_scheduler.step()

                if it % log_interval == log_interval-1:
                    val_loss, val_acc = evaluate_loss(net, device, val_loader, criterion)  # Compute validation loss
                    print(f"Val Loss : {val_loss:.3f}. Val Accuracy {val_acc:.2f}%")

                    if val_loss < best_loss:
                        print(f"Best validation loss improved from {best_loss:.3f} to {val_loss:.3f}")
                        print()
                        net_copy = copy.deepcopy(net)  # save a copy of the model
                        best_loss = val_loss
                        best_ep = ep + 1

        val_loss, val_acc = evaluate_loss(net, device, val_loader, criterion)  # Compute validation loss
        print()
        print(f"Epoch {ep+1} complete! Val Loss : {val_loss:.3f}. Val Accuracy {val_acc:.2f}%")

        train_losses.append(total_loss/len(train_loader))
        val_losses.append(val_loss)

        if val_loss < best_loss:
            print(f"Best validation loss improved from {best_loss:.3f} to {val_loss:.3f}")
            print()
            net_copy = copy.deepcopy(net)  # save a copy of the model
            best_loss = val_loss
            best_ep = ep + 1
        
        if unfreeze and ep == unfreeze_ep:
            for param in net.parameters():
                param.requires_grad = True

    # Saving the model
    path_to_model=f'models/CANINE_lr_{lr}_val_loss_{round(best_loss, 5)}_ep_{best_ep}_typo_{typo_percent}.pt'
    torch.save(net_copy.state_dict(), path_to_model)
    print(f"The model has been saved in {path_to_model}")

    del loss
    torch.cuda.empty_cache()

    return train_losses, val_losses, net_copy

In [None]:
def evaluate_loss(net, device, dataloader, criterion):
    net.eval()

    total_loss = 0
    correct = 0

    print('Evaluating...')
    with torch.no_grad():
        for it, batch in enumerate(dataloader):
            batch = {k: v.to(device) for k, v in batch.items()}
            seq, attn_masks, token_type_ids, labels = \
                batch['input_ids'], batch['attention_mask'], batch['token_type_ids'], batch['labels']
            labels = labels.to(torch.int64)
            output = net(seq, attn_masks, token_type_ids, labels=labels)
            total_loss += output.loss.item() #HUGGINGFACE
            #total_loss += criterion(output, labels).item()
            predicted = output.logits.argmax(axis=1) #HUGGINGFACE
            #predicted = output.argmax(axis=1)
            correct += predicted.eq(labels).sum().item()

    return total_loss/len(dataloader), 100*correct/len(dataloader.dataset)

## Train the model with different levels of noise

In [None]:
print("Creation of the models' folder...")
!mkdir models

Creation of the models' folder...


In [None]:
#freeze_canine = False  # if True, freeze the encoder weights and only update the classification layer weights
#maxlen = 2048  # maximum length of the tokenized input pair : if greater than "maxlen", the input is truncated and else if smaller, the input is padded
lr = 2e-5  # learning rate
epochs = 5  # number of training epochs

In [None]:
from transformers import CanineTokenizer, CanineConfig, CanineForSequenceClassification, get_linear_schedule_with_warmup, DistilBertForSequenceClassification#, AdamW
from torch.optim import AdamW

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
losses = []
accs = []

for typo_percent in [0, 0.05, 0.1, 0.2, 0.4]:
    dataset, loader = generate_dataset(typo_percent=typo_percent)

    configuration = CanineConfig(num_labels=2)
    net = CanineForSequenceClassification.from_pretrained("google/canine-s", config=configuration)

    net.to(device)

    optimizer = AdamW(net.parameters(), lr=lr, weight_decay=1e-2)
    criterion = nn.CrossEntropyLoss()
    t_total = len(loader['train']) * epochs  # Necessary to take into account Gradient accumulation
    num_warmup_steps = int(t_total*0.1) # The number of steps for the warmup phase.
    lr_scheduler = get_linear_schedule_with_warmup(optimizer=optimizer, num_warmup_steps=num_warmup_steps, num_training_steps=t_total)

    train_losses, val_losses, net = train_canine(net, device, optimizer, criterion, lr, lr_scheduler, loader['train'], loader['validation'], epochs)

    print(f'\n### Typo percentage = {typo_percent} ###')
    losses_aux = []
    accs_aux = []
    for split in ['train', 'validation', 'test']:
        loss, acc = evaluate_loss(net, device, loader[split], criterion)
        losses_aux.append(loss)
        accs_aux.append(acc)
        print()
        print(split)
        print(f'   loss = {loss}\n   acc = {acc}')

    losses.append(losses_aux)
    accs.append(accs_aux)

Downloading builder script:   0%|          | 0.00/7.78k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/4.47k [00:00<?, ?B/s]

Downloading and preparing dataset glue/sst2 (download: 7.09 MiB, generated: 4.81 MiB, post-processed: Unknown size, total: 11.90 MiB) to /root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad...


Downloading data:   0%|          | 0.00/7.44M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

Dataset glue downloaded and prepared to /root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/3999 [00:00<?, ?ba/s]

  0%|          | 0/211 [00:00<?, ?ba/s]

  0%|          | 0/55 [00:00<?, ?ba/s]

Downloading:   0%|          | 0.00/504M [00:00<?, ?B/s]

Some weights of CanineForSequenceClassification were not initialized from the model checkpoint at google/canine-c and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch : 1/5:  20%|█▉        | 799/4.00k [01:56<07:20, 7.27it/s, Loss=0.69, Accuracy=53.5, lr=7.98e-6]

Evaluating...


Epoch : 1/5:  20%|██        | 800/4.00k [02:07<3:01:11, 3.40s/it, Loss=0.69, Accuracy=53.5, lr=7.99e-6]

Val Loss : 0.681. Val Accuracy 55.85%
Best validation loss improved from inf to 0.681



Epoch : 1/5:  40%|███▉      | 1.60k/4.00k [04:01<05:57, 6.71it/s, Loss=0.637, Accuracy=60.5, lr=1.6e-5]

Evaluating...


Epoch : 1/5:  40%|███▉      | 1.60k/4.00k [04:12<2:14:23, 3.36s/it, Loss=0.636, Accuracy=60.5, lr=1.6e-5]

Val Loss : 0.513. Val Accuracy 76.31%
Best validation loss improved from 0.681 to 0.513



Epoch : 1/5:  60%|█████▉    | 2.40k/4.00k [06:06<04:01, 6.63it/s, Loss=0.558, Accuracy=67.7, lr=1.96e-5]

Evaluating...


Epoch : 1/5:  60%|█████▉    | 2.40k/4.00k [06:17<1:29:16, 3.35s/it, Loss=0.558, Accuracy=67.7, lr=1.96e-5]

Val Loss : 0.329. Val Accuracy 86.46%
Best validation loss improved from 0.513 to 0.329



Epoch : 1/5:  80%|███████▉  | 3.20k/4.00k [08:12<02:10, 6.14it/s, Loss=0.501, Accuracy=72.4, lr=1.87e-5]

Evaluating...
Val Loss : 0.297. Val Accuracy 88.27%
Best validation loss improved from 0.329 to 0.297



Epoch : 1/5: 100%|█████████▉| 4.00k/4.00k [10:17<00:00, 7.14it/s, Loss=0.455, Accuracy=75.7, lr=1.78e-5]

Evaluating...
Val Loss : 0.250. Val Accuracy 90.29%
Best validation loss improved from 0.297 to 0.250



Epoch : 1/5: 100%|██████████| 4.00k/4.00k [10:28<00:00, 6.36it/s, Loss=0.455, Accuracy=75.7, lr=1.78e-5]


Evaluating...

Epoch 1 complete! Val Loss : 0.253. Val Accuracy 90.11%


Epoch : 2/5:  20%|█▉        | 799/4.00k [01:54<07:46, 6.87it/s, Loss=0.233, Accuracy=91.2, lr=1.69e-5]

Evaluating...


Epoch : 2/5:  20%|██        | 800/4.00k [02:05<3:00:52, 3.39s/it, Loss=0.233, Accuracy=91.2, lr=1.69e-5]

Val Loss : 0.258. Val Accuracy 91.18%


Epoch : 2/5:  40%|███▉      | 1.60k/4.00k [03:59<06:26, 6.21it/s, Loss=0.203, Accuracy=92.4, lr=1.6e-5]

Evaluating...


Epoch : 2/5:  40%|███▉      | 1.60k/4.00k [04:10<2:16:27, 3.41s/it, Loss=0.204, Accuracy=92.4, lr=1.6e-5]

Val Loss : 0.219. Val Accuracy 91.78%
Best validation loss improved from 0.250 to 0.219



Epoch : 2/5:  60%|█████▉    | 2.40k/4.00k [06:04<03:51, 6.91it/s, Loss=0.191, Accuracy=93, lr=1.51e-5]

Evaluating...


Epoch : 2/5:  60%|█████▉    | 2.40k/4.00k [06:15<1:31:05, 3.41s/it, Loss=0.191, Accuracy=93, lr=1.51e-5]

Val Loss : 0.212. Val Accuracy 92.55%
Best validation loss improved from 0.219 to 0.212



Epoch : 2/5:  80%|███████▉  | 3.20k/4.00k [08:09<01:48, 7.40it/s, Loss=0.182, Accuracy=93.4, lr=1.42e-5]

Evaluating...


Epoch : 2/5:  80%|███████▉  | 3.20k/4.00k [08:20<45:27, 3.40s/it, Loss=0.182, Accuracy=93.4, lr=1.42e-5]

Val Loss : 0.209. Val Accuracy 92.76%
Best validation loss improved from 0.212 to 0.209



Epoch : 2/5: 100%|█████████▉| 4.00k/4.00k [10:15<00:00, 7.17it/s, Loss=0.178, Accuracy=93.5, lr=1.33e-5]

Evaluating...


Epoch : 2/5: 100%|█████████▉| 4.00k/4.00k [10:26<00:10, 3.35s/it, Loss=0.178, Accuracy=93.5, lr=1.33e-5]

Val Loss : 0.192. Val Accuracy 93.32%
Best validation loss improved from 0.209 to 0.192



Epoch : 2/5: 100%|██████████| 4.00k/4.00k [10:26<00:00, 6.38it/s, Loss=0.178, Accuracy=93.5, lr=1.33e-5]


Evaluating...

Epoch 2 complete! Val Loss : 0.196. Val Accuracy 92.93%


Epoch : 3/5:  20%|█▉        | 799/4.00k [01:53<07:44, 6.89it/s, Loss=0.126, Accuracy=95.5, lr=1.24e-5]

Evaluating...


Epoch : 3/5:  20%|██        | 800/4.00k [02:04<2:58:36, 3.35s/it, Loss=0.126, Accuracy=95.5, lr=1.24e-5]

Val Loss : 0.216. Val Accuracy 93.14%


Epoch : 3/5:  40%|███▉      | 1.60k/4.00k [03:58<05:41, 7.02it/s, Loss=0.0962, Accuracy=96.8, lr=1.16e-5]

Evaluating...


Epoch : 3/5:  40%|███▉      | 1.60k/4.00k [04:09<2:14:08, 3.35s/it, Loss=0.0961, Accuracy=96.8, lr=1.16e-5]

Val Loss : 0.205. Val Accuracy 93.35%


Epoch : 3/5:  60%|█████▉    | 2.40k/4.00k [06:03<03:53, 6.85it/s, Loss=0.0887, Accuracy=97, lr=1.07e-5]

Evaluating...


Epoch : 3/5:  60%|█████▉    | 2.40k/4.00k [06:14<1:30:27, 3.39s/it, Loss=0.0887, Accuracy=97, lr=1.07e-5]

Val Loss : 0.199. Val Accuracy 93.97%


Epoch : 3/5:  80%|███████▉  | 3.20k/4.00k [08:08<01:44, 7.66it/s, Loss=0.0839, Accuracy=97.2, lr=9.78e-6]

Evaluating...


Epoch : 3/5:  80%|███████▉  | 3.20k/4.00k [08:19<44:55, 3.36s/it, Loss=0.0839, Accuracy=97.2, lr=9.78e-6]

Val Loss : 0.221. Val Accuracy 93.02%


Epoch : 3/5: 100%|█████████▉| 4.00k/4.00k [10:13<00:00, 6.72it/s, Loss=0.083, Accuracy=97.3, lr=8.89e-6]

Evaluating...


Epoch : 3/5: 100%|█████████▉| 4.00k/4.00k [10:24<00:10, 3.42s/it, Loss=0.083, Accuracy=97.3, lr=8.89e-6]

Val Loss : 0.215. Val Accuracy 93.53%


Epoch : 3/5: 100%|██████████| 4.00k/4.00k [10:25<00:00, 6.39it/s, Loss=0.083, Accuracy=97.3, lr=8.89e-6]


Evaluating...

Epoch 3 complete! Val Loss : 0.214. Val Accuracy 93.53%


Epoch : 4/5:  20%|█▉        | 799/4.00k [01:54<07:53, 6.76it/s, Loss=0.0833, Accuracy=97.1, lr=8e-6]

Evaluating...


Epoch : 4/5:  20%|██        | 800/4.00k [02:05<2:58:25, 3.35s/it, Loss=0.0832, Accuracy=97.1, lr=8e-6]

Val Loss : 0.209. Val Accuracy 93.88%


Epoch : 4/5:  40%|███▉      | 1.60k/4.00k [03:59<05:23, 7.43it/s, Loss=0.0567, Accuracy=98.1, lr=7.11e-6]

Evaluating...


Epoch : 4/5:  40%|███▉      | 1.60k/4.00k [04:09<1:57:22, 2.93s/it, Loss=0.0566, Accuracy=98.1, lr=7.11e-6]

Val Loss : 0.216. Val Accuracy 93.97%


Epoch : 4/5:  60%|█████▉    | 2.40k/4.00k [06:03<03:37, 7.37it/s, Loss=0.0486, Accuracy=98.4, lr=6.23e-6]

Evaluating...


Epoch : 4/5:  60%|█████▉    | 2.40k/4.00k [06:14<1:23:20, 3.12s/it, Loss=0.0486, Accuracy=98.4, lr=6.22e-6]

Val Loss : 0.216. Val Accuracy 93.91%


Epoch : 4/5:  80%|███████▉  | 3.20k/4.00k [08:07<01:52, 7.12it/s, Loss=0.0454, Accuracy=98.5, lr=5.34e-6]

Evaluating...


Epoch : 4/5:  80%|███████▉  | 3.20k/4.00k [08:18<44:49, 3.35s/it, Loss=0.0454, Accuracy=98.5, lr=5.34e-6]

Val Loss : 0.198. Val Accuracy 94.15%


Epoch : 4/5: 100%|█████████▉| 4.00k/4.00k [10:10<00:00, 6.98it/s, Loss=0.0427, Accuracy=98.6, lr=4.45e-6]

Evaluating...


Epoch : 4/5: 100%|█████████▉| 4.00k/4.00k [10:21<00:10, 3.35s/it, Loss=0.0426, Accuracy=98.6, lr=4.45e-6]

Val Loss : 0.221. Val Accuracy 93.97%


Epoch : 4/5: 100%|██████████| 4.00k/4.00k [10:21<00:00, 6.43it/s, Loss=0.0426, Accuracy=98.6, lr=4.45e-6]


Evaluating...

Epoch 4 complete! Val Loss : 0.219. Val Accuracy 93.97%


Epoch : 5/5:  20%|█▉        | 799/4.00k [01:54<06:57, 7.66it/s, Loss=0.0549, Accuracy=98.1, lr=3.56e-6]

Evaluating...


Epoch : 5/5:  20%|██        | 800/4.00k [02:05<2:58:32, 3.35s/it, Loss=0.0548, Accuracy=98.1, lr=3.56e-6]

Val Loss : 0.254. Val Accuracy 93.79%


Epoch : 5/5:  40%|███▉      | 1.60k/4.00k [03:57<06:13, 6.42it/s, Loss=0.0366, Accuracy=98.8, lr=2.67e-6]

Evaluating...


Epoch : 5/5:  40%|███▉      | 1.60k/4.00k [04:08<2:14:20, 3.36s/it, Loss=0.0366, Accuracy=98.8, lr=2.67e-6]

Val Loss : 0.242. Val Accuracy 93.76%


Epoch : 5/5:  60%|█████▉    | 2.40k/4.00k [06:01<03:53, 6.85it/s, Loss=0.0295, Accuracy=99, lr=1.78e-6]

Evaluating...


Epoch : 5/5:  60%|█████▉    | 2.40k/4.00k [06:12<1:29:17, 3.35s/it, Loss=0.0295, Accuracy=99, lr=1.78e-6]

Val Loss : 0.244. Val Accuracy 94.06%


Epoch : 5/5:  80%|███████▉  | 3.20k/4.00k [08:05<01:58, 6.77it/s, Loss=0.0255, Accuracy=99.2, lr=8.94e-7]

Evaluating...


Epoch : 5/5:  80%|███████▉  | 3.20k/4.00k [08:16<44:54, 3.36s/it, Loss=0.0255, Accuracy=99.2, lr=8.92e-7]

Val Loss : 0.248. Val Accuracy 94.03%


Epoch : 5/5: 100%|█████████▉| 4.00k/4.00k [10:08<00:00, 7.10it/s, Loss=0.0236, Accuracy=99.2, lr=5.56e-9]

Evaluating...


Epoch : 5/5: 100%|█████████▉| 4.00k/4.00k [10:19<00:10, 3.37s/it, Loss=0.0236, Accuracy=99.2, lr=4.45e-9]

Val Loss : 0.253. Val Accuracy 94.03%


Epoch : 5/5: 100%|██████████| 4.00k/4.00k [10:20<00:00, 6.45it/s, Loss=0.0237, Accuracy=99.2, lr=1.11e-9]


Evaluating...

Epoch 5 complete! Val Loss : 0.253. Val Accuracy 94.03%
The model has been saved in models/CANINE_lr_2e-05_val_loss_0.19168_ep_2_typo_0.pt

### Typo percentage = 0 ###
Evaluating...

train
   loss = 0.07692918937306764
   acc = 97.96033197355464
Evaluating...

validation
   loss = 0.1916758729524522
   acc = 93.31947743467933
Evaluating...

test
   loss = 0.3921936253255064
   acc = 85.6651376146789


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached split indices for dataset at /root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-abf39e5d242edc33.arrow and /root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-b8f06ab981dbf971.arrow


  0%|          | 0/3999 [00:00<?, ?ba/s]

  0%|          | 0/211 [00:00<?, ?ba/s]

  0%|          | 0/55 [00:00<?, ?ba/s]

  0%|          | 0/3999 [00:00<?, ?ba/s]

  0%|          | 0/211 [00:00<?, ?ba/s]

  0%|          | 0/55 [00:00<?, ?ba/s]

Some weights of CanineForSequenceClassification were not initialized from the model checkpoint at google/canine-c and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch : 1/5:  20%|█▉        | 799/4.00k [01:52<07:31, 7.09it/s, Loss=0.69, Accuracy=53.3, lr=7.98e-6]

Evaluating...


Epoch : 1/5:  20%|██        | 800/4.00k [02:03<2:59:41, 3.37s/it, Loss=0.69, Accuracy=53.3, lr=7.99e-6]

Val Loss : 0.684. Val Accuracy 55.17%
Best validation loss improved from inf to 0.684



Epoch : 1/5:  40%|███▉      | 1.60k/4.00k [03:56<04:56, 8.11it/s, Loss=0.658, Accuracy=58.5, lr=1.6e-5]

Evaluating...


Epoch : 1/5:  40%|███▉      | 1.60k/4.00k [04:07<1:47:19, 2.68s/it, Loss=0.658, Accuracy=58.5, lr=1.6e-5]

Val Loss : 0.532. Val Accuracy 72.36%
Best validation loss improved from 0.684 to 0.532



Epoch : 1/5:  60%|█████▉    | 2.40k/4.00k [06:01<04:13, 6.32it/s, Loss=0.601, Accuracy=64.6, lr=1.96e-5]

Evaluating...


Epoch : 1/5:  60%|█████▉    | 2.40k/4.00k [06:12<1:29:57, 3.37s/it, Loss=0.601, Accuracy=64.6, lr=1.96e-5]

Val Loss : 0.439. Val Accuracy 80.14%
Best validation loss improved from 0.532 to 0.439



Epoch : 1/5:  80%|███████▉  | 3.20k/4.00k [08:06<01:42, 7.81it/s, Loss=0.555, Accuracy=68.6, lr=1.87e-5]

Evaluating...


Epoch : 1/5:  80%|███████▉  | 3.20k/4.00k [08:17<43:39, 3.27s/it, Loss=0.555, Accuracy=68.7, lr=1.87e-5]

Val Loss : 0.407. Val Accuracy 80.76%
Best validation loss improved from 0.439 to 0.407



Epoch : 1/5: 100%|█████████▉| 4.00k/4.00k [10:09<00:00, 6.95it/s, Loss=0.52, Accuracy=71.5, lr=1.78e-5]

Evaluating...


Epoch : 1/5: 100%|█████████▉| 4.00k/4.00k [10:20<00:10, 3.36s/it, Loss=0.52, Accuracy=71.5, lr=1.78e-5]

Val Loss : 0.347. Val Accuracy 84.65%
Best validation loss improved from 0.407 to 0.347



Epoch : 1/5: 100%|██████████| 4.00k/4.00k [10:20<00:00, 6.44it/s, Loss=0.52, Accuracy=71.6, lr=1.78e-5]


Evaluating...

Epoch 1 complete! Val Loss : 0.343. Val Accuracy 84.83%
Best validation loss improved from 0.347 to 0.343



Epoch : 2/5:  20%|█▉        | 799/4.00k [01:55<08:08, 6.55it/s, Loss=0.309, Accuracy=87.6, lr=1.69e-5]

Evaluating...


Epoch : 2/5:  20%|██        | 800/4.00k [02:05<2:59:17, 3.36s/it, Loss=0.308, Accuracy=87.6, lr=1.69e-5]

Val Loss : 0.350. Val Accuracy 85.45%


Epoch : 2/5:  40%|███▉      | 1.60k/4.00k [03:58<05:56, 6.73it/s, Loss=0.27, Accuracy=89.3, lr=1.6e-5]

Evaluating...


Epoch : 2/5:  40%|███▉      | 1.60k/4.00k [04:09<2:03:04, 3.08s/it, Loss=0.27, Accuracy=89.3, lr=1.6e-5]

Val Loss : 0.327. Val Accuracy 87.38%
Best validation loss improved from 0.343 to 0.327



Epoch : 2/5:  60%|█████▉    | 2.40k/4.00k [06:03<03:49, 6.98it/s, Loss=0.251, Accuracy=90.1, lr=1.51e-5]

Evaluating...


Epoch : 2/5:  60%|█████▉    | 2.40k/4.00k [06:13<1:29:30, 3.35s/it, Loss=0.251, Accuracy=90.1, lr=1.51e-5]

Val Loss : 0.311. Val Accuracy 87.74%
Best validation loss improved from 0.327 to 0.311



Epoch : 2/5:  80%|███████▉  | 3.20k/4.00k [08:06<01:50, 7.28it/s, Loss=0.243, Accuracy=90.4, lr=1.42e-5]

Evaluating...


Epoch : 2/5:  80%|███████▉  | 3.20k/4.00k [08:17<45:01, 3.37s/it, Loss=0.243, Accuracy=90.4, lr=1.42e-5]

Val Loss : 0.292. Val Accuracy 88.18%
Best validation loss improved from 0.311 to 0.292



Epoch : 2/5: 100%|█████████▉| 4.00k/4.00k [10:10<00:00, 6.37it/s, Loss=0.236, Accuracy=90.7, lr=1.33e-5]

Evaluating...


Epoch : 2/5: 100%|█████████▉| 4.00k/4.00k [10:21<00:10, 3.37s/it, Loss=0.236, Accuracy=90.7, lr=1.33e-5]

Val Loss : 0.277. Val Accuracy 88.42%
Best validation loss improved from 0.292 to 0.277



Epoch : 2/5: 100%|██████████| 4.00k/4.00k [10:21<00:00, 6.43it/s, Loss=0.236, Accuracy=90.7, lr=1.33e-5]


Evaluating...

Epoch 2 complete! Val Loss : 0.278. Val Accuracy 88.78%


Epoch : 3/5:  20%|█▉        | 799/4.00k [01:54<08:00, 6.67it/s, Loss=0.152, Accuracy=94.3, lr=1.24e-5]

Evaluating...


Epoch : 3/5:  20%|██        | 800/4.00k [02:05<3:00:49, 3.39s/it, Loss=0.152, Accuracy=94.3, lr=1.24e-5]

Val Loss : 0.306. Val Accuracy 88.42%


Epoch : 3/5:  40%|███▉      | 1.60k/4.00k [03:59<05:26, 7.34it/s, Loss=0.114, Accuracy=95.9, lr=1.16e-5]

Evaluating...


Epoch : 3/5:  40%|███▉      | 1.60k/4.00k [04:10<2:14:24, 3.36s/it, Loss=0.114, Accuracy=95.9, lr=1.16e-5]

Val Loss : 0.332. Val Accuracy 88.95%


Epoch : 3/5:  60%|█████▉    | 2.40k/4.00k [06:03<03:44, 7.13it/s, Loss=0.101, Accuracy=96.4, lr=1.07e-5]

Evaluating...


Epoch : 3/5:  60%|█████▉    | 2.40k/4.00k [06:14<1:29:36, 3.36s/it, Loss=0.101, Accuracy=96.4, lr=1.07e-5]

Val Loss : 0.325. Val Accuracy 89.31%


Epoch : 3/5:  80%|███████▉  | 3.20k/4.00k [08:09<01:40, 8.01it/s, Loss=0.0955, Accuracy=96.7, lr=9.78e-6]

Evaluating...


Epoch : 3/5:  80%|███████▉  | 3.20k/4.00k [08:20<40:43, 3.05s/it, Loss=0.0955, Accuracy=96.7, lr=9.78e-6]

Val Loss : 0.318. Val Accuracy 89.64%


Epoch : 3/5: 100%|█████████▉| 4.00k/4.00k [10:12<00:00, 7.71it/s, Loss=0.0906, Accuracy=96.9, lr=8.89e-6]

Evaluating...


Epoch : 3/5: 100%|█████████▉| 4.00k/4.00k [10:23<00:09, 3.22s/it, Loss=0.0907, Accuracy=96.9, lr=8.89e-6]

Val Loss : 0.368. Val Accuracy 89.28%


Epoch : 3/5: 100%|██████████| 4.00k/4.00k [10:24<00:00, 6.41it/s, Loss=0.0906, Accuracy=96.9, lr=8.89e-6]


Evaluating...

Epoch 3 complete! Val Loss : 0.366. Val Accuracy 89.37%


Epoch : 4/5:  20%|█▉        | 799/4.00k [01:54<07:26, 7.16it/s, Loss=0.0963, Accuracy=96.5, lr=8e-6]

Evaluating...


Epoch : 4/5:  20%|██        | 800/4.00k [02:05<2:59:07, 3.36s/it, Loss=0.0966, Accuracy=96.5, lr=8e-6]

Val Loss : 0.362. Val Accuracy 89.40%


Epoch : 4/5:  40%|███▉      | 1.60k/4.00k [03:57<05:24, 7.41it/s, Loss=0.06, Accuracy=98, lr=7.11e-6]

Evaluating...


Epoch : 4/5:  40%|███▉      | 1.60k/4.00k [04:07<2:14:01, 3.35s/it, Loss=0.06, Accuracy=98, lr=7.11e-6]

Val Loss : 0.432. Val Accuracy 88.75%


Epoch : 4/5:  60%|█████▉    | 2.40k/4.00k [06:01<03:58, 6.72it/s, Loss=0.048, Accuracy=98.4, lr=6.23e-6]

Evaluating...


Epoch : 4/5:  60%|█████▉    | 2.40k/4.00k [06:12<1:29:59, 3.37s/it, Loss=0.0479, Accuracy=98.4, lr=6.22e-6]

Val Loss : 0.384. Val Accuracy 89.55%


Epoch : 4/5:  80%|███████▉  | 3.20k/4.00k [08:05<01:48, 7.37it/s, Loss=0.0422, Accuracy=98.7, lr=5.34e-6]

Evaluating...


Epoch : 4/5:  80%|███████▉  | 3.20k/4.00k [08:16<44:40, 3.34s/it, Loss=0.0422, Accuracy=98.7, lr=5.34e-6]

Val Loss : 0.385. Val Accuracy 89.31%


Epoch : 4/5: 100%|█████████▉| 4.00k/4.00k [10:09<00:00, 8.03it/s, Loss=0.0379, Accuracy=98.8, lr=4.45e-6]

Evaluating...


Epoch : 4/5: 100%|█████████▉| 4.00k/4.00k [10:20<00:10, 3.35s/it, Loss=0.038, Accuracy=98.8, lr=4.45e-6]

Val Loss : 0.415. Val Accuracy 89.61%


Epoch : 4/5: 100%|██████████| 4.00k/4.00k [10:20<00:00, 6.44it/s, Loss=0.0382, Accuracy=98.8, lr=4.45e-6]


Evaluating...

Epoch 4 complete! Val Loss : 0.412. Val Accuracy 89.52%


Epoch : 5/5:  20%|█▉        | 799/4.00k [01:53<07:46, 6.86it/s, Loss=0.0724, Accuracy=97.5, lr=3.56e-6]

Evaluating...


Epoch : 5/5:  20%|██        | 800/4.00k [02:04<2:59:47, 3.37s/it, Loss=0.0723, Accuracy=97.5, lr=3.56e-6]

Val Loss : 0.382. Val Accuracy 89.90%


Epoch : 5/5:  40%|███▉      | 1.60k/4.00k [03:57<04:54, 8.16it/s, Loss=0.0407, Accuracy=98.7, lr=2.67e-6]

Evaluating...


Epoch : 5/5:  40%|███▉      | 1.60k/4.00k [04:08<2:13:22, 3.33s/it, Loss=0.0406, Accuracy=98.7, lr=2.67e-6]

Val Loss : 0.418. Val Accuracy 90.14%


Epoch : 5/5:  60%|█████▉    | 2.40k/4.00k [06:01<03:46, 7.08it/s, Loss=0.0296, Accuracy=99.1, lr=1.78e-6]

Evaluating...


Epoch : 5/5:  60%|█████▉    | 2.40k/4.00k [06:12<1:24:05, 3.15s/it, Loss=0.0296, Accuracy=99.1, lr=1.78e-6]

Val Loss : 0.440. Val Accuracy 90.26%


Epoch : 5/5:  80%|███████▉  | 3.20k/4.00k [08:05<01:42, 7.86it/s, Loss=0.0238, Accuracy=99.3, lr=8.94e-7]

Evaluating...


Epoch : 5/5:  80%|███████▉  | 3.20k/4.00k [08:15<44:21, 3.32s/it, Loss=0.0238, Accuracy=99.3, lr=8.92e-7]

Val Loss : 0.460. Val Accuracy 90.14%


Epoch : 5/5: 100%|█████████▉| 4.00k/4.00k [10:10<00:00, 6.58it/s, Loss=0.021, Accuracy=99.4, lr=5.56e-9]

Evaluating...


Epoch : 5/5: 100%|█████████▉| 4.00k/4.00k [10:21<00:10, 3.35s/it, Loss=0.021, Accuracy=99.4, lr=4.45e-9]

Val Loss : 0.446. Val Accuracy 90.20%


Epoch : 5/5: 100%|██████████| 4.00k/4.00k [10:21<00:00, 6.43it/s, Loss=0.021, Accuracy=99.4, lr=1.11e-9]


Evaluating...

Epoch 5 complete! Val Loss : 0.446. Val Accuracy 90.20%
The model has been saved in models/CANINE_lr_2e-05_val_loss_0.27685_ep_2_typo_0.05.pt

### Typo percentage = 0.05 ###
Evaluating...

train
   loss = 0.08488949112897815
   acc = 97.89937637736203
Evaluating...

validation
   loss = 0.27685173815425257
   acc = 88.42042755344418
Evaluating...

test
   loss = 0.3999604359269142
   acc = 83.71559633027523


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached split indices for dataset at /root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-abf39e5d242edc33.arrow and /root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-b8f06ab981dbf971.arrow


  0%|          | 0/3999 [00:00<?, ?ba/s]

  0%|          | 0/211 [00:00<?, ?ba/s]

  0%|          | 0/55 [00:00<?, ?ba/s]

  0%|          | 0/3999 [00:00<?, ?ba/s]

  0%|          | 0/211 [00:00<?, ?ba/s]

  0%|          | 0/55 [00:00<?, ?ba/s]

Some weights of CanineForSequenceClassification were not initialized from the model checkpoint at google/canine-c and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch : 1/5:  20%|█▉        | 799/4.00k [01:55<07:57, 6.70it/s, Loss=0.689, Accuracy=54.5, lr=7.98e-6]

Evaluating...


Epoch : 1/5:  20%|██        | 800/4.00k [02:05<2:53:48, 3.26s/it, Loss=0.689, Accuracy=54.5, lr=7.99e-6]

Val Loss : 0.683. Val Accuracy 56.77%
Best validation loss improved from inf to 0.683



Epoch : 1/5:  40%|███▉      | 1.60k/4.00k [04:00<05:11, 7.71it/s, Loss=0.676, Accuracy=57.1, lr=1.6e-5]

Evaluating...


Epoch : 1/5:  40%|███▉      | 1.60k/4.00k [04:11<2:14:42, 3.37s/it, Loss=0.676, Accuracy=57.1, lr=1.6e-5]

Val Loss : 0.627. Val Accuracy 67.70%
Best validation loss improved from 0.683 to 0.627



Epoch : 1/5:  60%|█████▉    | 2.40k/4.00k [06:05<03:30, 7.62it/s, Loss=0.639, Accuracy=61.7, lr=1.96e-5]

Evaluating...


Epoch : 1/5:  60%|█████▉    | 2.40k/4.00k [06:15<1:29:38, 3.36s/it, Loss=0.639, Accuracy=61.7, lr=1.96e-5]

Val Loss : 0.513. Val Accuracy 74.61%
Best validation loss improved from 0.627 to 0.513



Epoch : 1/5:  80%|███████▉  | 3.20k/4.00k [08:09<01:40, 7.96it/s, Loss=0.601, Accuracy=65.5, lr=1.87e-5]

Evaluating...


Epoch : 1/5:  80%|███████▉  | 3.20k/4.00k [08:20<45:01, 3.37s/it, Loss=0.6, Accuracy=65.5, lr=1.87e-5]

Val Loss : 0.449. Val Accuracy 79.16%
Best validation loss improved from 0.513 to 0.449



Epoch : 1/5: 100%|█████████▉| 4.00k/4.00k [10:14<00:00, 7.58it/s, Loss=0.568, Accuracy=68.3, lr=1.78e-5]

Evaluating...


Epoch : 1/5: 100%|█████████▉| 4.00k/4.00k [10:25<00:10, 3.38s/it, Loss=0.568, Accuracy=68.3, lr=1.78e-5]

Val Loss : 0.411. Val Accuracy 81.62%
Best validation loss improved from 0.449 to 0.411



Epoch : 1/5: 100%|██████████| 4.00k/4.00k [10:26<00:00, 6.39it/s, Loss=0.568, Accuracy=68.3, lr=1.78e-5]


Evaluating...

Epoch 1 complete! Val Loss : 0.410. Val Accuracy 81.56%
Best validation loss improved from 0.411 to 0.410



Epoch : 2/5:  20%|█▉        | 799/4.00k [01:55<08:15, 6.45it/s, Loss=0.372, Accuracy=83.6, lr=1.69e-5]

Evaluating...


Epoch : 2/5:  20%|██        | 800/4.00k [02:05<3:00:22, 3.38s/it, Loss=0.372, Accuracy=83.6, lr=1.69e-5]

Val Loss : 0.458. Val Accuracy 80.08%


Epoch : 2/5:  40%|███▉      | 1.60k/4.00k [04:00<05:48, 6.88it/s, Loss=0.334, Accuracy=85.9, lr=1.6e-5]

Evaluating...


Epoch : 2/5:  40%|███▉      | 1.60k/4.00k [04:10<2:15:08, 3.38s/it, Loss=0.334, Accuracy=85.9, lr=1.6e-5]

Val Loss : 0.417. Val Accuracy 83.52%


Epoch : 2/5:  60%|█████▉    | 2.40k/4.00k [06:05<04:03, 6.59it/s, Loss=0.32, Accuracy=86.5, lr=1.51e-5]

Evaluating...


Epoch : 2/5:  60%|█████▉    | 2.40k/4.00k [06:16<1:30:06, 3.38s/it, Loss=0.32, Accuracy=86.5, lr=1.51e-5]

Val Loss : 0.382. Val Accuracy 82.96%
Best validation loss improved from 0.410 to 0.382



Epoch : 2/5:  80%|███████▉  | 3.20k/4.00k [08:09<01:47, 7.44it/s, Loss=0.31, Accuracy=86.9, lr=1.42e-5]

Evaluating...


Epoch : 2/5:  80%|███████▉  | 3.20k/4.00k [08:20<44:58, 3.36s/it, Loss=0.31, Accuracy=86.9, lr=1.42e-5]

Val Loss : 0.359. Val Accuracy 84.35%
Best validation loss improved from 0.382 to 0.359



Epoch : 2/5: 100%|█████████▉| 4.00k/4.00k [10:15<00:00, 7.68it/s, Loss=0.303, Accuracy=87.2, lr=1.33e-5]

Evaluating...


Epoch : 2/5: 100%|█████████▉| 4.00k/4.00k [10:25<00:10, 3.38s/it, Loss=0.303, Accuracy=87.2, lr=1.33e-5]

Val Loss : 0.351. Val Accuracy 84.98%
Best validation loss improved from 0.359 to 0.351



Epoch : 2/5: 100%|██████████| 4.00k/4.00k [10:26<00:00, 6.38it/s, Loss=0.303, Accuracy=87.2, lr=1.33e-5]


Evaluating...

Epoch 2 complete! Val Loss : 0.353. Val Accuracy 85.04%


Epoch : 3/5:  20%|█▉        | 799/4.00k [01:55<08:12, 6.50it/s, Loss=0.19, Accuracy=92.7, lr=1.24e-5]

Evaluating...


Epoch : 3/5:  20%|██        | 800/4.00k [02:06<2:51:46, 3.22s/it, Loss=0.19, Accuracy=92.7, lr=1.24e-5]

Val Loss : 0.468. Val Accuracy 84.56%


Epoch : 3/5:  40%|███▉      | 1.60k/4.00k [04:00<05:55, 6.75it/s, Loss=0.142, Accuracy=94.8, lr=1.16e-5]

Evaluating...


Epoch : 3/5:  40%|███▉      | 1.60k/4.00k [04:11<2:14:24, 3.36s/it, Loss=0.142, Accuracy=94.8, lr=1.16e-5]

Val Loss : 0.485. Val Accuracy 84.00%


Epoch : 3/5:  60%|█████▉    | 2.40k/4.00k [06:05<03:50, 6.94it/s, Loss=0.125, Accuracy=95.4, lr=1.07e-5]

Evaluating...


Epoch : 3/5:  60%|█████▉    | 2.40k/4.00k [06:16<1:30:09, 3.38s/it, Loss=0.125, Accuracy=95.4, lr=1.07e-5]

Val Loss : 0.464. Val Accuracy 84.89%


Epoch : 3/5:  80%|███████▉  | 3.20k/4.00k [08:11<01:55, 6.96it/s, Loss=0.116, Accuracy=95.8, lr=9.78e-6]

Evaluating...


Epoch : 3/5:  80%|███████▉  | 3.20k/4.00k [08:21<45:09, 3.38s/it, Loss=0.116, Accuracy=95.8, lr=9.78e-6]

Val Loss : 0.418. Val Accuracy 85.60%


Epoch : 3/5: 100%|█████████▉| 4.00k/4.00k [10:17<00:00, 6.90it/s, Loss=0.112, Accuracy=96, lr=8.89e-6]

Evaluating...


Epoch : 3/5: 100%|█████████▉| 4.00k/4.00k [10:28<00:10, 3.38s/it, Loss=0.112, Accuracy=96, lr=8.89e-6]

Val Loss : 0.390. Val Accuracy 85.24%


Epoch : 3/5: 100%|██████████| 4.00k/4.00k [10:28<00:00, 6.36it/s, Loss=0.112, Accuracy=96, lr=8.89e-6]


Evaluating...

Epoch 3 complete! Val Loss : 0.396. Val Accuracy 85.30%


Epoch : 4/5:  20%|█▉        | 799/4.00k [01:57<07:22, 7.24it/s, Loss=0.116, Accuracy=95.5, lr=8e-6]

Evaluating...


Epoch : 4/5:  20%|██        | 800/4.00k [02:07<2:59:47, 3.37s/it, Loss=0.115, Accuracy=95.5, lr=8e-6]

Val Loss : 0.507. Val Accuracy 85.30%


Epoch : 4/5:  40%|███▉      | 1.60k/4.00k [04:02<06:03, 6.60it/s, Loss=0.0701, Accuracy=97.4, lr=7.11e-6]

Evaluating...


Epoch : 4/5:  40%|███▉      | 1.60k/4.00k [04:13<2:15:19, 3.38s/it, Loss=0.07, Accuracy=97.4, lr=7.11e-6]

Val Loss : 0.555. Val Accuracy 85.24%


Epoch : 4/5:  60%|█████▉    | 2.40k/4.00k [06:08<03:55, 6.80it/s, Loss=0.0551, Accuracy=98, lr=6.23e-6]

Evaluating...


Epoch : 4/5:  60%|█████▉    | 2.40k/4.00k [06:19<1:30:18, 3.38s/it, Loss=0.0551, Accuracy=98, lr=6.22e-6]

Val Loss : 0.594. Val Accuracy 84.83%


Epoch : 4/5:  80%|███████▉  | 3.20k/4.00k [08:14<01:37, 8.20it/s, Loss=0.0473, Accuracy=98.4, lr=5.34e-6]

Evaluating...


Epoch : 4/5:  80%|███████▉  | 3.20k/4.00k [08:25<44:52, 3.36s/it, Loss=0.0473, Accuracy=98.4, lr=5.34e-6]

Val Loss : 0.575. Val Accuracy 84.98%


Epoch : 4/5: 100%|█████████▉| 4.00k/4.00k [10:19<00:00, 6.85it/s, Loss=0.0428, Accuracy=98.5, lr=4.45e-6]

Evaluating...


Epoch : 4/5: 100%|█████████▉| 4.00k/4.00k [10:30<00:10, 3.37s/it, Loss=0.0428, Accuracy=98.5, lr=4.45e-6]

Val Loss : 0.628. Val Accuracy 84.98%


Epoch : 4/5: 100%|██████████| 4.00k/4.00k [10:31<00:00, 6.34it/s, Loss=0.0428, Accuracy=98.5, lr=4.45e-6]


Evaluating...

Epoch 4 complete! Val Loss : 0.625. Val Accuracy 85.12%


Epoch : 5/5:  20%|█▉        | 799/4.00k [01:55<07:54, 6.74it/s, Loss=0.0855, Accuracy=96.8, lr=3.56e-6]

Evaluating...


Epoch : 5/5:  20%|██        | 800/4.00k [02:05<2:59:28, 3.37s/it, Loss=0.0854, Accuracy=96.9, lr=3.56e-6]

Val Loss : 0.507. Val Accuracy 85.21%


Epoch : 5/5:  40%|███▉      | 1.60k/4.00k [04:01<05:28, 7.30it/s, Loss=0.0475, Accuracy=98.3, lr=2.67e-6]

Evaluating...


Epoch : 5/5:  40%|███▉      | 1.60k/4.00k [04:12<2:13:57, 3.35s/it, Loss=0.0475, Accuracy=98.3, lr=2.67e-6]

Val Loss : 0.632. Val Accuracy 85.30%


Epoch : 5/5:  60%|█████▉    | 2.40k/4.00k [06:05<04:15, 6.26it/s, Loss=0.0343, Accuracy=98.8, lr=1.78e-6]

Evaluating...


Epoch : 5/5:  60%|█████▉    | 2.40k/4.00k [06:16<1:29:47, 3.37s/it, Loss=0.0343, Accuracy=98.8, lr=1.78e-6]

Val Loss : 0.661. Val Accuracy 85.36%


Epoch : 5/5:  80%|███████▉  | 3.20k/4.00k [08:10<01:47, 7.50it/s, Loss=0.0272, Accuracy=99.1, lr=8.94e-7]

Evaluating...


Epoch : 5/5:  80%|███████▉  | 3.20k/4.00k [08:21<44:38, 3.34s/it, Loss=0.0272, Accuracy=99.1, lr=8.92e-7]

Val Loss : 0.694. Val Accuracy 85.51%


Epoch : 5/5: 100%|█████████▉| 4.00k/4.00k [10:15<00:00, 7.35it/s, Loss=0.0232, Accuracy=99.3, lr=5.56e-9]

Evaluating...


Epoch : 5/5: 100%|█████████▉| 4.00k/4.00k [10:25<00:10, 3.34s/it, Loss=0.0232, Accuracy=99.3, lr=4.45e-9]

Val Loss : 0.699. Val Accuracy 85.51%


Epoch : 5/5: 100%|██████████| 4.00k/4.00k [10:26<00:00, 6.38it/s, Loss=0.0232, Accuracy=99.3, lr=1.11e-9]


Evaluating...

Epoch 5 complete! Val Loss : 0.699. Val Accuracy 85.51%
The model has been saved in models/CANINE_lr_2e-05_val_loss_0.35146_ep_2_typo_0.1.pt

### Typo percentage = 0.1 ###
Evaluating...

train
   loss = 0.10715573558490234
   acc = 97.09132398680859
Evaluating...

validation
   loss = 0.3514567346828526
   acc = 84.97624703087887
Evaluating...

test
   loss = 0.5238751846280965
   acc = 78.21100917431193


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached split indices for dataset at /root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-abf39e5d242edc33.arrow and /root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-b8f06ab981dbf971.arrow


  0%|          | 0/3999 [00:00<?, ?ba/s]

  0%|          | 0/211 [00:00<?, ?ba/s]

  0%|          | 0/55 [00:00<?, ?ba/s]

  0%|          | 0/3999 [00:00<?, ?ba/s]

  0%|          | 0/211 [00:00<?, ?ba/s]

  0%|          | 0/55 [00:00<?, ?ba/s]

Some weights of CanineForSequenceClassification were not initialized from the model checkpoint at google/canine-c and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch : 1/5:  20%|█▉        | 799/4.00k [01:54<07:01, 7.60it/s, Loss=0.689, Accuracy=54.6, lr=7.98e-6]

Evaluating...


Epoch : 1/5:  20%|██        | 800/4.00k [02:05<2:58:12, 3.34s/it, Loss=0.689, Accuracy=54.6, lr=7.99e-6]

Val Loss : 0.686. Val Accuracy 55.20%
Best validation loss improved from inf to 0.686



Epoch : 1/5:  40%|███▉      | 1.60k/4.00k [03:59<05:44, 6.97it/s, Loss=0.683, Accuracy=55.8, lr=1.6e-5]

Evaluating...


Epoch : 1/5:  40%|███▉      | 1.60k/4.00k [04:10<2:11:46, 3.29s/it, Loss=0.683, Accuracy=55.8, lr=1.6e-5]

Val Loss : 0.652. Val Accuracy 59.89%
Best validation loss improved from 0.686 to 0.652



Epoch : 1/5:  60%|█████▉    | 2.40k/4.00k [06:03<03:29, 7.65it/s, Loss=0.664, Accuracy=58.7, lr=1.96e-5]

Evaluating...


Epoch : 1/5:  60%|█████▉    | 2.40k/4.00k [06:14<1:28:26, 3.31s/it, Loss=0.664, Accuracy=58.7, lr=1.96e-5]

Val Loss : 0.582. Val Accuracy 68.44%
Best validation loss improved from 0.652 to 0.582



Epoch : 1/5:  80%|███████▉  | 3.20k/4.00k [08:08<01:44, 7.69it/s, Loss=0.642, Accuracy=61.3, lr=1.87e-5]

Evaluating...


Epoch : 1/5:  80%|███████▉  | 3.20k/4.00k [08:19<44:41, 3.34s/it, Loss=0.642, Accuracy=61.3, lr=1.87e-5]

Val Loss : 0.550. Val Accuracy 70.93%
Best validation loss improved from 0.582 to 0.550



Epoch : 1/5: 100%|█████████▉| 4.00k/4.00k [10:12<00:00, 6.70it/s, Loss=0.622, Accuracy=63.5, lr=1.78e-5]

Evaluating...


Epoch : 1/5: 100%|█████████▉| 4.00k/4.00k [10:23<00:10, 3.35s/it, Loss=0.622, Accuracy=63.5, lr=1.78e-5]

Val Loss : 0.519. Val Accuracy 73.28%
Best validation loss improved from 0.550 to 0.519



Epoch : 1/5: 100%|██████████| 4.00k/4.00k [10:24<00:00, 6.41it/s, Loss=0.622, Accuracy=63.5, lr=1.78e-5]


Evaluating...

Epoch 1 complete! Val Loss : 0.519. Val Accuracy 73.16%
Best validation loss improved from 0.519 to 0.519



Epoch : 2/5:  20%|█▉        | 799/4.00k [01:54<07:30, 7.10it/s, Loss=0.476, Accuracy=77.1, lr=1.69e-5]

Evaluating...


Epoch : 2/5:  20%|██        | 800/4.00k [02:05<2:57:40, 3.33s/it, Loss=0.476, Accuracy=77.1, lr=1.69e-5]

Val Loss : 0.530. Val Accuracy 74.44%


Epoch : 2/5:  40%|███▉      | 1.60k/4.00k [03:58<05:46, 6.93it/s, Loss=0.447, Accuracy=79.2, lr=1.6e-5]

Evaluating...


Epoch : 2/5:  40%|███▉      | 1.60k/4.00k [04:09<2:14:39, 3.37s/it, Loss=0.447, Accuracy=79.2, lr=1.6e-5]

Val Loss : 0.513. Val Accuracy 75.53%
Best validation loss improved from 0.519 to 0.513



Epoch : 2/5:  60%|█████▉    | 2.40k/4.00k [06:04<03:44, 7.14it/s, Loss=0.432, Accuracy=80.1, lr=1.51e-5]

Evaluating...


Epoch : 2/5:  60%|█████▉    | 2.40k/4.00k [06:15<1:21:05, 3.04s/it, Loss=0.432, Accuracy=80.1, lr=1.51e-5]

Val Loss : 0.486. Val Accuracy 76.60%
Best validation loss improved from 0.513 to 0.486



Epoch : 2/5:  80%|███████▉  | 3.20k/4.00k [08:09<01:48, 7.38it/s, Loss=0.421, Accuracy=80.7, lr=1.42e-5]

Evaluating...


Epoch : 2/5:  80%|███████▉  | 3.20k/4.00k [08:19<43:59, 3.29s/it, Loss=0.421, Accuracy=80.7, lr=1.42e-5]

Val Loss : 0.528. Val Accuracy 76.57%


Epoch : 2/5: 100%|█████████▉| 4.00k/4.00k [10:12<00:00, 7.00it/s, Loss=0.414, Accuracy=81.1, lr=1.33e-5]

Evaluating...


Epoch : 2/5: 100%|█████████▉| 4.00k/4.00k [10:23<00:10, 3.35s/it, Loss=0.414, Accuracy=81.1, lr=1.33e-5]

Val Loss : 0.473. Val Accuracy 77.88%
Best validation loss improved from 0.486 to 0.473



Epoch : 2/5: 100%|██████████| 4.00k/4.00k [10:23<00:00, 6.41it/s, Loss=0.414, Accuracy=81.1, lr=1.33e-5]


Evaluating...

Epoch 2 complete! Val Loss : 0.470. Val Accuracy 78.12%
Best validation loss improved from 0.473 to 0.470



Epoch : 3/5:  20%|█▉        | 799/4.00k [01:55<09:00, 5.92it/s, Loss=0.26, Accuracy=89.6, lr=1.24e-5]

Evaluating...


Epoch : 3/5:  20%|██        | 800/4.00k [02:06<2:59:50, 3.37s/it, Loss=0.26, Accuracy=89.6, lr=1.24e-5]

Val Loss : 0.562. Val Accuracy 77.85%


Epoch : 3/5:  40%|███▉      | 1.60k/4.00k [04:00<05:24, 7.40it/s, Loss=0.202, Accuracy=92.2, lr=1.16e-5]

Evaluating...


Epoch : 3/5:  40%|███▉      | 1.60k/4.00k [04:11<2:14:14, 3.36s/it, Loss=0.202, Accuracy=92.2, lr=1.16e-5]

Val Loss : 0.602. Val Accuracy 77.76%


Epoch : 3/5:  60%|█████▉    | 2.40k/4.00k [06:04<04:03, 6.57it/s, Loss=0.186, Accuracy=93, lr=1.07e-5]

Evaluating...


Epoch : 3/5:  60%|█████▉    | 2.40k/4.00k [06:15<1:29:47, 3.37s/it, Loss=0.186, Accuracy=93, lr=1.07e-5]

Val Loss : 0.621. Val Accuracy 78.09%


Epoch : 3/5:  80%|███████▉  | 3.20k/4.00k [08:08<01:32, 8.64it/s, Loss=0.177, Accuracy=93.3, lr=9.78e-6]

Evaluating...


Epoch : 3/5:  80%|███████▉  | 3.20k/4.00k [08:19<44:46, 3.35s/it, Loss=0.176, Accuracy=93.3, lr=9.78e-6]

Val Loss : 0.618. Val Accuracy 78.09%


Epoch : 3/5: 100%|█████████▉| 4.00k/4.00k [10:12<00:00, 8.76it/s, Loss=0.171, Accuracy=93.6, lr=8.89e-6]

Evaluating...


Epoch : 3/5: 100%|█████████▉| 4.00k/4.00k [10:23<00:09, 3.32s/it, Loss=0.171, Accuracy=93.6, lr=8.89e-6]

Val Loss : 0.582. Val Accuracy 78.06%


Epoch : 3/5: 100%|██████████| 4.00k/4.00k [10:23<00:00, 6.41it/s, Loss=0.171, Accuracy=93.6, lr=8.89e-6]


Evaluating...

Epoch 3 complete! Val Loss : 0.583. Val Accuracy 78.09%


Epoch : 4/5:  20%|█▉        | 799/4.00k [01:54<08:03, 6.62it/s, Loss=0.166, Accuracy=93.7, lr=8e-6]

Evaluating...


Epoch : 4/5:  20%|██        | 800/4.00k [02:05<2:59:10, 3.36s/it, Loss=0.165, Accuracy=93.7, lr=8e-6]

Val Loss : 0.669. Val Accuracy 78.71%


Epoch : 4/5:  40%|███▉      | 1.60k/4.00k [03:59<06:17, 6.36it/s, Loss=0.101, Accuracy=96.4, lr=7.11e-6]

Evaluating...


Epoch : 4/5:  40%|███▉      | 1.60k/4.00k [04:10<2:15:43, 3.39s/it, Loss=0.101, Accuracy=96.4, lr=7.11e-6]

Val Loss : 0.831. Val Accuracy 78.53%


Epoch : 4/5:  60%|█████▉    | 2.40k/4.00k [06:05<04:07, 6.47it/s, Loss=0.0786, Accuracy=97.2, lr=6.23e-6]

Evaluating...


Epoch : 4/5:  60%|█████▉    | 2.40k/4.00k [06:16<1:27:44, 3.29s/it, Loss=0.0786, Accuracy=97.3, lr=6.22e-6]

Val Loss : 0.829. Val Accuracy 78.62%


Epoch : 4/5:  80%|███████▉  | 3.20k/4.00k [08:09<01:57, 6.85it/s, Loss=0.0683, Accuracy=97.7, lr=5.34e-6]

Evaluating...


Epoch : 4/5:  80%|███████▉  | 3.20k/4.00k [08:20<44:43, 3.35s/it, Loss=0.0683, Accuracy=97.7, lr=5.34e-6]

Val Loss : 0.801. Val Accuracy 78.68%


Epoch : 4/5: 100%|█████████▉| 4.00k/4.00k [10:14<00:00, 7.19it/s, Loss=0.0611, Accuracy=98, lr=4.45e-6]

Evaluating...


Epoch : 4/5: 100%|█████████▉| 4.00k/4.00k [10:25<00:10, 3.36s/it, Loss=0.0611, Accuracy=98, lr=4.45e-6]

Val Loss : 0.795. Val Accuracy 77.85%


Epoch : 4/5: 100%|██████████| 4.00k/4.00k [10:25<00:00, 6.39it/s, Loss=0.0611, Accuracy=98, lr=4.45e-6]


Evaluating...

Epoch 4 complete! Val Loss : 0.794. Val Accuracy 77.97%


Epoch : 5/5:  20%|█▉        | 799/4.00k [01:55<08:01, 6.65it/s, Loss=0.127, Accuracy=95.2, lr=3.56e-6]

Evaluating...


Epoch : 5/5:  20%|██        | 800/4.00k [02:06<2:59:12, 3.36s/it, Loss=0.127, Accuracy=95.2, lr=3.56e-6]

Val Loss : 0.742. Val Accuracy 78.38%


Epoch : 5/5:  40%|███▉      | 1.60k/4.00k [04:00<06:00, 6.66it/s, Loss=0.0701, Accuracy=97.5, lr=2.67e-6]

Evaluating...


Epoch : 5/5:  40%|███▉      | 1.60k/4.00k [04:11<2:15:15, 3.38s/it, Loss=0.0701, Accuracy=97.5, lr=2.67e-6]

Val Loss : 0.942. Val Accuracy 78.38%


Epoch : 5/5:  60%|█████▉    | 2.40k/4.00k [06:04<04:10, 6.40it/s, Loss=0.0506, Accuracy=98.2, lr=1.78e-6]

Evaluating...


Epoch : 5/5:  60%|█████▉    | 2.40k/4.00k [06:15<1:29:01, 3.34s/it, Loss=0.0506, Accuracy=98.2, lr=1.78e-6]

Val Loss : 0.999. Val Accuracy 78.44%


Epoch : 5/5:  80%|███████▉  | 3.20k/4.00k [08:10<01:51, 7.22it/s, Loss=0.0407, Accuracy=98.6, lr=8.94e-7]

Evaluating...


Epoch : 5/5:  80%|███████▉  | 3.20k/4.00k [08:21<44:44, 3.35s/it, Loss=0.0408, Accuracy=98.6, lr=8.92e-7]

Val Loss : 1.010. Val Accuracy 78.71%


Epoch : 5/5: 100%|█████████▉| 4.00k/4.00k [10:13<00:00, 6.86it/s, Loss=0.0349, Accuracy=98.9, lr=5.56e-9]

Evaluating...


Epoch : 5/5: 100%|█████████▉| 4.00k/4.00k [10:24<00:10, 3.36s/it, Loss=0.0349, Accuracy=98.9, lr=4.45e-9]

Val Loss : 1.013. Val Accuracy 78.50%


Epoch : 5/5: 100%|██████████| 4.00k/4.00k [10:24<00:00, 6.40it/s, Loss=0.0348, Accuracy=98.9, lr=1.11e-9]


Evaluating...

Epoch 5 complete! Val Loss : 1.013. Val Accuracy 78.50%
The model has been saved in models/CANINE_lr_2e-05_val_loss_0.47008_ep_2_typo_0.2.pt

### Typo percentage = 0.2 ###
Evaluating...

train
   loss = 0.18673049206639356
   acc = 94.46554445851112
Evaluating...

validation
   loss = 0.47007722054471335
   acc = 78.11757719714964
Evaluating...

test
   loss = 0.5448144376277924
   acc = 75.11467889908256


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

Loading cached split indices for dataset at /root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-abf39e5d242edc33.arrow and /root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad/cache-b8f06ab981dbf971.arrow


  0%|          | 0/3999 [00:00<?, ?ba/s]

  0%|          | 0/211 [00:00<?, ?ba/s]

  0%|          | 0/55 [00:00<?, ?ba/s]

  0%|          | 0/3999 [00:00<?, ?ba/s]

  0%|          | 0/211 [00:00<?, ?ba/s]

  0%|          | 0/55 [00:00<?, ?ba/s]

Some weights of CanineForSequenceClassification were not initialized from the model checkpoint at google/canine-c and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Epoch : 1/5:  20%|█▉        | 799/4.00k [01:55<07:22, 7.23it/s, Loss=0.691, Accuracy=53.6, lr=7.98e-6]

Evaluating...


Epoch : 1/5:  20%|██        | 800/4.00k [02:05<2:57:39, 3.33s/it, Loss=0.691, Accuracy=53.6, lr=7.99e-6]

Val Loss : 0.690. Val Accuracy 55.14%
Best validation loss improved from inf to 0.690



Epoch : 1/5:  40%|███▉      | 1.60k/4.00k [03:59<05:40, 7.06it/s, Loss=0.687, Accuracy=54.8, lr=1.6e-5]

Evaluating...


Epoch : 1/5:  40%|███▉      | 1.60k/4.00k [04:11<2:18:14, 3.46s/it, Loss=0.687, Accuracy=54.8, lr=1.6e-5]

Val Loss : 0.693. Val Accuracy 50.03%


Epoch : 1/5:  60%|█████▉    | 2.40k/4.00k [06:06<04:00, 6.67it/s, Loss=0.683, Accuracy=55.8, lr=1.96e-5]

Evaluating...


Epoch : 1/5:  60%|█████▉    | 2.40k/4.00k [06:17<1:29:57, 3.37s/it, Loss=0.683, Accuracy=55.8, lr=1.96e-5]

Val Loss : 0.668. Val Accuracy 59.06%
Best validation loss improved from 0.690 to 0.668



Epoch : 1/5:  80%|███████▉  | 3.20k/4.00k [08:13<01:59, 6.74it/s, Loss=0.678, Accuracy=56.7, lr=1.87e-5]

Evaluating...


Epoch : 1/5:  80%|███████▉  | 3.20k/4.00k [08:24<45:56, 3.44s/it, Loss=0.678, Accuracy=56.7, lr=1.87e-5]

Val Loss : 0.654. Val Accuracy 60.36%
Best validation loss improved from 0.668 to 0.654



Epoch : 1/5: 100%|█████████▉| 4.00k/4.00k [10:19<00:00, 6.29it/s, Loss=0.673, Accuracy=57.8, lr=1.78e-5]

Evaluating...


Epoch : 1/5: 100%|█████████▉| 4.00k/4.00k [10:30<00:10, 3.34s/it, Loss=0.673, Accuracy=57.8, lr=1.78e-5]

Val Loss : 0.647. Val Accuracy 61.88%
Best validation loss improved from 0.654 to 0.647



Epoch : 1/5: 100%|██████████| 4.00k/4.00k [10:31<00:00, 6.34it/s, Loss=0.673, Accuracy=57.8, lr=1.78e-5]


Evaluating...

Epoch 1 complete! Val Loss : 0.644. Val Accuracy 62.05%
Best validation loss improved from 0.647 to 0.644



Epoch : 2/5:  20%|█▉        | 799/4.00k [01:55<06:52, 7.75it/s, Loss=0.617, Accuracy=65.9, lr=1.69e-5]

Evaluating...


Epoch : 2/5:  20%|██        | 800/4.00k [02:06<2:58:24, 3.35s/it, Loss=0.617, Accuracy=65.9, lr=1.69e-5]

Val Loss : 0.648. Val Accuracy 62.47%


Epoch : 2/5:  40%|███▉      | 1.60k/4.00k [04:02<05:34, 7.18it/s, Loss=0.606, Accuracy=67.4, lr=1.6e-5]

Evaluating...


Epoch : 2/5:  40%|███▉      | 1.60k/4.00k [04:12<2:14:17, 3.36s/it, Loss=0.606, Accuracy=67.4, lr=1.6e-5]

Val Loss : 0.636. Val Accuracy 64.01%
Best validation loss improved from 0.644 to 0.636



Epoch : 2/5:  60%|█████▉    | 2.40k/4.00k [06:08<04:13, 6.32it/s, Loss=0.599, Accuracy=67.9, lr=1.51e-5]

Evaluating...


Epoch : 2/5:  60%|█████▉    | 2.40k/4.00k [06:19<1:30:19, 3.39s/it, Loss=0.599, Accuracy=67.9, lr=1.51e-5]

Val Loss : 0.630. Val Accuracy 64.04%
Best validation loss improved from 0.636 to 0.630



Epoch : 2/5:  80%|███████▉  | 3.20k/4.00k [08:13<01:52, 7.13it/s, Loss=0.593, Accuracy=68.5, lr=1.42e-5]

Evaluating...


Epoch : 2/5:  80%|███████▉  | 3.20k/4.00k [08:23<44:52, 3.36s/it, Loss=0.593, Accuracy=68.5, lr=1.42e-5]

Val Loss : 0.620. Val Accuracy 65.80%
Best validation loss improved from 0.630 to 0.620



Epoch : 2/5: 100%|█████████▉| 4.00k/4.00k [10:20<00:00, 7.36it/s, Loss=0.588, Accuracy=68.9, lr=1.33e-5]

Evaluating...


Epoch : 2/5: 100%|█████████▉| 4.00k/4.00k [10:31<00:10, 3.45s/it, Loss=0.588, Accuracy=68.9, lr=1.33e-5]

Val Loss : 0.609. Val Accuracy 66.51%
Best validation loss improved from 0.620 to 0.609



Epoch : 2/5: 100%|██████████| 4.00k/4.00k [10:31<00:00, 6.33it/s, Loss=0.588, Accuracy=68.9, lr=1.33e-5]


Evaluating...

Epoch 2 complete! Val Loss : 0.608. Val Accuracy 66.36%
Best validation loss improved from 0.609 to 0.608



Epoch : 3/5:  20%|█▉        | 799/4.00k [01:56<06:27, 8.27it/s, Loss=0.443, Accuracy=79.7, lr=1.24e-5]

Evaluating...


Epoch : 3/5:  20%|██        | 800/4.00k [02:07<2:58:19, 3.34s/it, Loss=0.443, Accuracy=79.7, lr=1.24e-5]

Val Loss : 0.714. Val Accuracy 65.71%


Epoch : 3/5:  40%|███▉      | 1.60k/4.00k [04:02<06:20, 6.30it/s, Loss=0.382, Accuracy=83.3, lr=1.16e-5]

Evaluating...


Epoch : 3/5:  40%|███▉      | 1.60k/4.00k [04:12<2:14:39, 3.37s/it, Loss=0.382, Accuracy=83.3, lr=1.16e-5]

Val Loss : 0.820. Val Accuracy 64.13%


Epoch : 3/5:  60%|█████▉    | 2.40k/4.00k [06:06<03:28, 7.67it/s, Loss=0.354, Accuracy=84.8, lr=1.07e-5]

Evaluating...


Epoch : 3/5:  60%|█████▉    | 2.40k/4.00k [06:17<1:29:00, 3.34s/it, Loss=0.354, Accuracy=84.8, lr=1.07e-5]

Val Loss : 0.841. Val Accuracy 66.42%


Epoch : 3/5:  80%|███████▉  | 3.20k/4.00k [08:11<01:48, 7.39it/s, Loss=0.338, Accuracy=85.6, lr=9.78e-6]

Evaluating...


Epoch : 3/5:  80%|███████▉  | 3.20k/4.00k [08:22<44:50, 3.35s/it, Loss=0.338, Accuracy=85.6, lr=9.78e-6]

Val Loss : 0.780. Val Accuracy 66.51%


Epoch : 3/5: 100%|█████████▉| 4.00k/4.00k [10:15<00:00, 7.63it/s, Loss=0.328, Accuracy=86.1, lr=8.89e-6]

Evaluating...


Epoch : 3/5: 100%|█████████▉| 4.00k/4.00k [10:26<00:09, 3.18s/it, Loss=0.328, Accuracy=86.1, lr=8.89e-6]

Val Loss : 0.795. Val Accuracy 66.03%


Epoch : 3/5: 100%|██████████| 4.00k/4.00k [10:27<00:00, 6.38it/s, Loss=0.328, Accuracy=86.1, lr=8.89e-6]


Evaluating...

Epoch 3 complete! Val Loss : 0.798. Val Accuracy 65.91%


Epoch : 4/5:  20%|█▉        | 799/4.00k [01:55<07:59, 6.67it/s, Loss=0.268, Accuracy=89, lr=8e-6]

Evaluating...


Epoch : 4/5:  20%|██        | 800/4.00k [02:06<2:59:20, 3.36s/it, Loss=0.269, Accuracy=89, lr=8e-6]

Val Loss : 0.997. Val Accuracy 66.18%


Epoch : 4/5:  40%|███▉      | 1.60k/4.00k [03:59<05:33, 7.20it/s, Loss=0.166, Accuracy=93.5, lr=7.11e-6]

Evaluating...


Epoch : 4/5:  40%|███▉      | 1.60k/4.00k [04:10<2:13:12, 3.33s/it, Loss=0.166, Accuracy=93.5, lr=7.11e-6]

Val Loss : 1.305. Val Accuracy 65.88%


Epoch : 4/5:  60%|█████▉    | 2.40k/4.00k [06:05<04:00, 6.65it/s, Loss=0.134, Accuracy=94.9, lr=6.23e-6]

Evaluating...


Epoch : 4/5:  60%|█████▉    | 2.40k/4.00k [06:16<1:30:10, 3.38s/it, Loss=0.134, Accuracy=94.9, lr=6.22e-6]

Val Loss : 1.198. Val Accuracy 65.47%


Epoch : 4/5:  80%|███████▉  | 3.20k/4.00k [08:10<01:59, 6.72it/s, Loss=0.118, Accuracy=95.6, lr=5.34e-6]

Evaluating...


Epoch : 4/5:  80%|███████▉  | 3.20k/4.00k [08:21<44:11, 3.31s/it, Loss=0.118, Accuracy=95.6, lr=5.34e-6]

Val Loss : 1.184. Val Accuracy 65.88%


Epoch : 4/5: 100%|█████████▉| 4.00k/4.00k [10:15<00:00, 6.81it/s, Loss=0.108, Accuracy=96.1, lr=4.45e-6]

Evaluating...


Epoch : 4/5: 100%|█████████▉| 4.00k/4.00k [10:26<00:10, 3.37s/it, Loss=0.108, Accuracy=96.1, lr=4.45e-6]

Val Loss : 1.223. Val Accuracy 65.26%


Epoch : 4/5: 100%|██████████| 4.00k/4.00k [10:26<00:00, 6.38it/s, Loss=0.108, Accuracy=96.1, lr=4.45e-6]


Evaluating...

Epoch 4 complete! Val Loss : 1.221. Val Accuracy 65.38%


Epoch : 5/5:  20%|█▉        | 799/4.00k [01:54<08:08, 6.56it/s, Loss=0.21, Accuracy=91.5, lr=3.56e-6]

Evaluating...


Epoch : 5/5:  20%|██        | 800/4.00k [02:05<2:59:42, 3.37s/it, Loss=0.209, Accuracy=91.5, lr=3.56e-6]

Val Loss : 1.065. Val Accuracy 65.50%


Epoch : 5/5:  40%|███▉      | 1.60k/4.00k [03:59<05:45, 6.95it/s, Loss=0.114, Accuracy=95.6, lr=2.67e-6]

Evaluating...


Epoch : 5/5:  40%|███▉      | 1.60k/4.00k [04:10<2:14:35, 3.36s/it, Loss=0.114, Accuracy=95.6, lr=2.67e-6]

Val Loss : 1.512. Val Accuracy 65.56%


Epoch : 5/5:  60%|█████▉    | 2.40k/4.00k [06:04<03:42, 7.21it/s, Loss=0.0799, Accuracy=97, lr=1.78e-6]

Evaluating...


Epoch : 5/5:  60%|█████▉    | 2.40k/4.00k [06:15<1:29:13, 3.34s/it, Loss=0.0799, Accuracy=97, lr=1.78e-6]

Val Loss : 1.645. Val Accuracy 66.09%


Epoch : 5/5:  80%|███████▉  | 3.20k/4.00k [08:10<01:56, 6.90it/s, Loss=0.064, Accuracy=97.6, lr=8.94e-7]

Evaluating...


Epoch : 5/5:  80%|███████▉  | 3.20k/4.00k [08:21<44:59, 3.37s/it, Loss=0.064, Accuracy=97.6, lr=8.92e-7]

Val Loss : 1.606. Val Accuracy 65.50%


Epoch : 5/5: 100%|█████████▉| 4.00k/4.00k [10:15<00:00, 6.58it/s, Loss=0.0549, Accuracy=98, lr=5.56e-9]

Evaluating...


Epoch : 5/5: 100%|█████████▉| 4.00k/4.00k [10:26<00:10, 3.36s/it, Loss=0.0549, Accuracy=98, lr=4.45e-9]

Val Loss : 1.620. Val Accuracy 65.47%


Epoch : 5/5: 100%|██████████| 4.00k/4.00k [10:26<00:00, 6.38it/s, Loss=0.0548, Accuracy=98, lr=1.11e-9]


Evaluating...

Epoch 5 complete! Val Loss : 1.620. Val Accuracy 65.47%
The model has been saved in models/CANINE_lr_2e-05_val_loss_0.60793_ep_2_typo_0.4.pt

### Typo percentage = 0.4 ###
Evaluating...

train
   loss = 0.4159156077427994
   acc = 86.34907238086306
Evaluating...

validation
   loss = 0.6079303105295552
   acc = 66.35985748218528
Evaluating...

test
   loss = 0.594926351850683
   acc = 67.66055045871559


## Print final results

In [None]:
from tabulate import tabulate

noises = [0, 0.05, 0.1, 0.2, 0.4]
for i in range(len(noises)):
    accs[i] = [noises[i]]+accs[i]
    losses[i] = [noises[i]]+losses[i]

print('Accuracy table')
print(tabulate([['Noise', 'Train', 'Validation', 'Test']]+accs))

print('\n\nLosses table')
print(tabulate([['Noise', 'Train', 'Validation', 'Test']]+losses))

Accuracy table
-----  -----------------  -----------------  -----------------
Noise  Train              Validation         Test
0      97.96033197355464  93.31947743467933  85.6651376146789
0.05   97.89937637736203  88.42042755344418  83.71559633027523
0.1    97.09132398680859  84.97624703087887  78.21100917431193
0.2    94.46554445851112  78.11757719714964  75.11467889908256
0.4    86.34907238086306  66.35985748218528  67.66055045871559
-----  -----------------  -----------------  -----------------


Losses table
-----  -------------------  -------------------  ------------------
Noise  Train                Validation           Test
0      0.07692918937306764  0.1916758729524522   0.3921936253255064
0.05   0.08488949112897815  0.27685173815425257  0.3999604359269142
0.1    0.10715573558490234  0.3514567346828526   0.5238751846280965
0.2    0.18673049206639356  0.47007722054471335  0.5448144376277924
0.4    0.4159156077427994   0.6079303105295552   0.594926351850683
-----  ------------