In [1]:
import math
import pytorch_lightning as pl
import torch
import torch.nn as nn
from torch.nn import Linear
from torch.nn import functional as F
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
from sklearn.model_selection import train_test_split
from tokenizers import Tokenizer
from torch.nn.utils.rnn import pad_sequence
import string
import pdb
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import DataLoader
from functools import partial

In [2]:
def gen_trg_mask(length, device):
    return torch.triu(
        torch.ones(length, length, device=device) * float("-inf"), diagonal=1
    )

def create_padding_mask(tensor, pad_idx):
    padding_mask = (tensor == pad_idx).transpose(0, 1)

    return padding_mask


def masked_accuracy(y_true: torch.Tensor, y_pred: torch.Tensor, pad_idx):
    mask = y_true != pad_idx
    y_true = torch.masked_select(y_true, mask)
    y_pred = torch.masked_select(y_pred, mask)
    acc = (y_true == y_pred).double().mean()
    return acc


class PositionalEncoding(nn.Module):
    #  https://pytorch.org/tutorials/beginner/transformer_tutorial.html

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 256):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model)
        )
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer("pe", pe)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Args:
            x: Tensor, shape [seq_len, batch_size, embedding_dim]
        """

        x = x + self.pe[: x.size(0)]

        return self.dropout(x)


class TokenEmbedding(nn.Module):
    #  https://pytorch.org/tutorials/beginner/translation_transformer.html
    def __init__(self, vocab_size: int, emb_size):
        super(TokenEmbedding, self).__init__()
        self.embedding = nn.Embedding(vocab_size, emb_size)
        self.emb_size = emb_size

    def forward(self, tokens: torch.Tensor):
        return self.embedding(tokens.long()) * math.sqrt(self.emb_size)


class Seq2Seq(pl.LightningModule):
    def __init__(
        self,
        out_vocab_size,
        pad_idx,
        tokenizer,
        channels=256,
        dropout=0.1
    ):
        super().__init__()
        self.pad_idx = pad_idx
        self.dropout = dropout
        self.out_vocab_size = out_vocab_size

        self.embeddings = TokenEmbedding(
            vocab_size=self.out_vocab_size, emb_size=channels
        )

        self.pos_encoder = PositionalEncoding(d_model=channels, dropout=dropout)

        self.transformer = torch.nn.Transformer(
            d_model=channels,
            nhead=4,
            num_encoder_layers=6,
            num_decoder_layers=6,
            dim_feedforward=1024,
            dropout=dropout,
        )

        self.linear = Linear(channels, out_vocab_size)

        self.do = nn.Dropout(p=self.dropout)
        self.tokenizer = tokenizer

    def init_weights(self) -> None:
        init_range = 0.1
        self.embeddings.weight.data.uniform_(-init_range, init_range)
        self.linear.bias.data.zero_()
        self.linear.weight.data.uniform_(-init_range, init_range)

    def encode_src(self, src):
        #pdb.set_trace()
        src = src.permute(1, 0)
        src_pad_mask = create_padding_mask(src, self.pad_idx)
        src = self.embeddings(src)
        src = self.pos_encoder(src)
        src = self.transformer.encoder(src, src_key_padding_mask=src_pad_mask)
        #src = self.pos_encoder(src)
        return src

    def decode_trg(self, trg, memory):
        trg = trg.permute(1, 0)
        out_sequence_len, batch_size = trg.size(0), trg.size(1)
        trg_pad_mask = create_padding_mask(trg, self.pad_idx)
        trg = self.embeddings(trg)
        trg = self.pos_encoder(trg)
        trg_mask = gen_trg_mask(out_sequence_len, trg.device)
        out = self.transformer.decoder(
            tgt=trg, memory=memory, tgt_mask=trg_mask, tgt_key_padding_mask=trg_pad_mask
        )
        out = out.permute(1, 0, 2)
        out = self.linear(out)
        return out

    def forward(self, x):
        #pdb.set_trace()
        src, trg = x
        src = self.encode_src(src)
        out = self.decode_trg(trg=trg, memory=src)
        return out

    def training_step(self, batch, batch_idx):
        return self._step(batch, batch_idx, name="train")

    def on_validation_epoch_start(self):
        self.acc_sum = 0
        self.acc_num = 0
    
    def validation_step(self, batch, batch_idx):
        return self._step(batch, batch_idx, name="valid")

    def on_validation_epoch_end(self):
        avg_acc = (self.acc_sum / self.acc_num).item()
        self.log("val_avg_acc", avg_acc)
        print("Epoch, accuracy:", self.current_epoch, round(avg_acc, 4))
    
    def test_step(self, batch, batch_idx):
        return self._step(batch, batch_idx, name="test")

    def _step(self, batch, batch_idx, name="train"):
        src, trg = batch
        #pdb.set_trace()
        trg_in, trg_out = trg[:, :-1], trg[:, 1:]
        y_hat_orig = self((src, trg_in))
        y_hat = y_hat_orig.view(-1, y_hat_orig.size(2))
        y = trg_out.contiguous().view(-1)
        
        if batch_idx==0 and name=="valid":
            tb_logger = None
            for logger in self.trainer.loggers:
                if isinstance(logger, TensorBoardLogger):
                    tb_logger = logger.experiment
                    break
            
            _, this_pred = torch.max(y_hat_orig, 2)
            for i in range(16):                
                text_truth = self.tokenizer.decode(trg_out[i].cpu().numpy())
                text_corrupted = self.tokenizer.decode(src[i].cpu().numpy())
                text_pred = self.tokenizer.decode(this_pred[i].cpu().numpy())
                
                output = "Corrupted: {}<br>Restored:{}".format(text_corrupted, text_pred)
                tb_logger.add_text(f'Validation #{i}, target: {text_truth}', output, self.global_step)
                
        
        loss = F.cross_entropy(y_hat, y, ignore_index=self.pad_idx)
        _, predicted = torch.max(y_hat, 1)
        acc = masked_accuracy(y, predicted, pad_idx=self.pad_idx)

        #pdb.set_trace()
        
        self.log(f"{name}_loss", loss)
        self.log(f"{name}_acc", acc)
        
        self.acc_sum += acc
        self.acc_num += 1

        return loss

    def configure_optimizers(self):
        return torch.optim.AdamW(self.parameters(), lr=3e-5)

In [3]:
MAX_LEN = 256

def apply_perturbation_to_text(text):
    #There's a chance it'll be perfect
    if np.random.uniform() < 0.1:
        return text
    
    all_characters = string.ascii_letters + ' '
    words = text.split()
    if np.random.uniform() < 0.1 and len(words) > 1:
        #Get rid of word
        idx = np.random.randint(len(words))
        words.pop(idx)                
    
    #Change word endings
    for i in range(len(words)):
        if words[i].endswith("ium") and np.random.uniform() < 0.01:
            words[i] = words[i][:-3] + "ius"
        elif words[i].endswith("ius") and np.random.uniform() < 0.01:
            words[i] = words[i][:-3] + "ium"
        elif words[i].endswith("us") and np.random.uniform() < 0.01:
            words[i] = words[i][:-2] + "i"     
        elif words[i].endswith("nem") and np.random.uniform() < 0.01:
            words[i] = words[i][:-3] + "nus"
    
    curr_str = " ".join(words)
    new_str = ""
    
    for i in range(len(curr_str)):
        if np.random.uniform() < 0.01:
            new_str += np.random.choice(list(all_characters))
        else:
            new_str += curr_str[i]
            
        #There's a chance for a new character to pop up
        if np.random.uniform() < 0.01:
            new_str += np.random.choice(list(all_characters))
            
    return new_str

def generate_batch(data_batch, pad_idx):
    src, trg = [], []
    for (src_item, trg_item) in data_batch:
        src.append(src_item)
        trg.append(trg_item)
    src = pad_sequence(src, padding_value=pad_idx, batch_first=True)
    trg = pad_sequence(trg, padding_value=pad_idx, batch_first=True)
    return src, trg



class Dataset(torch.utils.data.Dataset):
    def __init__(self, samples, hf_tokenizer):
        self.samples = samples
        self.n_samples = len(self.samples)
        self.hf_tokenizer = hf_tokenizer

    def __len__(self):
        return self.n_samples
    
    def __getitem__(self, idx):
        text = self.samples[idx]
        perturbed_text = apply_perturbation_to_text(text)

        x = self.hf_tokenizer.encode(perturbed_text).ids
        y = self.hf_tokenizer.encode(text).ids
        #print(text, len(x), len(y))
        #assert(len(x) < MAX_LEN)
        #assert(len(y) < MAX_LEN)
        x = torch.tensor(x, dtype=torch.long)
        y = torch.tensor(y, dtype=torch.long)

        return x, y


In [4]:
def split_text(text):
    words = text.split()
    i = 0
    lines = []
    while True:
        num_words = np.random.randint(2, 30)
        lines.append(" ".join(words[i:i+num_words]))
        i += num_words
        if i >= len(words):
            break
            
    return lines

with open("decretum.txt") as f:
    data = f.read()
    lines = split_text(data)
    
with open("corpus_thomisticum.txt") as f:
    data = f.read()
    lines += split_text(data)
    
with open("misc_medieval.txt") as f:
    data = f.read()
    lines += split_text(data)        
    
tokenizer = Tokenizer.from_file("latin_tokenizer.json")    
train, val = train_test_split(lines, test_size=0.1, random_state=1337)
train_data = Dataset(samples=train, hf_tokenizer=tokenizer)
val_data = Dataset(samples=val, hf_tokenizer=tokenizer)

print("len(train_data)", len(train_data))
print("len(val_data)", len(val_data))

len(train_data) 704475
len(val_data) 78276


In [5]:
lines[200]

'iusserit sine dubitatione serviendum est? Sicut enim in potestatibus societatis humane maior potestas minori ad obediendum preponitur,'

In [6]:
for l in lines:
    if len(l) < 5: print(l)
    if len(l) > 250: print(l)

civium, testimonia populorum, honoratorum arbitrium, electio clericorum in ordinationibus sacerdotum constituantur. Et infra: Per pacem et quietem sacerdotes, qui futuri sunt, postulentur, teneatur subscriptio clericorum, honoratorum testimonium, ordines conventus
quemlibet ecclesiasticarum rerum amministratorem, ut, verbi gratia, prepositum, yconomum, vicedominum; defensoris nomine advocatum sive castaldum, et iudicem; in subiecto regule archipresbiterum, archidiaconum, canonicum, monachum vel quemlibet mancipatum ecclesiastico offitio.
PRESUMPTIONE, NEC TEMPORIS PRESCRIPTIONE MUTARI POTEST. Licet regulis contineatur antiquis, parrochias unicuique ecclesiae pristina dispositione deputatas nulla posse ratione convelli, ne per consuetudinem pessimam, exempli mali temeritate crescente, universalis
ECCLESIARUM NULLA DEBENT INPROBITATE CONVELLI. Privilegia ecclesiarum, sanctorum canonibus instituta, et venerabilis Nicene fixa decretis, nulla possunt STATUTA PRIORUM SUCCESSORES SERVARE OPOR

In [7]:
def generate_batch(data_batch, pad_idx):
    src, trg = [], []
    for (src_item, trg_item) in data_batch:
        src.append(src_item)
        trg.append(trg_item)
    src = pad_sequence(src, padding_value=pad_idx, batch_first=True)
    trg = pad_sequence(trg, padding_value=pad_idx, batch_first=True)
    return src, trg

In [None]:
train_loader = DataLoader(
    train_data,
    batch_size=128,
    num_workers=4,
    shuffle=True,
    collate_fn=partial(generate_batch, pad_idx=tokenizer.token_to_id("[PAD]")),
)
val_loader = DataLoader(
    val_data,
    batch_size=128,
    num_workers=4,
    shuffle=False,
    collate_fn=partial(generate_batch, pad_idx=tokenizer.token_to_id("[PAD]")),
)

model = Seq2Seq(
    out_vocab_size=tokenizer.get_vocab_size(),
    pad_idx=tokenizer.token_to_id("[PAD]"),
    tokenizer=tokenizer,
    dropout=0.1
)

checkpoint_callback = ModelCheckpoint(
    monitor="valid_acc", mode="max", dirpath="./", filename="checker"
)

logger = TensorBoardLogger(
        save_dir="./",
        name="autocorrect_logs",
    )

trainer = pl.Trainer(
    max_epochs=2000,
    logger=logger,
    callbacks=[checkpoint_callback],
)


trainer.fit(model, train_loader, val_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 4070 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type               | Params
---------------------------------------------------
0 | embeddings  | TokenEmbedding     | 1.3 M 
1 | pos_encoder | PositionalEncoding | 0     
2 | transformer | Transformer        | 11.1 M
3 | linear      | Linear             | 1.3 M 
4 | do          | Dropout            | 0     
-------------------------

Sanity Checking: 0it [00:00, ?it/s]

Epoch, accuracy: 0 0.0002




Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 0 0.2248


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 1 0.284


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 2 0.3423


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 3 0.3792


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 4 0.4085


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 5 0.4342


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 6 0.4563


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 7 0.4744


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 8 0.4903


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 9 0.5051


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 10 0.5206


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 11 0.5335


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 12 0.5445


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 13 0.5517


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 14 0.5635


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 15 0.5708


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 16 0.5798


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 17 0.5857


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 18 0.5939


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 19 0.5994


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 20 0.6053


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 21 0.6112


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 22 0.6205


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 23 0.6257


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 24 0.6319


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 25 0.6375


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 26 0.6446


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 27 0.6505


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 28 0.6588


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 29 0.664


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 30 0.6693


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 31 0.6769


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 32 0.6814


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 33 0.6878


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 34 0.6916


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 35 0.6973


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 36 0.7051


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 37 0.7102


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 38 0.7154


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 39 0.7194


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 40 0.7237


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 41 0.7292


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 42 0.7372


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 43 0.7418


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 44 0.745


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 45 0.7497


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 46 0.7551


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 47 0.759


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 48 0.7634


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 49 0.7681


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 50 0.7735


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 51 0.7779


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 52 0.782


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 53 0.7871


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 54 0.7893


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 55 0.7938


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 56 0.7973


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 57 0.8009


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 58 0.8041


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 59 0.8072


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 60 0.8115


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 61 0.8149


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 62 0.8192


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 63 0.822


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 64 0.8265


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 65 0.8296


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 66 0.8315


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 67 0.8363


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 68 0.8364


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 69 0.8419


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 70 0.8449


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 71 0.8487


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 72 0.8512


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 73 0.8532


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 74 0.8565


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 75 0.8607


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 76 0.8617


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 77 0.8652


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 78 0.866


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 79 0.8699


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 80 0.8724


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 81 0.8755


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 82 0.8784


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 83 0.8803


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 84 0.8824


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 85 0.8832


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 86 0.886


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 87 0.8878


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 88 0.8903


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 89 0.8918


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 90 0.893


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 91 0.8961


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 92 0.8973


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 93 0.8997


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 94 0.9006


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 95 0.9022


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 96 0.9053


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 97 0.9059


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 98 0.9077


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 99 0.9083


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 100 0.9105


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 101 0.9127


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 102 0.9136


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 103 0.9153


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 104 0.9173


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 105 0.9177


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 106 0.9198


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 107 0.9201


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 108 0.9215


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 109 0.923


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 110 0.9254


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 111 0.9263


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 112 0.9277


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 113 0.9281


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 114 0.9293


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 115 0.9299


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 116 0.9312


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 117 0.9327


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 118 0.9337


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 119 0.9353


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 120 0.9352


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 121 0.9359


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 122 0.9371


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 123 0.9382


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 124 0.9396


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 125 0.9408


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 126 0.9417


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 127 0.9416


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 128 0.9428


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 129 0.9439


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 130 0.9447


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 131 0.9448


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 132 0.9463


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 133 0.9464


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 134 0.9479


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 135 0.9471


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 136 0.949


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 137 0.9498


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 138 0.9505


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 139 0.9507


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 140 0.9519


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 141 0.9523


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 142 0.9528


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 143 0.9542


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 144 0.9539


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 145 0.9544


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 146 0.9548


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 147 0.9555


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 148 0.9567


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 149 0.9572


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 150 0.9572


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 151 0.9574


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 152 0.9581


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 153 0.9588


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 154 0.9593


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 155 0.9595


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 156 0.9607


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 157 0.9608


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 158 0.9612


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 159 0.9618


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 160 0.9616


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 161 0.9627


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 162 0.9631


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 163 0.963


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 164 0.9633


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 165 0.9642


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 166 0.9642


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 167 0.9647


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 168 0.9649


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 169 0.9652


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 170 0.9666


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 171 0.9658


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 172 0.9664


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 173 0.9673


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 174 0.9668


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 175 0.9674


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 176 0.9679


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 177 0.9682


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 178 0.9689


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 179 0.9685


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 180 0.9694


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 181 0.9693


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 182 0.9696


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 183 0.9699


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 184 0.9706


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 185 0.971


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 186 0.9707


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 187 0.9713


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 188 0.9714


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 189 0.9713


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 190 0.972


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 191 0.9721


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 192 0.9722


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 193 0.9728


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 194 0.9727


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 195 0.9733


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 196 0.9733


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 197 0.9736


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 198 0.9738


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 199 0.9737


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 200 0.9738


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 201 0.974


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 202 0.9746


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 203 0.9747


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 204 0.9752


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 205 0.9751


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 206 0.9753


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 207 0.9751


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 208 0.9756


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 209 0.9756


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 210 0.9756


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 211 0.976


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 212 0.9762


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 213 0.9763


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 214 0.9767


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 215 0.9768


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 216 0.977


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 217 0.9769


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 218 0.9773


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 219 0.9775


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 220 0.9776


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 221 0.9777


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 222 0.9779


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 223 0.978


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 224 0.9781


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 225 0.978


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 226 0.9782


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 227 0.9786


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 228 0.9789


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 229 0.9788


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 230 0.9786


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 231 0.9789


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 232 0.9792


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 233 0.9791


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 234 0.9794


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 235 0.9795


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 236 0.9799


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 237 0.9798


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 238 0.9796


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 239 0.9797


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 240 0.9802


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 241 0.9802


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 242 0.9801


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 243 0.9803


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 244 0.9804


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 245 0.9806


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 246 0.9806


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 247 0.9805


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 248 0.981


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 249 0.9811


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 250 0.9809


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 251 0.9812


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 252 0.9814


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 253 0.9814


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 254 0.9814


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 255 0.9814


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 256 0.9814


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 257 0.9815


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 258 0.9819


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 259 0.9816


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 260 0.9816


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 261 0.9822


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 262 0.9822


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 263 0.9823


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 264 0.9821


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 265 0.9824


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 266 0.9825


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 267 0.9824


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 268 0.9825


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 269 0.9829


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 270 0.9828


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 271 0.9827


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 272 0.9829


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 273 0.9832


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 274 0.9829


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 275 0.9832


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 276 0.9832


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 277 0.9832


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 278 0.9832


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 279 0.9833


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 280 0.9834


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 281 0.9835


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 282 0.9835


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 283 0.9838


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 284 0.9837


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 285 0.9837


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 286 0.9838


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 287 0.9834


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 288 0.9838


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 289 0.9839


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 290 0.9838


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 291 0.984


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 292 0.9842


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 293 0.984


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 294 0.9845


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 295 0.9844


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 296 0.9843


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 297 0.9845


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 298 0.9844


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 299 0.9845


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 300 0.9846


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 301 0.9846


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 302 0.9846


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 303 0.9845


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 304 0.9848


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 305 0.9846


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 306 0.9845


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 307 0.985


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 308 0.9848


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 309 0.985


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 310 0.9851


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 311 0.9849


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 312 0.9852


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 313 0.9849


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 314 0.985


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 315 0.9853


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 316 0.9852


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 317 0.9852


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 318 0.9854


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 319 0.9854


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 320 0.9855


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 321 0.9854


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 322 0.9855


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 323 0.9855


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 324 0.9858


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 325 0.9858


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 326 0.9855


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 327 0.9855


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 328 0.9857


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 329 0.9858


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 330 0.9857


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 331 0.9858


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 332 0.986


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 333 0.9859


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 334 0.986


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 335 0.9859


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 336 0.986


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 337 0.986


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 338 0.9861


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 339 0.9859


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 340 0.9862


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 341 0.9862


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 342 0.986


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 343 0.9862


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 344 0.9864


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 345 0.9862


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 346 0.9863


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 347 0.9862


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 348 0.9864


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 349 0.9862


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 350 0.9864


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 351 0.9867


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 352 0.9863


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 353 0.9864


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 354 0.9867


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 355 0.9867


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 356 0.9866


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 357 0.9866


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 358 0.9868


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 359 0.9866


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 360 0.9868


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 361 0.9865


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 362 0.9865


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 363 0.9868


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 364 0.9868


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 365 0.987


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 366 0.987


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 367 0.987


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 368 0.987


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 369 0.987


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 370 0.9869


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 371 0.9872


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 372 0.9872


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 373 0.987


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 374 0.987


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 375 0.987


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 376 0.987


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 377 0.9872


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 378 0.9872


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 379 0.9872


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 380 0.9872


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 381 0.9871


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 382 0.9873


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 383 0.9872


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 384 0.9873


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 385 0.9872


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 386 0.9872


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 387 0.9873


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 388 0.9874


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 389 0.9874


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 390 0.9874


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 391 0.9874


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 392 0.9876


Validation: 0it [00:00, ?it/s]

Epoch, accuracy: 393 0.9876
