# Frame to Phoneme Classifier

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
drivepath_shallow = '/content/gdrive/MyDrive/DL_Group_Project/Dataset/Preprocessed_Data'

In [39]:
drivepath_spec = '/content/gdrive/MyDrive/DL_Group_Project/experiments/specialized_detectors/models'

In [42]:
drivepath_final = '/content/gdrive/MyDrive/DL_Group_Project/Dataset/Preprocessed_Data'

In [4]:
!pip install tqdm



In [5]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import os
from tqdm import tqdm
import time
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import pandas as pd

In [6]:
NUM_EPOCHS = 100
BATCH_SIZE = 64
HIDDEN_SIZE_shallow = 128
HIDDEN_SIZE_spec = 128
MODEL_VERSION = 1
LEARNING_RATE = 0.01
LOGISTIC_THRESHOLD = 0.5
OTHER_PHONEMES_PERCENT = 0.1

In [7]:
cuda = torch.cuda.is_available()
num_workers = 8 if cuda else 0
DEVICE = "cuda" if cuda else "cpu"
print("Cuda = "+str(cuda)+" with num_workers = "+str(num_workers))

Cuda = True with num_workers = 8


In [8]:
class PhonemesDataset(Dataset):
    
    def __init__(self, basepath, mode):
      phoneme_features = np.zeros((1, 40))  # eliminate this row
      phoneme_labels = np.zeros((1))  # eliminate this row

      with os.scandir(basepath) as entries:
        for entry in entries:
          if entry.is_file():
            if "features" in entry.name and mode in entry.name:
              phoneme_tag = entry.name.split("_")[0]

              features_filepath = entry.path
              labels_filepath = f"{basepath}/{phoneme_tag}_{mode}_labels.npy"

              other_phoneme_features = np.load(features_filepath, allow_pickle=True)
              other_phoneme_labels = np.load(labels_filepath, allow_pickle=True)
              
              # stack to phoneme features
              phoneme_features = np.concatenate((phoneme_features, other_phoneme_features))
              phoneme_labels = np.concatenate((phoneme_labels, other_phoneme_labels))
              
      self.X = phoneme_features[1:]
      self.Y = phoneme_labels[1:]

    def __len__(self):
        return len(self.X)
 
    # get a row at an index
    def __getitem__(self, index):
        x = torch.Tensor(self.X[index]).float()
        y = torch.as_tensor(self.Y[index]).long()
    
        return x,y

In [9]:
class SpecializedDataset(Dataset):
    
    def __init__(self, datapath, mode, task_name, phonemes_class_0, phonemes_class_1):
      """
      phonemes_class_0: list of phoneme names for class 0
      phonemes_class_1: list of phoneme names for class 1
      """
      complete_features = np.zeros((1, 40))  # eliminate this row
      complete_labels = np.zeros((1))  # eliminate this row

      # go through all files in datapath, check phoneme if class=0 or class=1
      # assign that label and discard silence frames
      class_0_phonemes_found = []
      class_1_phonemes_found = []
      with os.scandir(datapath) as entries:
        for entry in entries:
          if entry.is_file():
            if "features" in entry.name and mode in entry.name:
              phoneme_tag = entry.name.split("_")[0]

              # find phoneme in class_0 or class_1 list and assign label
              phoneme_class = None
              if phoneme_tag in phonemes_class_0:
                phoneme_class = 0
                class_0_phonemes_found.append(phoneme_tag)
              if phoneme_tag in phonemes_class_1:
                phoneme_class = 1
                class_1_phonemes_found.append(phoneme_tag)

              if phoneme_class is None:
                print(f"phoneme '{phoneme_tag}' not found on class 0 nor class 1 lists; skip") if DEBUG else None
                continue

              print(f"phoneme '{phoneme_tag}' is class: {phoneme_class}") if DEBUG else None

              features_filepath = entry.path
              labels_filepath = f"{datapath}/{phoneme_tag}_{mode}_labels.npy"

              phoneme_features = np.load(features_filepath, allow_pickle=True)
              phoneme_labels = np.load(labels_filepath, allow_pickle=True)
              print(f"{phoneme_tag} total features: {phoneme_features.shape}") if DEBUG else None
              print(f"{phoneme_tag} total labels: {phoneme_labels.shape}") if DEBUG else None

              # find frames where label != 0 (non-silence)
              non_zero_indexes = phoneme_labels.nonzero()
              phoneme_features = phoneme_features[non_zero_indexes]
              phoneme_labels = phoneme_labels[non_zero_indexes]
              print(f"{phoneme_tag} no-silence features: {phoneme_features.shape}") if DEBUG else None
              print(f"{phoneme_tag} no-silence labels: {phoneme_labels.shape}") if DEBUG else None

              phoneme_labels[:] = phoneme_class  # label=class

              # stack to phoneme features
              complete_features = np.concatenate((complete_features, phoneme_features))
              complete_labels = np.concatenate((complete_labels, phoneme_labels))
      
      self.X = complete_features[1:]
      self.Y = complete_labels[1:]
      print(f"[task={task_name}] {self.X.shape} features")
      print(f"[task={task_name}] {self.Y.shape} labels")

      if sorted(class_0_phonemes_found) != sorted(phonemes_class_0):
        raise Exception(f"class 0 phonemes found ({sorted(class_0_phonemes_found)}) != expected phonemes ({sorted(phonemes_class_0)})")
 
      if sorted(class_1_phonemes_found) != sorted(phonemes_class_1):
        raise Exception(f"class 1 phonemes found ({sorted(class_1_phonemes_found)}) != expected phonemes ({sorted(phonemes_class_1)})")
 

    def __len__(self):
        return len(self.X)
 
    # get a row at an index
    def __getitem__(self, index):
        x = torch.Tensor(self.X[index]).float()
        y = torch.as_tensor(self.Y[index]).float()
    
        return x,y

In [10]:
def make_dataloader(dataset, train, batch_size):
  if train:
    shuffle = True
    drop_last = True
  else:
    shuffle = False
    drop_last = False
    
  loader = DataLoader(dataset=dataset, batch_size=batch_size,
                      drop_last=drop_last, shuffle=shuffle,
                      pin_memory=True, num_workers=8)
  
  return loader

In [11]:
class PhonemeShallowDetector(nn.Module):
  
  def __init__(self, hidden_size, activation):
    super(PhonemeShallowDetector, self).__init__()
    
    self.linear_layer = nn.Linear(in_features=40, out_features=hidden_size)
    self.bn_layer = nn.BatchNorm1d(num_features=hidden_size)
    self.activation = activation
    self.output_layer = nn.Linear(in_features=hidden_size, out_features=1)
    self.sigmoid = nn.Sigmoid()
    seq_params = [
      self.linear_layer,
      self.bn_layer,
      self.activation,
      self.output_layer,
      self.sigmoid
    ]

    self.network = nn.Sequential(*seq_params)
    
  def forward(self, x):
    return self.network(x)

In [12]:
class SpecializedShallowDetector(nn.Module):
  
  def __init__(self, hidden_size, activation):
    super(SpecializedShallowDetector, self).__init__()
    
    self.linear_layer = nn.Linear(in_features=40, out_features=hidden_size)
    self.bn_layer = nn.BatchNorm1d(num_features=hidden_size)
    self.activation = activation
    self.output_layer = nn.Linear(in_features=hidden_size, out_features=1)
    self.sigmoid = nn.Sigmoid()
    seq_params = [
      self.linear_layer,
      self.bn_layer,
      self.activation,
      self.output_layer,
      self.sigmoid
    ]

    self.network = nn.Sequential(*seq_params)
    
  def forward(self, x):
    return self.network(x)

In [28]:
class FramePhonemeClassifierModel(nn.Module):
  
  def __init__(self, phoneme_mapper, specialized_mapper):
    super(FramePhonemeClassifierModel, self).__init__()
    
    self.phoneme_mapper = phoneme_mapper
    self.specialized_mapper = specialized_mapper

    # specialized detectors

    specialized_detectors = []
    for spec_idx, spec_type in enumerate(self.specialized_mapper):
      specialized_detector = SpecializedShallowDetector(hidden_size=HIDDEN_SIZE_spec, 
                                            activation=nn.LeakyReLU()).to(DEVICE)
      specialized_detectors.append(specialized_detector)
    
    self.specialized_detectors = nn.ModuleList(specialized_detectors)


    shallow_detectors = []
    # generate the PhonemeShallowDetectors 
    for phoneme_index, phoneme_tag in phoneme_mapper.items():
      shallow_detector = PhonemeShallowDetector(hidden_size=HIDDEN_SIZE_shallow, 
                                                activation=nn.LeakyReLU())
      shallow_detectors.append(shallow_detector)

    self.shallow_detectors = nn.ModuleList(shallow_detectors)

    self.linear_layer = nn.Linear(in_features=len(phoneme_mapper)+len(specialized_mapper), out_features=len(phoneme_mapper)+len(specialized_mapper))

    self.initialize_specialized_detectors()
    self.initialize_shallow_detectors()

  def initialize_shallow_detectors(self):
    # load weights from shallow detectors pre-trained models
    for phoneme_index, phoneme_tag in self.phoneme_mapper.items():
      phoneme_shallow_detector = self.shallow_detectors[phoneme_index]

      phoneme_model_path = f"{drivepath_shallow}/shallow_detectors/model_{phoneme_tag}_{MODEL_VERSION}_99"
      temp = torch.load(phoneme_model_path)
      phoneme_shallow_detector.load_state_dict(temp['model_state_dict'])
  
  def initialize_specialized_detectors(self):
    # load weights from shallow detectors pre-trained models
     for spec_idx, spec_type in enumerate(self.specialized_mapper):
      specialized_detector = self.specialized_detectors[spec_idx]

      spec_model_path = f"{drivepath_spec}/model_{spec_type}_{MODEL_VERSION}_29"
      temp = torch.load(spec_model_path)
      specialized_detector.load_state_dict(temp['model_state_dict'])
  
  def forward(self, x):
    
    # go through specialized
    
    total_outputs = []
    for phoneme_index, phoneme_tag in self.phoneme_mapper.items():
      phoneme_shallow_detector = self.shallow_detectors[phoneme_index]

      # run frame through shallow detector
      output = phoneme_shallow_detector(x)
      total_outputs.append(output.reshape(-1))
    
    for spec_idx, spec_type in enumerate(self.specialized_mapper):
      specialized_detector = self.specialized_detectors[spec_idx]
      output = specialized_detector(x)
      total_outputs.append(output.reshape(-1))

    # convert to torch tensor
    total_outputs = torch.vstack(total_outputs).T
    outputs = self.linear_layer(total_outputs)

    return outputs

In [58]:
class FramePhonemeClassifier():

  def __init__(self, phoneme_mapper, specialized_mapper):

    train_data = PhonemesDataset(basepath=drivepath_shallow, mode="train")
    self.train_loader = make_dataloader(dataset=train_data, train=True, batch_size=BATCH_SIZE)
    print(f"train_data.shape: {train_data.X.shape}")

    dev_data = PhonemesDataset(basepath=drivepath_shallow, mode="dev")
    self.dev_loader = make_dataloader(dataset=dev_data, train=False, batch_size=BATCH_SIZE)
    print(f"dev_data.shape: {dev_data.X.shape}")
    
    self.model = FramePhonemeClassifierModel(phoneme_mapper, specialized_mapper).to(DEVICE)

    self.criterion = nn.CrossEntropyLoss()
    self.optimizer = torch.optim.SGD(self.model.parameters(), lr=LEARNING_RATE, momentum=0.9)
    self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, 'min')

    self.train_loss_per_epoch = []
    self.train_acc_per_epoch = []
    self.dev_loss_per_epoch = []
    self.dev_acc_per_epoch = []
  
  def save_model(self, epoch):
    model_epoch_path = "{}/complete_classifier/model_{}_{}".format(drivepath_final,
                                                                 MODEL_VERSION, 
                                                                 epoch)
    torch.save({
        'model_state_dict': self.model.state_dict(),
        'optimizer_state_dict': self.optimizer.state_dict(),
        'scheduler_state_dict': self.scheduler.state_dict(),
    }, model_epoch_path)
    # print('saved model: {}'.format(model_epoch_path))

  def train(self, epochs):
    # Run training and track with wandb
    total_batches = len(self.train_loader) * epochs
    example_ct = 0  # number of examples seen
    batch_ct = 0

    for epoch in tqdm(range(epochs)):
        train_loss = 0.0
        start_time = time.time()
        total_predictions = 0
        correct_predictions = 0

        true_labels = []
        predictions = []
        for _, (features, targets) in enumerate(self.train_loader):
            batch_loss, outputs = self.train_batch(features, targets)
            train_loss += batch_loss

            example_ct += len(features)
            batch_ct += 1

            # check number of correct predictions
            output_classes = torch.argmax(outputs.log_softmax(1), dim=1).detach().cpu()  # convert to class labels
            total_predictions += len(output_classes)
            correct_predictions += torch.sum(targets == output_classes)

            true_labels += list(targets)
            predictions += list(output_classes)

        end_time = time.time()

        train_loss /= example_ct
        print(f"training loss: {train_loss}; time: {end_time - start_time}s")
        
        if (epoch + 1) % 10 == 0 or epoch == (epochs - 1):
          report = classification_report(true_labels, predictions, output_dict=True)
          df = pd.DataFrame(report).transpose()
          df.to_csv(f"{drivepath_final}/complete_classifier/reports_train_{MODEL_VERSION}_{epoch + 1}.csv", index=False)
          self.save_model(epoch)

        train_acc = (correct_predictions/total_predictions) * 100.0
        print(f"training accuracy: {train_acc}%")

        self.train_loss_per_epoch.append(train_loss)
        self.train_acc_per_epoch.append(train_acc)

        # evaluate model with validation data
        dev_loss, dev_acc = self.evaluate_model(epoch)
        
        self.dev_loss_per_epoch.append(dev_loss)
        self.dev_acc_per_epoch.append(dev_acc)

        # Step with the scheduler
        self.scheduler.step(dev_loss)
      
    # epoch completed, save model
    self.save_model(epoch)

  def train_batch(self, features, targets):
    features, targets = features.to(DEVICE), targets.to(DEVICE)
    targets = targets.reshape(-1, 1)

    self.optimizer.zero_grad()

    # Forward pass ➡
    outputs = self.model(features)
    loss = self.criterion(outputs, targets.reshape(-1))  # compare with target outputs
    # Backward pass ⬅
    loss.backward()
    # Step with optimizer
    self.optimizer.step()

    return loss.item(), outputs

  def evaluate_model(self, epoch):

    with torch.no_grad():
      self.model.eval()

      running_loss = 0.0
      total_predictions = 0.0
      correct_predictions = 0.0

      true_labels = []
      predictions = []

      example_ct = 0
      start_time = time.time()
      for batch_idx, (features, targets) in enumerate(self.dev_loader):
        features = features.to(DEVICE)
        targets = targets.to(DEVICE)
        targets = targets.reshape(-1, 1)

        example_ct += len(features)

        outputs = self.model(features)
        outputs = outputs.to(DEVICE)

        # check number of correct predictions
        output_classes = torch.argmax(outputs.log_softmax(1), dim=1)  # convert to class labels
        total_predictions += len(output_classes)
        correct_predictions += torch.sum(targets.reshape(-1) == output_classes)

        loss = self.criterion(outputs, targets.reshape(-1)).detach()
        running_loss += loss.item()

        true_labels += list(targets.detach().cpu())
        predictions += list(output_classes.detach().cpu())
      
      end_time = time.time()

      running_loss /= example_ct
      print(f"testing loss: {running_loss}; time: {end_time - start_time}s")
      acc = (correct_predictions/total_predictions) * 100.0
      print(f"testing accuracy: {acc}%")

      if (epoch + 1) % 10 == 0:
        report = classification_report(true_labels, predictions, output_dict=True)
        df = pd.DataFrame(report).transpose()
        df.to_csv(f"{drivepath_final}/complete_classifier/reports_dev_{MODEL_VERSION}_{epoch + 1}.csv", index=False)
  
      return running_loss, acc

# Train classifier

In [44]:
%cd /content/gdrive/MyDrive/DL_Group_Project/Dataset/Preprocessed_Data

/content/gdrive/.shortcut-targets-by-id/1qwJK2jyGMl2dPnVFe6JNZvrrG45HoonZ/DL_Group_Project/Dataset/Preprocessed_Data


In [31]:
from utilities import PHONEME_MAPPER
from utilities import SPECIALIZED_TASKS

In [32]:
%cd /

/


In [33]:
print(PHONEME_MAPPER)

{0: 'SIL', 1: 'AE', 2: 'AH', 3: 'AW', 4: 'AY', 5: 'B', 6: 'EH', 7: 'D', 8: 'DH', 9: 'EE', 10: 'FF', 11: 'G', 12: 'HH', 13: 'IH', 14: 'II', 15: 'J', 16: 'K', 17: 'LL', 18: 'MM', 19: 'NN', 20: 'OH', 21: 'OO', 22: 'OW', 23: 'OY', 24: 'P', 25: 'RR', 26: 'SH', 27: 'SS', 28: 'T', 29: 'TH', 30: 'UE', 31: 'UH', 32: 'VV', 33: 'WW', 34: 'YY', 35: 'ZZ', 36: 'CH', 37: 'ER', 38: 'NG'}


In [34]:
print(SPECIALIZED_TASKS)

{'1_vowel_vs_consonant': {0: ['EE', 'IH', 'EH', 'AE', 'UH', 'ER', 'AH', 'AW', 'OO', 'UE'], 1: ['FF', 'HH', 'MM', 'NN', 'NG', 'RR', 'SS', 'SH', 'VV', 'WW', 'YY', 'ZZ']}, '3_highvowel_vs_lowvowel': {0: ['EE', 'IH', 'UE', 'OO'], 1: ['AE', 'AH', 'AW']}, '4_voiced_vs_unvoiced_fricatives': {0: ['DH', 'VV', 'ZZ'], 1: ['FF', 'SS', 'SH', 'TH']}, '5_ss_vs_zz': {0: ['SS'], 1: ['ZZ']}, '6_b_vs_p': {0: ['B'], 1: ['P']}, '7_dh_vs_th': {0: ['DH'], 1: ['TH']}, '8_ww_vs_yy': {0: ['WW'], 1: ['YY']}, '9_ee_vs_aw': {0: ['EE'], 1: ['AW']}, '10_ah_vs_aw': {0: ['AH'], 1: ['AW']}, '11_mm_vs_nn': {0: ['MM'], 1: ['NN']}}


In [59]:
classifier = FramePhonemeClassifier(PHONEME_MAPPER, SPECIALIZED_TASKS)
classifier.train(epochs=NUM_EPOCHS)


  cpuset_checked))


train_data.shape: (53755, 40)
dev_data.shape: (11535, 40)








  0%|          | 0/100 [00:00<?, ?it/s][A[A[A[A[A[A

training loss: 0.02446505931873181; time: 32.54579973220825s
training accuracy: 64.27108001708984%








  1%|          | 1/100 [00:35<57:48, 35.03s/it][A[A[A[A[A[A

testing loss: 0.02475894945598961; time: 2.4723756313323975s
testing accuracy: 65.2102279663086%
training loss: 0.008879737421776989; time: 29.922794818878174s
training accuracy: 75.43020629882812%








  2%|▏         | 2/100 [01:07<55:55, 34.24s/it][A[A[A[A[A[A

testing loss: 0.020255918911927493; time: 2.463099241256714s
testing accuracy: 71.90290069580078%
training loss: 0.005025507067468936; time: 29.736472845077515s
training accuracy: 79.33924102783203%








  3%|▎         | 3/100 [01:39<54:20, 33.62s/it][A[A[A[A[A[A

testing loss: 0.018589723095988938; time: 2.400667190551758s
testing accuracy: 74.35630798339844%
training loss: 0.0034398830318723554; time: 29.859434604644775s
training accuracy: 81.28910827636719%








  4%|▍         | 4/100 [02:11<53:08, 33.22s/it][A[A[A[A[A[A

testing loss: 0.017108033771219323; time: 2.411463975906372s
testing accuracy: 76.23753356933594%
training loss: 0.0025985979305245027; time: 29.806447982788086s
training accuracy: 82.32270812988281%








  5%|▌         | 5/100 [02:44<52:14, 32.99s/it][A[A[A[A[A[A

testing loss: 0.016612770496558772; time: 2.642268657684326s
testing accuracy: 77.5899429321289%
training loss: 0.0020606021559991484; time: 30.197794437408447s
training accuracy: 83.26318359375%








  6%|▌         | 6/100 [03:17<51:36, 32.94s/it][A[A[A[A[A[A

testing loss: 0.015993594731869096; time: 2.6073694229125977s
testing accuracy: 78.18811798095703%
training loss: 0.0017107970242383947; time: 30.239437341690063s
training accuracy: 83.88147735595703%








  7%|▋         | 7/100 [03:49<50:56, 32.87s/it][A[A[A[A[A[A

testing loss: 0.01552355457783572; time: 2.435168981552124s
testing accuracy: 79.4625015258789%
training loss: 0.0014550096469296363; time: 29.8156840801239s
training accuracy: 84.2502212524414%








  8%|▊         | 8/100 [04:22<50:06, 32.68s/it][A[A[A[A[A[A

testing loss: 0.015292251470495108; time: 2.4036953449249268s
testing accuracy: 79.04637908935547%
training loss: 0.0012555770604861223; time: 30.094797134399414s
training accuracy: 84.77354431152344%








  9%|▉         | 9/100 [04:54<49:27, 32.61s/it][A[A[A[A[A[A

testing loss: 0.01495902212566765; time: 2.3522698879241943s
testing accuracy: 79.66189575195312%
training loss: 0.0011121139246126786; time: 29.235230207443237s
training accuracy: 85.06592559814453%


  _warn_prf(average, modifier, msg_start, len(result))






 10%|█         | 10/100 [05:27<49:01, 32.68s/it][A[A[A[A[A[A

testing loss: 0.014874254914398673; time: 2.3803389072418213s
testing accuracy: 79.79193115234375%
training loss: 0.0009978171159712194; time: 29.759512662887573s
training accuracy: 85.23353576660156%








 11%|█         | 11/100 [05:59<48:19, 32.58s/it][A[A[A[A[A[A

testing loss: 0.01478563892696655; time: 2.552961587905884s
testing accuracy: 80.30342102050781%
training loss: 0.0008932535495793311; time: 29.760931730270386s
training accuracy: 85.55199432373047%








 12%|█▏        | 12/100 [06:31<47:36, 32.46s/it][A[A[A[A[A[A

testing loss: 0.01474789635968198; time: 2.4095075130462646s
testing accuracy: 80.49414825439453%
training loss: 0.0008193761249017946; time: 29.705825567245483s
training accuracy: 85.6916732788086%








 13%|█▎        | 13/100 [07:04<46:54, 32.35s/it][A[A[A[A[A[A

testing loss: 0.01438806214190997; time: 2.3728768825531006s
testing accuracy: 80.81491088867188%
training loss: 0.00074963391216644; time: 29.36022138595581s
training accuracy: 85.80341339111328%








 14%|█▍        | 14/100 [07:35<46:06, 32.17s/it][A[A[A[A[A[A

testing loss: 0.014887161102060327; time: 2.378638744354248s
testing accuracy: 79.84394836425781%
training loss: 0.000688634676038094; time: 29.438009023666382s
training accuracy: 86.05854797363281%








 15%|█▌        | 15/100 [08:07<45:23, 32.04s/it][A[A[A[A[A[A

testing loss: 0.014278194381914772; time: 2.2699532508850098s
testing accuracy: 80.8322525024414%
training loss: 0.000639357275723039; time: 29.218945264816284s
training accuracy: 86.22244262695312%








 16%|█▌        | 16/100 [08:39<44:42, 31.94s/it][A[A[A[A[A[A

testing loss: 0.013840847609360392; time: 2.4781038761138916s
testing accuracy: 81.4911117553711%
training loss: 0.0005966572625173252; time: 30.284449815750122s
training accuracy: 86.39749908447266%








 17%|█▋        | 17/100 [09:12<44:35, 32.23s/it][A[A[A[A[A[A

testing loss: 0.014238861329469218; time: 2.61120343208313s
testing accuracy: 80.99696350097656%
training loss: 0.0005586504863264043; time: 29.924036026000977s
training accuracy: 86.47571563720703%








 18%|█▊        | 18/100 [09:44<44:07, 32.29s/it][A[A[A[A[A[A

testing loss: 0.013624944865677746; time: 2.4745190143585205s
testing accuracy: 81.59513854980469%
training loss: 0.0005230043838927603; time: 30.243069887161255s
training accuracy: 86.59862518310547%








 19%|█▉        | 19/100 [10:17<43:48, 32.45s/it][A[A[A[A[A[A

testing loss: 0.013845005314304656; time: 2.5716264247894287s
testing accuracy: 81.28305053710938%
training loss: 0.0004946052843978999; time: 30.407945156097412s
training accuracy: 86.65450286865234%








 20%|██        | 20/100 [10:51<43:58, 32.98s/it][A[A[A[A[A[A

testing loss: 0.013492289700314861; time: 2.568869113922119s
testing accuracy: 81.66449737548828%
training loss: 0.0004652389644205526; time: 30.05748987197876s
training accuracy: 86.92639923095703%








 21%|██        | 21/100 [11:24<43:15, 32.86s/it][A[A[A[A[A[A

testing loss: 0.01388797724678447; time: 2.4940173625946045s
testing accuracy: 81.59513854980469%
training loss: 0.00044297084048017417; time: 30.141093969345093s
training accuracy: 86.87984466552734%








 22%|██▏       | 22/100 [11:56<42:36, 32.78s/it][A[A[A[A[A[A

testing loss: 0.01354775344738983; time: 2.449798107147217s
testing accuracy: 81.9592514038086%
training loss: 0.0004196236834261869; time: 29.778456687927246s
training accuracy: 86.95806121826172%








 23%|██▎       | 23/100 [12:29<41:56, 32.68s/it][A[A[A[A[A[A

testing loss: 0.013741410419984354; time: 2.632241725921631s
testing accuracy: 81.74251556396484%
training loss: 0.0004007380472521683; time: 29.611109256744385s
training accuracy: 87.12008666992188%








 24%|██▍       | 24/100 [13:01<41:07, 32.47s/it][A[A[A[A[A[A

testing loss: 0.013740626920545881; time: 2.36993145942688s
testing accuracy: 81.37841033935547%
training loss: 0.0003821133953853287; time: 30.045774459838867s
training accuracy: 87.09028625488281%








 25%|██▌       | 25/100 [13:33<40:36, 32.48s/it][A[A[A[A[A[A

testing loss: 0.013366759070132685; time: 2.4513661861419678s
testing accuracy: 82.00260162353516%
training loss: 0.00036586814192744054; time: 30.371999740600586s
training accuracy: 87.21505737304688%








 26%|██▌       | 26/100 [14:06<40:12, 32.60s/it][A[A[A[A[A[A

testing loss: 0.013817840287527276; time: 2.4651432037353516s
testing accuracy: 81.04031372070312%
training loss: 0.0003491387385053558; time: 30.331545114517212s
training accuracy: 87.32307434082031%








 27%|██▋       | 27/100 [14:39<39:44, 32.66s/it][A[A[A[A[A[A

testing loss: 0.013455816444556953; time: 2.458246946334839s
testing accuracy: 82.04594421386719%
training loss: 0.0003362629117926866; time: 30.016332626342773s
training accuracy: 87.40316009521484%








 28%|██▊       | 28/100 [15:11<39:08, 32.62s/it][A[A[A[A[A[A

testing loss: 0.013139480404074593; time: 2.501657485961914s
testing accuracy: 82.36670684814453%
training loss: 0.00032252951702834494; time: 30.214332580566406s
training accuracy: 87.38267517089844%








 29%|██▉       | 29/100 [15:44<38:39, 32.67s/it][A[A[A[A[A[A

testing loss: 0.013348825835256697; time: 2.5447027683258057s
testing accuracy: 81.8638916015625%
training loss: 0.0003087114857326206; time: 30.167734146118164s
training accuracy: 87.53910827636719%








 30%|███       | 30/100 [16:18<38:36, 33.10s/it][A[A[A[A[A[A

testing loss: 0.013177321990770234; time: 2.7172110080718994s
testing accuracy: 82.29735565185547%
training loss: 0.0002985550863758916; time: 30.307889938354492s
training accuracy: 87.61360168457031%








 31%|███       | 31/100 [16:51<37:57, 33.00s/it][A[A[A[A[A[A

testing loss: 0.013370787287409501; time: 2.45810866355896s
testing accuracy: 82.35803985595703%
training loss: 0.0002870617044620056; time: 30.30600118637085s
training accuracy: 87.7048568725586%








 32%|███▏      | 32/100 [17:24<37:18, 32.92s/it][A[A[A[A[A[A

testing loss: 0.013140752386442241; time: 2.3988592624664307s
testing accuracy: 82.28001403808594%
training loss: 0.000277452193814877; time: 29.779545307159424s
training accuracy: 87.74024200439453%








 33%|███▎      | 33/100 [17:56<36:35, 32.77s/it][A[A[A[A[A[A

testing loss: 0.01343568710987321; time: 2.6283035278320312s
testing accuracy: 82.15864562988281%
training loss: 0.00026959274071905703; time: 29.77691650390625s
training accuracy: 87.7774887084961%








 34%|███▍      | 34/100 [18:28<35:52, 32.61s/it][A[A[A[A[A[A

testing loss: 0.013059667031848694; time: 2.460549831390381s
testing accuracy: 82.53142547607422%
training loss: 0.00026088141433123314; time: 29.65746235847473s
training accuracy: 87.72906494140625%








 35%|███▌      | 35/100 [19:01<35:10, 32.47s/it][A[A[A[A[A[A

testing loss: 0.013305325470267306; time: 2.4642701148986816s
testing accuracy: 82.4273910522461%
training loss: 0.00025251191924526543; time: 30.328480005264282s
training accuracy: 87.80728912353516%








 36%|███▌      | 36/100 [19:33<34:45, 32.59s/it][A[A[A[A[A[A

testing loss: 0.013342140350808919; time: 2.5164859294891357s
testing accuracy: 82.19332122802734%
training loss: 0.00024301977533384717; time: 30.39446210861206s
training accuracy: 87.90412902832031%








 37%|███▋      | 37/100 [20:06<34:18, 32.67s/it][A[A[A[A[A[A

testing loss: 0.013170019990962772; time: 2.4624595642089844s
testing accuracy: 82.82617950439453%
training loss: 0.00023580879443108646; time: 30.218786001205444s
training accuracy: 87.98420715332031%








 38%|███▊      | 38/100 [20:39<33:44, 32.66s/it][A[A[A[A[A[A

testing loss: 0.013244974703617204; time: 2.3893604278564453s
testing accuracy: 82.61811828613281%
training loss: 0.00023010276539315477; time: 30.38918709754944s
training accuracy: 87.95440673828125%








 39%|███▉      | 39/100 [21:12<33:15, 32.71s/it][A[A[A[A[A[A

testing loss: 0.012954696556888311; time: 2.4282262325286865s
testing accuracy: 82.97355651855469%
training loss: 0.00022289475014808458; time: 29.793347358703613s
training accuracy: 88.17788696289062%








 40%|████      | 40/100 [21:45<32:59, 32.98s/it][A[A[A[A[A[A

testing loss: 0.013338463808041297; time: 2.633784532546997s
testing accuracy: 81.81187438964844%
training loss: 0.00021726549392679092; time: 30.10502791404724s
training accuracy: 88.07360076904297%








 41%|████      | 41/100 [22:18<32:17, 32.84s/it][A[A[A[A[A[A

testing loss: 0.013223726181277063; time: 2.3838109970092773s
testing accuracy: 82.37537384033203%
training loss: 0.00021069263393856804; time: 30.39018416404724s
training accuracy: 88.16485595703125%








 42%|████▏     | 42/100 [22:51<31:44, 32.84s/it][A[A[A[A[A[A

testing loss: 0.013308901521593242; time: 2.42399001121521s
testing accuracy: 82.36670684814453%
training loss: 0.0002056026055623181; time: 30.290581226348877s
training accuracy: 88.1238784790039%








 43%|████▎     | 43/100 [23:24<31:12, 32.84s/it][A[A[A[A[A[A

testing loss: 0.012973922000827342; time: 2.5539331436157227s
testing accuracy: 82.55742645263672%
training loss: 0.00020062171468390735; time: 30.328124284744263s
training accuracy: 88.16112518310547%








 44%|████▍     | 44/100 [23:56<30:39, 32.85s/it][A[A[A[A[A[A

testing loss: 0.01271158893631814; time: 2.5027313232421875s
testing accuracy: 82.808837890625%
training loss: 0.00019524559189794025; time: 30.670779943466187s
training accuracy: 88.1685791015625%








 45%|████▌     | 45/100 [24:30<30:11, 32.93s/it][A[A[A[A[A[A

testing loss: 0.013046195468811644; time: 2.4510185718536377s
testing accuracy: 82.73948669433594%
training loss: 0.00019114335872131175; time: 30.599334478378296s
training accuracy: 88.16485595703125%








 46%|████▌     | 46/100 [25:03<29:41, 33.00s/it][A[A[A[A[A[A

testing loss: 0.012847790094903914; time: 2.5384902954101562s
testing accuracy: 82.65279388427734%
training loss: 0.0001859196699545261; time: 30.52886962890625s
training accuracy: 88.25611114501953%








 47%|████▋     | 47/100 [25:36<29:11, 33.04s/it][A[A[A[A[A[A

testing loss: 0.012736162085991997; time: 2.601428747177124s
testing accuracy: 82.94754791259766%
training loss: 0.00018184680080185223; time: 30.536898612976074s
training accuracy: 88.24307250976562%








 48%|████▊     | 48/100 [26:09<28:38, 33.05s/it][A[A[A[A[A[A

testing loss: 0.012633118648962072; time: 2.51887845993042s
testing accuracy: 83.05158233642578%
training loss: 0.00017673725836038427; time: 29.96495532989502s
training accuracy: 88.42371368408203%








 49%|████▉     | 49/100 [26:42<27:58, 32.92s/it][A[A[A[A[A[A

testing loss: 0.012680876532131116; time: 2.635787010192871s
testing accuracy: 82.8175048828125%
training loss: 0.0001727177932996112; time: 29.887508153915405s
training accuracy: 88.4386215209961%








 50%|█████     | 50/100 [27:15<27:34, 33.09s/it][A[A[A[A[A[A

testing loss: 0.012549526695570596; time: 2.37715744972229s
testing accuracy: 83.34632873535156%
training loss: 0.0001692837030906106; time: 29.572587728500366s
training accuracy: 88.41254425048828%








 51%|█████     | 51/100 [27:47<26:47, 32.81s/it][A[A[A[A[A[A

testing loss: 0.013066181843744298; time: 2.5909881591796875s
testing accuracy: 82.86952209472656%
training loss: 0.00016530861948196098; time: 30.51704716682434s
training accuracy: 88.42558288574219%








 52%|█████▏    | 52/100 [28:20<26:17, 32.87s/it][A[A[A[A[A[A

testing loss: 0.013007889587252906; time: 2.476811408996582s
testing accuracy: 82.73081970214844%
training loss: 0.0001612957896270612; time: 30.681401252746582s
training accuracy: 88.59319305419922%








 53%|█████▎    | 53/100 [28:53<25:49, 32.97s/it][A[A[A[A[A[A

testing loss: 0.012903441405420341; time: 2.4855682849884033s
testing accuracy: 82.7134780883789%
training loss: 0.00015774657776413084; time: 30.01377010345459s
training accuracy: 88.58946228027344%








 54%|█████▍    | 54/100 [29:26<25:10, 32.83s/it][A[A[A[A[A[A

testing loss: 0.012587853386436235; time: 2.4871702194213867s
testing accuracy: 82.8955307006836%
training loss: 0.00015496937478333954; time: 29.80977201461792s
training accuracy: 88.54849243164062%








 55%|█████▌    | 55/100 [29:58<24:31, 32.70s/it][A[A[A[A[A[A

testing loss: 0.012910206962133002; time: 2.573627233505249s
testing accuracy: 83.13826751708984%
training loss: 0.000151862240062913; time: 29.68813943862915s
training accuracy: 88.58573913574219%








 56%|█████▌    | 56/100 [30:30<23:51, 32.53s/it][A[A[A[A[A[A

testing loss: 0.013351975281904103; time: 2.437527894973755s
testing accuracy: 82.58344268798828%
training loss: 0.00014918019823796828; time: 30.386125564575195s
training accuracy: 88.6192626953125%








 57%|█████▋    | 57/100 [31:03<23:24, 32.67s/it][A[A[A[A[A[A

testing loss: 0.012534800141494306; time: 2.5921099185943604s
testing accuracy: 83.32032775878906%
training loss: 0.00014667302350894533; time: 30.44461727142334s
training accuracy: 88.62857818603516%








 58%|█████▊    | 58/100 [31:36<22:55, 32.75s/it][A[A[A[A[A[A

testing loss: 0.012884924883094761; time: 2.4719817638397217s
testing accuracy: 83.268310546875%
training loss: 0.00014309183333302157; time: 30.086248874664307s
training accuracy: 88.68258666992188%








 59%|█████▉    | 59/100 [32:09<22:20, 32.70s/it][A[A[A[A[A[A

testing loss: 0.012408958336699303; time: 2.49407696723938s
testing accuracy: 83.32899475097656%
training loss: 0.0001405467879046369; time: 29.989476442337036s
training accuracy: 88.58946228027344%








 60%|██████    | 60/100 [32:43<22:00, 33.01s/it][A[A[A[A[A[A

testing loss: 0.01266729356692785; time: 2.5187394618988037s
testing accuracy: 83.10359954833984%
training loss: 0.00013780228036028663; time: 29.820563077926636s
training accuracy: 88.67513275146484%








 61%|██████    | 61/100 [33:15<21:18, 32.78s/it][A[A[A[A[A[A

testing loss: 0.012660362890234849; time: 2.3854358196258545s
testing accuracy: 82.9128646850586%
training loss: 0.00013572907524198144; time: 30.115075826644897s
training accuracy: 88.73472595214844%








 62%|██████▏   | 62/100 [33:48<20:43, 32.71s/it][A[A[A[A[A[A

testing loss: 0.012262183006030944; time: 2.428199291229248s
testing accuracy: 83.51972198486328%
training loss: 0.00013290861053432288; time: 30.334436655044556s
training accuracy: 88.74217987060547%








 63%|██████▎   | 63/100 [34:20<20:12, 32.76s/it][A[A[A[A[A[A

testing loss: 0.012261972095499216; time: 2.539623975753784s
testing accuracy: 83.41568756103516%
training loss: 0.00013120862069553323; time: 30.280269384384155s
training accuracy: 88.74962615966797%








 64%|██████▍   | 64/100 [34:53<19:39, 32.76s/it][A[A[A[A[A[A

testing loss: 0.012668355639925445; time: 2.4594686031341553s
testing accuracy: 83.49371337890625%
training loss: 0.00012862857667184658; time: 30.265466451644897s
training accuracy: 88.79991149902344%








 65%|██████▌   | 65/100 [35:26<19:05, 32.74s/it][A[A[A[A[A[A

testing loss: 0.012672896949331327; time: 2.3909053802490234s
testing accuracy: 83.23362731933594%
training loss: 0.00012611351216817527; time: 30.105614185333252s
training accuracy: 88.75521087646484%








 66%|██████▌   | 66/100 [35:58<18:31, 32.68s/it][A[A[A[A[A[A

testing loss: 0.01277672388902928; time: 2.443894863128662s
testing accuracy: 82.63545989990234%
training loss: 0.0001237011612759689; time: 29.770917654037476s
training accuracy: 88.89116668701172%








 67%|██████▋   | 67/100 [36:31<17:55, 32.61s/it][A[A[A[A[A[A

testing loss: 0.012627515785224933; time: 2.632770538330078s
testing accuracy: 83.52838897705078%
training loss: 0.0001219587905904867; time: 30.283246517181396s
training accuracy: 88.81853485107422%








 68%|██████▊   | 68/100 [37:04<17:24, 32.65s/it][A[A[A[A[A[A

testing loss: 0.01239785573436783; time: 2.457432270050049s
testing accuracy: 83.31166076660156%
training loss: 0.00011974189833741078; time: 30.270237684249878s
training accuracy: 88.84646606445312%








 69%|██████▉   | 69/100 [37:36<16:53, 32.69s/it][A[A[A[A[A[A

testing loss: 0.012365598986946663; time: 2.486846685409546s
testing accuracy: 83.78846740722656%
training loss: 0.00011772661689188723; time: 30.244672775268555s
training accuracy: 88.95075988769531%








 70%|███████   | 70/100 [38:10<16:33, 33.11s/it][A[A[A[A[A[A

testing loss: 0.012369528310765009; time: 2.627121686935425s
testing accuracy: 83.20762634277344%
training loss: 0.00011589865228175756; time: 29.931204080581665s
training accuracy: 89.0029067993164%








 71%|███████   | 71/100 [38:43<15:53, 32.88s/it][A[A[A[A[A[A

testing loss: 0.01260760759229933; time: 2.3921682834625244s
testing accuracy: 83.04290771484375%
training loss: 0.00011443540179268721; time: 29.78603982925415s
training accuracy: 88.93399810791016%








 72%|███████▏  | 72/100 [39:15<15:15, 32.68s/it][A[A[A[A[A[A

testing loss: 0.01241113292186561; time: 2.4100310802459717s
testing accuracy: 83.54572296142578%
training loss: 0.00011269194852750956; time: 30.250083923339844s
training accuracy: 88.95634460449219%








 73%|███████▎  | 73/100 [39:48<14:42, 32.67s/it][A[A[A[A[A[A

testing loss: 0.012415699999881194; time: 2.3875279426574707s
testing accuracy: 83.6324234008789%
training loss: 0.00010245138123399629; time: 30.593526363372803s
training accuracy: 89.91731262207031%








 74%|███████▍  | 74/100 [40:21<14:12, 32.79s/it][A[A[A[A[A[A

testing loss: 0.011983605934767999; time: 2.4441113471984863s
testing accuracy: 83.97918701171875%
training loss: 0.00010002049215635256; time: 30.352471590042114s
training accuracy: 90.08492279052734%








 75%|███████▌  | 75/100 [40:54<13:40, 32.80s/it][A[A[A[A[A[A

testing loss: 0.011907422225106884; time: 2.4709925651550293s
testing accuracy: 84.11790466308594%
training loss: 9.842428751529379e-05; time: 30.53516387939453s
training accuracy: 90.06443786621094%








 76%|███████▌  | 76/100 [41:27<13:08, 32.86s/it][A[A[A[A[A[A

testing loss: 0.01188446533630873; time: 2.436007499694824s
testing accuracy: 84.23059844970703%
training loss: 9.687524287090736e-05; time: 29.84181571006775s
training accuracy: 90.12403106689453%








 77%|███████▋  | 77/100 [41:59<12:31, 32.69s/it][A[A[A[A[A[A

testing loss: 0.01182732519826488; time: 2.452275276184082s
testing accuracy: 84.23059844970703%
training loss: 9.56013834982068e-05; time: 29.94728946685791s
training accuracy: 90.11471557617188%








 78%|███████▊  | 78/100 [42:31<11:57, 32.60s/it][A[A[A[A[A[A

testing loss: 0.011748428193841809; time: 2.4093878269195557s
testing accuracy: 84.12657165527344%
training loss: 9.423011645982819e-05; time: 30.320497751235962s
training accuracy: 90.1668701171875%








 79%|███████▉  | 79/100 [43:04<11:26, 32.68s/it][A[A[A[A[A[A

testing loss: 0.011778746528521108; time: 2.519321918487549s
testing accuracy: 84.23926544189453%
training loss: 9.298028888309062e-05; time: 30.60136914253235s
training accuracy: 90.14079284667969%








 80%|████████  | 80/100 [43:39<11:04, 33.23s/it][A[A[A[A[A[A

testing loss: 0.011796618169347187; time: 2.6965267658233643s
testing accuracy: 84.12657165527344%
training loss: 9.17263411822905e-05; time: 30.024536848068237s
training accuracy: 90.15569305419922%








 81%|████████  | 81/100 [44:11<10:27, 33.00s/it][A[A[A[A[A[A

testing loss: 0.011748747474957608; time: 2.4238741397857666s
testing accuracy: 84.18724822998047%
training loss: 9.042094037233059e-05; time: 29.982250213623047s
training accuracy: 90.17431640625%








 82%|████████▏ | 82/100 [44:43<09:50, 32.82s/it][A[A[A[A[A[A

testing loss: 0.011743553896447927; time: 2.3940558433532715s
testing accuracy: 84.14390563964844%
training loss: 8.945104320223658e-05; time: 29.779525756835938s
training accuracy: 90.13520812988281%








 83%|████████▎ | 83/100 [45:16<09:14, 32.64s/it][A[A[A[A[A[A

testing loss: 0.011834800977132307; time: 2.4162404537200928s
testing accuracy: 84.05721282958984%
training loss: 8.821284275612072e-05; time: 30.45172667503357s
training accuracy: 90.23949432373047%








 84%|████████▍ | 84/100 [45:49<08:43, 32.73s/it][A[A[A[A[A[A

testing loss: 0.011830449575823935; time: 2.470432996749878s
testing accuracy: 84.1005630493164%
training loss: 8.714095870912971e-05; time: 30.396830797195435s
training accuracy: 90.1687240600586%








 85%|████████▌ | 85/100 [46:21<08:11, 32.78s/it][A[A[A[A[A[A

testing loss: 0.011743795707027375; time: 2.477372407913208s
testing accuracy: 84.33463287353516%
training loss: 8.604248253564642e-05; time: 30.09231185913086s
training accuracy: 90.189208984375%








 86%|████████▌ | 86/100 [46:54<07:38, 32.72s/it][A[A[A[A[A[A

testing loss: 0.011765469283214315; time: 2.485729217529297s
testing accuracy: 84.30862426757812%
training loss: 8.49640325725686e-05; time: 30.020723342895508s
training accuracy: 90.19666290283203%








 87%|████████▋ | 87/100 [47:27<07:04, 32.65s/it][A[A[A[A[A[A

testing loss: 0.011835457517643951; time: 2.440401792526245s
testing accuracy: 84.20458984375%
training loss: 8.399156675294488e-05; time: 29.735860347747803s
training accuracy: 90.20038604736328%








 88%|████████▊ | 88/100 [47:59<06:29, 32.48s/it][A[A[A[A[A[A

testing loss: 0.011808979956293498; time: 2.3487744331359863s
testing accuracy: 84.24794006347656%
training loss: 8.307563843487022e-05; time: 30.13785743713379s
training accuracy: 90.20970153808594%








 89%|████████▉ | 89/100 [48:31<05:57, 32.52s/it][A[A[A[A[A[A

testing loss: 0.01190305213372481; time: 2.4418256282806396s
testing accuracy: 84.21326446533203%
training loss: 8.20680230690916e-05; time: 30.509161949157715s
training accuracy: 90.19852447509766%








 90%|█████████ | 90/100 [49:05<05:30, 33.03s/it][A[A[A[A[A[A

testing loss: 0.011901984015971383; time: 2.509495258331299s
testing accuracy: 84.1092300415039%
training loss: 8.111702142996356e-05; time: 30.40336513519287s
training accuracy: 90.19293975830078%








 91%|█████████ | 91/100 [49:38<04:57, 33.00s/it][A[A[A[A[A[A

testing loss: 0.011736152910170846; time: 2.5078368186950684s
testing accuracy: 84.2912826538086%
training loss: 8.009289380425455e-05; time: 30.173169136047363s
training accuracy: 90.19852447509766%








 92%|█████████▏| 92/100 [50:11<04:23, 32.89s/it][A[A[A[A[A[A

testing loss: 0.011841539769240877; time: 2.42374324798584s
testing accuracy: 84.22193145751953%
training loss: 7.922743691667705e-05; time: 29.859943151474s
training accuracy: 90.22460174560547%








 93%|█████████▎| 93/100 [50:43<03:49, 32.73s/it][A[A[A[A[A[A

testing loss: 0.011685362775694203; time: 2.4929754734039307s
testing accuracy: 84.2739486694336%
training loss: 7.836031497956855e-05; time: 30.059845447540283s
training accuracy: 90.22832489013672%








 94%|█████████▍| 94/100 [51:16<03:16, 32.74s/it][A[A[A[A[A[A

testing loss: 0.01175848618182053; time: 2.6994595527648926s
testing accuracy: 84.38665008544922%
training loss: 7.752729091097129e-05; time: 30.479544639587402s
training accuracy: 90.26929473876953%








 95%|█████████▌| 95/100 [51:49<02:44, 32.81s/it][A[A[A[A[A[A

testing loss: 0.011903676847952705; time: 2.451643466949463s
testing accuracy: 84.24794006347656%
training loss: 7.665449849768909e-05; time: 30.450255870819092s
training accuracy: 90.25067138671875%








 96%|█████████▌| 96/100 [52:22<02:11, 32.87s/it][A[A[A[A[A[A

testing loss: 0.011745635409103517; time: 2.555171489715576s
testing accuracy: 84.17858123779297%
training loss: 7.579061810479772e-05; time: 29.996715545654297s
training accuracy: 90.28604888916016%








 97%|█████████▋| 97/100 [52:55<01:38, 32.77s/it][A[A[A[A[A[A

testing loss: 0.011669764151343754; time: 2.524287223815918s
testing accuracy: 84.2739486694336%
training loss: 7.501080986425433e-05; time: 30.004119634628296s
training accuracy: 90.24136352539062%








 98%|█████████▊| 98/100 [53:27<01:05, 32.68s/it][A[A[A[A[A[A

testing loss: 0.011859420676625649; time: 2.4415295124053955s
testing accuracy: 84.30862426757812%
training loss: 7.424224370868546e-05; time: 29.924227237701416s
training accuracy: 90.25439453125%








 99%|█████████▉| 99/100 [54:00<00:32, 32.63s/it][A[A[A[A[A[A

testing loss: 0.011914163248191213; time: 2.5681164264678955s
testing accuracy: 84.20458984375%
training loss: 7.346810478854734e-05; time: 30.67046046257019s
training accuracy: 90.26184844970703%








100%|██████████| 100/100 [54:34<00:00, 32.75s/it]

testing loss: 0.011793264364932696; time: 2.5948386192321777s
testing accuracy: 84.22193145751953%



