# Bibliotecas


In [1]:
from transformers import DistilBertTokenizer, DistilBertForMaskedLM
#from transformers import RobertaTokenizer, RobertaForMaskedLM
from transformers import TrainingArguments
from transformers import Trainer
import torch
import pandas as pd
from torch.optim import AdamW
from tqdm import tqdm
import time
import numpy as np
from torch import cuda
from sklearn.model_selection import train_test_split
import json

from gensim.models import Word2Vec
from gensim.models import Doc2Vec
from gensim.models import KeyedVectors

from torch.utils.data import DataLoader

# Funções

In [71]:
class EarlyStopping:
    """Early stops the training if validation loss doesn't improve after a given patience."""
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        """
        Args:
            patience (int): How long to wait after last time validation loss improved.
                            Default: 7
            verbose (bool): If True, prints a message for each validation loss improvement. 
                            Default: False
            delta (float): Minimum change in the monitored quantity to qualify as an improvement.
                            Default: 0
            path (str): Path for the checkpoint to be saved to.
                            Default: 'checkpoint.pt'
            trace_func (function): trace print function.
                            Default: print            
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path
        self.trace_func = trace_func
    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''Saves model when validation loss decrease.'''
        if self.verbose:
            self.trace_func(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

In [72]:
from torch.utils.data import Dataset

class TextDataset(torch.utils.data.Dataset):
    def __init__(self, texts, tokenizer, max_len):
        self.texts = texts
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding="max_length",
            max_length=self.max_len,
            return_tensors="pt"
        )
        return {
            'input_ids': encoding['input_ids'].squeeze(0),
            'attention_mask': encoding['attention_mask'].squeeze(0),
            'labels': encoding['input_ids'].squeeze(0)  # Exemplo para MLM
        }

In [73]:
#Dataset do torch auxilia no treinamento dos modelos
class Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings):
        self.encodings = encodings
    def __getitem__(self, idx):
        return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
    def __len__(self):
        return len(self.encodings.input_ids)

In [74]:
"""
This function converts the list of sentences into a BERT input
"""
def bertInput_clean(sentences):

    token_text = "[SEP]".join(sentences)
    
    return token_text


"""
This function converts the list of sentences into a BERT input
"""
def input_clean(sentences):

    token_text = " ".join(sentences)
    
    return token_text

In [75]:
#Obtem os embeddings das sentenças e não das palavras
def executeModel_CLS(model, tokenizer, poi_type):
    
    tokenized_text = tokenizer(poi_type, return_tensors='pt', max_length=512, truncation=True, padding=True)
    
    # Predict hidden states features for each layer
    with torch.no_grad():
        encoded_layers = model(**tokenized_text, output_hidden_states = True)
    
    # create a new dimension in the tensor.
    token_embeddings = torch.stack(encoded_layers['hidden_states'], dim=0)
    
    # Remove dimension 1, the "batches".
    token_embeddings = torch.squeeze(token_embeddings, dim=1)
    
    # Swap dimensions 0 and 1.
    token_embeddings = token_embeddings.permute(1,0,2)
    
    # Stores the token vectors, with shape [22 x 768]
    token_vecs_sum = []

    # `token_embeddings` is a [22 x 12 x 768] tensor.

    # For each token in the sentence...
    for token in token_embeddings:

        # `token` is a [12 x 768] tensor

        # Sum the vectors from the last four layers.
        sum_vec = torch.mean(token[-4:], dim=0)

        # Use `sum_vec` to represent `token`.
        token_vecs_sum.append(sum_vec.tolist())
        

    #print ('Shape is: %d x %d' % (len(token_vecs_sum_text_01), len(token_vecs_sum_text_01[0])))
    
    #CLS vector
    return token_vecs_sum[0]

In [76]:
def convertModels(distilbert_model, model_base):
    
     #Obtendo o Índice de cada palavra no modelo
    vocab_tuple = []
    for key in model_base.wv.vocab.keys():
        vocab_tuple.append((key, model_base.wv.vocab[key].index))
    vocab_tuple.sort(key=lambda x:x[1])


    #Construindo os vetores concatenados
    vectors_matrix = []
    for word, index in vocab_tuple:

        try:
            vector = executeModel_CLS(distilbert_model, tokenizer, word)
        except:
            vector = [-1.0]*70

        vectors_matrix.append(np.hstack([vector]).tolist())


    #Convertendo matriz para tipo float32
    vectors_matrix = np.float32(vectors_matrix)

    #Atualizando vetores no modelo base
    model_base.wv.vector_size = len(vectors_matrix[0])
    model_base.vector_size = len(vectors_matrix[0]) 
    model_base.wv.vector = vectors_matrix
    #model_base.wv.syn0 = vectors_matrix
    model_base.wv.vectors = vectors_matrix

    #Não lembro o que essa etapa é
    #model_base.wv.syn0norm = None
    model_base.wv.vectors_norm  = None
    model_base.wv.init_sims()
    
    return model_base

# Realizando todos os passos em Pipeline
Os dados são relações de co-ocorrência binárias entre os tipos do POI central e os tipos dos POIs na vizinhança.

## Versão Traning Validation

- Essa versão faz a separação dos dados considerando os tipos de dados geográficos replicados
- Toda vez que o dado geográfico muda, significa que outra sequência de replicação irá iniciar
- Também há um controle de tamanho de sentenças para evitar o estouro da tokenização
- Nesse caso, pequenos documentos com base nessas mudanças são gerados
- **O conjunto de validação é o próprio treino**
- **O mascaramento é feito nos tokens correspondentes a toda a palavra**

In [None]:
model_checkpoint = "distilbert-base-uncased"
tokenizer = DistilBertTokenizer.from_pretrained(model_checkpoint)

#model_checkpoint = 'roberta-base'
#tokenizer = RobertaTokenizer.from_pretrained(model_checkpoint)

SENTENCE_SIZE = 100
EPOCHS = 15
BATCH_SIZE = 1
MAX_LEN = 512
MASK_PERC = 0.15
LR = 5e-5
patience = 3
use_amp = True

osm_tables = ['bins_points_information', 'bins_polygons_information', 'bins_roads_information', 'bins_lines_information']
for n in range(0, 1):
    for w in np.arange(0.0, 0.1, 0.1):
        #Ajustando o parâmetro w
        wgt = round(w, 1)
        for osm_table in osm_tables:
        
            #Flag para permitir o treinamento correto
            do_training = False
            
            #Caso especial para carregar os dados de pontos e treinar apenas uma vez
            if(osm_table == 'bins_points_information' and wgt == 0.0):
                file_name = './geographic/GEOC2VEC/austin-sl-tuple-geoc2vec-' + str(n) + osm_table + '-pfp-c.parquet'
                model_name = './geographic/GEOC2VECBERT15TKT-02/austin-sl-tuple-geoc2vec-distilbert-MLM-' + str(n) + osm_table + '-pfp-c'
                #model_name = './geographic/GEOC2VECBERT15TKT-02/austin-sl-tuple-geoc2vec-roberta-MLM-' + str(n) + osm_table + '-pfp-c'
                do_training = True
                
            elif(osm_table != 'bins_points_information'):
                file_name = './geographic/GEOC2VEC/austin-sl-tuple-geoc2vec-' + str(n) + osm_table + '-wgt' + str(wgt) + 'pfp-c.parquet'
                model_name = './geographic/GEOC2VECBERT15TKT-02/austin-sl-tuple-geoc2vec-distilbert-MLM-' + str(n) + osm_table + '-wgt' + str(wgt) + '-pfp-c'
                #model_name = './geographic/GEOC2VECBERT15TKT-02/austin-sl-tuple-geoc2vec-roberta-MLM-' + str(n) + osm_table + '-wgt' + str(wgt) + '-pfp-c'
                do_training = True
                
            if(do_training):
                
                start_time = time.time()
                
                #Carregando o modelo vazio para o finetuning
                model = DistilBertForMaskedLM.from_pretrained(model_checkpoint)
                #model = RobertaForMaskedLM.from_pretrained(model_checkpoint)
                
                #Carregar dataset
                print("Carregando dados...")
                print(file_name)
                sentences = pd.read_parquet(file_name)
                sentences = sentences.values.tolist()
                print('Quantidade de sentenças:', len(sentences))


                print("Gerando subtextos com foco nos tipos de POI (Treino)...")
                #Criando textos menores com o conjunto de duas palavras dos POIs e dados geográficos
                #Esse método considera a mudança do TIPO de dado geográfico para criar um novo subtexto
                train_sentences = []
                local_sentences = []
                count_sentences = 1
                actual_type = sentences[0][3] # => Dado geográfico
                for i, sentence in enumerate(sentences):

                    if((count_sentences % SENTENCE_SIZE) == 0):
                        
                        final_sentence = '[CLS]'.join(local_sentences)
                        train_sentences.append(final_sentence)
                        
                        count_sentences = 1
                        local_sentences = []

                    #Aqui deve ser feito um split_test
                    elif(sentence[3] != actual_type):
                        
                        #Salvando a parte final
                        final_sentence = '[CLS]'.join(local_sentences)
                        train_sentences.append(final_sentence)
                        
                        actual_type = sentence[3]
                        count_sentences = 1
                        
                        #Começando o novo contexto
                        sentence_text = [sentence[1], sentence[3]]
                        local_sentences = [bertInput_clean(sentence_text)]

                    else:
                        count_sentences+=1
                        sentence_text = [sentence[1], sentence[3]]
                        local_sentences.append(bertInput_clean(sentence_text))

                #Adicionando último trecho
                if(len(local_sentences) > 0):
                    final_sentence = '[CLS]'.join(local_sentences)
                    train_sentences.append(final_sentence)
                
                print("Gerando conjunto de Treino...")
                #train, validation = train_test_split(train_sentences, test_size=valid_size, random_state=42)
                print('Conjunto de Treino: ', len(train_sentences))

                #Esvaziando memória
                del local_sentences

                #Tokenizando e salvando uma cópia dos tokens para representar as labels
                inputs_train = tokenizer(train_sentences, return_tensors='pt', max_length=MAX_LEN, truncation = True, padding='max_length')
                inputs_train['labels'] = inputs_train.input_ids.detach().clone()

                #Esvaziando memória
                del train_sentences

                #Encontrando MASK_PERC% das frases que serão mascaradas
                #As frases terão a segunda sentença mascarada ex: [CLS]Bar[SEP]Park[SEP] => [CLS]Bar[SEP]#####[SEP]
                print("Mascarando dados...")
                rand_train = torch.rand(inputs_train.input_ids.shape)
                
                #Gerando um mascaramento em posições aleatório
                #101 = [CLS]
                #102 = [SEP]
                mask_arr_train = (rand_train < MASK_PERC) * (inputs_train.input_ids != 101) * \
                           (inputs_train.input_ids != 102) * (inputs_train.input_ids != 0)
                
                
                #Tokens que terão os tokens mascarados
                selection_train = []
                for i in range(inputs_train.input_ids.shape[0]):
                    selection_train.append(
                        torch.flatten(mask_arr_train[i].nonzero()).tolist()
                    )
                    
            
                #Esvaziando memória
                del rand_train, mask_arr_train
                
                #Alterando os tokens de TODA A PALAVRA
                #103 = [MASK]
                for i in range(inputs_train.input_ids.shape[0]):
                    inputs_train.input_ids[i, selection_train[i]] = 103
                    
                    for j in selection_train[i]:
                        #Moving front and back
                        b = j - 1
                        f = j + 1

                        #Preenchendo restante do token
                        while ((b > 0) and 
                               (inputs_train.input_ids[i, b] != 101 and 
                                inputs_train.input_ids[i, b] != 102 and 
                                inputs_train.input_ids[i, b] != 103)):

                            inputs_train.input_ids[i, b] = 103
                            b = b-1

                        while ((f < len(inputs_train.input_ids[i])) and 
                               (inputs_train.input_ids[i, f] != 0 and 
                                inputs_train.input_ids[i, f] != 101 and 
                                inputs_train.input_ids[i, f] != 102 and 
                                inputs_train.input_ids[i, f] != 103)):
                            inputs_train.input_ids[i, f] = 103
                            f = f+1
                
                
                    
                #Esvaziando memória
                del selection_train 

                #Transformando os dados no objeto dataset do torch
                print("Preparando para o treinamento...")
                dataset_train = Dataset(inputs_train)
                dataset_val = dataset_train
                loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
                
                #Esvaziando memória
                del inputs_train

                #Preparando o dispotivo para o treinamento
                device = 'cuda' if cuda.is_available() else 'cpu' # CPU OR GPU
                torch.cuda.empty_cache()
                # and move our model over to the selected device
                model.to(device)

                optim = AdamW(model.parameters(), lr=LR)

                # to track the training loss as the model trains
                train_losses = []
                # to track the average training loss per epoch as the model trains
                avg_train_losses = []

                # initialize the early_stopping object
                early_stopping = EarlyStopping(patience=patience, verbose=False)
                scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
                has_early_stopping = False

                for epoch in range(EPOCHS):
                    # setup loop with TQDM and dataloader
                    # activate training mode
                    model.train()
                    loop_train = tqdm(loader_train, leave=True)
                    for batch_train in loop_train:

                        optim.zero_grad()
                        # pull all tensor batches required for training
                        input_ids = batch_train['input_ids'].to(device)
                        attention_mask = batch_train['attention_mask'].to(device)
                        labels = batch_train['labels'].to(device)

                        with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=use_amp):
                            outputs = model(input_ids, attention_mask=attention_mask,
                                        labels=labels)
                            loss = outputs.loss

                        scaler.scale(loss).backward()
                        scaler.step(optim)
                        scaler.update()

                        loop_train.set_description(f'Epoch {epoch}')
                        loop_train.set_postfix(loss=loss.item())

                        train_losses.append(loss.item())

        
                    # calculate average loss over an epoch
                    train_loss = np.average(train_losses)
                    avg_train_losses.append(train_loss)


                    # early_stopping needs the validation loss to check if it has decresed, 
                    # and if it has, it will make a checkpoint of the current model
                    early_stopping(train_loss, model)

                    if early_stopping.early_stop:
                        print("Early stopping")
                        final_time = (time.time() - start_time)
                        has_early_stopping = True
                        training_dictionary = {'epoch': epoch+1,
                                               'epochs': EPOCHS,
                                               'patience': patience,
                                               'train_loss': train_loss,
                                               'avg_train_losses':avg_train_losses,
                                               'time': final_time}
                        break

                    # clear lists to track next epoch
                    train_losses = []

                # load the last checkpoint with the best model
                model.load_state_dict(torch.load('checkpoint.pt'))

                #Novo tempo para o caso das coisas terem sido diferentes
                if(has_early_stopping == False):
                    final_time = (time.time() - start_time)
                    training_dictionary = {'epoch': epoch+1,
                                           'epochs': EPOCHS,
                                           'patience': patience,
                                           'train_loss': train_loss,
                                           'avg_train_losses':avg_train_losses,
                                           'time': final_time}

                #Salvando o modelo pronto
                print("Salvando o modelo...")
                args = TrainingArguments(
                    output_dir=model_name,
                    per_device_train_batch_size=BATCH_SIZE,
                    num_train_epochs=EPOCHS
                )

                trainer = Trainer(
                    model=model,
                    args=args,
                    train_dataset=dataset_train,
                    eval_dataset=dataset_train
                )

                trainer.save_model()
                
                #Salvando as estatísticas do treinamento
                file_name = model_name + '/training_dictionary.json'
                with open(file_name, "w") as outfile:
                    json.dump(training_dictionary, outfile)
                
                #Esvaziando memória
                del loader_train
                del dataset_train
                del train_losses
                del avg_train_losses
                del trainer

## Versão Type Split-I

- Essa versão faz a separação dos dados considerando os tipos de dados geográficos replicados
- Toda vez que o dado geográfico muda, significa que outra sequência de replicação irá iniciar
- Também há um controle de tamanho de sentenças para evitar o estouro da tokenização
- Nesse caso, pequenos documentos com base nessas mudanças são gerados
- **O conjunto de validação é construído descantando-se os dados replicados**
- **O mascaramento é feito nos tokens correspondentes a toda a palavra**

In [None]:
model_checkpoint = "distilbert-base-uncased"
tokenizer = DistilBertTokenizer.from_pretrained(model_checkpoint)

#model_checkpoint = 'roberta-base'
#tokenizer = RobertaTokenizer.from_pretrained(model_checkpoint)

SENTENCE_SIZE = 50
EPOCHS = 15
BATCH_SIZE = 1
MAX_LEN = 512
MASK_PERC = 0.15
LR = 5e-5
patience = 3
use_amp = True
weights = [0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9]

osm_tables = ['bins_points_information', 'bins_polygons_information', 'bins_roads_information', 'bins_lines_information']
#osm_tables = ['bins_roads_information', 'bins_lines_information']
for n in range(1, 2):
    #for w in np.arange(0.0, 1.1, 0.1):
    for w in weights:
        #Ajustando o parâmetro w
        wgt = round(w, 1)
        for osm_table in osm_tables:
        
            #Flag para permitir o treinamento correto
            do_training = False
            
            #Caso especial para carregar os dados de pontos e treinar apenas uma vez
            if(osm_table == 'bins_points_information' and wgt == 0.0):
                file_name = './geographic/GEOC2VEC/austin-sl-tuple-geoc2vec-' + str(n) + osm_table + '-pfp-c.parquet'
                model_name = './geographic/GEOC2VECBERT15TKT-03/austin-sl-tuple-geoc2vec-distilbert-MLM-' + str(n) + osm_table + '-pfp-c'
                #model_name = './geographic/GEOC2VECBERT15TKT-02/austin-sl-tuple-geoc2vec-roberta-MLM-' + str(n) + osm_table + '-pfp-c'
                do_training = True
                
            elif(osm_table != 'bins_points_information'):
                file_name = './geographic/GEOC2VEC/austin-sl-tuple-geoc2vec-' + str(n) + osm_table + '-wgt' + str(wgt) + 'pfp-c.parquet'
                model_name = './geographic/GEOC2VECBERT15TKT-03/austin-sl-tuple-geoc2vec-distilbert-MLM-' + str(n) + osm_table + '-wgt' + str(wgt) + '-pfp-c'
                #model_name = './geographic/GEOC2VECBERT15TKT-02/austin-sl-tuple-geoc2vec-roberta-MLM-' + str(n) + osm_table + '-wgt' + str(wgt) + '-pfp-c'
                do_training = True
                
            if(do_training):
                
                start_time = time.time()
                
                #Carregando o modelo vazio para o finetuning
                model = DistilBertForMaskedLM.from_pretrained(model_checkpoint)
                #model = RobertaForMaskedLM.from_pretrained(model_checkpoint)
                
                #Carregar dataset
                print("Carregando dados...")
                print(file_name)
                sentences = pd.read_parquet(file_name)
                validation = sentences[['center_poi', 'context_osm']].drop_duplicates()
                validation = validation.values.tolist()
                sentences = sentences.values.tolist()
                print('Quantidade de sentenças:', len(sentences))


                print("Gerando subtextos com foco nos tipos de POI (Treino)...")
                #Criando textos menores com o conjunto de duas palavras dos POIs e dados geográficos
                #Esse método considera a mudança do TIPO de dado geográfico para criar um novo subtexto
                train_sentences = []
                local_sentences = []
                count_sentences = 1
                actual_type = sentences[0][3] # => Dado geográfico
                for i, sentence in enumerate(sentences):

                    #Para voltar a versão anterior descomente esse trecho
                    '''if((count_sentences % SENTENCE_SIZE) == 0):
                        
                        final_sentence = '[CLS]'.join(local_sentences)
                        train_sentences.append(final_sentence)
                        
                        count_sentences = 1
                        local_sentences = []'''

                    #Aqui deve ser feito um split_test
                    if(sentence[3] != actual_type):
                        
                        #Esse trecho é novo
                        if(len(local_sentences) >= SENTENCE_SIZE):
                            
                            print('Treino ultrapassou em:', i, 'tam:', len(local_sentences))
                            
                            final_sentence = '[CLS]'.join(local_sentences[0:int(len(local_sentences)/2)])
                            train_sentences.append(final_sentence)
                            
                            final_sentence = '[CLS]'.join(local_sentences[int(len(local_sentences)/2):len(local_sentences)])
                            train_sentences.append(final_sentence)
                        
                        else:
                            #Salvando a parte final
                            final_sentence = '[CLS]'.join(local_sentences)
                            train_sentences.append(final_sentence)

                        actual_type = sentence[3]
                        count_sentences = 1

                        #Começando o novo contexto
                        sentence_text = [sentence[1], sentence[3]]
                        local_sentences = [bertInput_clean(sentence_text)]

                    else:
                        count_sentences+=1
                        sentence_text = [sentence[1], sentence[3]]
                        local_sentences.append(bertInput_clean(sentence_text))

                #Adicionando último trecho
                if(len(local_sentences) > 0):
                    final_sentence = '[CLS]'.join(local_sentences)
                    train_sentences.append(final_sentence)
            
                #Esvaziando memória
                del sentences, local_sentences
                
                
                print("Gerando subtextos com foco nos tipos de POI (Validação)...")
                #Criando textos menores com o conjunto de duas palavras dos POIs e dados geográficos
                #Esse método considera a mudança do TIPO de dado geográfico para criar um novo subtexto
                val_sentences = []
                local_sentences = []
                count_sentences = 1
                actual_type = validation[0][1] # => Dado geográfico
                for i, sentence in enumerate(validation):

                    #Descomente para voltar ao original
                    '''if((count_sentences % SENTENCE_SIZE) == 0):
                        
                        final_sentence = '[CLS]'.join(local_sentences)
                        val_sentences.append(final_sentence)
                        
                        count_sentences = 1
                        local_sentences = []'''

                    #Aqui deve ser feito um split_test
                    if(sentence[1] != actual_type):
                        
                        #Esse trecho é novo
                        if(len(local_sentences) >= SENTENCE_SIZE):
                            
                            print('Validação ultrapassou em:', i, 'tam:', len(local_sentences))
                            
                            final_sentence = '[CLS]'.join(local_sentences[0:int(len(local_sentences)/2)])
                            train_sentences.append(final_sentence)
                            
                            final_sentence = '[CLS]'.join(local_sentences[int(len(local_sentences)/2):len(local_sentences)])
                            train_sentences.append(final_sentence)
                        
                        else:
                            #Salvando a parte final
                            final_sentence = '[CLS]'.join(local_sentences)
                            val_sentences.append(final_sentence)
                        
                        actual_type = sentence[1]
                        count_sentences = 1
                        
                        #Começando o novo contexto
                        sentence_text = [sentence[0], sentence[1]]
                        local_sentences = [bertInput_clean(sentence_text)]

                    else:
                        count_sentences+=1
                        sentence_text = [sentence[0], sentence[1]]
                        local_sentences.append(bertInput_clean(sentence_text))

                #Adicionando último trecho
                if(len(local_sentences) > 0):
                    final_sentence = '[CLS]'.join(local_sentences)
                    val_sentences.append(final_sentence)
            
                #Esvaziando memória
                del validation, local_sentences
                
                print('Conjunto de Treino: ', len(train_sentences))
                print('Conjunto de Validação: ', len(val_sentences))
                
                #Tokenizando e salvando uma cópia dos tokens para representar as labels
                inputs_train = tokenizer(train_sentences, return_tensors='pt', max_length=MAX_LEN, truncation = True, padding='max_length')
                inputs_train['labels'] = inputs_train.input_ids.detach().clone()

                inputs_val = tokenizer(val_sentences, return_tensors='pt', max_length=MAX_LEN, truncation = True, padding='max_length')
                inputs_val['labels'] = inputs_val.input_ids.detach().clone()

                #Esvaziando memória
                del train_sentences, val_sentences

                #Encontrando MASK_PERC% das frases que serão mascaradas
                #As frases terão a segunda sentença mascarada ex: [CLS]Bar[SEP]Park[SEP] => [CLS]Bar[SEP]#####[SEP]
                print("Mascarando dados...")
                rand_train = torch.rand(inputs_train.input_ids.shape)
                rand_val = torch.rand(inputs_val.input_ids.shape)
                
                #Gerando um mascaramento em posições aleatório
                #101 = [CLS]
                #102 = [SEP]
                mask_arr_train = (rand_train < MASK_PERC) * (inputs_train.input_ids != 101) * \
                           (inputs_train.input_ids != 102) * (inputs_train.input_ids != 0)
                
                mask_arr_val = (rand_val < MASK_PERC) * (inputs_val.input_ids != 101) * \
                           (inputs_val.input_ids != 102) * (inputs_val.input_ids != 0)
                
                #Tokens que terão os tokens mascarados
                selection_train = []
                for i in range(inputs_train.input_ids.shape[0]):
                    selection_train.append(
                        torch.flatten(mask_arr_train[i].nonzero()).tolist()
                    )
                    
                selection_val = []
                for i in range(inputs_val.input_ids.shape[0]):
                    selection_val.append(
                        torch.flatten(mask_arr_val[i].nonzero()).tolist()
                    )

                #Esvaziando memória
                del rand_train, mask_arr_train, rand_val, mask_arr_val
                
                #Alterando os tokens de TODA A PALAVRA
                #103 = [MASK]
                for i in range(inputs_train.input_ids.shape[0]):
                    inputs_train.input_ids[i, selection_train[i]] = 103
                    
                    for j in selection_train[i]:
                        #Moving front and back
                        b = j - 1
                        f = j + 1

                        #Preenchendo restante do token
                        while ((b > 0) and 
                               (inputs_train.input_ids[i, b] != 101 and 
                                inputs_train.input_ids[i, b] != 102 and 
                                inputs_train.input_ids[i, b] != 103)):

                            inputs_train.input_ids[i, b] = 103
                            b = b-1

                        while ((f < len(inputs_train.input_ids[i])) and 
                               (inputs_train.input_ids[i, f] != 0 and 
                                inputs_train.input_ids[i, f] != 101 and 
                                inputs_train.input_ids[i, f] != 102 and 
                                inputs_train.input_ids[i, f] != 103)):
                            inputs_train.input_ids[i, f] = 103
                            f = f+1
                
                for i in range(inputs_val.input_ids.shape[0]):
                    inputs_val.input_ids[i, selection_val[i]] = 103
                    
                    for j in selection_val[i]:
                        #Moving front and back
                        b = j - 1
                        f = j + 1

                        #Preenchendo restante do token
                        while ((b > 0) and 
                               (inputs_val.input_ids[i, b] != 101 and 
                                inputs_val.input_ids[i, b] != 102 and 
                                inputs_val.input_ids[i, b] != 103)):

                            inputs_val.input_ids[i, b] = 103
                            b = b-1

                        while ((f < len(inputs_val.input_ids[i])) and 
                               (inputs_val.input_ids[i, f] != 0 and 
                                inputs_val.input_ids[i, f] != 101 and 
                                inputs_val.input_ids[i, f] != 102 and 
                                inputs_val.input_ids[i, f] != 103)):
                            inputs_val.input_ids[i, f] = 103
                            f = f+1
                    
                #Esvaziando memória
                del selection_train, selection_val

                #Transformando os dados no objeto dataset do torch
                print("Preparando para o treinamento...")
                dataset_train = Dataset(inputs_train)
                loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
                dataset_val = Dataset(inputs_val)
                loader_val = torch.utils.data.DataLoader(dataset_val, batch_size=BATCH_SIZE, shuffle=True)
                
                #Esvaziando memória
                del inputs_train, inputs_val

                #Preparando o dispotivo para o treinamento
                device = 'cuda' if cuda.is_available() else 'cpu' # CPU OR GPU
                torch.cuda.empty_cache()
                # and move our model over to the selected device
                model.to(device)

                optim = AdamW(model.parameters(), lr=LR)

                # to track the training loss as the model trains
                train_losses = []
                # to track the validation loss as the model trains
                valid_losses = []
                # to track the average training loss per epoch as the model trains
                avg_train_losses = []
                # to track the average validation loss per epoch as the model trains
                avg_valid_losses = [] 

                # initialize the early_stopping object
                early_stopping = EarlyStopping(patience=patience, verbose=True)
                scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
                has_early_stopping = False

                for epoch in range(EPOCHS):
                    # setup loop with TQDM and dataloader
                    # activate training mode
                    model.train()
                    loop_train = tqdm(loader_train, leave=True)
                    for batch_train in loop_train:

                        optim.zero_grad()
                        # pull all tensor batches required for training
                        input_ids = batch_train['input_ids'].to(device)
                        attention_mask = batch_train['attention_mask'].to(device)
                        labels = batch_train['labels'].to(device)

                        with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=use_amp):
                            outputs = model(input_ids, attention_mask=attention_mask,
                                        labels=labels)
                            loss = outputs.loss

                        scaler.scale(loss).backward()
                        scaler.step(optim)
                        scaler.update()

                        loop_train.set_description(f'Epoch {epoch}')
                        loop_train.set_postfix(loss=loss.item())


                        train_losses.append(loss.item())

                    ######################    
                    # validate the model #
                    ######################
                    model.eval() # prep model for evaluation
                    loop_val = tqdm(loader_val, leave=True)
                    for batch_val in loop_val:

                        # forward pass: compute predicted outputs by passing inputs to the model
                        input_ids = batch_val['input_ids'].to(device)
                        attention_mask = batch_val['attention_mask'].to(device)
                        labels = batch_val['labels'].to(device)

                        # process
                        with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=use_amp):
                            outputs = model(input_ids, attention_mask=attention_mask,
                                        labels=labels)
                            loss = outputs.loss

                        # record validation loss
                        valid_losses.append(loss.item())

                    # calculate average loss over an epoch
                    train_loss = np.average(train_losses)
                    valid_loss = np.average(valid_losses)
                    avg_train_losses.append(train_loss)
                    avg_valid_losses.append(valid_loss)

                    # early_stopping needs the validation loss to check if it has decresed, 
                    # and if it has, it will make a checkpoint of the current model
                    early_stopping(valid_loss, model)

                    if early_stopping.early_stop:
                        print("Early stopping")
                        final_time = (time.time() - start_time)
                        has_early_stopping = True
                        training_dictionary = {'epoch': epoch+1,
                                               'epochs': EPOCHS,
                                               'patience': patience,
                                               'train_loss': train_loss,
                                               'valid_loss': valid_loss,
                                               'avg_train_losses':avg_train_losses,
                                               'avg_valid_losses': avg_valid_losses,
                                               'time': final_time}
                        break

                    # clear lists to track next epoch
                    train_losses = []
                    valid_losses = []

                # load the last checkpoint with the best model
                model.load_state_dict(torch.load('checkpoint.pt'))

                #Novo tempo para o caso das coisas terem sido diferentes
                if(has_early_stopping == False):
                    final_time = (time.time() - start_time)
                    training_dictionary = {'epoch': epoch+1,
                                           'epochs': EPOCHS,
                                           'patience': patience,
                                           'train_loss': train_loss,
                                           'valid_loss': valid_loss,
                                           'avg_train_losses':avg_train_losses,
                                           'avg_valid_losses': avg_valid_losses,
                                           'time': final_time}

                #Salvando o modelo pronto
                print("Salvando o modelo...")
                args = TrainingArguments(
                    output_dir=model_name,
                    per_device_train_batch_size=BATCH_SIZE,
                    num_train_epochs=EPOCHS
                )

                trainer = Trainer(
                    model=model,
                    args=args,
                    train_dataset=dataset_train,
                    eval_dataset=dataset_val
                )

                trainer.save_model()
                
                #Salvando as estatísticas do treinamento
                file_name = model_name + '/training_dictionary.json'
                with open(file_name, "w") as outfile:
                    json.dump(training_dictionary, outfile)
                
                #Esvaziando memória
                del loader_train
                del loader_val
                del dataset_train
                del dataset_val
                del train_losses
                del valid_losses
                del avg_train_losses
                del avg_valid_losses
                del trainer

Carregando dados...
./geographic/GEOC2VEC/austin-sl-tuple-geoc2vec-1bins_polygons_information-wgt0.2pfp-c.parquet
Quantidade de sentenças: 2452843
Gerando subtextos com foco nos tipos de POI (Treino)...
Treino ultrapassou em: 147 tam: 63
Treino ultrapassou em: 364 tam: 91
Treino ultrapassou em: 519 tam: 65
Treino ultrapassou em: 743 tam: 64
Treino ultrapassou em: 893 tam: 54
Treino ultrapassou em: 1178 tam: 66
Treino ultrapassou em: 1657 tam: 145
Treino ultrapassou em: 1876 tam: 51
Treino ultrapassou em: 1976 tam: 60
Treino ultrapassou em: 2204 tam: 148
Treino ultrapassou em: 2300 tam: 60
Treino ultrapassou em: 2594 tam: 72
Treino ultrapassou em: 2709 tam: 75
Treino ultrapassou em: 2801 tam: 64
Treino ultrapassou em: 2885 tam: 64
Treino ultrapassou em: 2965 tam: 68
Treino ultrapassou em: 3070 tam: 90
Treino ultrapassou em: 3185 tam: 60
Treino ultrapassou em: 3447 tam: 55
Treino ultrapassou em: 3735 tam: 68
Treino ultrapassou em: 3827 tam: 52
Treino ultrapassou em: 4011 tam: 120
Treino 

Treino ultrapassou em: 243173 tam: 182
Treino ultrapassou em: 243288 tam: 65
Treino ultrapassou em: 243376 tam: 76
Treino ultrapassou em: 243501 tam: 90
Treino ultrapassou em: 243854 tam: 78
Treino ultrapassou em: 244002 tam: 52
Treino ultrapassou em: 244523 tam: 72
Treino ultrapassou em: 244731 tam: 136
Treino ultrapassou em: 244876 tam: 65
Treino ultrapassou em: 245075 tam: 52
Treino ultrapassou em: 245244 tam: 169
Treino ultrapassou em: 245313 tam: 51
Treino ultrapassou em: 245513 tam: 60
Treino ultrapassou em: 245747 tam: 135
Treino ultrapassou em: 246002 tam: 91
Treino ultrapassou em: 246210 tam: 96
Treino ultrapassou em: 246372 tam: 72
Treino ultrapassou em: 246487 tam: 80
Treino ultrapassou em: 246683 tam: 85
Treino ultrapassou em: 246897 tam: 112
Treino ultrapassou em: 247163 tam: 70
Treino ultrapassou em: 247499 tam: 130
Treino ultrapassou em: 247724 tam: 144
Treino ultrapassou em: 247793 tam: 51
Treino ultrapassou em: 247859 tam: 51
Treino ultrapassou em: 247991 tam: 102
Trei

Treino ultrapassou em: 486684 tam: 80
Treino ultrapassou em: 486818 tam: 54
Treino ultrapassou em: 486989 tam: 81
Treino ultrapassou em: 487052 tam: 57
Treino ultrapassou em: 487176 tam: 52
Treino ultrapassou em: 487276 tam: 72
Treino ultrapassou em: 487386 tam: 75
Treino ultrapassou em: 487556 tam: 60
Treino ultrapassou em: 487656 tam: 80
Treino ultrapassou em: 487908 tam: 224
Treino ultrapassou em: 487968 tam: 51
Treino ultrapassou em: 488142 tam: 90
Treino ultrapassou em: 488609 tam: 126
Treino ultrapassou em: 488808 tam: 94
Treino ultrapassou em: 488888 tam: 64
Treino ultrapassou em: 489269 tam: 55
Treino ultrapassou em: 489472 tam: 63
Treino ultrapassou em: 489535 tam: 54
Treino ultrapassou em: 489619 tam: 76
Treino ultrapassou em: 489887 tam: 94
Treino ultrapassou em: 490025 tam: 54
Treino ultrapassou em: 490256 tam: 165
Treino ultrapassou em: 490361 tam: 85
Treino ultrapassou em: 490491 tam: 80
Treino ultrapassou em: 490731 tam: 112
Treino ultrapassou em: 490869 tam: 102
Treino 

Treino ultrapassou em: 697546 tam: 65
Treino ultrapassou em: 697762 tam: 56
Treino ultrapassou em: 697828 tam: 57
Treino ultrapassou em: 698179 tam: 182
Treino ultrapassou em: 698283 tam: 56
Treino ultrapassou em: 698481 tam: 63
Treino ultrapassou em: 698754 tam: 66
Treino ultrapassou em: 699157 tam: 56
Treino ultrapassou em: 699609 tam: 112
Treino ultrapassou em: 699871 tam: 70
Treino ultrapassou em: 699959 tam: 64
Treino ultrapassou em: 700239 tam: 69
Treino ultrapassou em: 700533 tam: 72
Treino ultrapassou em: 700946 tam: 170
Treino ultrapassou em: 701076 tam: 55
Treino ultrapassou em: 701286 tam: 75
Treino ultrapassou em: 701493 tam: 144
Treino ultrapassou em: 701598 tam: 80
Treino ultrapassou em: 701876 tam: 68
Treino ultrapassou em: 701992 tam: 52
Treino ultrapassou em: 702420 tam: 50
Treino ultrapassou em: 702552 tam: 90
Treino ultrapassou em: 702757 tam: 195
Treino ultrapassou em: 703025 tam: 112
Treino ultrapassou em: 703341 tam: 128
Treino ultrapassou em: 703407 tam: 51
Trein

Treino ultrapassou em: 921259 tam: 102
Treino ultrapassou em: 921503 tam: 51
Treino ultrapassou em: 921655 tam: 56
Treino ultrapassou em: 921899 tam: 64
Treino ultrapassou em: 922009 tam: 95
Treino ultrapassou em: 922500 tam: 150
Treino ultrapassou em: 922732 tam: 168
Treino ultrapassou em: 923183 tam: 121
Treino ultrapassou em: 923267 tam: 68
Treino ultrapassou em: 923417 tam: 90
Treino ultrapassou em: 923587 tam: 145
Treino ultrapassou em: 923679 tam: 76
Treino ultrapassou em: 923861 tam: 66
Treino ultrapassou em: 924245 tam: 300
Treino ultrapassou em: 924390 tam: 65
Treino ultrapassou em: 924446 tam: 56
Treino ultrapassou em: 924670 tam: 224
Treino ultrapassou em: 924730 tam: 51
Treino ultrapassou em: 924884 tam: 52
Treino ultrapassou em: 924953 tam: 57
Treino ultrapassou em: 925162 tam: 75
Treino ultrapassou em: 925316 tam: 85
Treino ultrapassou em: 925496 tam: 132
Treino ultrapassou em: 925686 tam: 70
Treino ultrapassou em: 925991 tam: 67
Treino ultrapassou em: 926199 tam: 51
Trei

Treino ultrapassou em: 1127412 tam: 80
Treino ultrapassou em: 1127552 tam: 80
Treino ultrapassou em: 1127687 tam: 65
Treino ultrapassou em: 1127771 tam: 56
Treino ultrapassou em: 1127840 tam: 51
Treino ultrapassou em: 1127950 tam: 95
Treino ultrapassou em: 1128055 tam: 90
Treino ultrapassou em: 1128146 tam: 91
Treino ultrapassou em: 1128250 tam: 104
Treino ultrapassou em: 1128341 tam: 52
Treino ultrapassou em: 1128425 tam: 68
Treino ultrapassou em: 1128687 tam: 104
Treino ultrapassou em: 1128812 tam: 55
Treino ultrapassou em: 1128927 tam: 80
Treino ultrapassou em: 1129552 tam: 238
Treino ultrapassou em: 1129672 tam: 70
Treino ultrapassou em: 1129756 tam: 56
Treino ultrapassou em: 1129924 tam: 154
Treino ultrapassou em: 1130044 tam: 75
Treino ultrapassou em: 1130258 tam: 70
Treino ultrapassou em: 1130490 tam: 80
Treino ultrapassou em: 1130600 tam: 85
Treino ultrapassou em: 1131080 tam: 72
Treino ultrapassou em: 1131146 tam: 54
Treino ultrapassou em: 1131423 tam: 78
Treino ultrapassou em

Treino ultrapassou em: 1318126 tam: 54
Treino ultrapassou em: 1318286 tam: 92
Treino ultrapassou em: 1318646 tam: 335
Treino ultrapassou em: 1319096 tam: 250
Treino ultrapassou em: 1319226 tam: 80
Treino ultrapassou em: 1319551 tam: 215
Treino ultrapassou em: 1319859 tam: 81
Treino ultrapassou em: 1320083 tam: 98
Treino ultrapassou em: 1320188 tam: 95
Treino ultrapassou em: 1320318 tam: 65
Treino ultrapassou em: 1320532 tam: 84
Treino ultrapassou em: 1320686 tam: 65
Treino ultrapassou em: 1321025 tam: 196
Treino ultrapassou em: 1321420 tam: 196
Treino ultrapassou em: 1321601 tam: 92
Treino ultrapassou em: 1322259 tam: 143
Treino ultrapassou em: 1322415 tam: 50
Treino ultrapassou em: 1322559 tam: 96
Treino ultrapassou em: 1322907 tam: 98
Treino ultrapassou em: 1323063 tam: 96
Treino ultrapassou em: 1323179 tam: 64
Treino ultrapassou em: 1323379 tam: 112
Treino ultrapassou em: 1323490 tam: 51
Treino ultrapassou em: 1323578 tam: 64
Treino ultrapassou em: 1323746 tam: 78
Treino ultrapassou

Treino ultrapassou em: 1503226 tam: 100
Treino ultrapassou em: 1503326 tam: 52
Treino ultrapassou em: 1503675 tam: 70
Treino ultrapassou em: 1504003 tam: 60
Treino ultrapassou em: 1504225 tam: 64
Treino ultrapassou em: 1504516 tam: 66
Treino ultrapassou em: 1504818 tam: 122
Treino ultrapassou em: 1505126 tam: 165
Treino ultrapassou em: 1505421 tam: 70
Treino ultrapassou em: 1505721 tam: 91
Treino ultrapassou em: 1505868 tam: 140
Treino ultrapassou em: 1506030 tam: 54
Treino ultrapassou em: 1506285 tam: 60
Treino ultrapassou em: 1506411 tam: 96
Treino ultrapassou em: 1506549 tam: 96
Treino ultrapassou em: 1506679 tam: 65
Treino ultrapassou em: 1507375 tam: 264
Treino ultrapassou em: 1507579 tam: 72
Treino ultrapassou em: 1507932 tam: 60
Treino ultrapassou em: 1508072 tam: 65
Treino ultrapassou em: 1508242 tam: 102
Treino ultrapassou em: 1508444 tam: 65
Treino ultrapassou em: 1508596 tam: 60
Treino ultrapassou em: 1508728 tam: 60
Treino ultrapassou em: 1509285 tam: 260
Treino ultrapassou

Treino ultrapassou em: 1699976 tam: 112
Treino ultrapassou em: 1700176 tam: 119
Treino ultrapassou em: 1700756 tam: 103
Treino ultrapassou em: 1701242 tam: 136
Treino ultrapassou em: 1701362 tam: 108
Treino ultrapassou em: 1701579 tam: 112
Treino ultrapassou em: 1701717 tam: 78
Treino ultrapassou em: 1702121 tam: 65
Treino ultrapassou em: 1702217 tam: 60
Treino ultrapassou em: 1702445 tam: 51
Treino ultrapassou em: 1702711 tam: 64
Treino ultrapassou em: 1702903 tam: 52
Treino ultrapassou em: 1703028 tam: 65
Treino ultrapassou em: 1703336 tam: 280
Treino ultrapassou em: 1703441 tam: 100
Treino ultrapassou em: 1703607 tam: 154
Treino ultrapassou em: 1703699 tam: 64
Treino ultrapassou em: 1704013 tam: 70
Treino ultrapassou em: 1704119 tam: 94
Treino ultrapassou em: 1704511 tam: 170
Treino ultrapassou em: 1704679 tam: 144
Treino ultrapassou em: 1705023 tam: 80
Treino ultrapassou em: 1705361 tam: 208
Treino ultrapassou em: 1705890 tam: 60
Treino ultrapassou em: 1706035 tam: 70
Treino ultrap

Treino ultrapassou em: 1887650 tam: 60
Treino ultrapassou em: 1887990 tam: 54
Treino ultrapassou em: 1888224 tam: 72
Treino ultrapassou em: 1888416 tam: 78
Treino ultrapassou em: 1888467 tam: 51
Treino ultrapassou em: 1888756 tam: 289
Treino ultrapassou em: 1888876 tam: 80
Treino ultrapassou em: 1888956 tam: 56
Treino ultrapassou em: 1889081 tam: 65
Treino ultrapassou em: 1889448 tam: 264
Treino ultrapassou em: 1889517 tam: 51
Treino ultrapassou em: 1890134 tam: 136
Treino ultrapassou em: 1890222 tam: 68
Treino ultrapassou em: 1890453 tam: 91
Treino ultrapassou em: 1890709 tam: 60
Treino ultrapassou em: 1890885 tam: 128
Treino ultrapassou em: 1890995 tam: 100
Treino ultrapassou em: 1891121 tam: 96
Treino ultrapassou em: 1891259 tam: 102
Treino ultrapassou em: 1891692 tam: 64
Treino ultrapassou em: 1891985 tam: 247
Treino ultrapassou em: 1892273 tam: 272
Treino ultrapassou em: 1892355 tam: 78
Treino ultrapassou em: 1892644 tam: 156
Treino ultrapassou em: 1892740 tam: 56
Treino ultrapass

Treino ultrapassou em: 2075792 tam: 56
Treino ultrapassou em: 2075855 tam: 57
Treino ultrapassou em: 2076056 tam: 105
Treino ultrapassou em: 2076472 tam: 171
Treino ultrapassou em: 2077066 tam: 135
Treino ultrapassou em: 2077192 tam: 96
Treino ultrapassou em: 2077460 tam: 208
Treino ultrapassou em: 2077684 tam: 128
Treino ultrapassou em: 2077750 tam: 51
Treino ultrapassou em: 2077855 tam: 95
Treino ultrapassou em: 2078202 tam: 139
Treino ultrapassou em: 2078308 tam: 70
Treino ultrapassou em: 2078464 tam: 84
Treino ultrapassou em: 2078940 tam: 144
Treino ultrapassou em: 2079265 tam: 208
Treino ultrapassou em: 2079665 tam: 240
Treino ultrapassou em: 2079859 tam: 56
Treino ultrapassou em: 2080027 tam: 160
Treino ultrapassou em: 2080304 tam: 128
Treino ultrapassou em: 2080505 tam: 78
Treino ultrapassou em: 2080680 tam: 70
Treino ultrapassou em: 2080898 tam: 136
Treino ultrapassou em: 2081078 tam: 78
Treino ultrapassou em: 2081290 tam: 54
Treino ultrapassou em: 2081626 tam: 210
Treino ultra

Treino ultrapassou em: 2266935 tam: 51
Treino ultrapassou em: 2267085 tam: 108
Treino ultrapassou em: 2267364 tam: 117
Treino ultrapassou em: 2267535 tam: 65
Treino ultrapassou em: 2267598 tam: 60
Treino ultrapassou em: 2267926 tam: 70
Treino ultrapassou em: 2268270 tam: 100
Treino ultrapassou em: 2268502 tam: 98
Treino ultrapassou em: 2268740 tam: 112
Treino ultrapassou em: 2269054 tam: 100
Treino ultrapassou em: 2269288 tam: 96
Treino ultrapassou em: 2269388 tam: 91
Treino ultrapassou em: 2269523 tam: 63
Treino ultrapassou em: 2269749 tam: 127
Treino ultrapassou em: 2269889 tam: 112
Treino ultrapassou em: 2269991 tam: 51
Treino ultrapassou em: 2270239 tam: 50
Treino ultrapassou em: 2270299 tam: 51
Treino ultrapassou em: 2270473 tam: 60
Treino ultrapassou em: 2270697 tam: 96
Treino ultrapassou em: 2270928 tam: 84
Treino ultrapassou em: 2271094 tam: 52
Treino ultrapassou em: 2271334 tam: 192
Treino ultrapassou em: 2271801 tam: 160
Treino ultrapassou em: 2271945 tam: 96
Treino ultrapass

Treino ultrapassou em: 2451209 tam: 55
Treino ultrapassou em: 2451469 tam: 128
Treino ultrapassou em: 2451897 tam: 90
Treino ultrapassou em: 2452065 tam: 128
Treino ultrapassou em: 2452337 tam: 176
Treino ultrapassou em: 2452735 tam: 96
Gerando subtextos com foco nos tipos de POI (Validação)...
Conjunto de Treino:  176216
Conjunto de Validação:  35624
Mascarando dados...
Preparando para o treinamento...


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 0: 100%|██████████| 176216/176216 [1:42:38<00:00, 28.61it/s, loss=0.0412]  
100%|██████████| 35624/35624 [04:26<00:00, 133.44it/s]


Validation loss decreased (inf --> 0.028851).  Saving model ...


Epoch 1: 100%|██████████| 176216/176216 [1:42:58<00:00, 28.52it/s, loss=0.000267]
 56%|█████▋    | 20106/35624 [02:30<01:56, 132.85it/s]

## Versão Type Split-I

- Essa versão faz a separação dos dados considerando os tipos de dados geográficos replicados
- Toda vez que o dado geográfico muda, significa que outra sequência de replicação irá iniciar
- Também há um controle de tamanho de sentenças para evitar o estouro da tokenização
- Nesse caso, pequenos documentos com base nessas mudanças são gerados
- O conjunto de validação é construído descartando-se os dados replicados

In [None]:
model_checkpoint = "distilbert-base-uncased"
tokenizer = DistilBertTokenizer.from_pretrained(model_checkpoint)

#model_checkpoint = 'roberta-base'
#tokenizer = RobertaTokenizer.from_pretrained(model_checkpoint)

SENTENCE_SIZE = 200
EPOCHS = 15
BATCH_SIZE = 1
MAX_LEN = 512
MASK_PERC = 0.15
LR = 5e-5
valid_size = 0.2
patience = 5
use_amp = True

osm_tables = ['bins_polygons_information', 'bins_points_information']
for n in range(0, 1):
    for w in np.arange(0.0, 0.1, 0.1):
        #Ajustando o parâmetro w
        wgt = round(w, 1)
        for osm_table in osm_tables:
        
            #Flag para permitir o treinamento correto
            do_training = False
            
            #Caso especial para carregar os dados de pontos e treinar apenas uma vez
            if(osm_table == 'bins_points_information' and wgt == 0.0):
                file_name = './geographic/GEOC2VEC/austin-sl-tuple-geoc2vec-' + str(n) + osm_table + '-pfp-c.parquet'
                model_name = './geographic/GEOC2VECBERT15TKT-01/austin-sl-tuple-geoc2vec-distilbert-MLM-' + str(n) + osm_table + '-pfp-c'
                #model_name = './geographic/GEOC2VECBERT15TKT-02/austin-sl-tuple-geoc2vec-roberta-MLM-' + str(n) + osm_table + '-pfp-c'
                do_training = True
                
            elif(osm_table != 'bins_points_information'):
                file_name = './geographic/GEOC2VEC/austin-sl-tuple-geoc2vec-' + str(n) + osm_table + '-wgt' + str(wgt) + 'pfp-c.parquet'
                model_name = './geographic/GEOC2VECBERT15TKT-01/austin-sl-tuple-geoc2vec-distilbert-MLM-' + str(n) + osm_table + '-wgt' + str(wgt) + '-pfp-c'
                #model_name = './geographic/GEOC2VECBERT15TKT-02/austin-sl-tuple-geoc2vec-roberta-MLM-' + str(n) + osm_table + '-wgt' + str(wgt) + '-pfp-c'
                do_training = True
                
            if(do_training):
                
                start_time = time.time()
                
                #Carregando o modelo vazio para o finetuning
                model = DistilBertForMaskedLM.from_pretrained(model_checkpoint)
                #model = RobertaForMaskedLM.from_pretrained(model_checkpoint)
                
                #Carregar dataset
                print("Carregando dados...")
                print(file_name)
                sentences = pd.read_parquet(file_name)
                validation = sentences[['center_poi', 'context_osm']].drop_duplicates()
                validation = validation.values.tolist()
                sentences = sentences.values.tolist()
                print('Quantidade de sentenças:', len(sentences))


                print("Gerando subtextos com foco nos tipos de POI (Treino)...")
                #Criando textos menores com o conjunto de duas palavras dos POIs e dados geográficos
                #Esse método considera a mudança do TIPO de dado geográfico para criar um novo subtexto
                train_sentences = []
                local_sentences = []
                count_sentences = 1
                actual_type = sentences[0][3] # => Dado geográfico
                for i, sentence in enumerate(sentences):

                    if((count_sentences % SENTENCE_SIZE) == 0):
                        
                        final_sentence = '[CLS]'.join(local_sentences)
                        train_sentences.append(final_sentence)
                        
                        count_sentences = 1
                        local_sentences = []

                    #Aqui deve ser feito um split_test
                    if(sentence[3] != actual_type):
                        
                        #Salvando a parte final
                        final_sentence = '[CLS]'.join(local_sentences)
                        train_sentences.append(final_sentence)
                        
                        actual_type = sentence[3]
                        count_sentences = 1
                        
                        #Começando o novo contexto
                        sentence_text = [sentence[1], sentence[3]]
                        local_sentences = [bertInput_clean(sentence_text)]

                    else:
                        count_sentences+=1
                        sentence_text = [sentence[1], sentence[3]]
                        local_sentences.append(bertInput_clean(sentence_text))

                #Adicionando último trecho
                if(len(local_sentences) > 0):
                    final_sentence = '[CLS]'.join(local_sentences)
                    train_sentences.append(final_sentence)
            
                #Esvaziando memória
                del sentences, local_sentences
                
                
                print("Gerando subtextos com foco nos tipos de POI (Validação)...")
                #Criando textos menores com o conjunto de duas palavras dos POIs e dados geográficos
                #Esse método considera a mudança do TIPO de dado geográfico para criar um novo subtexto
                val_sentences = []
                local_sentences = []
                count_sentences = 1
                actual_type = validation[0][1] # => Dado geográfico
                for i, sentence in enumerate(validation):

                    if((count_sentences % SENTENCE_SIZE) == 0):
                        
                        final_sentence = '[CLS]'.join(local_sentences)
                        val_sentences.append(final_sentence)
                        
                        count_sentences = 1
                        local_sentences = []

                    #Aqui deve ser feito um split_test
                    if(sentence[1] != actual_type):
                        
                        #Salvando a parte final
                        final_sentence = '[CLS]'.join(local_sentences)
                        val_sentences.append(final_sentence)
                        
                        actual_type = sentence[1]
                        count_sentences = 1
                        
                        #Começando o novo contexto
                        sentence_text = [sentence[0], sentence[1]]
                        local_sentences = [bertInput_clean(sentence_text)]

                    else:
                        count_sentences+=1
                        sentence_text = [sentence[0], sentence[1]]
                        local_sentences.append(bertInput_clean(sentence_text))

                #Adicionando último trecho
                if(len(local_sentences) > 0):
                    final_sentence = '[CLS]'.join(local_sentences)
                    val_sentences.append(final_sentence)
            
                #Esvaziando memória
                del validation, local_sentences
                
                print('Conjunto de Treino: ', len(train_sentences))
                print('Conjunto de Validação: ', len(val_sentences))
                

                #Tokenizando e salvando uma cópia dos tokens para representar as labels
                inputs_train = tokenizer(train_sentences, return_tensors='pt', max_length=MAX_LEN, truncation = True, padding='max_length')
                inputs_train['labels'] = inputs_train.input_ids.detach().clone()

                inputs_val = tokenizer(val_sentences, return_tensors='pt', max_length=MAX_LEN, truncation = True, padding='max_length')
                inputs_val['labels'] = inputs_val.input_ids.detach().clone()


                #Esvaziando memória
                del train_sentences, val_sentences

                #Encontrando MASK_PERC% das frases que serão mascaradas
                #As frases terão a segunda sentença mascarada ex: [CLS]Bar[SEP]Park[SEP] => [CLS]Bar[SEP]#####[SEP]
                print("Mascarando dados...")
                rand_train = torch.rand(inputs_train.input_ids.shape)
                rand_val = torch.rand(inputs_val.input_ids.shape)
                
                #Gerando um mascaramento em posições aleatório
                #101 = [CLS]
                #102 = [SEP]
                mask_arr_train = (rand_train < MASK_PERC) * (inputs_train.input_ids != 101) * \
                           (inputs_train.input_ids != 102) * (inputs_train.input_ids != 0)
                
                mask_arr_val = (rand_val < MASK_PERC) * (inputs_val.input_ids != 101) * \
                           (inputs_val.input_ids != 102) * (inputs_val.input_ids != 0)
                
                #Tokens que terão os tokens mascarados
                selection_train = []
                for i in range(inputs_train.input_ids.shape[0]):
                    selection_train.append(
                        torch.flatten(mask_arr_train[i].nonzero()).tolist()
                    )
                    
                selection_val = []
                for i in range(inputs_val.input_ids.shape[0]):
                    selection_val.append(
                        torch.flatten(mask_arr_val[i].nonzero()).tolist()
                    )

                #Esvaziando memória
                del rand_train, mask_arr_train, rand_val, mask_arr_val
                
                #Alterando os tokens
                #103 = [MASK]
                for i in range(inputs_train.input_ids.shape[0]):
                    inputs_train.input_ids[i, selection_train[i]] = 103
                
                for i in range(inputs_val.input_ids.shape[0]):
                    inputs_val.input_ids[i, selection_val[i]] = 103
                    
                #Esvaziando memória
                del selection_train, selection_val

                #Transformando os dados no objeto dataset do torch
                print("Preparando para o treinamento...")
                dataset_train = Dataset(inputs_train)
                loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
                dataset_val = Dataset(inputs_val)
                loader_val = torch.utils.data.DataLoader(dataset_val, batch_size=BATCH_SIZE, shuffle=True)
                
                #Esvaziando memória
                del inputs_train, inputs_val

                #Preparando o dispotivo para o treinamento
                device = 'cuda' if cuda.is_available() else 'cpu' # CPU OR GPU
                torch.cuda.empty_cache()
                # and move our model over to the selected device
                model.to(device)

                optim = AdamW(model.parameters(), lr=LR)

                # to track the training loss as the model trains
                train_losses = []
                # to track the validation loss as the model trains
                valid_losses = []
                # to track the average training loss per epoch as the model trains
                avg_train_losses = []
                # to track the average validation loss per epoch as the model trains
                avg_valid_losses = [] 

                # initialize the early_stopping object
                early_stopping = EarlyStopping(patience=patience, verbose=False)
                scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
                has_early_stopping = False

                for epoch in range(EPOCHS):
                    # setup loop with TQDM and dataloader
                    # activate training mode
                    model.train()
                    loop_train = tqdm(loader_train, leave=True)
                    for batch_train in loop_train:

                        optim.zero_grad()
                        # pull all tensor batches required for training
                        input_ids = batch_train['input_ids'].to(device)
                        attention_mask = batch_train['attention_mask'].to(device)
                        labels = batch_train['labels'].to(device)

                        with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=use_amp):
                            outputs = model(input_ids, attention_mask=attention_mask,
                                        labels=labels)
                            loss = outputs.loss

                        scaler.scale(loss).backward()
                        scaler.step(optim)
                        scaler.update()

                        loop_train.set_description(f'Epoch {epoch}')
                        loop_train.set_postfix(loss=loss.item())


                        train_losses.append(loss.item())

                    ######################    
                    # validate the model #
                    ######################
                    model.eval() # prep model for evaluation
                    loop_val = tqdm(loader_val, leave=True)
                    for batch_val in loop_val:

                        # forward pass: compute predicted outputs by passing inputs to the model
                        input_ids = batch_val['input_ids'].to(device)
                        attention_mask = batch_val['attention_mask'].to(device)
                        labels = batch_val['labels'].to(device)

                        # process
                        with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=use_amp):
                            outputs = model(input_ids, attention_mask=attention_mask,
                                        labels=labels)
                            loss = outputs.loss

                        # record validation loss
                        valid_losses.append(loss.item())

                    # calculate average loss over an epoch
                    train_loss = np.average(train_losses)
                    valid_loss = np.average(valid_losses)
                    avg_train_losses.append(train_loss)
                    avg_valid_losses.append(valid_loss)

                    # early_stopping needs the validation loss to check if it has decresed, 
                    # and if it has, it will make a checkpoint of the current model
                    early_stopping(valid_loss, model)

                    if early_stopping.early_stop:
                        print("Early stopping")
                        final_time = (time.time() - start_time)
                        has_early_stopping = True
                        training_dictionary = {'epoch': epoch+1,
                                               'epochs': EPOCHS,
                                               'patience': patience,
                                               'train_loss': train_loss,
                                               'valid_loss': valid_loss,
                                               'avg_train_losses':avg_train_losses,
                                               'avg_valid_losses': avg_valid_losses,
                                               'time': final_time}
                        break

                    # clear lists to track next epoch
                    train_losses = []
                    valid_losses = []

                # load the last checkpoint with the best model
                model.load_state_dict(torch.load('checkpoint.pt'))


                #Novo tempo para o caso das coisas terem sido diferentes
                if(has_early_stopping == False):
                    final_time = (time.time() - start_time)
                    training_dictionary = {'epoch': epoch+1,
                                           'epochs': EPOCHS,
                                           'patience': patience,
                                           'train_loss': train_loss,
                                           'valid_loss': valid_loss,
                                           'avg_train_losses':avg_train_losses,
                                           'avg_valid_losses': avg_valid_losses,
                                           'time': final_time}

                #Salvando o modelo pronto
                print("Salvando o modelo...")
                args = TrainingArguments(
                    output_dir=model_name,
                    per_device_train_batch_size=BATCH_SIZE,
                    num_train_epochs=EPOCHS
                )

                trainer = Trainer(
                    model=model,
                    args=args,
                    train_dataset=dataset_train,
                    eval_dataset=dataset_val
                )

                trainer.save_model()
                
                #Salvando as estatísticas do treinamento
                file_name = model_name + '/training_dictionary.json'
                with open(file_name, "w") as outfile:
                    json.dump(training_dictionary, outfile)
                
                #Esvaziando memória
                del loader_train
                del loader_val
                del dataset_train
                del dataset_val
                del train_losses
                del valid_losses
                del avg_train_losses
                del avg_valid_losses
                del trainer

Carregando dados...
./geographic/GEOC2VEC/austin-sl-tuple-geoc2vec-0bins_polygons_information-wgt0.0pfp-c.parquet
Quantidade de sentenças: 2157954
Gerando subtextos com foco nos tipos de POI (Treino)...
Gerando subtextos com foco nos tipos de POI (Validação)...
Conjunto de Treino:  89172
Conjunto de Validação:  20694
Mascarando dados...
Preparando para o treinamento...


  return {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
Epoch 0: 100%|██████████| 89172/89172 [51:01<00:00, 29.13it/s, loss=1.58e-7] 
100%|██████████| 20694/20694 [02:30<00:00, 137.86it/s]
Epoch 1: 100%|██████████| 89172/89172 [50:27<00:00, 29.46it/s, loss=1.33e-5] 
100%|██████████| 20694/20694 [02:26<00:00, 141.14it/s]
Epoch 2:  78%|███████▊  | 69399/89172 [39:00<11:06, 29.65it/s, loss=0.0109]  

## Versão Type Split-II

- Essa versão faz a separação dos dados considerando os tipos de dados geográficos replicados
- Toda vez que o dado geográfico muda, significa que outra sequência de replicação irá iniciar
- Também há um controle de tamanho de sentenças para evitar o estouro da tokenização
- Nesse caso, pequenos documentos com base nessas mudanças são gerados
- O conjunto de validação é feito utilizando o split 80-20

In [None]:
model_checkpoint = "distilbert-base-uncased"
tokenizer = DistilBertTokenizer.from_pretrained(model_checkpoint)

SENTENCE_SIZE = 200
EPOCHS = 15
BATCH_SIZE = 4
MAX_LEN = 512
MASK_PERC = 0.15
LR = 5e-5
valid_size = 0.2
patience = 5
use_amp = True

osm_tables = ['bins_points_information', 'bins_polygons_information', 'bins_roads_information', 'bins_lines_information']
for n in range(0, 1):
    for w in np.arange(0.3, 1.1, 0.1):
        #Ajustando o parâmetro w
        wgt = round(w, 1)
        for osm_table in osm_tables:
        
            #Flag para permitir o treinamento correto
            do_training = False
            
            #Caso especial para carregar os dados de pontos e treinar apenas uma vez
            if(osm_table == 'bins_points_information' and wgt == 0.0):
                file_name = './geographic/GEOC2VEC/austin-sl-tuple-geoc2vec-' + str(n) + osm_table + '-pfp-c.parquet'
                model_name = './geographic/GEOC2VECBERT15TKT/austin-sl-tuple-geoc2vec-distilbert-MLM-' + str(n) + osm_table + '-pfp-c'
                do_training = True
                
            elif(osm_table != 'bins_points_information'):
                file_name = './geographic/GEOC2VEC/austin-sl-tuple-geoc2vec-' + str(n) + osm_table + '-wgt' + str(wgt) + 'pfp-c.parquet'
                model_name = './geographic/GEOC2VECBERT15TKT/austin-sl-tuple-geoc2vec-distilbert-MLM-' + str(n) + osm_table + '-wgt' + str(wgt) + '-pfp-c'
                do_training = True
                
            if(do_training):
                
                start_time = time.time()
                
                #Carregando o modelo vazio para o finetuning
                model = DistilBertForMaskedLM.from_pretrained(model_checkpoint)

                #Carregar dataset
                print("Carregando dados...")
                print(file_name)
                sentences = pd.read_parquet(file_name)
                sentences = sentences.values.tolist()
                print('Quantidade de sentenças:', len(sentences))


                print("Gerando subtextos com foco nos tipos de POI...")
                #Criando textos menores com o conjunto de duas palavras dos POIs e dados geográficos
                #Esse método considera a mudança do TIPO de dado geográfico para criar um novo subtexto
                bert_sentences = []
                local_sentences = []
                count_sentences = 1
                actual_type = sentences[0][3] # => Dado geográfico
                for i, sentence in enumerate(sentences):

                    if((count_sentences % SENTENCE_SIZE) == 0):
                        count_sentences = 1
                        final_sentence = '[CLS]'.join(local_sentences)
                        bert_sentences.append(final_sentence)
                        local_sentences = []

                    #Aqui deve ser feito um split_test
                    elif(sentence[3] != actual_type):
                        actual_type = sentence[3]
                        count_sentences = 1
                        final_sentence = '[CLS]'.join(local_sentences)
                        bert_sentences.append(final_sentence)
                        local_sentences = []

                    else:
                        count_sentences+=1
                        sentence_text = [sentence[1], sentence[3]]
                        local_sentences.append(bertInput_clean(sentence_text))

                #Adicionando último trecho
                if(len(local_sentences) > 0):
                    final_sentence = '[CLS]'.join(local_sentences)
                    bert_sentences.append(final_sentence)
            
                #Esvaziando memória
                del sentences, local_sentences
                
                print("Gerando conjunto de Treino e Validação...")
                train, validation = train_test_split(bert_sentences, test_size=valid_size, random_state=42)
                print('Conjunto de Treino: ', len(train))
                print('Conjunto de Validação: ', len(validation))
                
                #Esvaziando memória
                del bert_sentences

                #Tokenizando e salvando uma cópia dos tokens para representar as labels
                inputs_train = tokenizer(train, return_tensors='pt', max_length=MAX_LEN, truncation = True, padding='max_length')
                inputs_train['labels'] = inputs_train.input_ids.detach().clone()

                inputs_val = tokenizer(validation, return_tensors='pt', max_length=MAX_LEN, truncation = True, padding='max_length')
                inputs_val['labels'] = inputs_val.input_ids.detach().clone()


                #Esvaziando memória
                del train, validation

                #Encontrando MASK_PERC% das frases que serão mascaradas
                #As frases terão a segunda sentença mascarada ex: [CLS]Bar[SEP]Park[SEP] => [CLS]Bar[SEP]#####[SEP]
                print("Mascarando dados...")
                rand_train = torch.rand(inputs_train.input_ids.shape)
                rand_val = torch.rand(inputs_val.input_ids.shape)
                
                #Gerando um mascaramento em posições aleatório
                #101 = [CLS]
                #102 = [SEP]
                mask_arr_train = (rand_train < MASK_PERC) * (inputs_train.input_ids != 101) * \
                           (inputs_train.input_ids != 102) * (inputs_train.input_ids != 0)
                
                mask_arr_val = (rand_val < MASK_PERC) * (inputs_val.input_ids != 101) * \
                           (inputs_val.input_ids != 102) * (inputs_val.input_ids != 0)
                
                #Tokens que terão os tokens mascarados
                selection_train = []
                for i in range(inputs_train.input_ids.shape[0]):
                    selection_train.append(
                        torch.flatten(mask_arr_train[i].nonzero()).tolist()
                    )
                    
                selection_val = []
                for i in range(inputs_val.input_ids.shape[0]):
                    selection_val.append(
                        torch.flatten(mask_arr_val[i].nonzero()).tolist()
                    )

                #Esvaziando memória
                del rand_train, mask_arr_train, rand_val, mask_arr_val
                
                #Alterando os tokens
                #103 = [MASK]
                for i in range(inputs_train.input_ids.shape[0]):
                    inputs_train.input_ids[i, selection_train[i]] = 103
                
                for i in range(inputs_val.input_ids.shape[0]):
                    inputs_val.input_ids[i, selection_val[i]] = 103
                    
                #Esvaziando memória
                del selection_train, selection_val

                #Transformando os dados no objeto dataset do torch
                print("Preparando para o treinamento...")
                dataset_train = Dataset(inputs_train)
                loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
                dataset_val = Dataset(inputs_val)
                loader_val = torch.utils.data.DataLoader(dataset_val, batch_size=BATCH_SIZE, shuffle=True)
                
                #Esvaziando memória
                del inputs_train, inputs_val

                #Preparando o dispotivo para o treinamento
                device = 'cuda' if cuda.is_available() else 'cpu' # CPU OR GPU
                torch.cuda.empty_cache()
                # and move our model over to the selected device
                model.to(device)

                optim = AdamW(model.parameters(), lr=LR)

                # to track the training loss as the model trains
                train_losses = []
                # to track the validation loss as the model trains
                valid_losses = []
                # to track the average training loss per epoch as the model trains
                avg_train_losses = []
                # to track the average validation loss per epoch as the model trains
                avg_valid_losses = [] 

                # initialize the early_stopping object
                early_stopping = EarlyStopping(patience=patience, verbose=False)
                scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
                has_early_stopping = False

                for epoch in range(EPOCHS):
                    # setup loop with TQDM and dataloader
                    # activate training mode
                    model.train()
                    loop_train = tqdm(loader_train, leave=True)
                    for batch_train in loop_train:

                        optim.zero_grad()
                        # pull all tensor batches required for training
                        input_ids = batch_train['input_ids'].to(device)
                        attention_mask = batch_train['attention_mask'].to(device)
                        labels = batch_train['labels'].to(device)

                        with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=use_amp):
                            outputs = model(input_ids, attention_mask=attention_mask,
                                        labels=labels)
                            loss = outputs.loss

                        scaler.scale(loss).backward()
                        scaler.step(optim)
                        scaler.update()

                        loop_train.set_description(f'Epoch {epoch}')
                        loop_train.set_postfix(loss=loss.item())


                        train_losses.append(loss.item())

                    ######################    
                    # validate the model #
                    ######################
                    model.eval() # prep model for evaluation
                    loop_val = tqdm(loader_val, leave=True)
                    for batch_val in loop_val:

                        # forward pass: compute predicted outputs by passing inputs to the model
                        input_ids = batch_val['input_ids'].to(device)
                        attention_mask = batch_val['attention_mask'].to(device)
                        labels = batch_val['labels'].to(device)

                        # process
                        with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=use_amp):
                            outputs = model(input_ids, attention_mask=attention_mask,
                                        labels=labels)
                            loss = outputs.loss

                        # record validation loss
                        valid_losses.append(loss.item())

                    # calculate average loss over an epoch
                    train_loss = np.average(train_losses)
                    valid_loss = np.average(valid_losses)
                    avg_train_losses.append(train_loss)
                    avg_valid_losses.append(valid_loss)

                    # early_stopping needs the validation loss to check if it has decresed, 
                    # and if it has, it will make a checkpoint of the current model
                    early_stopping(valid_loss, model)

                    if early_stopping.early_stop:
                        print("Early stopping")
                        final_time = (time.time() - start_time)
                        has_early_stopping = True
                        training_dictionary = {'epoch': epoch+1,
                                               'epochs': EPOCHS,
                                               'patience': patience,
                                               'train_loss': train_loss,
                                               'valid_loss': valid_loss,
                                               'avg_train_losses':avg_train_losses,
                                               'avg_valid_losses': avg_valid_losses,
                                               'time': final_time}
                        break

                    # clear lists to track next epoch
                    train_losses = []
                    valid_losses = []

                # load the last checkpoint with the best model
                model.load_state_dict(torch.load('checkpoint.pt'))


                #Novo tempo para o caso das coisas terem sido diferentes
                if(has_early_stopping == False):
                    final_time = (time.time() - start_time)
                    training_dictionary = {'epoch': epoch+1,
                                           'epochs': EPOCHS,
                                           'patience': patience,
                                           'train_loss': train_loss,
                                           'valid_loss': valid_loss,
                                           'avg_train_losses':avg_train_losses,
                                           'avg_valid_losses': avg_valid_losses,
                                           'time': final_time}

                #Salvando o modelo pronto
                print("Salvando o modelo...")
                args = TrainingArguments(
                    output_dir=model_name,
                    per_device_train_batch_size=BATCH_SIZE,
                    num_train_epochs=EPOCHS
                )

                trainer = Trainer(
                    model=model,
                    args=args,
                    train_dataset=dataset_train,
                    eval_dataset=dataset_val
                )

                trainer.save_model()
                
                #Salvando as estatísticas do treinamento
                file_name = model_name + '/training_dictionary.json'
                with open(file_name, "w") as outfile:
                    json.dump(training_dictionary, outfile)
                
                #Esvaziando memória
                del loader_train
                del loader_val
                del dataset_train
                del dataset_val
                del train_losses
                del valid_losses
                del avg_train_losses
                del avg_valid_losses
                del trainer

## Versão Type Split-II.I

- Essa versão faz a separação dos dados considerando os tipos de dados geográficos replicados
- Toda vez que o dado geográfico muda, significa que outra sequência de replicação irá iniciar
- Também há um controle de tamanho de sentenças para evitar o estouro da tokenização
- Nesse caso, pequenos documentos com base nessas mudanças são gerados
- O conjunto de validação é feito utilizando o split 80-20
- Não trabalha com Early Stop

In [7]:
model_checkpoint = "distilbert-base-uncased"
tokenizer = DistilBertTokenizer.from_pretrained(model_checkpoint)

SENTENCE_SIZE = 200
EPOCHS = 3
BATCH_SIZE = 4
MAX_LEN = 512
MASK_PERC = 0.15
LR = 5e-5
valid_size = 0.2
patience = 5
use_amp = True

#weights = [0.0, 0.5, 1.0]
#weights = [0.1, 0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9]
weights = [0.5]

#osm_tables = ['bins_points_information', 'bins_polygons_information', 'bins_roads_information', 'bins_lines_information']
osm_tables = ['bins_polygons_information']
for n in range(4, 5):
    
    #for w in np.arange(0.3, 1.1, 0.1):
    for w in weights:
        #Ajustando o parâmetro w
        wgt = round(w, 1)
        for osm_table in osm_tables:
        
            #Flag para permitir o treinamento correto
            do_training = False
            
            #Caso especial para carregar os dados de pontos e treinar apenas uma vez
            if(osm_table == 'bins_points_information' and wgt == 0.0):
                file_name = './geographic/GEOC2VEC PROP/austin-sl-tuple-geoc2vec-' + str(n) + osm_table + '-pfp-c.parquet'
                model_name = './geographic/GEOC2VEC2BERT15TKT-01/austin-sl-tuple-geoc2vec-distilbert-MLM-' + str(n) + osm_table + '-pfp-c'
                
                do_training = True
                
            elif(osm_table != 'bins_points_information'):
                
#                 if (osm_table == 'bins_polygons_information'):
#                     file_name = './geographic/GEOC2VEC2/austin-sl-tuple-geoc2vec-' + str(n) + osm_table + '-wgt' + str(wgt) + 'pfp-c.csv'
#                 else:
                file_name = './geographic/GEOC2VEC PROP/austin-sl-tuple-geoc2vec-' + str(n) + osm_table + '-wgt' + str(wgt) + 'pfp-c.parquet'
                model_name = './geographic/GEOC2VEC2BERT15TKT-01/austin-sl-tuple-geoc2vec-distilbert-MLM-' + str(n) + osm_table + '-wgt' + str(wgt) + '-pfp-c'
                do_training = True
                
            if(do_training):
                
                start_time = time.time()
                
                #Carregando o modelo vazio para o finetuning
                model = DistilBertForMaskedLM.from_pretrained(model_checkpoint)

                #Carregar dataset
                print("Carregando dados...")
                print(file_name)
                sentences = pd.read_parquet(file_name)
                sentences = sentences.values.tolist()
                print('Quantidade de sentenças:', len(sentences))


                print("Gerando subtextos com foco nos tipos de POI...")
                #Criando textos menores com o conjunto de duas palavras dos POIs e dados geográficos
                #Esse método considera a mudança do TIPO de dado geográfico para criar um novo subtexto
                bert_sentences = []
                local_sentences = []
                actual_type = sentences[0][3] # => Dado geográfico
                for i, sentence in enumerate(sentences):

                    if(sentence[3] != actual_type):
                        
                        final_sentence = '[SEP]'.join(local_sentences)
                        bert_sentences.append(final_sentence)
                        
                        actual_type = sentence[3]
                        sentence_text = [sentence[1], sentence[3]]
                        local_sentences = [input_clean(sentence_text)]

                    else:
                        sentence_text = [sentence[1], sentence[3]]
                        local_sentences.append(input_clean(sentence_text))

                #Adicionando último trecho
                if(len(local_sentences) > 0):
                    print(len(local_sentences))
#                     final_sentence = '[SEP]'.join(local_sentences)
#                     print(final_sentence)
                    bert_sentences.append(final_sentence)
            
                #Esvaziando memória
                del sentences, local_sentences
                
                print("Gerando conjunto de Treino e Validação...")
                train, validation = train_test_split(bert_sentences, test_size=valid_size, random_state=42)
                print('Conjunto de Treino: ', len(train))
                print('Conjunto de Validação: ', len(validation))
                
                #print(bert_sentences[0])
                
                #Esvaziando memória
                del bert_sentences

                #Tokenizando e salvando uma cópia dos tokens para representar as labels
                inputs_train = tokenizer(train, return_tensors='pt', max_length=MAX_LEN, truncation = True, padding=True)
                inputs_train['labels'] = inputs_train.input_ids.detach().clone()

                inputs_val = tokenizer(validation, return_tensors='pt', max_length=MAX_LEN, truncation = True, padding=True)
                inputs_val['labels'] = inputs_val.input_ids.detach().clone()


                #Esvaziando memória
                del train, validation

                #Encontrando MASK_PERC% das frases que serão mascaradas
                #As frases terão a segunda sentença mascarada ex: [CLS]Bar[SEP]Park[SEP] => [CLS]Bar[SEP]#####[SEP]
                print("Mascarando dados...")
                rand_train = torch.rand(inputs_train.input_ids.shape)
                rand_val = torch.rand(inputs_val.input_ids.shape)
                
                #Gerando um mascaramento em posições aleatório
                #101 = [CLS]
                #102 = [SEP]
                mask_arr_train = (rand_train < MASK_PERC) * (inputs_train.input_ids != 101) * \
                           (inputs_train.input_ids != 102) * (inputs_train.input_ids != 0)
                
                mask_arr_val = (rand_val < MASK_PERC) * (inputs_val.input_ids != 101) * \
                           (inputs_val.input_ids != 102) * (inputs_val.input_ids != 0)
                
                #Tokens que terão os tokens mascarados
                selection_train = []
                for i in range(inputs_train.input_ids.shape[0]):
                    selection_train.append(
                        torch.flatten(mask_arr_train[i].nonzero()).tolist()
                    )
                    
                selection_val = []
                for i in range(inputs_val.input_ids.shape[0]):
                    selection_val.append(
                        torch.flatten(mask_arr_val[i].nonzero()).tolist()
                    )

                #Esvaziando memória
                del rand_train, mask_arr_train, rand_val, mask_arr_val
                
                #Alterando os tokens
                #103 = [MASK]
                for i in range(inputs_train.input_ids.shape[0]):
                    inputs_train.input_ids[i, selection_train[i]] = 103
                
                for i in range(inputs_val.input_ids.shape[0]):
                    inputs_val.input_ids[i, selection_val[i]] = 103
                    
                #Esvaziando memória
                del selection_train, selection_val

                #Transformando os dados no objeto dataset do torch
                print("Preparando para o treinamento...")
                dataset_train = Dataset(inputs_train)
                loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
                dataset_val = Dataset(inputs_val)
                loader_val = torch.utils.data.DataLoader(dataset_val, batch_size=BATCH_SIZE, shuffle=True)
                
                #Esvaziando memória
                del inputs_train, inputs_val

                #Preparando o dispotivo para o treinamento
                device = 'cuda' if cuda.is_available() else 'cpu' # CPU OR GPU
                torch.cuda.empty_cache()
                # and move our model over to the selected device
                model.to(device)

                optim = AdamW(model.parameters(), lr=LR)

                # to track the training loss as the model trains
                train_losses = []
                # to track the validation loss as the model trains
                valid_losses = []
                # to track the average training loss per epoch as the model trains
                avg_train_losses = []
                # to track the average validation loss per epoch as the model trains
                avg_valid_losses = [] 

                # initialize the early_stopping object
                early_stopping = EarlyStopping(patience=patience, verbose=False)
                scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
                has_early_stopping = False

                for epoch in range(EPOCHS):
                    # setup loop with TQDM and dataloader
                    # activate training mode
                    model.train()
                    loop_train = tqdm(loader_train, leave=True)
                    for batch_train in loop_train:

                        optim.zero_grad()
                        # pull all tensor batches required for training
                        input_ids = batch_train['input_ids'].to(device)
                        attention_mask = batch_train['attention_mask'].to(device)
                        labels = batch_train['labels'].to(device)

                        with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=use_amp):
                            outputs = model(input_ids, attention_mask=attention_mask,
                                        labels=labels)
                            loss = outputs.loss

                        scaler.scale(loss).backward()
                        scaler.step(optim)
                        scaler.update()

                        loop_train.set_description(f'Epoch {epoch}')
                        loop_train.set_postfix(loss=loss.item())


                        train_losses.append(loss.item())

                    ######################    
                    # validate the model #
                    ######################
                    model.eval() # prep model for evaluation
                    loop_val = tqdm(loader_val, leave=True)
                    for batch_val in loop_val:

                        # forward pass: compute predicted outputs by passing inputs to the model
                        input_ids = batch_val['input_ids'].to(device)
                        attention_mask = batch_val['attention_mask'].to(device)
                        labels = batch_val['labels'].to(device)

                        # process
                        with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=use_amp):
                            outputs = model(input_ids, attention_mask=attention_mask,
                                        labels=labels)
                            loss = outputs.loss

                        # record validation loss
                        valid_losses.append(loss.item())

                    # calculate average loss over an epoch
                    train_loss = np.average(train_losses)
                    valid_loss = np.average(valid_losses)
                    avg_train_losses.append(train_loss)
                    avg_valid_losses.append(valid_loss)

                    # clear lists to track next epoch
                    train_losses = []
                    valid_losses = []

                #Salvando o modelo pronto
                print("Salvando o modelo...")
                args = TrainingArguments(
                    output_dir=model_name,
                    per_device_train_batch_size=BATCH_SIZE,
                    num_train_epochs=EPOCHS
                )

                trainer = Trainer(
                    model=model,
                    args=args,
                    train_dataset=dataset_train,
                    eval_dataset=dataset_val
                )

                #trainer.save_model()
                
                final_time = (time.time() - start_time)
#                 training_dictionary = {'epoch': (EPOCHS+1),
#                                        'epochs': EPOCHS,
#                                        'patience': patience,
#                                        'train_loss': train_loss,
#                                        'valid_loss': valid_loss,
#                                        'avg_train_losses':avg_train_losses,
#                                        'avg_valid_losses': avg_valid_losses,
#                                        'time': final_time}
                
#                 #Salvando as estatísticas do treinamento
#                 file_name = model_name + '/training_dictionary.json'
#                 with open(file_name, "w") as outfile:
#                     json.dump(training_dictionary, outfile)
                
                #Esvaziando memória
                del loader_train
                del loader_val
                del dataset_train
                del dataset_val
                del train_losses
                del valid_losses
                del avg_train_losses
                del avg_valid_losses
                del trainer

Carregando dados...
./geographic/GEOC2VEC PROP/austin-sl-tuple-geoc2vec-4bins_polygons_information-wgt0.5pfp-c.parquet
Quantidade de sentenças: 3755228
Gerando subtextos com foco nos tipos de POI...
32
Gerando conjunto de Treino e Validação...
Conjunto de Treino:  350963
Conjunto de Validação:  87741
Mascarando dados...



  

Epoch 0:   0%|          | 0/87741 [00:00<?, ?it/s][A
Epoch 0:   0%|          | 0/87741 [00:00<?, ?it/s, loss=14.9][A
Epoch 0:   0%|          | 0/87741 [00:00<?, ?it/s, loss=14.9][A
Epoch 0:   0%|          | 0/87741 [00:00<?, ?it/s, loss=15.3][A
Epoch 0:   0%|          | 0/87741 [00:00<?, ?it/s, loss=15.3][A
Epoch 0:   0%|          | 0/87741 [00:00<?, ?it/s, loss=13.4][A
Epoch 0:   0%|          | 0/87741 [00:00<?, ?it/s, loss=13.4][A
Epoch 0:   0%|          | 0/87741 [00:00<?, ?it/s, loss=16.6][A
Epoch 0:   0%|          | 0/87741 [00:00<?, ?it/s, loss=16.6][A

Preparando para o treinamento...



Epoch 0:   0%|          | 0/87741 [00:00<?, ?it/s, loss=15.2][A
Epoch 0:   0%|          | 5/87741 [00:00<33:25, 43.76it/s, loss=15.2][A
Epoch 0:   0%|          | 5/87741 [00:00<33:25, 43.76it/s, loss=15.2][A
Epoch 0:   0%|          | 5/87741 [00:00<33:25, 43.76it/s, loss=14.7][A
Epoch 0:   0%|          | 5/87741 [00:00<33:25, 43.76it/s, loss=14.7][A
Epoch 0:   0%|          | 5/87741 [00:00<33:25, 43.76it/s, loss=17.1][A
Epoch 0:   0%|          | 5/87741 [00:00<33:25, 43.76it/s, loss=17.1][A
Epoch 0:   0%|          | 5/87741 [00:00<33:25, 43.76it/s, loss=12.2][A
Epoch 0:   0%|          | 5/87741 [00:00<33:25, 43.76it/s, loss=12.2][A
Epoch 0:   0%|          | 5/87741 [00:00<33:25, 43.76it/s, loss=10.1][A
Epoch 0:   0%|          | 9/87741 [00:00<34:50, 41.97it/s, loss=10.1][A
Epoch 0:   0%|          | 9/87741 [00:00<34:50, 41.97it/s, loss=10.1][A
Epoch 0:   0%|          | 9/87741 [00:00<34:50, 41.97it/s, loss=10.1][A
Epoch 0:   0%|          | 9/87741 [00:00<34:50, 41.97it/s,

Epoch 0:   0%|          | 101/87741 [00:02<37:07, 39.34it/s, loss=0.134][A
Epoch 0:   0%|          | 101/87741 [00:02<37:07, 39.34it/s, loss=0.0749][A
Epoch 0:   0%|          | 101/87741 [00:02<37:07, 39.34it/s, loss=0.0749][A
Epoch 0:   0%|          | 101/87741 [00:02<37:07, 39.34it/s, loss=0.0794][A
Epoch 0:   0%|          | 101/87741 [00:02<37:07, 39.34it/s, loss=0.0794][A
Epoch 0:   0%|          | 101/87741 [00:02<37:07, 39.34it/s, loss=0.083] [A
Epoch 0:   0%|          | 101/87741 [00:02<37:07, 39.34it/s, loss=0.083][A
Epoch 0:   0%|          | 101/87741 [00:02<37:07, 39.34it/s, loss=0.151][A
Epoch 0:   0%|          | 105/87741 [00:02<37:05, 39.37it/s, loss=0.151][A
Epoch 0:   0%|          | 105/87741 [00:02<37:05, 39.37it/s, loss=0.151][A
Epoch 0:   0%|          | 105/87741 [00:02<37:05, 39.37it/s, loss=0.0853][A
Epoch 0:   0%|          | 105/87741 [00:02<37:05, 39.37it/s, loss=0.0853][A
Epoch 0:   0%|          | 105/87741 [00:02<37:05, 39.37it/s, loss=0.0652][A
Epoc

Epoch 0:   0%|          | 193/87741 [00:04<37:02, 39.40it/s, loss=0.0255][A
Epoch 0:   0%|          | 193/87741 [00:04<37:02, 39.40it/s, loss=0.0255][A
Epoch 0:   0%|          | 193/87741 [00:04<37:02, 39.40it/s, loss=0.0394][A
Epoch 0:   0%|          | 197/87741 [00:04<37:01, 39.40it/s, loss=0.0394][A
Epoch 0:   0%|          | 197/87741 [00:05<37:01, 39.40it/s, loss=0.0394][A
Epoch 0:   0%|          | 197/87741 [00:05<37:01, 39.40it/s, loss=0.0355][A
Epoch 0:   0%|          | 197/87741 [00:05<37:01, 39.40it/s, loss=0.0355][A
Epoch 0:   0%|          | 197/87741 [00:05<37:01, 39.40it/s, loss=0.042] [A
Epoch 0:   0%|          | 197/87741 [00:05<37:01, 39.40it/s, loss=0.042][A
Epoch 0:   0%|          | 197/87741 [00:05<37:01, 39.40it/s, loss=0.0318][A
Epoch 0:   0%|          | 197/87741 [00:05<37:01, 39.40it/s, loss=0.0318][A
Epoch 0:   0%|          | 197/87741 [00:05<37:01, 39.40it/s, loss=0.0216][A
Epoch 0:   0%|          | 201/87741 [00:05<37:02, 39.40it/s, loss=0.0216][A


Epoch 0:   0%|          | 289/87741 [00:07<37:10, 39.20it/s, loss=0.019] [A
Epoch 0:   0%|          | 289/87741 [00:07<37:10, 39.20it/s, loss=0.019][A
Epoch 0:   0%|          | 289/87741 [00:07<37:10, 39.20it/s, loss=0.0224][A
Epoch 0:   0%|          | 289/87741 [00:07<37:10, 39.20it/s, loss=0.0224][A
Epoch 0:   0%|          | 289/87741 [00:07<37:10, 39.20it/s, loss=0.02]  [A
Epoch 0:   0%|          | 289/87741 [00:07<37:10, 39.20it/s, loss=0.02][A
Epoch 0:   0%|          | 289/87741 [00:07<37:10, 39.20it/s, loss=0.015][A
Epoch 0:   0%|          | 293/87741 [00:07<37:07, 39.25it/s, loss=0.015][A
Epoch 0:   0%|          | 293/87741 [00:07<37:07, 39.25it/s, loss=0.015][A
Epoch 0:   0%|          | 293/87741 [00:07<37:07, 39.25it/s, loss=0.014][A
Epoch 0:   0%|          | 293/87741 [00:07<37:07, 39.25it/s, loss=0.014][A
Epoch 0:   0%|          | 293/87741 [00:07<37:07, 39.25it/s, loss=0.021][A
Epoch 0:   0%|          | 293/87741 [00:07<37:07, 39.25it/s, loss=0.021][A
Epoch 0: 

Epoch 0:   0%|          | 381/87741 [00:09<37:04, 39.27it/s, loss=0.0107][A
Epoch 0:   0%|          | 381/87741 [00:09<37:04, 39.27it/s, loss=0.0107][A
Epoch 0:   0%|          | 381/87741 [00:09<37:04, 39.27it/s, loss=0.0184][A
Epoch 0:   0%|          | 385/87741 [00:09<37:03, 39.28it/s, loss=0.0184][A
Epoch 0:   0%|          | 385/87741 [00:09<37:03, 39.28it/s, loss=0.0184][A
Epoch 0:   0%|          | 385/87741 [00:09<37:03, 39.28it/s, loss=0.0291][A
Epoch 0:   0%|          | 385/87741 [00:09<37:03, 39.28it/s, loss=0.0291][A
Epoch 0:   0%|          | 385/87741 [00:09<37:03, 39.28it/s, loss=0.018] [A
Epoch 0:   0%|          | 385/87741 [00:09<37:03, 39.28it/s, loss=0.018][A
Epoch 0:   0%|          | 385/87741 [00:09<37:03, 39.28it/s, loss=0.0336][A
Epoch 0:   0%|          | 385/87741 [00:09<37:03, 39.28it/s, loss=0.0336][A
Epoch 0:   0%|          | 385/87741 [00:09<37:03, 39.28it/s, loss=0.0103][A
Epoch 0:   0%|          | 389/87741 [00:09<37:03, 39.28it/s, loss=0.0103][A


Epoch 0:   1%|          | 477/87741 [00:12<37:00, 39.30it/s, loss=0.00424][A
Epoch 0:   1%|          | 477/87741 [00:12<37:00, 39.30it/s, loss=0.00424][A
Epoch 0:   1%|          | 477/87741 [00:12<37:00, 39.30it/s, loss=0.0024] [A
Epoch 0:   1%|          | 477/87741 [00:12<37:00, 39.30it/s, loss=0.0024][A
Epoch 0:   1%|          | 477/87741 [00:12<37:00, 39.30it/s, loss=0.00838][A
Epoch 0:   1%|          | 477/87741 [00:12<37:00, 39.30it/s, loss=0.00838][A
Epoch 0:   1%|          | 477/87741 [00:12<37:00, 39.30it/s, loss=0.00362][A
Epoch 0:   1%|          | 477/87741 [00:12<37:00, 39.30it/s, loss=0.00362][A
Epoch 0:   1%|          | 477/87741 [00:12<37:00, 39.30it/s, loss=0.00846][A
Epoch 0:   1%|          | 481/87741 [00:12<37:01, 39.28it/s, loss=0.00846][A
Epoch 0:   1%|          | 481/87741 [00:12<37:01, 39.28it/s, loss=0.00846][A
Epoch 0:   1%|          | 481/87741 [00:12<37:01, 39.28it/s, loss=0.0126] [A
Epoch 0:   1%|          | 481/87741 [00:12<37:01, 39.28it/s, loss

Epoch 0:   1%|          | 569/87741 [00:14<36:59, 39.27it/s, loss=0.00637][A
Epoch 0:   1%|          | 569/87741 [00:14<36:59, 39.27it/s, loss=0.00637][A
Epoch 0:   1%|          | 569/87741 [00:14<36:59, 39.27it/s, loss=0.0163] [A
Epoch 0:   1%|          | 569/87741 [00:14<36:59, 39.27it/s, loss=0.0163][A
Epoch 0:   1%|          | 569/87741 [00:14<36:59, 39.27it/s, loss=0.00496][A
Epoch 0:   1%|          | 573/87741 [00:14<37:01, 39.24it/s, loss=0.00496][A
Epoch 0:   1%|          | 573/87741 [00:14<37:01, 39.24it/s, loss=0.00496][A
Epoch 0:   1%|          | 573/87741 [00:14<37:01, 39.24it/s, loss=0.00853][A
Epoch 0:   1%|          | 573/87741 [00:14<37:01, 39.24it/s, loss=0.00853][A
Epoch 0:   1%|          | 573/87741 [00:14<37:01, 39.24it/s, loss=0.00446][A
Epoch 0:   1%|          | 573/87741 [00:14<37:01, 39.24it/s, loss=0.00446][A
Epoch 0:   1%|          | 573/87741 [00:14<37:01, 39.24it/s, loss=0.0114] [A
Epoch 0:   1%|          | 573/87741 [00:14<37:01, 39.24it/s, loss

Epoch 0:   1%|          | 661/87741 [00:16<36:54, 39.32it/s, loss=0.013][A
Epoch 0:   1%|          | 661/87741 [00:16<36:54, 39.32it/s, loss=0.00475][A
Epoch 0:   1%|          | 665/87741 [00:16<36:55, 39.31it/s, loss=0.00475][A
Epoch 0:   1%|          | 665/87741 [00:16<36:55, 39.31it/s, loss=0.00475][A
Epoch 0:   1%|          | 665/87741 [00:16<36:55, 39.31it/s, loss=0.00511][A
Epoch 0:   1%|          | 665/87741 [00:16<36:55, 39.31it/s, loss=0.00511][A
Epoch 0:   1%|          | 665/87741 [00:16<36:55, 39.31it/s, loss=0.00676][A
Epoch 0:   1%|          | 665/87741 [00:16<36:55, 39.31it/s, loss=0.00676][A
Epoch 0:   1%|          | 665/87741 [00:16<36:55, 39.31it/s, loss=0.0257] [A
Epoch 0:   1%|          | 665/87741 [00:17<36:55, 39.31it/s, loss=0.0257][A
Epoch 0:   1%|          | 665/87741 [00:17<36:55, 39.31it/s, loss=0.00428][A
Epoch 0:   1%|          | 669/87741 [00:17<36:55, 39.30it/s, loss=0.00428][A
Epoch 0:   1%|          | 669/87741 [00:17<36:55, 39.30it/s, loss=0

Epoch 0:   1%|          | 757/87741 [00:19<36:52, 39.31it/s, loss=0.00452][A
Epoch 0:   1%|          | 757/87741 [00:19<36:52, 39.31it/s, loss=0.003]  [A
Epoch 0:   1%|          | 757/87741 [00:19<36:52, 39.31it/s, loss=0.003][A
Epoch 0:   1%|          | 757/87741 [00:19<36:52, 39.31it/s, loss=0.00356][A
Epoch 0:   1%|          | 757/87741 [00:19<36:52, 39.31it/s, loss=0.00356][A
Epoch 0:   1%|          | 757/87741 [00:19<36:52, 39.31it/s, loss=0.0104] [A
Epoch 0:   1%|          | 757/87741 [00:19<36:52, 39.31it/s, loss=0.0104][A
Epoch 0:   1%|          | 757/87741 [00:19<36:52, 39.31it/s, loss=0.00258][A
Epoch 0:   1%|          | 761/87741 [00:19<36:51, 39.33it/s, loss=0.00258][A
Epoch 0:   1%|          | 761/87741 [00:19<36:51, 39.33it/s, loss=0.00258][A
Epoch 0:   1%|          | 761/87741 [00:19<36:51, 39.33it/s, loss=0.0111] [A
Epoch 0:   1%|          | 761/87741 [00:19<36:51, 39.33it/s, loss=0.0111][A
Epoch 0:   1%|          | 761/87741 [00:19<36:51, 39.33it/s, loss=0.

Epoch 0:   1%|          | 849/87741 [00:21<36:52, 39.27it/s, loss=0.00875][A
Epoch 0:   1%|          | 849/87741 [00:21<36:52, 39.27it/s, loss=0.00875][A
Epoch 0:   1%|          | 849/87741 [00:21<36:52, 39.27it/s, loss=0.00746][A
Epoch 0:   1%|          | 849/87741 [00:21<36:52, 39.27it/s, loss=0.00746][A
Epoch 0:   1%|          | 849/87741 [00:21<36:52, 39.27it/s, loss=0.00424][A
Epoch 0:   1%|          | 853/87741 [00:21<36:51, 39.28it/s, loss=0.00424][A
Epoch 0:   1%|          | 853/87741 [00:21<36:51, 39.28it/s, loss=0.00424][A
Epoch 0:   1%|          | 853/87741 [00:21<36:51, 39.28it/s, loss=0.00739][A
Epoch 0:   1%|          | 853/87741 [00:21<36:51, 39.28it/s, loss=0.00739][A
Epoch 0:   1%|          | 853/87741 [00:21<36:51, 39.28it/s, loss=0.00392][A
Epoch 0:   1%|          | 853/87741 [00:21<36:51, 39.28it/s, loss=0.00392][A
Epoch 0:   1%|          | 853/87741 [00:21<36:51, 39.28it/s, loss=0.00197][A
Epoch 0:   1%|          | 853/87741 [00:21<36:51, 39.28it/s, los

Epoch 0:   1%|          | 941/87741 [00:24<36:47, 39.32it/s, loss=0.00249][A
Epoch 0:   1%|          | 941/87741 [00:24<36:47, 39.32it/s, loss=0.0038] [A
Epoch 0:   1%|          | 945/87741 [00:24<36:47, 39.32it/s, loss=0.0038][A
Epoch 0:   1%|          | 945/87741 [00:24<36:47, 39.32it/s, loss=0.0038][A
Epoch 0:   1%|          | 945/87741 [00:24<36:47, 39.32it/s, loss=0.00253][A
Epoch 0:   1%|          | 945/87741 [00:24<36:47, 39.32it/s, loss=0.00253][A
Epoch 0:   1%|          | 945/87741 [00:24<36:47, 39.32it/s, loss=0.00315][A
Epoch 0:   1%|          | 945/87741 [00:24<36:47, 39.32it/s, loss=0.00315][A
Epoch 0:   1%|          | 945/87741 [00:24<36:47, 39.32it/s, loss=0.0121] [A
Epoch 0:   1%|          | 945/87741 [00:24<36:47, 39.32it/s, loss=0.0121][A
Epoch 0:   1%|          | 945/87741 [00:24<36:47, 39.32it/s, loss=0.0101][A
Epoch 0:   1%|          | 949/87741 [00:24<36:47, 39.32it/s, loss=0.0101][A
Epoch 0:   1%|          | 949/87741 [00:24<36:47, 39.32it/s, loss=0.0

Epoch 0:   1%|          | 1037/87741 [00:26<36:51, 39.21it/s, loss=0.00449][A
Epoch 0:   1%|          | 1037/87741 [00:26<36:51, 39.21it/s, loss=0.00449][A
Epoch 0:   1%|          | 1037/87741 [00:26<36:51, 39.21it/s, loss=0.00599][A
Epoch 0:   1%|          | 1037/87741 [00:26<36:51, 39.21it/s, loss=0.00599][A
Epoch 0:   1%|          | 1037/87741 [00:26<36:51, 39.21it/s, loss=0.00565][A
Epoch 0:   1%|          | 1037/87741 [00:26<36:51, 39.21it/s, loss=0.00565][A
Epoch 0:   1%|          | 1037/87741 [00:26<36:51, 39.21it/s, loss=0.011]  [A
Epoch 0:   1%|          | 1037/87741 [00:26<36:51, 39.21it/s, loss=0.011][A
Epoch 0:   1%|          | 1037/87741 [00:26<36:51, 39.21it/s, loss=0.00279][A
Epoch 0:   1%|          | 1041/87741 [00:26<36:49, 39.25it/s, loss=0.00279][A
Epoch 0:   1%|          | 1041/87741 [00:26<36:49, 39.25it/s, loss=0.00279][A
Epoch 0:   1%|          | 1041/87741 [00:26<36:49, 39.25it/s, loss=0.00533][A
Epoch 0:   1%|          | 1041/87741 [00:26<36:49, 39.

Epoch 0:   1%|▏         | 1125/87741 [00:28<36:51, 39.17it/s, loss=0.00574][A
Epoch 0:   1%|▏         | 1129/87741 [00:28<36:50, 39.18it/s, loss=0.00574][A
Epoch 0:   1%|▏         | 1129/87741 [00:28<36:50, 39.18it/s, loss=0.00574][A
Epoch 0:   1%|▏         | 1129/87741 [00:28<36:50, 39.18it/s, loss=0.00683][A
Epoch 0:   1%|▏         | 1129/87741 [00:28<36:50, 39.18it/s, loss=0.00683][A
Epoch 0:   1%|▏         | 1129/87741 [00:28<36:50, 39.18it/s, loss=0.0111] [A
Epoch 0:   1%|▏         | 1129/87741 [00:28<36:50, 39.18it/s, loss=0.0111][A
Epoch 0:   1%|▏         | 1129/87741 [00:28<36:50, 39.18it/s, loss=0.00335][A
Epoch 0:   1%|▏         | 1129/87741 [00:28<36:50, 39.18it/s, loss=0.00335][A
Epoch 0:   1%|▏         | 1129/87741 [00:28<36:50, 39.18it/s, loss=0.0051] [A
Epoch 0:   1%|▏         | 1133/87741 [00:28<36:47, 39.24it/s, loss=0.0051][A
Epoch 0:   1%|▏         | 1133/87741 [00:28<36:47, 39.24it/s, loss=0.0051][A
Epoch 0:   1%|▏         | 1133/87741 [00:28<36:47, 39.2

Epoch 0:   1%|▏         | 1217/87741 [00:31<36:43, 39.26it/s, loss=0.00202][A
Epoch 0:   1%|▏         | 1217/87741 [00:31<36:43, 39.26it/s, loss=0.00301][A
Epoch 0:   1%|▏         | 1221/87741 [00:31<36:46, 39.21it/s, loss=0.00301][A
Epoch 0:   1%|▏         | 1221/87741 [00:31<36:46, 39.21it/s, loss=0.00301][A
Epoch 0:   1%|▏         | 1221/87741 [00:31<36:46, 39.21it/s, loss=0.0042] [A
Epoch 0:   1%|▏         | 1221/87741 [00:31<36:46, 39.21it/s, loss=0.0042][A
Epoch 0:   1%|▏         | 1221/87741 [00:31<36:46, 39.21it/s, loss=0.00224][A
Epoch 0:   1%|▏         | 1221/87741 [00:31<36:46, 39.21it/s, loss=0.00224][A
Epoch 0:   1%|▏         | 1221/87741 [00:31<36:46, 39.21it/s, loss=0.00218][A
Epoch 0:   1%|▏         | 1221/87741 [00:31<36:46, 39.21it/s, loss=0.00218][A
Epoch 0:   1%|▏         | 1221/87741 [00:31<36:46, 39.21it/s, loss=0.00084][A
Epoch 0:   1%|▏         | 1225/87741 [00:31<36:45, 39.23it/s, loss=0.00084][A
Epoch 0:   1%|▏         | 1225/87741 [00:31<36:45, 39

Epoch 0:   1%|▏         | 1309/87741 [00:33<36:40, 39.29it/s, loss=0.00182][A
Epoch 0:   1%|▏         | 1309/87741 [00:33<36:40, 39.29it/s, loss=0.00182][A
Epoch 0:   1%|▏         | 1309/87741 [00:33<36:40, 39.29it/s, loss=0.0101] [A
Epoch 0:   1%|▏         | 1313/87741 [00:33<36:40, 39.27it/s, loss=0.0101][A
Epoch 0:   1%|▏         | 1313/87741 [00:33<36:40, 39.27it/s, loss=0.0101][A
Epoch 0:   1%|▏         | 1313/87741 [00:33<36:40, 39.27it/s, loss=0.00823][A
Epoch 0:   1%|▏         | 1313/87741 [00:33<36:40, 39.27it/s, loss=0.00823][A
Epoch 0:   1%|▏         | 1313/87741 [00:33<36:40, 39.27it/s, loss=0.0049] [A
Epoch 0:   1%|▏         | 1313/87741 [00:33<36:40, 39.27it/s, loss=0.0049][A
Epoch 0:   1%|▏         | 1313/87741 [00:33<36:40, 39.27it/s, loss=0.0116][A
Epoch 0:   1%|▏         | 1313/87741 [00:33<36:40, 39.27it/s, loss=0.0116][A
Epoch 0:   1%|▏         | 1313/87741 [00:33<36:40, 39.27it/s, loss=0.0105][A
Epoch 0:   2%|▏         | 1317/87741 [00:33<36:40, 39.27it

Epoch 0:   2%|▏         | 1401/87741 [00:35<36:36, 39.30it/s, loss=0.00292][A
Epoch 0:   2%|▏         | 1401/87741 [00:35<36:36, 39.30it/s, loss=0.00258][A
Epoch 0:   2%|▏         | 1401/87741 [00:35<36:36, 39.30it/s, loss=0.00258][A
Epoch 0:   2%|▏         | 1401/87741 [00:35<36:36, 39.30it/s, loss=0.00107][A
Epoch 0:   2%|▏         | 1405/87741 [00:35<36:36, 39.31it/s, loss=0.00107][A
Epoch 0:   2%|▏         | 1405/87741 [00:35<36:36, 39.31it/s, loss=0.00107][A
Epoch 0:   2%|▏         | 1405/87741 [00:35<36:36, 39.31it/s, loss=0.000901][A
Epoch 0:   2%|▏         | 1405/87741 [00:35<36:36, 39.31it/s, loss=0.000901][A
Epoch 0:   2%|▏         | 1405/87741 [00:35<36:36, 39.31it/s, loss=0.0066]  [A
Epoch 0:   2%|▏         | 1405/87741 [00:35<36:36, 39.31it/s, loss=0.0066][A
Epoch 0:   2%|▏         | 1405/87741 [00:35<36:36, 39.31it/s, loss=0.0095][A
Epoch 0:   2%|▏         | 1405/87741 [00:35<36:36, 39.31it/s, loss=0.0095][A
Epoch 0:   2%|▏         | 1405/87741 [00:35<36:36, 3

Epoch 0:   2%|▏         | 1493/87741 [00:38<36:37, 39.26it/s, loss=0.0115] [A
Epoch 0:   2%|▏         | 1493/87741 [00:38<36:37, 39.26it/s, loss=0.0115][A
Epoch 0:   2%|▏         | 1493/87741 [00:38<36:37, 39.26it/s, loss=0.0132][A
Epoch 0:   2%|▏         | 1493/87741 [00:38<36:37, 39.26it/s, loss=0.0132][A
Epoch 0:   2%|▏         | 1493/87741 [00:38<36:37, 39.26it/s, loss=0.00261][A
Epoch 0:   2%|▏         | 1497/87741 [00:38<36:39, 39.21it/s, loss=0.00261][A
Epoch 0:   2%|▏         | 1497/87741 [00:38<36:39, 39.21it/s, loss=0.00261][A
Epoch 0:   2%|▏         | 1497/87741 [00:38<36:39, 39.21it/s, loss=0.00821][A
Epoch 0:   2%|▏         | 1497/87741 [00:38<36:39, 39.21it/s, loss=0.00821][A
Epoch 0:   2%|▏         | 1497/87741 [00:38<36:39, 39.21it/s, loss=0.00397][A
Epoch 0:   2%|▏         | 1497/87741 [00:38<36:39, 39.21it/s, loss=0.00397][A
Epoch 0:   2%|▏         | 1497/87741 [00:38<36:39, 39.21it/s, loss=0.00113][A
Epoch 0:   2%|▏         | 1497/87741 [00:38<36:39, 39.2

Epoch 0:   2%|▏         | 1585/87741 [00:40<36:29, 39.35it/s, loss=0.0027][A
Epoch 0:   2%|▏         | 1585/87741 [00:40<36:29, 39.35it/s, loss=0.00881][A
Epoch 0:   2%|▏         | 1585/87741 [00:40<36:29, 39.35it/s, loss=0.00881][A
Epoch 0:   2%|▏         | 1585/87741 [00:40<36:29, 39.35it/s, loss=0.00801][A
Epoch 0:   2%|▏         | 1585/87741 [00:40<36:29, 39.35it/s, loss=0.00801][A
Epoch 0:   2%|▏         | 1585/87741 [00:40<36:29, 39.35it/s, loss=0.00533][A
Epoch 0:   2%|▏         | 1589/87741 [00:40<36:32, 39.29it/s, loss=0.00533][A
Epoch 0:   2%|▏         | 1589/87741 [00:40<36:32, 39.29it/s, loss=0.00533][A
Epoch 0:   2%|▏         | 1589/87741 [00:40<36:32, 39.29it/s, loss=0.00336][A
Epoch 0:   2%|▏         | 1589/87741 [00:40<36:32, 39.29it/s, loss=0.00336][A
Epoch 0:   2%|▏         | 1589/87741 [00:40<36:32, 39.29it/s, loss=0.00434][A
Epoch 0:   2%|▏         | 1589/87741 [00:40<36:32, 39.29it/s, loss=0.00434][A
Epoch 0:   2%|▏         | 1589/87741 [00:40<36:32, 39

Epoch 0:   2%|▏         | 1677/87741 [00:42<36:30, 39.28it/s, loss=0.00995][A
Epoch 0:   2%|▏         | 1677/87741 [00:42<36:30, 39.28it/s, loss=0.00995][A
Epoch 0:   2%|▏         | 1677/87741 [00:42<36:30, 39.28it/s, loss=0.000733][A
Epoch 0:   2%|▏         | 1677/87741 [00:42<36:30, 39.28it/s, loss=0.000733][A
Epoch 0:   2%|▏         | 1677/87741 [00:42<36:30, 39.28it/s, loss=0.00622] [A
Epoch 0:   2%|▏         | 1677/87741 [00:42<36:30, 39.28it/s, loss=0.00622][A
Epoch 0:   2%|▏         | 1677/87741 [00:42<36:30, 39.28it/s, loss=0.00662][A
Epoch 0:   2%|▏         | 1681/87741 [00:42<36:29, 39.30it/s, loss=0.00662][A
Epoch 0:   2%|▏         | 1681/87741 [00:42<36:29, 39.30it/s, loss=0.00662][A
Epoch 0:   2%|▏         | 1681/87741 [00:42<36:29, 39.30it/s, loss=0.00863][A
Epoch 0:   2%|▏         | 1681/87741 [00:42<36:29, 39.30it/s, loss=0.00863][A
Epoch 0:   2%|▏         | 1681/87741 [00:42<36:29, 39.30it/s, loss=0.00464][A
Epoch 0:   2%|▏         | 1681/87741 [00:42<36:29

Epoch 0:   2%|▏         | 1769/87741 [00:45<36:39, 39.08it/s, loss=0.000255][A
Epoch 0:   2%|▏         | 1769/87741 [00:45<36:39, 39.08it/s, loss=0.00126] [A
Epoch 0:   2%|▏         | 1769/87741 [00:45<36:39, 39.08it/s, loss=0.00126][A
Epoch 0:   2%|▏         | 1769/87741 [00:45<36:39, 39.08it/s, loss=0.00167][A
Epoch 0:   2%|▏         | 1769/87741 [00:45<36:39, 39.08it/s, loss=0.00167][A
Epoch 0:   2%|▏         | 1769/87741 [00:45<36:39, 39.08it/s, loss=0.00268][A
Epoch 0:   2%|▏         | 1769/87741 [00:45<36:39, 39.08it/s, loss=0.00268][A
Epoch 0:   2%|▏         | 1769/87741 [00:45<36:39, 39.08it/s, loss=0.00743][A
Epoch 0:   2%|▏         | 1773/87741 [00:45<36:36, 39.15it/s, loss=0.00743][A
Epoch 0:   2%|▏         | 1773/87741 [00:45<36:36, 39.15it/s, loss=0.00743][A
Epoch 0:   2%|▏         | 1773/87741 [00:45<36:36, 39.15it/s, loss=0.00142][A
Epoch 0:   2%|▏         | 1773/87741 [00:45<36:36, 39.15it/s, loss=0.00142][A
Epoch 0:   2%|▏         | 1773/87741 [00:45<36:36,

Epoch 0:   2%|▏         | 1861/87741 [00:47<36:26, 39.28it/s, loss=0.00623][A
Epoch 0:   2%|▏         | 1861/87741 [00:47<36:26, 39.28it/s, loss=0.00623][A
Epoch 0:   2%|▏         | 1861/87741 [00:47<36:26, 39.28it/s, loss=0.00373][A
Epoch 0:   2%|▏         | 1861/87741 [00:47<36:26, 39.28it/s, loss=0.00373][A
Epoch 0:   2%|▏         | 1861/87741 [00:47<36:26, 39.28it/s, loss=0.00369][A
Epoch 0:   2%|▏         | 1861/87741 [00:47<36:26, 39.28it/s, loss=0.00369][A
Epoch 0:   2%|▏         | 1861/87741 [00:47<36:26, 39.28it/s, loss=0.00265][A
Epoch 0:   2%|▏         | 1861/87741 [00:47<36:26, 39.28it/s, loss=0.00265][A
Epoch 0:   2%|▏         | 1861/87741 [00:47<36:26, 39.28it/s, loss=0.00809][A
Epoch 0:   2%|▏         | 1865/87741 [00:47<36:26, 39.27it/s, loss=0.00809][A
Epoch 0:   2%|▏         | 1865/87741 [00:47<36:26, 39.27it/s, loss=0.00809][A
Epoch 0:   2%|▏         | 1865/87741 [00:47<36:26, 39.27it/s, loss=0.00698][A
Epoch 0:   2%|▏         | 1865/87741 [00:47<36:26, 3

Epoch 0:   2%|▏         | 1949/87741 [00:49<36:22, 39.31it/s, loss=0.00136][A
Epoch 0:   2%|▏         | 1953/87741 [00:49<36:22, 39.31it/s, loss=0.00136][A
Epoch 0:   2%|▏         | 1953/87741 [00:49<36:22, 39.31it/s, loss=0.00136][A
Epoch 0:   2%|▏         | 1953/87741 [00:49<36:22, 39.31it/s, loss=0.000213][A
Epoch 0:   2%|▏         | 1953/87741 [00:49<36:22, 39.31it/s, loss=0.000213][A
Epoch 0:   2%|▏         | 1953/87741 [00:49<36:22, 39.31it/s, loss=0.0025]  [A
Epoch 0:   2%|▏         | 1953/87741 [00:49<36:22, 39.31it/s, loss=0.0025][A
Epoch 0:   2%|▏         | 1953/87741 [00:49<36:22, 39.31it/s, loss=0.000741][A
Epoch 0:   2%|▏         | 1953/87741 [00:49<36:22, 39.31it/s, loss=0.000741][A
Epoch 0:   2%|▏         | 1953/87741 [00:49<36:22, 39.31it/s, loss=0.00933] [A
Epoch 0:   2%|▏         | 1957/87741 [00:49<36:22, 39.30it/s, loss=0.00933][A
Epoch 0:   2%|▏         | 1957/87741 [00:49<36:22, 39.30it/s, loss=0.00933][A
Epoch 0:   2%|▏         | 1957/87741 [00:49<36:

Epoch 0:   2%|▏         | 2041/87741 [00:52<36:27, 39.18it/s, loss=0.00422][A
Epoch 0:   2%|▏         | 2041/87741 [00:52<36:27, 39.18it/s, loss=0.00651][A
Epoch 0:   2%|▏         | 2045/87741 [00:52<36:24, 39.23it/s, loss=0.00651][A
Epoch 0:   2%|▏         | 2045/87741 [00:52<36:24, 39.23it/s, loss=0.00651][A
Epoch 0:   2%|▏         | 2045/87741 [00:52<36:24, 39.23it/s, loss=0.000233][A
Epoch 0:   2%|▏         | 2045/87741 [00:52<36:24, 39.23it/s, loss=0.000233][A
Epoch 0:   2%|▏         | 2045/87741 [00:52<36:24, 39.23it/s, loss=0.0114]  [A
Epoch 0:   2%|▏         | 2045/87741 [00:52<36:24, 39.23it/s, loss=0.0114][A
Epoch 0:   2%|▏         | 2045/87741 [00:52<36:24, 39.23it/s, loss=0.00148][A
Epoch 0:   2%|▏         | 2045/87741 [00:52<36:24, 39.23it/s, loss=0.00148][A
Epoch 0:   2%|▏         | 2045/87741 [00:52<36:24, 39.23it/s, loss=0.000277][A
Epoch 0:   2%|▏         | 2049/87741 [00:52<36:24, 39.23it/s, loss=0.000277][A
Epoch 0:   2%|▏         | 2049/87741 [00:52<36:2

Epoch 0:   2%|▏         | 2133/87741 [00:54<36:18, 39.29it/s, loss=0.0133][A
Epoch 0:   2%|▏         | 2133/87741 [00:54<36:18, 39.29it/s, loss=0.0133][A
Epoch 0:   2%|▏         | 2133/87741 [00:54<36:18, 39.29it/s, loss=0.00158][A
Epoch 0:   2%|▏         | 2137/87741 [00:54<36:17, 39.32it/s, loss=0.00158][A
Epoch 0:   2%|▏         | 2137/87741 [00:54<36:17, 39.32it/s, loss=0.00158][A
Epoch 0:   2%|▏         | 2137/87741 [00:54<36:17, 39.32it/s, loss=0.00238][A
Epoch 0:   2%|▏         | 2137/87741 [00:54<36:17, 39.32it/s, loss=0.00238][A
Epoch 0:   2%|▏         | 2137/87741 [00:54<36:17, 39.32it/s, loss=0.00412][A
Epoch 0:   2%|▏         | 2137/87741 [00:54<36:17, 39.32it/s, loss=0.00412][A
Epoch 0:   2%|▏         | 2137/87741 [00:54<36:17, 39.32it/s, loss=0.000353][A
Epoch 0:   2%|▏         | 2137/87741 [00:54<36:17, 39.32it/s, loss=0.000353][A
Epoch 0:   2%|▏         | 2137/87741 [00:54<36:17, 39.32it/s, loss=0.00969] [A
Epoch 0:   2%|▏         | 2141/87741 [00:54<36:16, 

Epoch 0:   3%|▎         | 2225/87741 [00:56<36:12, 39.36it/s, loss=0.0051][A
Epoch 0:   3%|▎         | 2225/87741 [00:56<36:12, 39.36it/s, loss=0.0179][A
Epoch 0:   3%|▎         | 2225/87741 [00:56<36:12, 39.36it/s, loss=0.0179][A
Epoch 0:   3%|▎         | 2225/87741 [00:56<36:12, 39.36it/s, loss=0.0162][A
Epoch 0:   3%|▎         | 2229/87741 [00:56<36:13, 39.35it/s, loss=0.0162][A
Epoch 0:   3%|▎         | 2229/87741 [00:56<36:13, 39.35it/s, loss=0.0162][A
Epoch 0:   3%|▎         | 2229/87741 [00:56<36:13, 39.35it/s, loss=0.00699][A
Epoch 0:   3%|▎         | 2229/87741 [00:56<36:13, 39.35it/s, loss=0.00699][A
Epoch 0:   3%|▎         | 2229/87741 [00:56<36:13, 39.35it/s, loss=0.00347][A
Epoch 0:   3%|▎         | 2229/87741 [00:56<36:13, 39.35it/s, loss=0.00347][A
Epoch 0:   3%|▎         | 2229/87741 [00:56<36:13, 39.35it/s, loss=0.000661][A
Epoch 0:   3%|▎         | 2229/87741 [00:56<36:13, 39.35it/s, loss=0.000661][A
Epoch 0:   3%|▎         | 2229/87741 [00:56<36:13, 39.35

Epoch 0:   3%|▎         | 2317/87741 [00:59<36:12, 39.32it/s, loss=0.00324] [A
Epoch 0:   3%|▎         | 2317/87741 [00:59<36:12, 39.32it/s, loss=0.00324][A
Epoch 0:   3%|▎         | 2317/87741 [00:59<36:12, 39.32it/s, loss=0.00123][A
Epoch 0:   3%|▎         | 2317/87741 [00:59<36:12, 39.32it/s, loss=0.00123][A
Epoch 0:   3%|▎         | 2317/87741 [00:59<36:12, 39.32it/s, loss=0.00527][A
Epoch 0:   3%|▎         | 2321/87741 [00:59<36:11, 39.33it/s, loss=0.00527][A
Epoch 0:   3%|▎         | 2321/87741 [00:59<36:11, 39.33it/s, loss=0.00527][A
Epoch 0:   3%|▎         | 2321/87741 [00:59<36:11, 39.33it/s, loss=0.00213][A
Epoch 0:   3%|▎         | 2321/87741 [00:59<36:11, 39.33it/s, loss=0.00213][A
Epoch 0:   3%|▎         | 2321/87741 [00:59<36:11, 39.33it/s, loss=0.0112] [A
Epoch 0:   3%|▎         | 2321/87741 [00:59<36:11, 39.33it/s, loss=0.0112][A
Epoch 0:   3%|▎         | 2321/87741 [00:59<36:11, 39.33it/s, loss=0.00707][A
Epoch 0:   3%|▎         | 2321/87741 [00:59<36:11, 3

Epoch 0:   3%|▎         | 2409/87741 [01:01<36:08, 39.35it/s, loss=0.00103][A
Epoch 0:   3%|▎         | 2409/87741 [01:01<36:08, 39.35it/s, loss=0.00739][A
Epoch 0:   3%|▎         | 2409/87741 [01:01<36:08, 39.35it/s, loss=0.00739][A
Epoch 0:   3%|▎         | 2409/87741 [01:01<36:08, 39.35it/s, loss=0.000472][A
Epoch 0:   3%|▎         | 2409/87741 [01:01<36:08, 39.35it/s, loss=0.000472][A
Epoch 0:   3%|▎         | 2409/87741 [01:01<36:08, 39.35it/s, loss=0.00596] [A
Epoch 0:   3%|▎         | 2413/87741 [01:01<36:08, 39.35it/s, loss=0.00596][A
Epoch 0:   3%|▎         | 2413/87741 [01:01<36:08, 39.35it/s, loss=0.00596][A
Epoch 0:   3%|▎         | 2413/87741 [01:01<36:08, 39.35it/s, loss=0.00598][A
Epoch 0:   3%|▎         | 2413/87741 [01:01<36:08, 39.35it/s, loss=0.00598][A
Epoch 0:   3%|▎         | 2413/87741 [01:01<36:08, 39.35it/s, loss=0.00127][A
Epoch 0:   3%|▎         | 2413/87741 [01:01<36:08, 39.35it/s, loss=0.00127][A
Epoch 0:   3%|▎         | 2413/87741 [01:01<36:08

Epoch 0:   3%|▎         | 2501/87741 [01:03<36:15, 39.19it/s, loss=0.00257] [A
Epoch 0:   3%|▎         | 2501/87741 [01:03<36:15, 39.19it/s, loss=0.00257][A
Epoch 0:   3%|▎         | 2501/87741 [01:03<36:15, 39.19it/s, loss=0.00773][A
Epoch 0:   3%|▎         | 2501/87741 [01:03<36:15, 39.19it/s, loss=0.00773][A
Epoch 0:   3%|▎         | 2501/87741 [01:03<36:15, 39.19it/s, loss=0.00843][A
Epoch 0:   3%|▎         | 2501/87741 [01:03<36:15, 39.19it/s, loss=0.00843][A
Epoch 0:   3%|▎         | 2501/87741 [01:03<36:15, 39.19it/s, loss=0.00508][A
Epoch 0:   3%|▎         | 2505/87741 [01:03<36:16, 39.16it/s, loss=0.00508][A
Epoch 0:   3%|▎         | 2505/87741 [01:03<36:16, 39.16it/s, loss=0.00508][A
Epoch 0:   3%|▎         | 2505/87741 [01:03<36:16, 39.16it/s, loss=0.00716][A
Epoch 0:   3%|▎         | 2505/87741 [01:03<36:16, 39.16it/s, loss=0.00716][A
Epoch 0:   3%|▎         | 2505/87741 [01:03<36:16, 39.16it/s, loss=0.00155][A
Epoch 0:   3%|▎         | 2505/87741 [01:03<36:16, 

Epoch 0:   3%|▎         | 2593/87741 [01:06<36:06, 39.30it/s, loss=0.00379][A
Epoch 0:   3%|▎         | 2593/87741 [01:06<36:06, 39.30it/s, loss=0.00843][A
Epoch 0:   3%|▎         | 2593/87741 [01:06<36:06, 39.30it/s, loss=0.00843][A
Epoch 0:   3%|▎         | 2593/87741 [01:06<36:06, 39.30it/s, loss=0.00841][A
Epoch 0:   3%|▎         | 2593/87741 [01:06<36:06, 39.30it/s, loss=0.00841][A
Epoch 0:   3%|▎         | 2593/87741 [01:06<36:06, 39.30it/s, loss=0.00585][A
Epoch 0:   3%|▎         | 2593/87741 [01:06<36:06, 39.30it/s, loss=0.00585][A
Epoch 0:   3%|▎         | 2593/87741 [01:06<36:06, 39.30it/s, loss=0.00393][A
Epoch 0:   3%|▎         | 2597/87741 [01:06<36:05, 39.31it/s, loss=0.00393][A
Epoch 0:   3%|▎         | 2597/87741 [01:06<36:05, 39.31it/s, loss=0.00393][A
Epoch 0:   3%|▎         | 2597/87741 [01:06<36:05, 39.31it/s, loss=0.00331][A
Epoch 0:   3%|▎         | 2597/87741 [01:06<36:05, 39.31it/s, loss=0.00331][A
Epoch 0:   3%|▎         | 2597/87741 [01:06<36:05, 3

Epoch 0:   3%|▎         | 2685/87741 [01:08<36:12, 39.15it/s, loss=0.0106][A
Epoch 0:   3%|▎         | 2685/87741 [01:08<36:12, 39.15it/s, loss=0.0106][A
Epoch 0:   3%|▎         | 2685/87741 [01:08<36:12, 39.15it/s, loss=0.0045][A
Epoch 0:   3%|▎         | 2685/87741 [01:08<36:12, 39.15it/s, loss=0.0045][A
Epoch 0:   3%|▎         | 2685/87741 [01:08<36:12, 39.15it/s, loss=0.0117][A
Epoch 0:   3%|▎         | 2685/87741 [01:08<36:12, 39.15it/s, loss=0.0117][A
Epoch 0:   3%|▎         | 2685/87741 [01:08<36:12, 39.15it/s, loss=0.00511][A
Epoch 0:   3%|▎         | 2685/87741 [01:08<36:12, 39.15it/s, loss=0.00511][A
Epoch 0:   3%|▎         | 2685/87741 [01:08<36:12, 39.15it/s, loss=0.00717][A
Epoch 0:   3%|▎         | 2689/87741 [01:08<36:13, 39.14it/s, loss=0.00717][A
Epoch 0:   3%|▎         | 2689/87741 [01:08<36:13, 39.14it/s, loss=0.00717][A
Epoch 0:   3%|▎         | 2689/87741 [01:08<36:13, 39.14it/s, loss=0.0022] [A
Epoch 0:   3%|▎         | 2689/87741 [01:08<36:13, 39.14it

Epoch 0:   3%|▎         | 2773/87741 [01:10<36:06, 39.22it/s, loss=0.00743][A
Epoch 0:   3%|▎         | 2777/87741 [01:10<36:04, 39.25it/s, loss=0.00743][A
Epoch 0:   3%|▎         | 2777/87741 [01:10<36:04, 39.25it/s, loss=0.00743][A
Epoch 0:   3%|▎         | 2777/87741 [01:10<36:04, 39.25it/s, loss=0.00342][A
Epoch 0:   3%|▎         | 2777/87741 [01:10<36:04, 39.25it/s, loss=0.00342][A
Epoch 0:   3%|▎         | 2777/87741 [01:10<36:04, 39.25it/s, loss=0.00017][A
Epoch 0:   3%|▎         | 2777/87741 [01:10<36:04, 39.25it/s, loss=0.00017][A
Epoch 0:   3%|▎         | 2777/87741 [01:10<36:04, 39.25it/s, loss=0.00796][A
Epoch 0:   3%|▎         | 2777/87741 [01:10<36:04, 39.25it/s, loss=0.00796][A
Epoch 0:   3%|▎         | 2777/87741 [01:10<36:04, 39.25it/s, loss=0.00413][A
Epoch 0:   3%|▎         | 2781/87741 [01:10<36:03, 39.26it/s, loss=0.00413][A
Epoch 0:   3%|▎         | 2781/87741 [01:10<36:03, 39.26it/s, loss=0.00413][A
Epoch 0:   3%|▎         | 2781/87741 [01:10<36:03, 3

Epoch 0:   3%|▎         | 2865/87741 [01:13<35:58, 39.33it/s, loss=0.0018][A
Epoch 0:   3%|▎         | 2865/87741 [01:13<35:58, 39.33it/s, loss=0.00991][A
Epoch 0:   3%|▎         | 2869/87741 [01:13<35:58, 39.32it/s, loss=0.00991][A
Epoch 0:   3%|▎         | 2869/87741 [01:13<35:58, 39.32it/s, loss=0.00991][A
Epoch 0:   3%|▎         | 2869/87741 [01:13<35:58, 39.32it/s, loss=0.021]  [A
Epoch 0:   3%|▎         | 2869/87741 [01:13<35:58, 39.32it/s, loss=0.021][A
Epoch 0:   3%|▎         | 2869/87741 [01:13<35:58, 39.32it/s, loss=0.00053][A
Epoch 0:   3%|▎         | 2869/87741 [01:13<35:58, 39.32it/s, loss=0.00053][A
Epoch 0:   3%|▎         | 2869/87741 [01:13<35:58, 39.32it/s, loss=0.00934][A
Epoch 0:   3%|▎         | 2869/87741 [01:13<35:58, 39.32it/s, loss=0.00934][A
Epoch 0:   3%|▎         | 2869/87741 [01:13<35:58, 39.32it/s, loss=0.00107][A
Epoch 0:   3%|▎         | 2873/87741 [01:13<35:57, 39.34it/s, loss=0.00107][A
Epoch 0:   3%|▎         | 2873/87741 [01:13<35:57, 39.3

Epoch 0:   3%|▎         | 2957/87741 [01:15<35:56, 39.32it/s, loss=0.00203][A
Epoch 0:   3%|▎         | 2957/87741 [01:15<35:56, 39.32it/s, loss=0.00203][A
Epoch 0:   3%|▎         | 2957/87741 [01:15<35:56, 39.32it/s, loss=0.00475][A
Epoch 0:   3%|▎         | 2961/87741 [01:15<35:56, 39.32it/s, loss=0.00475][A
Epoch 0:   3%|▎         | 2961/87741 [01:15<35:56, 39.32it/s, loss=0.00475][A
Epoch 0:   3%|▎         | 2961/87741 [01:15<35:56, 39.32it/s, loss=0.00215][A
Epoch 0:   3%|▎         | 2961/87741 [01:15<35:56, 39.32it/s, loss=0.00215][A
Epoch 0:   3%|▎         | 2961/87741 [01:15<35:56, 39.32it/s, loss=0.0011] [A
Epoch 0:   3%|▎         | 2961/87741 [01:15<35:56, 39.32it/s, loss=0.0011][A
Epoch 0:   3%|▎         | 2961/87741 [01:15<35:56, 39.32it/s, loss=0.000801][A
Epoch 0:   3%|▎         | 2961/87741 [01:15<35:56, 39.32it/s, loss=0.000801][A
Epoch 0:   3%|▎         | 2961/87741 [01:15<35:56, 39.32it/s, loss=0.00397] [A
Epoch 0:   3%|▎         | 2965/87741 [01:15<35:55,

Epoch 0:   3%|▎         | 3049/87741 [01:17<36:13, 38.96it/s, loss=0.000453][A
Epoch 0:   3%|▎         | 3049/87741 [01:17<36:13, 38.96it/s, loss=0.00388] [A
Epoch 0:   3%|▎         | 3049/87741 [01:17<36:13, 38.96it/s, loss=0.00388][A
Epoch 0:   3%|▎         | 3049/87741 [01:17<36:13, 38.96it/s, loss=0.0018] [A
Epoch 0:   3%|▎         | 3053/87741 [01:17<36:13, 38.96it/s, loss=0.0018][A
Epoch 0:   3%|▎         | 3053/87741 [01:17<36:13, 38.96it/s, loss=0.0018][A
Epoch 0:   3%|▎         | 3053/87741 [01:17<36:13, 38.96it/s, loss=0.0027][A
Epoch 0:   3%|▎         | 3053/87741 [01:17<36:13, 38.96it/s, loss=0.0027][A
Epoch 0:   3%|▎         | 3053/87741 [01:17<36:13, 38.96it/s, loss=0.00446][A
Epoch 0:   3%|▎         | 3053/87741 [01:17<36:13, 38.96it/s, loss=0.00446][A
Epoch 0:   3%|▎         | 3053/87741 [01:17<36:13, 38.96it/s, loss=0.0023] [A
Epoch 0:   3%|▎         | 3053/87741 [01:17<36:13, 38.96it/s, loss=0.0023][A
Epoch 0:   3%|▎         | 3053/87741 [01:17<36:13, 38.9

Epoch 0:   4%|▎         | 3141/87741 [01:20<35:51, 39.33it/s, loss=0.00108] [A
Epoch 0:   4%|▎         | 3141/87741 [01:20<35:51, 39.33it/s, loss=0.00108][A
Epoch 0:   4%|▎         | 3141/87741 [01:20<35:51, 39.33it/s, loss=0.00751][A
Epoch 0:   4%|▎         | 3141/87741 [01:20<35:51, 39.33it/s, loss=0.00751][A
Epoch 0:   4%|▎         | 3141/87741 [01:20<35:51, 39.33it/s, loss=0.00664][A
Epoch 0:   4%|▎         | 3145/87741 [01:20<35:50, 39.34it/s, loss=0.00664][A
Epoch 0:   4%|▎         | 3145/87741 [01:20<35:50, 39.34it/s, loss=0.00664][A
Epoch 0:   4%|▎         | 3145/87741 [01:20<35:50, 39.34it/s, loss=0.00103][A
Epoch 0:   4%|▎         | 3145/87741 [01:20<35:50, 39.34it/s, loss=0.00103][A
Epoch 0:   4%|▎         | 3145/87741 [01:20<35:50, 39.34it/s, loss=0.00276][A
Epoch 0:   4%|▎         | 3145/87741 [01:20<35:50, 39.34it/s, loss=0.00276][A
Epoch 0:   4%|▎         | 3145/87741 [01:20<35:50, 39.34it/s, loss=0.00117][A
Epoch 0:   4%|▎         | 3145/87741 [01:20<35:50, 

Epoch 0:   4%|▎         | 3233/87741 [01:22<35:53, 39.24it/s, loss=0.000171][A
Epoch 0:   4%|▎         | 3233/87741 [01:22<35:53, 39.24it/s, loss=0.00105] [A
Epoch 0:   4%|▎         | 3233/87741 [01:22<35:53, 39.24it/s, loss=0.00105][A
Epoch 0:   4%|▎         | 3233/87741 [01:22<35:53, 39.24it/s, loss=0.00074][A
Epoch 0:   4%|▎         | 3233/87741 [01:22<35:53, 39.24it/s, loss=0.00074][A
Epoch 0:   4%|▎         | 3233/87741 [01:22<35:53, 39.24it/s, loss=0.00143][A
Epoch 0:   4%|▎         | 3237/87741 [01:22<35:55, 39.20it/s, loss=0.00143][A
Epoch 0:   4%|▎         | 3237/87741 [01:22<35:55, 39.20it/s, loss=0.00143][A
Epoch 0:   4%|▎         | 3237/87741 [01:22<35:55, 39.20it/s, loss=0.00214][A
Epoch 0:   4%|▎         | 3237/87741 [01:22<35:55, 39.20it/s, loss=0.00214][A
Epoch 0:   4%|▎         | 3237/87741 [01:22<35:55, 39.20it/s, loss=0.00105][A
Epoch 0:   4%|▎         | 3237/87741 [01:22<35:55, 39.20it/s, loss=0.00105][A
Epoch 0:   4%|▎         | 3237/87741 [01:22<35:55,

Epoch 0:   4%|▍         | 3325/87741 [01:24<35:46, 39.32it/s, loss=0.0058]  [A
Epoch 0:   4%|▍         | 3325/87741 [01:24<35:46, 39.32it/s, loss=0.0058][A
Epoch 0:   4%|▍         | 3325/87741 [01:24<35:46, 39.32it/s, loss=0.0132][A
Epoch 0:   4%|▍         | 3325/87741 [01:24<35:46, 39.32it/s, loss=0.0132][A
Epoch 0:   4%|▍         | 3325/87741 [01:24<35:46, 39.32it/s, loss=0.00617][A
Epoch 0:   4%|▍         | 3325/87741 [01:24<35:46, 39.32it/s, loss=0.00617][A
Epoch 0:   4%|▍         | 3325/87741 [01:24<35:46, 39.32it/s, loss=0.00466][A
Epoch 0:   4%|▍         | 3329/87741 [01:24<35:47, 39.30it/s, loss=0.00466][A
Epoch 0:   4%|▍         | 3329/87741 [01:24<35:47, 39.30it/s, loss=0.00466][A
Epoch 0:   4%|▍         | 3329/87741 [01:24<35:47, 39.30it/s, loss=0.00416][A
Epoch 0:   4%|▍         | 3329/87741 [01:24<35:47, 39.30it/s, loss=0.00416][A
Epoch 0:   4%|▍         | 3329/87741 [01:24<35:47, 39.30it/s, loss=0.00616][A
Epoch 0:   4%|▍         | 3329/87741 [01:24<35:47, 39.

Epoch 0:   4%|▍         | 3417/87741 [01:27<35:43, 39.33it/s, loss=0.0116][A
Epoch 0:   4%|▍         | 3417/87741 [01:27<35:43, 39.33it/s, loss=0.00523][A
Epoch 0:   4%|▍         | 3417/87741 [01:27<35:43, 39.33it/s, loss=0.00523][A
Epoch 0:   4%|▍         | 3417/87741 [01:27<35:43, 39.33it/s, loss=0.00131][A
Epoch 0:   4%|▍         | 3417/87741 [01:27<35:43, 39.33it/s, loss=0.00131][A
Epoch 0:   4%|▍         | 3417/87741 [01:27<35:43, 39.33it/s, loss=0.001]  [A
Epoch 0:   4%|▍         | 3417/87741 [01:27<35:43, 39.33it/s, loss=0.001][A
Epoch 0:   4%|▍         | 3417/87741 [01:27<35:43, 39.33it/s, loss=0.000435][A
Epoch 0:   4%|▍         | 3421/87741 [01:27<35:43, 39.34it/s, loss=0.000435][A
Epoch 0:   4%|▍         | 3421/87741 [01:27<35:43, 39.34it/s, loss=0.000435][A
Epoch 0:   4%|▍         | 3421/87741 [01:27<35:43, 39.34it/s, loss=0.00733] [A
Epoch 0:   4%|▍         | 3421/87741 [01:27<35:43, 39.34it/s, loss=0.00733][A
Epoch 0:   4%|▍         | 3421/87741 [01:27<35:43, 

Epoch 0:   4%|▍         | 3509/87741 [01:29<35:41, 39.33it/s, loss=0.000386][A
Epoch 0:   4%|▍         | 3509/87741 [01:29<35:41, 39.33it/s, loss=0.000386][A
Epoch 0:   4%|▍         | 3509/87741 [01:29<35:41, 39.33it/s, loss=7.21e-5] [A
Epoch 0:   4%|▍         | 3509/87741 [01:29<35:41, 39.33it/s, loss=7.21e-5][A
Epoch 0:   4%|▍         | 3509/87741 [01:29<35:41, 39.33it/s, loss=0.00109][A
Epoch 0:   4%|▍         | 3509/87741 [01:29<35:41, 39.33it/s, loss=0.00109][A
Epoch 0:   4%|▍         | 3509/87741 [01:29<35:41, 39.33it/s, loss=0.00337][A
Epoch 0:   4%|▍         | 3509/87741 [01:29<35:41, 39.33it/s, loss=0.00337][A
Epoch 0:   4%|▍         | 3509/87741 [01:29<35:41, 39.33it/s, loss=0.0035] [A
Epoch 0:   4%|▍         | 3513/87741 [01:29<35:41, 39.33it/s, loss=0.0035][A
Epoch 0:   4%|▍         | 3513/87741 [01:29<35:41, 39.33it/s, loss=0.0035][A
Epoch 0:   4%|▍         | 3513/87741 [01:29<35:41, 39.33it/s, loss=0.00251][A
Epoch 0:   4%|▍         | 3513/87741 [01:29<35:41, 

Epoch 0:   4%|▍         | 3597/87741 [01:31<35:40, 39.32it/s, loss=0.00295][A
Epoch 0:   4%|▍         | 3601/87741 [01:31<35:40, 39.30it/s, loss=0.00295][A
Epoch 0:   4%|▍         | 3601/87741 [01:31<35:40, 39.30it/s, loss=0.00295][A
Epoch 0:   4%|▍         | 3601/87741 [01:31<35:40, 39.30it/s, loss=0.000367][A
Epoch 0:   4%|▍         | 3601/87741 [01:31<35:40, 39.30it/s, loss=0.000367][A
Epoch 0:   4%|▍         | 3601/87741 [01:31<35:40, 39.30it/s, loss=0.0016]  [A
Epoch 0:   4%|▍         | 3601/87741 [01:31<35:40, 39.30it/s, loss=0.0016][A
Epoch 0:   4%|▍         | 3601/87741 [01:31<35:40, 39.30it/s, loss=0.00089][A
Epoch 0:   4%|▍         | 3601/87741 [01:31<35:40, 39.30it/s, loss=0.00089][A
Epoch 0:   4%|▍         | 3601/87741 [01:31<35:40, 39.30it/s, loss=0.00193][A
Epoch 0:   4%|▍         | 3605/87741 [01:31<35:39, 39.32it/s, loss=0.00193][A
Epoch 0:   4%|▍         | 3605/87741 [01:31<35:39, 39.32it/s, loss=0.00193][A
Epoch 0:   4%|▍         | 3605/87741 [01:31<35:39,

Epoch 0:   4%|▍         | 3689/87741 [01:34<35:36, 39.34it/s, loss=0.00183][A
Epoch 0:   4%|▍         | 3689/87741 [01:34<35:36, 39.34it/s, loss=0.000232][A
Epoch 0:   4%|▍         | 3693/87741 [01:34<35:35, 39.35it/s, loss=0.000232][A
Epoch 0:   4%|▍         | 3693/87741 [01:34<35:35, 39.35it/s, loss=0.000232][A
Epoch 0:   4%|▍         | 3693/87741 [01:34<35:35, 39.35it/s, loss=0.00474] [A
Epoch 0:   4%|▍         | 3693/87741 [01:34<35:35, 39.35it/s, loss=0.00474][A
Epoch 0:   4%|▍         | 3693/87741 [01:34<35:35, 39.35it/s, loss=0.0168] [A
Epoch 0:   4%|▍         | 3693/87741 [01:34<35:35, 39.35it/s, loss=0.0168][A
Epoch 0:   4%|▍         | 3693/87741 [01:34<35:35, 39.35it/s, loss=0.00879][A
Epoch 0:   4%|▍         | 3693/87741 [01:34<35:35, 39.35it/s, loss=0.00879][A
Epoch 0:   4%|▍         | 3693/87741 [01:34<35:35, 39.35it/s, loss=0.00159][A
Epoch 0:   4%|▍         | 3697/87741 [01:34<35:37, 39.32it/s, loss=0.00159][A
Epoch 0:   4%|▍         | 3697/87741 [01:34<35:37

Epoch 0:   4%|▍         | 3781/87741 [01:36<35:35, 39.31it/s, loss=0.00667] [A
Epoch 0:   4%|▍         | 3781/87741 [01:36<35:35, 39.31it/s, loss=0.00667][A
Epoch 0:   4%|▍         | 3781/87741 [01:36<35:35, 39.31it/s, loss=0.0132] [A
Epoch 0:   4%|▍         | 3785/87741 [01:36<35:35, 39.32it/s, loss=0.0132][A
Epoch 0:   4%|▍         | 3785/87741 [01:36<35:35, 39.32it/s, loss=0.0132][A
Epoch 0:   4%|▍         | 3785/87741 [01:36<35:35, 39.32it/s, loss=0.00175][A
Epoch 0:   4%|▍         | 3785/87741 [01:36<35:35, 39.32it/s, loss=0.00175][A
Epoch 0:   4%|▍         | 3785/87741 [01:36<35:35, 39.32it/s, loss=0.00592][A
Epoch 0:   4%|▍         | 3785/87741 [01:36<35:35, 39.32it/s, loss=0.00592][A
Epoch 0:   4%|▍         | 3785/87741 [01:36<35:35, 39.32it/s, loss=0.00903][A
Epoch 0:   4%|▍         | 3785/87741 [01:36<35:35, 39.32it/s, loss=0.00903][A
Epoch 0:   4%|▍         | 3785/87741 [01:36<35:35, 39.32it/s, loss=0.00324][A
Epoch 0:   4%|▍         | 3789/87741 [01:36<35:36, 39

Epoch 0:   4%|▍         | 3873/87741 [01:38<35:33, 39.30it/s, loss=0.00257][A
Epoch 0:   4%|▍         | 3873/87741 [01:38<35:33, 39.30it/s, loss=0.00118][A
Epoch 0:   4%|▍         | 3873/87741 [01:38<35:33, 39.30it/s, loss=0.00118][A
Epoch 0:   4%|▍         | 3873/87741 [01:38<35:33, 39.30it/s, loss=0.0104] [A
Epoch 0:   4%|▍         | 3877/87741 [01:38<35:32, 39.32it/s, loss=0.0104][A
Epoch 0:   4%|▍         | 3877/87741 [01:38<35:32, 39.32it/s, loss=0.0104][A
Epoch 0:   4%|▍         | 3877/87741 [01:38<35:32, 39.32it/s, loss=0.000185][A
Epoch 0:   4%|▍         | 3877/87741 [01:38<35:32, 39.32it/s, loss=0.000185][A
Epoch 0:   4%|▍         | 3877/87741 [01:38<35:32, 39.32it/s, loss=0.000247][A
Epoch 0:   4%|▍         | 3877/87741 [01:38<35:32, 39.32it/s, loss=0.000247][A
Epoch 0:   4%|▍         | 3877/87741 [01:38<35:32, 39.32it/s, loss=0.00193] [A
Epoch 0:   4%|▍         | 3877/87741 [01:38<35:32, 39.32it/s, loss=0.00193][A
Epoch 0:   4%|▍         | 3877/87741 [01:38<35:32

Epoch 0:   5%|▍         | 3965/87741 [01:40<35:36, 39.21it/s, loss=0.000739][A
Epoch 0:   5%|▍         | 3965/87741 [01:41<35:36, 39.21it/s, loss=0.000739][A
Epoch 0:   5%|▍         | 3965/87741 [01:41<35:36, 39.21it/s, loss=0.00813] [A
Epoch 0:   5%|▍         | 3965/87741 [01:41<35:36, 39.21it/s, loss=0.00813][A
Epoch 0:   5%|▍         | 3965/87741 [01:41<35:36, 39.21it/s, loss=0.000507][A
Epoch 0:   5%|▍         | 3969/87741 [01:41<35:40, 39.14it/s, loss=0.000507][A
Epoch 0:   5%|▍         | 3969/87741 [01:41<35:40, 39.14it/s, loss=0.000507][A
Epoch 0:   5%|▍         | 3969/87741 [01:41<35:40, 39.14it/s, loss=0.00253] [A
Epoch 0:   5%|▍         | 3969/87741 [01:41<35:40, 39.14it/s, loss=0.00253][A
Epoch 0:   5%|▍         | 3969/87741 [01:41<35:40, 39.14it/s, loss=0.00193][A
Epoch 0:   5%|▍         | 3969/87741 [01:41<35:40, 39.14it/s, loss=0.00193][A
Epoch 0:   5%|▍         | 3969/87741 [01:41<35:40, 39.14it/s, loss=0.00368][A
Epoch 0:   5%|▍         | 3969/87741 [01:41<3

Epoch 0:   5%|▍         | 4057/87741 [01:43<35:31, 39.26it/s, loss=0.00272][A
Epoch 0:   5%|▍         | 4057/87741 [01:43<35:31, 39.26it/s, loss=0.0109] [A
Epoch 0:   5%|▍         | 4057/87741 [01:43<35:31, 39.26it/s, loss=0.0109][A
Epoch 0:   5%|▍         | 4057/87741 [01:43<35:31, 39.26it/s, loss=0.0015][A
Epoch 0:   5%|▍         | 4057/87741 [01:43<35:31, 39.26it/s, loss=0.0015][A
Epoch 0:   5%|▍         | 4057/87741 [01:43<35:31, 39.26it/s, loss=0.0111][A
Epoch 0:   5%|▍         | 4061/87741 [01:43<35:31, 39.26it/s, loss=0.0111][A
Epoch 0:   5%|▍         | 4061/87741 [01:43<35:31, 39.26it/s, loss=0.0111][A
Epoch 0:   5%|▍         | 4061/87741 [01:43<35:31, 39.26it/s, loss=0.0016][A
Epoch 0:   5%|▍         | 4061/87741 [01:43<35:31, 39.26it/s, loss=0.0016][A
Epoch 0:   5%|▍         | 4061/87741 [01:43<35:31, 39.26it/s, loss=0.00172][A
Epoch 0:   5%|▍         | 4061/87741 [01:43<35:31, 39.26it/s, loss=0.00172][A
Epoch 0:   5%|▍         | 4061/87741 [01:43<35:31, 39.26it/s

Epoch 0:   5%|▍         | 4149/87741 [01:45<35:32, 39.20it/s, loss=0.00335][A
Epoch 0:   5%|▍         | 4149/87741 [01:45<35:32, 39.20it/s, loss=0.00335][A
Epoch 0:   5%|▍         | 4149/87741 [01:45<35:32, 39.20it/s, loss=0.00507][A
Epoch 0:   5%|▍         | 4149/87741 [01:45<35:32, 39.20it/s, loss=0.00507][A
Epoch 0:   5%|▍         | 4149/87741 [01:45<35:32, 39.20it/s, loss=0.00735][A
Epoch 0:   5%|▍         | 4149/87741 [01:45<35:32, 39.20it/s, loss=0.00735][A
Epoch 0:   5%|▍         | 4149/87741 [01:45<35:32, 39.20it/s, loss=0.00174][A
Epoch 0:   5%|▍         | 4153/87741 [01:45<35:30, 39.24it/s, loss=0.00174][A
Epoch 0:   5%|▍         | 4153/87741 [01:45<35:30, 39.24it/s, loss=0.00174][A
Epoch 0:   5%|▍         | 4153/87741 [01:45<35:30, 39.24it/s, loss=0.00141][A
Epoch 0:   5%|▍         | 4153/87741 [01:45<35:30, 39.24it/s, loss=0.00141][A
Epoch 0:   5%|▍         | 4153/87741 [01:45<35:30, 39.24it/s, loss=0.000788][A
Epoch 0:   5%|▍         | 4153/87741 [01:45<35:30, 

Epoch 0:   5%|▍         | 4241/87741 [01:47<35:29, 39.21it/s, loss=0.00259][A
Epoch 0:   5%|▍         | 4241/87741 [01:47<35:29, 39.21it/s, loss=0.00076][A
Epoch 0:   5%|▍         | 4241/87741 [01:48<35:29, 39.21it/s, loss=0.00076][A
Epoch 0:   5%|▍         | 4241/87741 [01:48<35:29, 39.21it/s, loss=0.000562][A
Epoch 0:   5%|▍         | 4241/87741 [01:48<35:29, 39.21it/s, loss=0.000562][A
Epoch 0:   5%|▍         | 4241/87741 [01:48<35:29, 39.21it/s, loss=0.00619] [A
Epoch 0:   5%|▍         | 4241/87741 [01:48<35:29, 39.21it/s, loss=0.00619][A
Epoch 0:   5%|▍         | 4241/87741 [01:48<35:29, 39.21it/s, loss=0.000641][A
Epoch 0:   5%|▍         | 4245/87741 [01:48<35:29, 39.21it/s, loss=0.000641][A
Epoch 0:   5%|▍         | 4245/87741 [01:48<35:29, 39.21it/s, loss=0.000641][A
Epoch 0:   5%|▍         | 4245/87741 [01:48<35:29, 39.21it/s, loss=0.00674] [A
Epoch 0:   5%|▍         | 4245/87741 [01:48<35:29, 39.21it/s, loss=0.00674][A
Epoch 0:   5%|▍         | 4245/87741 [01:48<3

Epoch 0:   5%|▍         | 4333/87741 [01:50<35:25, 39.24it/s, loss=0.0005][A
Epoch 0:   5%|▍         | 4333/87741 [01:50<35:25, 39.24it/s, loss=0.0005][A
Epoch 0:   5%|▍         | 4333/87741 [01:50<35:25, 39.24it/s, loss=0.00694][A
Epoch 0:   5%|▍         | 4333/87741 [01:50<35:25, 39.24it/s, loss=0.00694][A
Epoch 0:   5%|▍         | 4333/87741 [01:50<35:25, 39.24it/s, loss=0.00433][A
Epoch 0:   5%|▍         | 4333/87741 [01:50<35:25, 39.24it/s, loss=0.00433][A
Epoch 0:   5%|▍         | 4333/87741 [01:50<35:25, 39.24it/s, loss=0.00116][A
Epoch 0:   5%|▍         | 4333/87741 [01:50<35:25, 39.24it/s, loss=0.00116][A
Epoch 0:   5%|▍         | 4333/87741 [01:50<35:25, 39.24it/s, loss=7.17e-5][A
Epoch 0:   5%|▍         | 4337/87741 [01:50<35:23, 39.27it/s, loss=7.17e-5][A
Epoch 0:   5%|▍         | 4337/87741 [01:50<35:23, 39.27it/s, loss=7.17e-5][A
Epoch 0:   5%|▍         | 4337/87741 [01:50<35:23, 39.27it/s, loss=0.0027] [A
Epoch 0:   5%|▍         | 4337/87741 [01:50<35:23, 39.

Epoch 0:   5%|▌         | 4421/87741 [01:52<35:21, 39.27it/s, loss=0.000827][A
Epoch 0:   5%|▌         | 4425/87741 [01:52<35:20, 39.29it/s, loss=0.000827][A
Epoch 0:   5%|▌         | 4425/87741 [01:52<35:20, 39.29it/s, loss=0.000827][A
Epoch 0:   5%|▌         | 4425/87741 [01:52<35:20, 39.29it/s, loss=0.00511] [A
Epoch 0:   5%|▌         | 4425/87741 [01:52<35:20, 39.29it/s, loss=0.00511][A
Epoch 0:   5%|▌         | 4425/87741 [01:52<35:20, 39.29it/s, loss=0.00101][A
Epoch 0:   5%|▌         | 4425/87741 [01:52<35:20, 39.29it/s, loss=0.00101][A
Epoch 0:   5%|▌         | 4425/87741 [01:52<35:20, 39.29it/s, loss=0.000758][A
Epoch 0:   5%|▌         | 4425/87741 [01:52<35:20, 39.29it/s, loss=0.000758][A
Epoch 0:   5%|▌         | 4425/87741 [01:52<35:20, 39.29it/s, loss=0.00363] [A
Epoch 0:   5%|▌         | 4429/87741 [01:52<35:18, 39.32it/s, loss=0.00363][A
Epoch 0:   5%|▌         | 4429/87741 [01:52<35:18, 39.32it/s, loss=0.00363][A
Epoch 0:   5%|▌         | 4429/87741 [01:52<3

Epoch 0:   5%|▌         | 4513/87741 [01:54<35:29, 39.09it/s, loss=0.00297][A
Epoch 0:   5%|▌         | 4513/87741 [01:54<35:29, 39.09it/s, loss=0.00734][A
Epoch 0:   5%|▌         | 4517/87741 [01:54<35:35, 38.96it/s, loss=0.00734][A
Epoch 0:   5%|▌         | 4517/87741 [01:55<35:35, 38.96it/s, loss=0.00734][A
Epoch 0:   5%|▌         | 4517/87741 [01:55<35:35, 38.96it/s, loss=0.00309][A
Epoch 0:   5%|▌         | 4517/87741 [01:55<35:35, 38.96it/s, loss=0.00309][A
Epoch 0:   5%|▌         | 4517/87741 [01:55<35:35, 38.96it/s, loss=0.00358][A
Epoch 0:   5%|▌         | 4517/87741 [01:55<35:35, 38.96it/s, loss=0.00358][A
Epoch 0:   5%|▌         | 4517/87741 [01:55<35:35, 38.96it/s, loss=0.00104][A
Epoch 0:   5%|▌         | 4517/87741 [01:55<35:35, 38.96it/s, loss=0.00104][A
Epoch 0:   5%|▌         | 4517/87741 [01:55<35:35, 38.96it/s, loss=0.0039] [A
Epoch 0:   5%|▌         | 4521/87741 [01:55<35:32, 39.03it/s, loss=0.0039][A
Epoch 0:   5%|▌         | 4521/87741 [01:55<35:32, 39

Epoch 0:   5%|▌         | 4605/87741 [01:57<35:20, 39.21it/s, loss=0.00143][A
Epoch 0:   5%|▌         | 4605/87741 [01:57<35:20, 39.21it/s, loss=0.00143][A
Epoch 0:   5%|▌         | 4605/87741 [01:57<35:20, 39.21it/s, loss=0.00322][A
Epoch 0:   5%|▌         | 4609/87741 [01:57<35:17, 39.25it/s, loss=0.00322][A
Epoch 0:   5%|▌         | 4609/87741 [01:57<35:17, 39.25it/s, loss=0.00322][A
Epoch 0:   5%|▌         | 4609/87741 [01:57<35:17, 39.25it/s, loss=0.00394][A
Epoch 0:   5%|▌         | 4609/87741 [01:57<35:17, 39.25it/s, loss=0.00394][A
Epoch 0:   5%|▌         | 4609/87741 [01:57<35:17, 39.25it/s, loss=0.00543][A
Epoch 0:   5%|▌         | 4609/87741 [01:57<35:17, 39.25it/s, loss=0.00543][A
Epoch 0:   5%|▌         | 4609/87741 [01:57<35:17, 39.25it/s, loss=0.000545][A
Epoch 0:   5%|▌         | 4609/87741 [01:57<35:17, 39.25it/s, loss=0.000545][A
Epoch 0:   5%|▌         | 4609/87741 [01:57<35:17, 39.25it/s, loss=0.00208] [A
Epoch 0:   5%|▌         | 4613/87741 [01:57<35:15

Epoch 0:   5%|▌         | 4697/87741 [01:59<35:21, 39.14it/s, loss=0.0187][A
Epoch 0:   5%|▌         | 4697/87741 [01:59<35:21, 39.14it/s, loss=0.00133][A
Epoch 0:   5%|▌         | 4697/87741 [01:59<35:21, 39.14it/s, loss=0.00133][A
Epoch 0:   5%|▌         | 4697/87741 [01:59<35:21, 39.14it/s, loss=0.00777][A
Epoch 0:   5%|▌         | 4701/87741 [01:59<35:22, 39.12it/s, loss=0.00777][A
Epoch 0:   5%|▌         | 4701/87741 [01:59<35:22, 39.12it/s, loss=0.00777][A
Epoch 0:   5%|▌         | 4701/87741 [01:59<35:22, 39.12it/s, loss=0.0121] [A
Epoch 0:   5%|▌         | 4701/87741 [01:59<35:22, 39.12it/s, loss=0.0121][A
Epoch 0:   5%|▌         | 4701/87741 [01:59<35:22, 39.12it/s, loss=0.00409][A
Epoch 0:   5%|▌         | 4701/87741 [01:59<35:22, 39.12it/s, loss=0.00409][A
Epoch 0:   5%|▌         | 4701/87741 [01:59<35:22, 39.12it/s, loss=0.00734][A
Epoch 0:   5%|▌         | 4701/87741 [01:59<35:22, 39.12it/s, loss=0.00734][A
Epoch 0:   5%|▌         | 4701/87741 [01:59<35:22, 39.

Epoch 0:   5%|▌         | 4789/87741 [02:01<35:19, 39.14it/s, loss=0.00691][A
Epoch 0:   5%|▌         | 4789/87741 [02:02<35:19, 39.14it/s, loss=0.00691][A
Epoch 0:   5%|▌         | 4789/87741 [02:02<35:19, 39.14it/s, loss=0.00478][A
Epoch 0:   5%|▌         | 4789/87741 [02:02<35:19, 39.14it/s, loss=0.00478][A
Epoch 0:   5%|▌         | 4789/87741 [02:02<35:19, 39.14it/s, loss=0.000208][A
Epoch 0:   5%|▌         | 4793/87741 [02:02<35:19, 39.14it/s, loss=0.000208][A
Epoch 0:   5%|▌         | 4793/87741 [02:02<35:19, 39.14it/s, loss=0.000208][A
Epoch 0:   5%|▌         | 4793/87741 [02:02<35:19, 39.14it/s, loss=0.00361] [A
Epoch 0:   5%|▌         | 4793/87741 [02:02<35:19, 39.14it/s, loss=0.00361][A
Epoch 0:   5%|▌         | 4793/87741 [02:02<35:19, 39.14it/s, loss=0.00822][A
Epoch 0:   5%|▌         | 4793/87741 [02:02<35:19, 39.14it/s, loss=0.00822][A
Epoch 0:   5%|▌         | 4793/87741 [02:02<35:19, 39.14it/s, loss=0.00102][A
Epoch 0:   5%|▌         | 4793/87741 [02:02<35:1

Epoch 0:   6%|▌         | 4881/87741 [02:04<35:10, 39.27it/s, loss=0.00011][A
Epoch 0:   6%|▌         | 4881/87741 [02:04<35:10, 39.27it/s, loss=0.0059] [A
Epoch 0:   6%|▌         | 4881/87741 [02:04<35:10, 39.27it/s, loss=0.0059][A
Epoch 0:   6%|▌         | 4881/87741 [02:04<35:10, 39.27it/s, loss=0.00944][A
Epoch 0:   6%|▌         | 4881/87741 [02:04<35:10, 39.27it/s, loss=0.00944][A
Epoch 0:   6%|▌         | 4881/87741 [02:04<35:10, 39.27it/s, loss=0.000812][A
Epoch 0:   6%|▌         | 4885/87741 [02:04<35:14, 39.18it/s, loss=0.000812][A
Epoch 0:   6%|▌         | 4885/87741 [02:04<35:14, 39.18it/s, loss=0.000812][A
Epoch 0:   6%|▌         | 4885/87741 [02:04<35:14, 39.18it/s, loss=0.00222] [A
Epoch 0:   6%|▌         | 4885/87741 [02:04<35:14, 39.18it/s, loss=0.00222][A
Epoch 0:   6%|▌         | 4885/87741 [02:04<35:14, 39.18it/s, loss=0.00601][A
Epoch 0:   6%|▌         | 4885/87741 [02:04<35:14, 39.18it/s, loss=0.00601][A
Epoch 0:   6%|▌         | 4885/87741 [02:04<35:14

Epoch 0:   6%|▌         | 4973/87741 [02:06<35:07, 39.26it/s, loss=0.00809] [A
Epoch 0:   6%|▌         | 4973/87741 [02:06<35:07, 39.26it/s, loss=0.00809][A
Epoch 0:   6%|▌         | 4973/87741 [02:06<35:07, 39.26it/s, loss=0.00873][A
Epoch 0:   6%|▌         | 4973/87741 [02:06<35:07, 39.26it/s, loss=0.00873][A
Epoch 0:   6%|▌         | 4973/87741 [02:06<35:07, 39.26it/s, loss=0.0038] [A
Epoch 0:   6%|▌         | 4973/87741 [02:06<35:07, 39.26it/s, loss=0.0038][A
Epoch 0:   6%|▌         | 4973/87741 [02:06<35:07, 39.26it/s, loss=0.00567][A
Epoch 0:   6%|▌         | 4977/87741 [02:06<35:06, 39.29it/s, loss=0.00567][A
Epoch 0:   6%|▌         | 4977/87741 [02:06<35:06, 39.29it/s, loss=0.00567][A
Epoch 0:   6%|▌         | 4977/87741 [02:06<35:06, 39.29it/s, loss=0.0022] [A
Epoch 0:   6%|▌         | 4977/87741 [02:06<35:06, 39.29it/s, loss=0.0022][A
Epoch 0:   6%|▌         | 4977/87741 [02:06<35:06, 39.29it/s, loss=0.0016][A
Epoch 0:   6%|▌         | 4977/87741 [02:06<35:06, 39.

Epoch 0:   6%|▌         | 5065/87741 [02:08<35:01, 39.33it/s, loss=0.00456][A
Epoch 0:   6%|▌         | 5065/87741 [02:08<35:01, 39.33it/s, loss=0.00113][A
Epoch 0:   6%|▌         | 5065/87741 [02:09<35:01, 39.33it/s, loss=0.00113][A
Epoch 0:   6%|▌         | 5065/87741 [02:09<35:01, 39.33it/s, loss=0.0027] [A
Epoch 0:   6%|▌         | 5065/87741 [02:09<35:01, 39.33it/s, loss=0.0027][A
Epoch 0:   6%|▌         | 5065/87741 [02:09<35:01, 39.33it/s, loss=0.000755][A
Epoch 0:   6%|▌         | 5065/87741 [02:09<35:01, 39.33it/s, loss=0.000755][A
Epoch 0:   6%|▌         | 5065/87741 [02:09<35:01, 39.33it/s, loss=0.000151][A
Epoch 0:   6%|▌         | 5069/87741 [02:09<35:07, 39.23it/s, loss=0.000151][A
Epoch 0:   6%|▌         | 5069/87741 [02:09<35:07, 39.23it/s, loss=0.000151][A
Epoch 0:   6%|▌         | 5069/87741 [02:09<35:07, 39.23it/s, loss=0.00157] [A
Epoch 0:   6%|▌         | 5069/87741 [02:09<35:07, 39.23it/s, loss=0.00157][A
Epoch 0:   6%|▌         | 5069/87741 [02:09<35:

Epoch 0:   6%|▌         | 5157/87741 [02:11<35:00, 39.31it/s, loss=0.00414][A
Epoch 0:   6%|▌         | 5157/87741 [02:11<35:00, 39.31it/s, loss=0.00414][A
Epoch 0:   6%|▌         | 5157/87741 [02:11<35:00, 39.31it/s, loss=0.00179][A
Epoch 0:   6%|▌         | 5157/87741 [02:11<35:00, 39.31it/s, loss=0.00179][A
Epoch 0:   6%|▌         | 5157/87741 [02:11<35:00, 39.31it/s, loss=0.005]  [A
Epoch 0:   6%|▌         | 5157/87741 [02:11<35:00, 39.31it/s, loss=0.005][A
Epoch 0:   6%|▌         | 5157/87741 [02:11<35:00, 39.31it/s, loss=0.000286][A
Epoch 0:   6%|▌         | 5157/87741 [02:11<35:00, 39.31it/s, loss=0.000286][A
Epoch 0:   6%|▌         | 5157/87741 [02:11<35:00, 39.31it/s, loss=0.000243][A
Epoch 0:   6%|▌         | 5161/87741 [02:11<35:03, 39.26it/s, loss=0.000243][A
Epoch 0:   6%|▌         | 5161/87741 [02:11<35:03, 39.26it/s, loss=0.000243][A
Epoch 0:   6%|▌         | 5161/87741 [02:11<35:03, 39.26it/s, loss=9.2e-5]  [A
Epoch 0:   6%|▌         | 5161/87741 [02:11<35:0

Epoch 0:   6%|▌         | 5245/87741 [02:13<34:59, 39.29it/s, loss=0.000845][A
Epoch 0:   6%|▌         | 5249/87741 [02:13<34:58, 39.32it/s, loss=0.000845][A
Epoch 0:   6%|▌         | 5249/87741 [02:13<34:58, 39.32it/s, loss=0.000845][A
Epoch 0:   6%|▌         | 5249/87741 [02:13<34:58, 39.32it/s, loss=0.00187] [A
Epoch 0:   6%|▌         | 5249/87741 [02:13<34:58, 39.32it/s, loss=0.00187][A
Epoch 0:   6%|▌         | 5249/87741 [02:13<34:58, 39.32it/s, loss=0.00482][A
Epoch 0:   6%|▌         | 5249/87741 [02:13<34:58, 39.32it/s, loss=0.00482][A
Epoch 0:   6%|▌         | 5249/87741 [02:13<34:58, 39.32it/s, loss=0.0014] [A
Epoch 0:   6%|▌         | 5249/87741 [02:13<34:58, 39.32it/s, loss=0.0014][A
Epoch 0:   6%|▌         | 5249/87741 [02:13<34:58, 39.32it/s, loss=0.00367][A
Epoch 0:   6%|▌         | 5253/87741 [02:13<34:57, 39.32it/s, loss=0.00367][A
Epoch 0:   6%|▌         | 5253/87741 [02:13<34:57, 39.32it/s, loss=0.00367][A
Epoch 0:   6%|▌         | 5253/87741 [02:13<34:57

Epoch 0:   6%|▌         | 5337/87741 [02:15<34:54, 39.34it/s, loss=0.000192][A
Epoch 0:   6%|▌         | 5337/87741 [02:15<34:54, 39.34it/s, loss=0.00119] [A
Epoch 0:   6%|▌         | 5341/87741 [02:15<34:53, 39.35it/s, loss=0.00119][A
Epoch 0:   6%|▌         | 5341/87741 [02:16<34:53, 39.35it/s, loss=0.00119][A
Epoch 0:   6%|▌         | 5341/87741 [02:16<34:53, 39.35it/s, loss=0.000104][A
Epoch 0:   6%|▌         | 5341/87741 [02:16<34:53, 39.35it/s, loss=0.000104][A
Epoch 0:   6%|▌         | 5341/87741 [02:16<34:53, 39.35it/s, loss=0.000414][A
Epoch 0:   6%|▌         | 5341/87741 [02:16<34:53, 39.35it/s, loss=0.000414][A
Epoch 0:   6%|▌         | 5341/87741 [02:16<34:53, 39.35it/s, loss=0.00294] [A
Epoch 0:   6%|▌         | 5341/87741 [02:16<34:53, 39.35it/s, loss=0.00294][A
Epoch 0:   6%|▌         | 5341/87741 [02:16<34:53, 39.35it/s, loss=0.00277][A
Epoch 0:   6%|▌         | 5345/87741 [02:16<34:54, 39.34it/s, loss=0.00277][A
Epoch 0:   6%|▌         | 5345/87741 [02:16<3

Epoch 0:   6%|▌         | 5429/87741 [02:18<35:03, 39.13it/s, loss=0.00288][A
Epoch 0:   6%|▌         | 5429/87741 [02:18<35:03, 39.13it/s, loss=0.00288][A
Epoch 0:   6%|▌         | 5429/87741 [02:18<35:03, 39.13it/s, loss=0.00294][A
Epoch 0:   6%|▌         | 5433/87741 [02:18<35:05, 39.08it/s, loss=0.00294][A
Epoch 0:   6%|▌         | 5433/87741 [02:18<35:05, 39.08it/s, loss=0.00294][A
Epoch 0:   6%|▌         | 5433/87741 [02:18<35:05, 39.08it/s, loss=0.00011][A
Epoch 0:   6%|▌         | 5433/87741 [02:18<35:05, 39.08it/s, loss=0.00011][A
Epoch 0:   6%|▌         | 5433/87741 [02:18<35:05, 39.08it/s, loss=0.00406][A
Epoch 0:   6%|▌         | 5433/87741 [02:18<35:05, 39.08it/s, loss=0.00406][A
Epoch 0:   6%|▌         | 5433/87741 [02:18<35:05, 39.08it/s, loss=0.00809][A
Epoch 0:   6%|▌         | 5433/87741 [02:18<35:05, 39.08it/s, loss=0.00809][A
Epoch 0:   6%|▌         | 5433/87741 [02:18<35:05, 39.08it/s, loss=0.000814][A
Epoch 0:   6%|▌         | 5437/87741 [02:18<35:04, 

Epoch 0:   6%|▋         | 5521/87741 [02:20<34:54, 39.26it/s, loss=0.000505][A
Epoch 0:   6%|▋         | 5521/87741 [02:20<34:54, 39.26it/s, loss=0.00378] [A
Epoch 0:   6%|▋         | 5521/87741 [02:20<34:54, 39.26it/s, loss=0.00378][A
Epoch 0:   6%|▋         | 5521/87741 [02:20<34:54, 39.26it/s, loss=0.000621][A
Epoch 0:   6%|▋         | 5525/87741 [02:20<34:53, 39.27it/s, loss=0.000621][A
Epoch 0:   6%|▋         | 5525/87741 [02:20<34:53, 39.27it/s, loss=0.000621][A
Epoch 0:   6%|▋         | 5525/87741 [02:20<34:53, 39.27it/s, loss=0.000125][A
Epoch 0:   6%|▋         | 5525/87741 [02:20<34:53, 39.27it/s, loss=0.000125][A
Epoch 0:   6%|▋         | 5525/87741 [02:20<34:53, 39.27it/s, loss=0.000854][A
Epoch 0:   6%|▋         | 5525/87741 [02:20<34:53, 39.27it/s, loss=0.000854][A
Epoch 0:   6%|▋         | 5525/87741 [02:20<34:53, 39.27it/s, loss=0.000294][A
Epoch 0:   6%|▋         | 5525/87741 [02:20<34:53, 39.27it/s, loss=0.000294][A
Epoch 0:   6%|▋         | 5525/87741 [02:

Epoch 0:   6%|▋         | 5613/87741 [02:22<34:48, 39.32it/s, loss=0.000516][A
Epoch 0:   6%|▋         | 5613/87741 [02:22<34:48, 39.32it/s, loss=0.000516][A
Epoch 0:   6%|▋         | 5613/87741 [02:22<34:48, 39.32it/s, loss=0.00305] [A
Epoch 0:   6%|▋         | 5613/87741 [02:23<34:48, 39.32it/s, loss=0.00305][A
Epoch 0:   6%|▋         | 5613/87741 [02:23<34:48, 39.32it/s, loss=0.00039][A
Epoch 0:   6%|▋         | 5617/87741 [02:23<34:49, 39.30it/s, loss=0.00039][A
Epoch 0:   6%|▋         | 5617/87741 [02:23<34:49, 39.30it/s, loss=0.00039][A
Epoch 0:   6%|▋         | 5617/87741 [02:23<34:49, 39.30it/s, loss=0.00299][A
Epoch 0:   6%|▋         | 5617/87741 [02:23<34:49, 39.30it/s, loss=0.00299][A
Epoch 0:   6%|▋         | 5617/87741 [02:23<34:49, 39.30it/s, loss=0.0168] [A
Epoch 0:   6%|▋         | 5617/87741 [02:23<34:49, 39.30it/s, loss=0.0168][A
Epoch 0:   6%|▋         | 5617/87741 [02:23<34:49, 39.30it/s, loss=3.55e-5][A
Epoch 0:   6%|▋         | 5617/87741 [02:23<34:49,

Epoch 0:   7%|▋         | 5705/87741 [02:25<34:47, 39.31it/s, loss=0.00474][A
Epoch 0:   7%|▋         | 5705/87741 [02:25<34:47, 39.31it/s, loss=0.00258][A
Epoch 0:   7%|▋         | 5705/87741 [02:25<34:47, 39.31it/s, loss=0.00258][A
Epoch 0:   7%|▋         | 5705/87741 [02:25<34:47, 39.31it/s, loss=0.000337][A
Epoch 0:   7%|▋         | 5705/87741 [02:25<34:47, 39.31it/s, loss=0.000337][A
Epoch 0:   7%|▋         | 5705/87741 [02:25<34:47, 39.31it/s, loss=0.00344] [A
Epoch 0:   7%|▋         | 5709/87741 [02:25<34:47, 39.29it/s, loss=0.00344][A
Epoch 0:   7%|▋         | 5709/87741 [02:25<34:47, 39.29it/s, loss=0.00344][A
Epoch 0:   7%|▋         | 5709/87741 [02:25<34:47, 39.29it/s, loss=0.00279][A
Epoch 0:   7%|▋         | 5709/87741 [02:25<34:47, 39.29it/s, loss=0.00279][A
Epoch 0:   7%|▋         | 5709/87741 [02:25<34:47, 39.29it/s, loss=0.00377][A
Epoch 0:   7%|▋         | 5709/87741 [02:25<34:47, 39.29it/s, loss=0.00377][A
Epoch 0:   7%|▋         | 5709/87741 [02:25<34:47

Epoch 0:   7%|▋         | 5797/87741 [02:27<34:46, 39.27it/s, loss=0.000894][A
Epoch 0:   7%|▋         | 5797/87741 [02:27<34:46, 39.27it/s, loss=0.000894][A
Epoch 0:   7%|▋         | 5797/87741 [02:27<34:46, 39.27it/s, loss=0.000123][A
Epoch 0:   7%|▋         | 5797/87741 [02:27<34:46, 39.27it/s, loss=0.000123][A
Epoch 0:   7%|▋         | 5797/87741 [02:27<34:46, 39.27it/s, loss=0.000214][A
Epoch 0:   7%|▋         | 5797/87741 [02:27<34:46, 39.27it/s, loss=0.000214][A
Epoch 0:   7%|▋         | 5797/87741 [02:27<34:46, 39.27it/s, loss=0.000112][A
Epoch 0:   7%|▋         | 5801/87741 [02:27<34:46, 39.27it/s, loss=0.000112][A
Epoch 0:   7%|▋         | 5801/87741 [02:27<34:46, 39.27it/s, loss=0.000112][A
Epoch 0:   7%|▋         | 5801/87741 [02:27<34:46, 39.27it/s, loss=0.00113] [A
Epoch 0:   7%|▋         | 5801/87741 [02:27<34:46, 39.27it/s, loss=0.00113][A
Epoch 0:   7%|▋         | 5801/87741 [02:27<34:46, 39.27it/s, loss=8.61e-5][A
Epoch 0:   7%|▋         | 5801/87741 [02:2

Epoch 0:   7%|▋         | 5889/87741 [02:29<34:42, 39.30it/s, loss=0.00409][A
Epoch 0:   7%|▋         | 5889/87741 [02:29<34:42, 39.30it/s, loss=1.9e-5] [A
Epoch 0:   7%|▋         | 5889/87741 [02:29<34:42, 39.30it/s, loss=1.9e-5][A
Epoch 0:   7%|▋         | 5889/87741 [02:29<34:42, 39.30it/s, loss=0.00245][A
Epoch 0:   7%|▋         | 5889/87741 [02:30<34:42, 39.30it/s, loss=0.00245][A
Epoch 0:   7%|▋         | 5889/87741 [02:30<34:42, 39.30it/s, loss=0.00104][A
Epoch 0:   7%|▋         | 5889/87741 [02:30<34:42, 39.30it/s, loss=0.00104][A
Epoch 0:   7%|▋         | 5889/87741 [02:30<34:42, 39.30it/s, loss=0.00395][A
Epoch 0:   7%|▋         | 5893/87741 [02:30<34:44, 39.26it/s, loss=0.00395][A
Epoch 0:   7%|▋         | 5893/87741 [02:30<34:44, 39.26it/s, loss=0.00395][A
Epoch 0:   7%|▋         | 5893/87741 [02:30<34:44, 39.26it/s, loss=0.00862][A
Epoch 0:   7%|▋         | 5893/87741 [02:30<34:44, 39.26it/s, loss=0.00862][A
Epoch 0:   7%|▋         | 5893/87741 [02:30<34:44, 39

Epoch 0:   7%|▋         | 5981/87741 [02:32<34:47, 39.17it/s, loss=0.00352][A
Epoch 0:   7%|▋         | 5981/87741 [02:32<34:47, 39.17it/s, loss=0.00352][A
Epoch 0:   7%|▋         | 5981/87741 [02:32<34:47, 39.17it/s, loss=0.0023] [A
Epoch 0:   7%|▋         | 5981/87741 [02:32<34:47, 39.17it/s, loss=0.0023][A
Epoch 0:   7%|▋         | 5981/87741 [02:32<34:47, 39.17it/s, loss=0.00269][A
Epoch 0:   7%|▋         | 5981/87741 [02:32<34:47, 39.17it/s, loss=0.00269][A
Epoch 0:   7%|▋         | 5981/87741 [02:32<34:47, 39.17it/s, loss=0.0116] [A
Epoch 0:   7%|▋         | 5981/87741 [02:32<34:47, 39.17it/s, loss=0.0116][A
Epoch 0:   7%|▋         | 5981/87741 [02:32<34:47, 39.17it/s, loss=0.00137][A
Epoch 0:   7%|▋         | 5985/87741 [02:32<34:44, 39.22it/s, loss=0.00137][A
Epoch 0:   7%|▋         | 5985/87741 [02:32<34:44, 39.22it/s, loss=0.00137][A
Epoch 0:   7%|▋         | 5985/87741 [02:32<34:44, 39.22it/s, loss=0.00968][A
Epoch 0:   7%|▋         | 5985/87741 [02:32<34:44, 39.

Epoch 0:   7%|▋         | 6069/87741 [02:34<34:40, 39.25it/s, loss=0.000859][A
Epoch 0:   7%|▋         | 6073/87741 [02:34<34:44, 39.19it/s, loss=0.000859][A
Epoch 0:   7%|▋         | 6073/87741 [02:34<34:44, 39.19it/s, loss=0.000859][A
Epoch 0:   7%|▋         | 6073/87741 [02:34<34:44, 39.19it/s, loss=0.00141] [A
Epoch 0:   7%|▋         | 6073/87741 [02:34<34:44, 39.19it/s, loss=0.00141][A
Epoch 0:   7%|▋         | 6073/87741 [02:34<34:44, 39.19it/s, loss=0.00168][A
Epoch 0:   7%|▋         | 6073/87741 [02:34<34:44, 39.19it/s, loss=0.00168][A
Epoch 0:   7%|▋         | 6073/87741 [02:34<34:44, 39.19it/s, loss=0.000152][A
Epoch 0:   7%|▋         | 6073/87741 [02:34<34:44, 39.19it/s, loss=0.000152][A
Epoch 0:   7%|▋         | 6073/87741 [02:34<34:44, 39.19it/s, loss=0.00247] [A
Epoch 0:   7%|▋         | 6077/87741 [02:34<34:44, 39.17it/s, loss=0.00247][A
Epoch 0:   7%|▋         | 6077/87741 [02:34<34:44, 39.17it/s, loss=0.00247][A
Epoch 0:   7%|▋         | 6077/87741 [02:34<3

Epoch 0:   7%|▋         | 6161/87741 [02:36<34:36, 39.28it/s, loss=0.00113][A
Epoch 0:   7%|▋         | 6161/87741 [02:36<34:36, 39.28it/s, loss=0.000782][A
Epoch 0:   7%|▋         | 6165/87741 [02:36<34:37, 39.27it/s, loss=0.000782][A
Epoch 0:   7%|▋         | 6165/87741 [02:37<34:37, 39.27it/s, loss=0.000782][A
Epoch 0:   7%|▋         | 6165/87741 [02:37<34:37, 39.27it/s, loss=0.0011]  [A
Epoch 0:   7%|▋         | 6165/87741 [02:37<34:37, 39.27it/s, loss=0.0011][A
Epoch 0:   7%|▋         | 6165/87741 [02:37<34:37, 39.27it/s, loss=0.00107][A
Epoch 0:   7%|▋         | 6165/87741 [02:37<34:37, 39.27it/s, loss=0.00107][A
Epoch 0:   7%|▋         | 6165/87741 [02:37<34:37, 39.27it/s, loss=0.000314][A
Epoch 0:   7%|▋         | 6165/87741 [02:37<34:37, 39.27it/s, loss=0.000314][A
Epoch 0:   7%|▋         | 6165/87741 [02:37<34:37, 39.27it/s, loss=0.0119]  [A
Epoch 0:   7%|▋         | 6169/87741 [02:37<34:37, 39.26it/s, loss=0.0119][A
Epoch 0:   7%|▋         | 6169/87741 [02:37<34:

Epoch 0:   7%|▋         | 6253/87741 [02:39<34:33, 39.30it/s, loss=0.00428][A
Epoch 0:   7%|▋         | 6253/87741 [02:39<34:33, 39.30it/s, loss=0.00428][A
Epoch 0:   7%|▋         | 6253/87741 [02:39<34:33, 39.30it/s, loss=0.00553][A
Epoch 0:   7%|▋         | 6257/87741 [02:39<34:33, 39.30it/s, loss=0.00553][A
Epoch 0:   7%|▋         | 6257/87741 [02:39<34:33, 39.30it/s, loss=0.00553][A
Epoch 0:   7%|▋         | 6257/87741 [02:39<34:33, 39.30it/s, loss=0.000402][A
Epoch 0:   7%|▋         | 6257/87741 [02:39<34:33, 39.30it/s, loss=0.000402][A
Epoch 0:   7%|▋         | 6257/87741 [02:39<34:33, 39.30it/s, loss=0.0099]  [A
Epoch 0:   7%|▋         | 6257/87741 [02:39<34:33, 39.30it/s, loss=0.0099][A
Epoch 0:   7%|▋         | 6257/87741 [02:39<34:33, 39.30it/s, loss=0.0142][A
Epoch 0:   7%|▋         | 6257/87741 [02:39<34:33, 39.30it/s, loss=0.0142][A
Epoch 0:   7%|▋         | 6257/87741 [02:39<34:33, 39.30it/s, loss=0.00374][A
Epoch 0:   7%|▋         | 6261/87741 [02:39<34:33, 3

Epoch 0:   7%|▋         | 6345/87741 [02:41<34:41, 39.10it/s, loss=0.00414][A
Epoch 0:   7%|▋         | 6345/87741 [02:41<34:41, 39.10it/s, loss=0.00147][A
Epoch 0:   7%|▋         | 6345/87741 [02:41<34:41, 39.10it/s, loss=0.00147][A
Epoch 0:   7%|▋         | 6345/87741 [02:41<34:41, 39.10it/s, loss=0.0134] [A
Epoch 0:   7%|▋         | 6349/87741 [02:41<34:38, 39.16it/s, loss=0.0134][A
Epoch 0:   7%|▋         | 6349/87741 [02:41<34:38, 39.16it/s, loss=0.0134][A
Epoch 0:   7%|▋         | 6349/87741 [02:41<34:38, 39.16it/s, loss=5.06e-5][A
Epoch 0:   7%|▋         | 6349/87741 [02:41<34:38, 39.16it/s, loss=5.06e-5][A
Epoch 0:   7%|▋         | 6349/87741 [02:41<34:38, 39.16it/s, loss=0.00418][A
Epoch 0:   7%|▋         | 6349/87741 [02:41<34:38, 39.16it/s, loss=0.00418][A
Epoch 0:   7%|▋         | 6349/87741 [02:41<34:38, 39.16it/s, loss=0.0033] [A
Epoch 0:   7%|▋         | 6349/87741 [02:41<34:38, 39.16it/s, loss=0.0033][A
Epoch 0:   7%|▋         | 6349/87741 [02:41<34:38, 39.1

Epoch 0:   7%|▋         | 6437/87741 [02:43<34:29, 39.29it/s, loss=0.00203] [A
Epoch 0:   7%|▋         | 6437/87741 [02:43<34:29, 39.29it/s, loss=0.00203][A
Epoch 0:   7%|▋         | 6437/87741 [02:43<34:29, 39.29it/s, loss=0.00167][A
Epoch 0:   7%|▋         | 6437/87741 [02:44<34:29, 39.29it/s, loss=0.00167][A
Epoch 0:   7%|▋         | 6437/87741 [02:44<34:29, 39.29it/s, loss=0.00335][A
Epoch 0:   7%|▋         | 6441/87741 [02:44<34:28, 39.30it/s, loss=0.00335][A
Epoch 0:   7%|▋         | 6441/87741 [02:44<34:28, 39.30it/s, loss=0.00335][A
Epoch 0:   7%|▋         | 6441/87741 [02:44<34:28, 39.30it/s, loss=0.000107][A
Epoch 0:   7%|▋         | 6441/87741 [02:44<34:28, 39.30it/s, loss=0.000107][A
Epoch 0:   7%|▋         | 6441/87741 [02:44<34:28, 39.30it/s, loss=0.00916] [A
Epoch 0:   7%|▋         | 6441/87741 [02:44<34:28, 39.30it/s, loss=0.00916][A
Epoch 0:   7%|▋         | 6441/87741 [02:44<34:28, 39.30it/s, loss=0.00201][A
Epoch 0:   7%|▋         | 6441/87741 [02:44<34:2

Epoch 0:   7%|▋         | 6529/87741 [02:46<34:25, 39.33it/s, loss=0.00577][A
Epoch 0:   7%|▋         | 6529/87741 [02:46<34:25, 39.33it/s, loss=0.00144][A
Epoch 0:   7%|▋         | 6529/87741 [02:46<34:25, 39.33it/s, loss=0.00144][A
Epoch 0:   7%|▋         | 6529/87741 [02:46<34:25, 39.33it/s, loss=6.5e-5] [A
Epoch 0:   7%|▋         | 6529/87741 [02:46<34:25, 39.33it/s, loss=6.5e-5][A
Epoch 0:   7%|▋         | 6529/87741 [02:46<34:25, 39.33it/s, loss=0.00104][A
Epoch 0:   7%|▋         | 6533/87741 [02:46<34:25, 39.32it/s, loss=0.00104][A
Epoch 0:   7%|▋         | 6533/87741 [02:46<34:25, 39.32it/s, loss=0.00104][A
Epoch 0:   7%|▋         | 6533/87741 [02:46<34:25, 39.32it/s, loss=0.000765][A
Epoch 0:   7%|▋         | 6533/87741 [02:46<34:25, 39.32it/s, loss=0.000765][A
Epoch 0:   7%|▋         | 6533/87741 [02:46<34:25, 39.32it/s, loss=0.00298] [A
Epoch 0:   7%|▋         | 6533/87741 [02:46<34:25, 39.32it/s, loss=0.00298][A
Epoch 0:   7%|▋         | 6533/87741 [02:46<34:25,

Epoch 0:   8%|▊         | 6621/87741 [02:48<34:23, 39.31it/s, loss=0.00279][A
Epoch 0:   8%|▊         | 6621/87741 [02:48<34:23, 39.31it/s, loss=0.00279][A
Epoch 0:   8%|▊         | 6621/87741 [02:48<34:23, 39.31it/s, loss=0.000688][A
Epoch 0:   8%|▊         | 6621/87741 [02:48<34:23, 39.31it/s, loss=0.000688][A
Epoch 0:   8%|▊         | 6621/87741 [02:48<34:23, 39.31it/s, loss=0.000571][A
Epoch 0:   8%|▊         | 6621/87741 [02:48<34:23, 39.31it/s, loss=0.000571][A
Epoch 0:   8%|▊         | 6621/87741 [02:48<34:23, 39.31it/s, loss=0.000209][A
Epoch 0:   8%|▊         | 6625/87741 [02:48<34:23, 39.31it/s, loss=0.000209][A
Epoch 0:   8%|▊         | 6625/87741 [02:48<34:23, 39.31it/s, loss=0.000209][A
Epoch 0:   8%|▊         | 6625/87741 [02:48<34:23, 39.31it/s, loss=0.00409] [A
Epoch 0:   8%|▊         | 6625/87741 [02:48<34:23, 39.31it/s, loss=0.00409][A
Epoch 0:   8%|▊         | 6625/87741 [02:48<34:23, 39.31it/s, loss=0.00271][A
Epoch 0:   8%|▊         | 6625/87741 [02:48<

Epoch 0:   8%|▊         | 6713/87741 [02:50<34:23, 39.27it/s, loss=0.00356][A
Epoch 0:   8%|▊         | 6713/87741 [02:50<34:23, 39.27it/s, loss=0.00397][A
Epoch 0:   8%|▊         | 6713/87741 [02:50<34:23, 39.27it/s, loss=0.00397][A
Epoch 0:   8%|▊         | 6713/87741 [02:50<34:23, 39.27it/s, loss=0.00833][A
Epoch 0:   8%|▊         | 6713/87741 [02:50<34:23, 39.27it/s, loss=0.00833][A
Epoch 0:   8%|▊         | 6713/87741 [02:51<34:23, 39.27it/s, loss=0.00756][A
Epoch 0:   8%|▊         | 6713/87741 [02:51<34:23, 39.27it/s, loss=0.00756][A
Epoch 0:   8%|▊         | 6713/87741 [02:51<34:23, 39.27it/s, loss=0.0167] [A
Epoch 0:   8%|▊         | 6717/87741 [02:51<34:22, 39.29it/s, loss=0.0167][A
Epoch 0:   8%|▊         | 6717/87741 [02:51<34:22, 39.29it/s, loss=0.0167][A
Epoch 0:   8%|▊         | 6717/87741 [02:51<34:22, 39.29it/s, loss=3.13e-5][A
Epoch 0:   8%|▊         | 6717/87741 [02:51<34:22, 39.29it/s, loss=3.13e-5][A
Epoch 0:   8%|▊         | 6717/87741 [02:51<34:22, 39.

Epoch 0:   8%|▊         | 6805/87741 [02:53<34:25, 39.18it/s, loss=0.00181][A
Epoch 0:   8%|▊         | 6805/87741 [02:53<34:25, 39.18it/s, loss=0.00181][A
Epoch 0:   8%|▊         | 6805/87741 [02:53<34:25, 39.18it/s, loss=0.0107] [A
Epoch 0:   8%|▊         | 6805/87741 [02:53<34:25, 39.18it/s, loss=0.0107][A
Epoch 0:   8%|▊         | 6805/87741 [02:53<34:25, 39.18it/s, loss=0.000358][A
Epoch 0:   8%|▊         | 6805/87741 [02:53<34:25, 39.18it/s, loss=0.000358][A
Epoch 0:   8%|▊         | 6805/87741 [02:53<34:25, 39.18it/s, loss=0.00371] [A
Epoch 0:   8%|▊         | 6805/87741 [02:53<34:25, 39.18it/s, loss=0.00371][A
Epoch 0:   8%|▊         | 6805/87741 [02:53<34:25, 39.18it/s, loss=0.000207][A
Epoch 0:   8%|▊         | 6809/87741 [02:53<34:23, 39.21it/s, loss=0.000207][A
Epoch 0:   8%|▊         | 6809/87741 [02:53<34:23, 39.21it/s, loss=0.000207][A
Epoch 0:   8%|▊         | 6809/87741 [02:53<34:23, 39.21it/s, loss=0.000125][A
Epoch 0:   8%|▊         | 6809/87741 [02:53<34

Epoch 0:   8%|▊         | 6893/87741 [02:55<34:17, 39.29it/s, loss=0.00356] [A
Epoch 0:   8%|▊         | 6897/87741 [02:55<34:17, 39.29it/s, loss=0.00356][A
Epoch 0:   8%|▊         | 6897/87741 [02:55<34:17, 39.29it/s, loss=0.00356][A
Epoch 0:   8%|▊         | 6897/87741 [02:55<34:17, 39.29it/s, loss=0.00431][A
Epoch 0:   8%|▊         | 6897/87741 [02:55<34:17, 39.29it/s, loss=0.00431][A
Epoch 0:   8%|▊         | 6897/87741 [02:55<34:17, 39.29it/s, loss=0.00323][A
Epoch 0:   8%|▊         | 6897/87741 [02:55<34:17, 39.29it/s, loss=0.00323][A
Epoch 0:   8%|▊         | 6897/87741 [02:55<34:17, 39.29it/s, loss=0.0061] [A
Epoch 0:   8%|▊         | 6897/87741 [02:55<34:17, 39.29it/s, loss=0.0061][A
Epoch 0:   8%|▊         | 6897/87741 [02:55<34:17, 39.29it/s, loss=0.000269][A
Epoch 0:   8%|▊         | 6901/87741 [02:55<34:17, 39.29it/s, loss=0.000269][A
Epoch 0:   8%|▊         | 6901/87741 [02:55<34:17, 39.29it/s, loss=0.000269][A
Epoch 0:   8%|▊         | 6901/87741 [02:55<34:17

Epoch 0:   8%|▊         | 6985/87741 [02:57<34:16, 39.27it/s, loss=1.46e-5][A
Epoch 0:   8%|▊         | 6985/87741 [02:57<34:16, 39.27it/s, loss=0.00456][A
Epoch 0:   8%|▊         | 6989/87741 [02:57<34:16, 39.26it/s, loss=0.00456][A
Epoch 0:   8%|▊         | 6989/87741 [02:57<34:16, 39.26it/s, loss=0.00456][A
Epoch 0:   8%|▊         | 6989/87741 [02:57<34:16, 39.26it/s, loss=0.00131][A
Epoch 0:   8%|▊         | 6989/87741 [02:58<34:16, 39.26it/s, loss=0.00131][A
Epoch 0:   8%|▊         | 6989/87741 [02:58<34:16, 39.26it/s, loss=0.0099] [A
Epoch 0:   8%|▊         | 6989/87741 [02:58<34:16, 39.26it/s, loss=0.0099][A
Epoch 0:   8%|▊         | 6989/87741 [02:58<34:16, 39.26it/s, loss=0.00365][A
Epoch 0:   8%|▊         | 6989/87741 [02:58<34:16, 39.26it/s, loss=0.00365][A
Epoch 0:   8%|▊         | 6989/87741 [02:58<34:16, 39.26it/s, loss=0.00439][A
Epoch 0:   8%|▊         | 6993/87741 [02:58<34:22, 39.14it/s, loss=0.00439][A
Epoch 0:   8%|▊         | 6993/87741 [02:58<34:22, 39

Epoch 0:   8%|▊         | 7077/87741 [03:00<34:13, 39.29it/s, loss=0.00642] [A
Epoch 0:   8%|▊         | 7077/87741 [03:00<34:13, 39.29it/s, loss=0.00642][A
Epoch 0:   8%|▊         | 7077/87741 [03:00<34:13, 39.29it/s, loss=0.00416][A
Epoch 0:   8%|▊         | 7081/87741 [03:00<34:12, 39.30it/s, loss=0.00416][A
Epoch 0:   8%|▊         | 7081/87741 [03:00<34:12, 39.30it/s, loss=0.00416][A
Epoch 0:   8%|▊         | 7081/87741 [03:00<34:12, 39.30it/s, loss=0.00441][A
Epoch 0:   8%|▊         | 7081/87741 [03:00<34:12, 39.30it/s, loss=0.00441][A
Epoch 0:   8%|▊         | 7081/87741 [03:00<34:12, 39.30it/s, loss=0.00873][A
Epoch 0:   8%|▊         | 7081/87741 [03:00<34:12, 39.30it/s, loss=0.00873][A
Epoch 0:   8%|▊         | 7081/87741 [03:00<34:12, 39.30it/s, loss=0.00498][A
Epoch 0:   8%|▊         | 7081/87741 [03:00<34:12, 39.30it/s, loss=0.00498][A
Epoch 0:   8%|▊         | 7081/87741 [03:00<34:12, 39.30it/s, loss=0.00663][A
Epoch 0:   8%|▊         | 7085/87741 [03:00<34:20, 

Epoch 0:   8%|▊         | 7169/87741 [03:02<34:11, 39.28it/s, loss=0.000577][A
Epoch 0:   8%|▊         | 7169/87741 [03:02<34:11, 39.28it/s, loss=0.000426][A
Epoch 0:   8%|▊         | 7169/87741 [03:02<34:11, 39.28it/s, loss=0.000426][A
Epoch 0:   8%|▊         | 7169/87741 [03:02<34:11, 39.28it/s, loss=7.55e-5] [A
Epoch 0:   8%|▊         | 7173/87741 [03:02<34:10, 39.29it/s, loss=7.55e-5][A
Epoch 0:   8%|▊         | 7173/87741 [03:02<34:10, 39.29it/s, loss=7.55e-5][A
Epoch 0:   8%|▊         | 7173/87741 [03:02<34:10, 39.29it/s, loss=0.00135][A
Epoch 0:   8%|▊         | 7173/87741 [03:02<34:10, 39.29it/s, loss=0.00135][A
Epoch 0:   8%|▊         | 7173/87741 [03:02<34:10, 39.29it/s, loss=0.000447][A
Epoch 0:   8%|▊         | 7173/87741 [03:02<34:10, 39.29it/s, loss=0.000447][A
Epoch 0:   8%|▊         | 7173/87741 [03:02<34:10, 39.29it/s, loss=0.00698] [A
Epoch 0:   8%|▊         | 7173/87741 [03:02<34:10, 39.29it/s, loss=0.00698][A
Epoch 0:   8%|▊         | 7173/87741 [03:02<3

Epoch 0:   8%|▊         | 7261/87741 [03:04<34:12, 39.21it/s, loss=0.00209][A
Epoch 0:   8%|▊         | 7261/87741 [03:04<34:12, 39.21it/s, loss=0.00209][A
Epoch 0:   8%|▊         | 7261/87741 [03:04<34:12, 39.21it/s, loss=0.0107] [A
Epoch 0:   8%|▊         | 7261/87741 [03:04<34:12, 39.21it/s, loss=0.0107][A
Epoch 0:   8%|▊         | 7261/87741 [03:04<34:12, 39.21it/s, loss=0.00866][A
Epoch 0:   8%|▊         | 7265/87741 [03:04<34:11, 39.22it/s, loss=0.00866][A
Epoch 0:   8%|▊         | 7265/87741 [03:05<34:11, 39.22it/s, loss=0.00866][A
Epoch 0:   8%|▊         | 7265/87741 [03:05<34:11, 39.22it/s, loss=0.00645][A
Epoch 0:   8%|▊         | 7265/87741 [03:05<34:11, 39.22it/s, loss=0.00645][A
Epoch 0:   8%|▊         | 7265/87741 [03:05<34:11, 39.22it/s, loss=0.001]  [A
Epoch 0:   8%|▊         | 7265/87741 [03:05<34:11, 39.22it/s, loss=0.001][A
Epoch 0:   8%|▊         | 7265/87741 [03:05<34:11, 39.22it/s, loss=0.00075][A
Epoch 0:   8%|▊         | 7265/87741 [03:05<34:11, 39.2

Epoch 0:   8%|▊         | 7353/87741 [03:07<34:09, 39.22it/s, loss=0.00368][A
Epoch 0:   8%|▊         | 7353/87741 [03:07<34:09, 39.22it/s, loss=0.00165][A
Epoch 0:   8%|▊         | 7353/87741 [03:07<34:09, 39.22it/s, loss=0.00165][A
Epoch 0:   8%|▊         | 7353/87741 [03:07<34:09, 39.22it/s, loss=0.000216][A
Epoch 0:   8%|▊         | 7353/87741 [03:07<34:09, 39.22it/s, loss=0.000216][A
Epoch 0:   8%|▊         | 7353/87741 [03:07<34:09, 39.22it/s, loss=0.00129] [A
Epoch 0:   8%|▊         | 7357/87741 [03:07<34:07, 39.26it/s, loss=0.00129][A
Epoch 0:   8%|▊         | 7357/87741 [03:07<34:07, 39.26it/s, loss=0.00129][A
Epoch 0:   8%|▊         | 7357/87741 [03:07<34:07, 39.26it/s, loss=0.00185][A
Epoch 0:   8%|▊         | 7357/87741 [03:07<34:07, 39.26it/s, loss=0.00185][A
Epoch 0:   8%|▊         | 7357/87741 [03:07<34:07, 39.26it/s, loss=0.00335][A
Epoch 0:   8%|▊         | 7357/87741 [03:07<34:07, 39.26it/s, loss=0.00335][A
Epoch 0:   8%|▊         | 7357/87741 [03:07<34:07

Epoch 0:   8%|▊         | 7445/87741 [03:09<34:04, 39.28it/s, loss=0.00626][A
Epoch 0:   8%|▊         | 7445/87741 [03:09<34:04, 39.28it/s, loss=0.00626][A
Epoch 0:   8%|▊         | 7445/87741 [03:09<34:04, 39.28it/s, loss=8.7e-5] [A
Epoch 0:   8%|▊         | 7445/87741 [03:09<34:04, 39.28it/s, loss=8.7e-5][A
Epoch 0:   8%|▊         | 7445/87741 [03:09<34:04, 39.28it/s, loss=0.00306][A
Epoch 0:   8%|▊         | 7445/87741 [03:09<34:04, 39.28it/s, loss=0.00306][A
Epoch 0:   8%|▊         | 7445/87741 [03:09<34:04, 39.28it/s, loss=0.00215][A
Epoch 0:   8%|▊         | 7449/87741 [03:09<34:04, 39.27it/s, loss=0.00215][A
Epoch 0:   8%|▊         | 7449/87741 [03:09<34:04, 39.27it/s, loss=0.00215][A
Epoch 0:   8%|▊         | 7449/87741 [03:09<34:04, 39.27it/s, loss=0.00424][A
Epoch 0:   8%|▊         | 7449/87741 [03:09<34:04, 39.27it/s, loss=0.00424][A
Epoch 0:   8%|▊         | 7449/87741 [03:09<34:04, 39.27it/s, loss=0.00308][A
Epoch 0:   8%|▊         | 7449/87741 [03:09<34:04, 39

Epoch 0:   9%|▊         | 7537/87741 [03:11<34:01, 39.29it/s, loss=0.000949][A
Epoch 0:   9%|▊         | 7537/87741 [03:11<34:01, 39.29it/s, loss=0.00296] [A
Epoch 0:   9%|▊         | 7537/87741 [03:11<34:01, 39.29it/s, loss=0.00296][A
Epoch 0:   9%|▊         | 7537/87741 [03:11<34:01, 39.29it/s, loss=0.00258][A
Epoch 0:   9%|▊         | 7537/87741 [03:11<34:01, 39.29it/s, loss=0.00258][A
Epoch 0:   9%|▊         | 7537/87741 [03:11<34:01, 39.29it/s, loss=0.00116][A
Epoch 0:   9%|▊         | 7537/87741 [03:12<34:01, 39.29it/s, loss=0.00116][A
Epoch 0:   9%|▊         | 7537/87741 [03:12<34:01, 39.29it/s, loss=0.00421][A
Epoch 0:   9%|▊         | 7541/87741 [03:12<34:02, 39.27it/s, loss=0.00421][A
Epoch 0:   9%|▊         | 7541/87741 [03:12<34:02, 39.27it/s, loss=0.00421][A
Epoch 0:   9%|▊         | 7541/87741 [03:12<34:02, 39.27it/s, loss=0.00596][A
Epoch 0:   9%|▊         | 7541/87741 [03:12<34:02, 39.27it/s, loss=0.00596][A
Epoch 0:   9%|▊         | 7541/87741 [03:12<34:02,

Epoch 0:   9%|▊         | 7629/87741 [03:14<33:59, 39.28it/s, loss=0.00546][A
Epoch 0:   9%|▊         | 7629/87741 [03:14<33:59, 39.28it/s, loss=0.00546][A
Epoch 0:   9%|▊         | 7629/87741 [03:14<33:59, 39.28it/s, loss=0.00236][A
Epoch 0:   9%|▊         | 7629/87741 [03:14<33:59, 39.28it/s, loss=0.00236][A
Epoch 0:   9%|▊         | 7629/87741 [03:14<33:59, 39.28it/s, loss=2.91e-5][A
Epoch 0:   9%|▊         | 7629/87741 [03:14<33:59, 39.28it/s, loss=2.91e-5][A
Epoch 0:   9%|▊         | 7629/87741 [03:14<33:59, 39.28it/s, loss=0.000825][A
Epoch 0:   9%|▊         | 7629/87741 [03:14<33:59, 39.28it/s, loss=0.000825][A
Epoch 0:   9%|▊         | 7629/87741 [03:14<33:59, 39.28it/s, loss=0.00206] [A
Epoch 0:   9%|▊         | 7633/87741 [03:14<33:59, 39.28it/s, loss=0.00206][A
Epoch 0:   9%|▊         | 7633/87741 [03:14<33:59, 39.28it/s, loss=0.00206][A
Epoch 0:   9%|▊         | 7633/87741 [03:14<33:59, 39.28it/s, loss=0.00853][A
Epoch 0:   9%|▊         | 7633/87741 [03:14<33:59

Epoch 0:   9%|▉         | 7717/87741 [03:16<33:56, 39.29it/s, loss=0.00188] [A
Epoch 0:   9%|▉         | 7721/87741 [03:16<33:56, 39.29it/s, loss=0.00188][A
Epoch 0:   9%|▉         | 7721/87741 [03:16<33:56, 39.29it/s, loss=0.00188][A
Epoch 0:   9%|▉         | 7721/87741 [03:16<33:56, 39.29it/s, loss=0.00178][A
Epoch 0:   9%|▉         | 7721/87741 [03:16<33:56, 39.29it/s, loss=0.00178][A
Epoch 0:   9%|▉         | 7721/87741 [03:16<33:56, 39.29it/s, loss=0.0223] [A
Epoch 0:   9%|▉         | 7721/87741 [03:16<33:56, 39.29it/s, loss=0.0223][A
Epoch 0:   9%|▉         | 7721/87741 [03:16<33:56, 39.29it/s, loss=0.000735][A
Epoch 0:   9%|▉         | 7721/87741 [03:16<33:56, 39.29it/s, loss=0.000735][A
Epoch 0:   9%|▉         | 7721/87741 [03:16<33:56, 39.29it/s, loss=0.0107]  [A
Epoch 0:   9%|▉         | 7725/87741 [03:16<33:57, 39.27it/s, loss=0.0107][A
Epoch 0:   9%|▉         | 7725/87741 [03:16<33:57, 39.27it/s, loss=0.0107][A
Epoch 0:   9%|▉         | 7725/87741 [03:16<33:57, 

Epoch 0:   9%|▉         | 7809/87741 [03:18<33:57, 39.22it/s, loss=0.000253][A
Epoch 0:   9%|▉         | 7809/87741 [03:18<33:57, 39.22it/s, loss=0.00159] [A
Epoch 0:   9%|▉         | 7813/87741 [03:18<33:55, 39.27it/s, loss=0.00159][A
Epoch 0:   9%|▉         | 7813/87741 [03:18<33:55, 39.27it/s, loss=0.00159][A
Epoch 0:   9%|▉         | 7813/87741 [03:18<33:55, 39.27it/s, loss=0.000734][A
Epoch 0:   9%|▉         | 7813/87741 [03:19<33:55, 39.27it/s, loss=0.000734][A
Epoch 0:   9%|▉         | 7813/87741 [03:19<33:55, 39.27it/s, loss=0.00471] [A
Epoch 0:   9%|▉         | 7813/87741 [03:19<33:55, 39.27it/s, loss=0.00471][A
Epoch 0:   9%|▉         | 7813/87741 [03:19<33:55, 39.27it/s, loss=0.00129][A
Epoch 0:   9%|▉         | 7813/87741 [03:19<33:55, 39.27it/s, loss=0.00129][A
Epoch 0:   9%|▉         | 7813/87741 [03:19<33:55, 39.27it/s, loss=0.00152][A
Epoch 0:   9%|▉         | 7817/87741 [03:19<33:55, 39.26it/s, loss=0.00152][A
Epoch 0:   9%|▉         | 7817/87741 [03:19<33:

Epoch 0:   9%|▉         | 7901/87741 [03:21<33:54, 39.24it/s, loss=0.00279][A
Epoch 0:   9%|▉         | 7901/87741 [03:21<33:54, 39.24it/s, loss=0.00279][A
Epoch 0:   9%|▉         | 7901/87741 [03:21<33:54, 39.24it/s, loss=0.00149][A
Epoch 0:   9%|▉         | 7905/87741 [03:21<33:55, 39.22it/s, loss=0.00149][A
Epoch 0:   9%|▉         | 7905/87741 [03:21<33:55, 39.22it/s, loss=0.00149][A
Epoch 0:   9%|▉         | 7905/87741 [03:21<33:55, 39.22it/s, loss=5.71e-5][A
Epoch 0:   9%|▉         | 7905/87741 [03:21<33:55, 39.22it/s, loss=5.71e-5][A
Epoch 0:   9%|▉         | 7905/87741 [03:21<33:55, 39.22it/s, loss=0.00653][A
Epoch 0:   9%|▉         | 7905/87741 [03:21<33:55, 39.22it/s, loss=0.00653][A
Epoch 0:   9%|▉         | 7905/87741 [03:21<33:55, 39.22it/s, loss=0.00344][A
Epoch 0:   9%|▉         | 7905/87741 [03:21<33:55, 39.22it/s, loss=0.00344][A
Epoch 0:   9%|▉         | 7905/87741 [03:21<33:55, 39.22it/s, loss=0.00256][A
Epoch 0:   9%|▉         | 7909/87741 [03:21<33:54, 3

Epoch 0:   9%|▉         | 7993/87741 [03:23<33:47, 39.33it/s, loss=0.000243][A
Epoch 0:   9%|▉         | 7993/87741 [03:23<33:47, 39.33it/s, loss=0.000174][A
Epoch 0:   9%|▉         | 7993/87741 [03:23<33:47, 39.33it/s, loss=0.000174][A
Epoch 0:   9%|▉         | 7993/87741 [03:23<33:47, 39.33it/s, loss=0.00745] [A
Epoch 0:   9%|▉         | 7997/87741 [03:23<33:48, 39.32it/s, loss=0.00745][A
Epoch 0:   9%|▉         | 7997/87741 [03:23<33:48, 39.32it/s, loss=0.00745][A
Epoch 0:   9%|▉         | 7997/87741 [03:23<33:48, 39.32it/s, loss=0.0119] [A
Epoch 0:   9%|▉         | 7997/87741 [03:23<33:48, 39.32it/s, loss=0.0119][A
Epoch 0:   9%|▉         | 7997/87741 [03:23<33:48, 39.32it/s, loss=0.00248][A
Epoch 0:   9%|▉         | 7997/87741 [03:23<33:48, 39.32it/s, loss=0.00248][A
Epoch 0:   9%|▉         | 7997/87741 [03:23<33:48, 39.32it/s, loss=0.0132] [A
Epoch 0:   9%|▉         | 7997/87741 [03:23<33:48, 39.32it/s, loss=0.0132][A
Epoch 0:   9%|▉         | 7997/87741 [03:23<33:48,

Epoch 0:   9%|▉         | 8085/87741 [03:25<33:55, 39.13it/s, loss=7.18e-5][A
Epoch 0:   9%|▉         | 8085/87741 [03:25<33:55, 39.13it/s, loss=7.18e-5][A
Epoch 0:   9%|▉         | 8085/87741 [03:25<33:55, 39.13it/s, loss=0.00452][A
Epoch 0:   9%|▉         | 8085/87741 [03:25<33:55, 39.13it/s, loss=0.00452][A
Epoch 0:   9%|▉         | 8085/87741 [03:25<33:55, 39.13it/s, loss=0.00663][A
Epoch 0:   9%|▉         | 8089/87741 [03:25<33:53, 39.17it/s, loss=0.00663][A
Epoch 0:   9%|▉         | 8089/87741 [03:26<33:53, 39.17it/s, loss=0.00663][A
Epoch 0:   9%|▉         | 8089/87741 [03:26<33:53, 39.17it/s, loss=0.00242][A
Epoch 0:   9%|▉         | 8089/87741 [03:26<33:53, 39.17it/s, loss=0.00242][A
Epoch 0:   9%|▉         | 8089/87741 [03:26<33:53, 39.17it/s, loss=0.00647][A
Epoch 0:   9%|▉         | 8089/87741 [03:26<33:53, 39.17it/s, loss=0.00647][A
Epoch 0:   9%|▉         | 8089/87741 [03:26<33:53, 39.17it/s, loss=0.00114][A
Epoch 0:   9%|▉         | 8089/87741 [03:26<33:53, 3

Epoch 0:   9%|▉         | 8177/87741 [03:28<33:48, 39.23it/s, loss=1.48e-5][A
Epoch 0:   9%|▉         | 8177/87741 [03:28<33:48, 39.23it/s, loss=0.00543][A
Epoch 0:   9%|▉         | 8177/87741 [03:28<33:48, 39.23it/s, loss=0.00543][A
Epoch 0:   9%|▉         | 8177/87741 [03:28<33:48, 39.23it/s, loss=6.37e-5][A
Epoch 0:   9%|▉         | 8177/87741 [03:28<33:48, 39.23it/s, loss=6.37e-5][A
Epoch 0:   9%|▉         | 8177/87741 [03:28<33:48, 39.23it/s, loss=6.99e-5][A
Epoch 0:   9%|▉         | 8181/87741 [03:28<33:46, 39.25it/s, loss=6.99e-5][A
Epoch 0:   9%|▉         | 8181/87741 [03:28<33:46, 39.25it/s, loss=6.99e-5][A
Epoch 0:   9%|▉         | 8181/87741 [03:28<33:46, 39.25it/s, loss=2.53e-5][A
Epoch 0:   9%|▉         | 8181/87741 [03:28<33:46, 39.25it/s, loss=2.53e-5][A
Epoch 0:   9%|▉         | 8181/87741 [03:28<33:46, 39.25it/s, loss=0.000228][A
Epoch 0:   9%|▉         | 8181/87741 [03:28<33:46, 39.25it/s, loss=0.000228][A
Epoch 0:   9%|▉         | 8181/87741 [03:28<33:46,

Epoch 0:   9%|▉         | 8269/87741 [03:30<33:43, 39.27it/s, loss=0.000198][A
Epoch 0:   9%|▉         | 8269/87741 [03:30<33:43, 39.27it/s, loss=0.000198][A
Epoch 0:   9%|▉         | 8269/87741 [03:30<33:43, 39.27it/s, loss=0.00556] [A
Epoch 0:   9%|▉         | 8269/87741 [03:30<33:43, 39.27it/s, loss=0.00556][A
Epoch 0:   9%|▉         | 8269/87741 [03:30<33:43, 39.27it/s, loss=0.00198][A
Epoch 0:   9%|▉         | 8269/87741 [03:30<33:43, 39.27it/s, loss=0.00198][A
Epoch 0:   9%|▉         | 8269/87741 [03:30<33:43, 39.27it/s, loss=0.000724][A
Epoch 0:   9%|▉         | 8273/87741 [03:30<33:45, 39.24it/s, loss=0.000724][A
Epoch 0:   9%|▉         | 8273/87741 [03:30<33:45, 39.24it/s, loss=0.000724][A
Epoch 0:   9%|▉         | 8273/87741 [03:30<33:45, 39.24it/s, loss=0.00446] [A
Epoch 0:   9%|▉         | 8273/87741 [03:30<33:45, 39.24it/s, loss=0.00446][A
Epoch 0:   9%|▉         | 8273/87741 [03:30<33:45, 39.24it/s, loss=0.00023][A
Epoch 0:   9%|▉         | 8273/87741 [03:30<3

Epoch 0:  10%|▉         | 8361/87741 [03:32<33:40, 39.28it/s, loss=0.00885][A
Epoch 0:  10%|▉         | 8361/87741 [03:32<33:40, 39.28it/s, loss=0.000209][A
Epoch 0:  10%|▉         | 8361/87741 [03:32<33:40, 39.28it/s, loss=0.000209][A
Epoch 0:  10%|▉         | 8361/87741 [03:32<33:40, 39.28it/s, loss=0.00317] [A
Epoch 0:  10%|▉         | 8361/87741 [03:32<33:40, 39.28it/s, loss=0.00317][A
Epoch 0:  10%|▉         | 8361/87741 [03:32<33:40, 39.28it/s, loss=0.00186][A
Epoch 0:  10%|▉         | 8361/87741 [03:33<33:40, 39.28it/s, loss=0.00186][A
Epoch 0:  10%|▉         | 8361/87741 [03:33<33:40, 39.28it/s, loss=0.00082][A
Epoch 0:  10%|▉         | 8365/87741 [03:33<33:41, 39.27it/s, loss=0.00082][A
Epoch 0:  10%|▉         | 8365/87741 [03:33<33:41, 39.27it/s, loss=0.00082][A
Epoch 0:  10%|▉         | 8365/87741 [03:33<33:41, 39.27it/s, loss=0.00127][A
Epoch 0:  10%|▉         | 8365/87741 [03:33<33:41, 39.27it/s, loss=0.00127][A
Epoch 0:  10%|▉         | 8365/87741 [03:33<33:41

Epoch 0:  10%|▉         | 8453/87741 [03:35<33:36, 39.32it/s, loss=0.000102][A
Epoch 0:  10%|▉         | 8453/87741 [03:35<33:36, 39.32it/s, loss=0.000102][A
Epoch 0:  10%|▉         | 8453/87741 [03:35<33:36, 39.32it/s, loss=0.00177] [A
Epoch 0:  10%|▉         | 8453/87741 [03:35<33:36, 39.32it/s, loss=0.00177][A
Epoch 0:  10%|▉         | 8453/87741 [03:35<33:36, 39.32it/s, loss=0.0026] [A
Epoch 0:  10%|▉         | 8453/87741 [03:35<33:36, 39.32it/s, loss=0.0026][A
Epoch 0:  10%|▉         | 8453/87741 [03:35<33:36, 39.32it/s, loss=0.00152][A
Epoch 0:  10%|▉         | 8453/87741 [03:35<33:36, 39.32it/s, loss=0.00152][A
Epoch 0:  10%|▉         | 8453/87741 [03:35<33:36, 39.32it/s, loss=8.42e-5][A
Epoch 0:  10%|▉         | 8457/87741 [03:35<33:36, 39.32it/s, loss=8.42e-5][A
Epoch 0:  10%|▉         | 8457/87741 [03:35<33:36, 39.32it/s, loss=8.42e-5][A
Epoch 0:  10%|▉         | 8457/87741 [03:35<33:36, 39.32it/s, loss=0.00138][A
Epoch 0:  10%|▉         | 8457/87741 [03:35<33:36,

Epoch 0:  10%|▉         | 8541/87741 [03:37<33:35, 39.30it/s, loss=0.00087] [A
Epoch 0:  10%|▉         | 8545/87741 [03:37<33:34, 39.31it/s, loss=0.00087][A
Epoch 0:  10%|▉         | 8545/87741 [03:37<33:34, 39.31it/s, loss=0.00087][A
Epoch 0:  10%|▉         | 8545/87741 [03:37<33:34, 39.31it/s, loss=0.000888][A
Epoch 0:  10%|▉         | 8545/87741 [03:37<33:34, 39.31it/s, loss=0.000888][A
Epoch 0:  10%|▉         | 8545/87741 [03:37<33:34, 39.31it/s, loss=0.00189] [A
Epoch 0:  10%|▉         | 8545/87741 [03:37<33:34, 39.31it/s, loss=0.00189][A
Epoch 0:  10%|▉         | 8545/87741 [03:37<33:34, 39.31it/s, loss=0.000172][A
Epoch 0:  10%|▉         | 8545/87741 [03:37<33:34, 39.31it/s, loss=0.000172][A
Epoch 0:  10%|▉         | 8545/87741 [03:37<33:34, 39.31it/s, loss=0.000245][A
Epoch 0:  10%|▉         | 8549/87741 [03:37<33:34, 39.30it/s, loss=0.000245][A
Epoch 0:  10%|▉         | 8549/87741 [03:37<33:34, 39.30it/s, loss=0.000245][A
Epoch 0:  10%|▉         | 8549/87741 [03:37

Epoch 0:  10%|▉         | 8633/87741 [03:39<33:33, 39.29it/s, loss=0.00058][A
Epoch 0:  10%|▉         | 8633/87741 [03:39<33:33, 39.29it/s, loss=0.00882][A
Epoch 0:  10%|▉         | 8637/87741 [03:39<33:32, 39.31it/s, loss=0.00882][A
Epoch 0:  10%|▉         | 8637/87741 [03:39<33:32, 39.31it/s, loss=0.00882][A
Epoch 0:  10%|▉         | 8637/87741 [03:39<33:32, 39.31it/s, loss=1.21e-5][A
Epoch 0:  10%|▉         | 8637/87741 [03:39<33:32, 39.31it/s, loss=1.21e-5][A
Epoch 0:  10%|▉         | 8637/87741 [03:39<33:32, 39.31it/s, loss=0.00156][A
Epoch 0:  10%|▉         | 8637/87741 [03:40<33:32, 39.31it/s, loss=0.00156][A
Epoch 0:  10%|▉         | 8637/87741 [03:40<33:32, 39.31it/s, loss=0.00252][A
Epoch 0:  10%|▉         | 8637/87741 [03:40<33:32, 39.31it/s, loss=0.00252][A
Epoch 0:  10%|▉         | 8637/87741 [03:40<33:32, 39.31it/s, loss=0.000813][A
Epoch 0:  10%|▉         | 8641/87741 [03:40<33:32, 39.30it/s, loss=0.000813][A
Epoch 0:  10%|▉         | 8641/87741 [03:40<33:32,

Epoch 0:  10%|▉         | 8725/87741 [03:42<33:30, 39.30it/s, loss=0.00223] [A
Epoch 0:  10%|▉         | 8725/87741 [03:42<33:30, 39.30it/s, loss=0.00223][A
Epoch 0:  10%|▉         | 8725/87741 [03:42<33:30, 39.30it/s, loss=0.00401][A
Epoch 0:  10%|▉         | 8729/87741 [03:42<33:31, 39.29it/s, loss=0.00401][A
Epoch 0:  10%|▉         | 8729/87741 [03:42<33:31, 39.29it/s, loss=0.00401][A
Epoch 0:  10%|▉         | 8729/87741 [03:42<33:31, 39.29it/s, loss=0.00117][A
Epoch 0:  10%|▉         | 8729/87741 [03:42<33:31, 39.29it/s, loss=0.00117][A
Epoch 0:  10%|▉         | 8729/87741 [03:42<33:31, 39.29it/s, loss=0.00186][A
Epoch 0:  10%|▉         | 8729/87741 [03:42<33:31, 39.29it/s, loss=0.00186][A
Epoch 0:  10%|▉         | 8729/87741 [03:42<33:31, 39.29it/s, loss=0.00192][A
Epoch 0:  10%|▉         | 8729/87741 [03:42<33:31, 39.29it/s, loss=0.00192][A
Epoch 0:  10%|▉         | 8729/87741 [03:42<33:31, 39.29it/s, loss=0.00918][A
Epoch 0:  10%|▉         | 8733/87741 [03:42<33:32, 

Epoch 0:  10%|█         | 8817/87741 [03:44<33:37, 39.12it/s, loss=0.00574][A
Epoch 0:  10%|█         | 8817/87741 [03:44<33:37, 39.12it/s, loss=0.00491][A
Epoch 0:  10%|█         | 8817/87741 [03:44<33:37, 39.12it/s, loss=0.00491][A
Epoch 0:  10%|█         | 8817/87741 [03:44<33:37, 39.12it/s, loss=0.000332][A
Epoch 0:  10%|█         | 8821/87741 [03:44<33:34, 39.18it/s, loss=0.000332][A
Epoch 0:  10%|█         | 8821/87741 [03:44<33:34, 39.18it/s, loss=0.000332][A
Epoch 0:  10%|█         | 8821/87741 [03:44<33:34, 39.18it/s, loss=0.000615][A
Epoch 0:  10%|█         | 8821/87741 [03:44<33:34, 39.18it/s, loss=0.000615][A
Epoch 0:  10%|█         | 8821/87741 [03:44<33:34, 39.18it/s, loss=6.65e-5] [A
Epoch 0:  10%|█         | 8821/87741 [03:44<33:34, 39.18it/s, loss=6.65e-5][A
Epoch 0:  10%|█         | 8821/87741 [03:44<33:34, 39.18it/s, loss=0.000771][A
Epoch 0:  10%|█         | 8821/87741 [03:44<33:34, 39.18it/s, loss=0.000771][A
Epoch 0:  10%|█         | 8821/87741 [03:44<

Epoch 0:  10%|█         | 8909/87741 [03:46<33:31, 39.18it/s, loss=0.00392][A
Epoch 0:  10%|█         | 8909/87741 [03:46<33:31, 39.18it/s, loss=0.00392][A
Epoch 0:  10%|█         | 8909/87741 [03:46<33:31, 39.18it/s, loss=0.00287][A
Epoch 0:  10%|█         | 8909/87741 [03:46<33:31, 39.18it/s, loss=0.00287][A
Epoch 0:  10%|█         | 8909/87741 [03:46<33:31, 39.18it/s, loss=0.00115][A
Epoch 0:  10%|█         | 8913/87741 [03:46<33:29, 39.22it/s, loss=0.00115][A
Epoch 0:  10%|█         | 8913/87741 [03:46<33:29, 39.22it/s, loss=0.00115][A
Epoch 0:  10%|█         | 8913/87741 [03:46<33:29, 39.22it/s, loss=0.000454][A
Epoch 0:  10%|█         | 8913/87741 [03:47<33:29, 39.22it/s, loss=0.000454][A
Epoch 0:  10%|█         | 8913/87741 [03:47<33:29, 39.22it/s, loss=0.00419] [A
Epoch 0:  10%|█         | 8913/87741 [03:47<33:29, 39.22it/s, loss=0.00419][A
Epoch 0:  10%|█         | 8913/87741 [03:47<33:29, 39.22it/s, loss=0.000864][A
Epoch 0:  10%|█         | 8913/87741 [03:47<33:2

Epoch 0:  10%|█         | 9001/87741 [03:49<33:30, 39.17it/s, loss=0.00017][A
Epoch 0:  10%|█         | 9001/87741 [03:49<33:30, 39.17it/s, loss=4.86e-5][A
Epoch 0:  10%|█         | 9001/87741 [03:49<33:30, 39.17it/s, loss=4.86e-5][A
Epoch 0:  10%|█         | 9001/87741 [03:49<33:30, 39.17it/s, loss=0.00213][A
Epoch 0:  10%|█         | 9001/87741 [03:49<33:30, 39.17it/s, loss=0.00213][A
Epoch 0:  10%|█         | 9001/87741 [03:49<33:30, 39.17it/s, loss=1.36e-5][A
Epoch 0:  10%|█         | 9005/87741 [03:49<33:28, 39.20it/s, loss=1.36e-5][A
Epoch 0:  10%|█         | 9005/87741 [03:49<33:28, 39.20it/s, loss=1.36e-5][A
Epoch 0:  10%|█         | 9005/87741 [03:49<33:28, 39.20it/s, loss=0.0138] [A
Epoch 0:  10%|█         | 9005/87741 [03:49<33:28, 39.20it/s, loss=0.0138][A
Epoch 0:  10%|█         | 9005/87741 [03:49<33:28, 39.20it/s, loss=0.00044][A
Epoch 0:  10%|█         | 9005/87741 [03:49<33:28, 39.20it/s, loss=0.00044][A
Epoch 0:  10%|█         | 9005/87741 [03:49<33:28, 39

Epoch 0:  10%|█         | 9093/87741 [03:51<33:25, 39.22it/s, loss=0.000223][A
Epoch 0:  10%|█         | 9093/87741 [03:51<33:25, 39.22it/s, loss=0.000223][A
Epoch 0:  10%|█         | 9093/87741 [03:51<33:25, 39.22it/s, loss=0.00228] [A
Epoch 0:  10%|█         | 9093/87741 [03:51<33:25, 39.22it/s, loss=0.00228][A
Epoch 0:  10%|█         | 9093/87741 [03:51<33:25, 39.22it/s, loss=0.00423][A
Epoch 0:  10%|█         | 9093/87741 [03:51<33:25, 39.22it/s, loss=0.00423][A
Epoch 0:  10%|█         | 9093/87741 [03:51<33:25, 39.22it/s, loss=0.00491][A
Epoch 0:  10%|█         | 9097/87741 [03:51<33:28, 39.16it/s, loss=0.00491][A
Epoch 0:  10%|█         | 9097/87741 [03:51<33:28, 39.16it/s, loss=0.00491][A
Epoch 0:  10%|█         | 9097/87741 [03:51<33:28, 39.16it/s, loss=0.00655][A
Epoch 0:  10%|█         | 9097/87741 [03:51<33:28, 39.16it/s, loss=0.00655][A
Epoch 0:  10%|█         | 9097/87741 [03:51<33:28, 39.16it/s, loss=8.57e-5][A
Epoch 0:  10%|█         | 9097/87741 [03:51<33:28

Epoch 0:  10%|█         | 9185/87741 [03:53<33:18, 39.31it/s, loss=0.000271][A
Epoch 0:  10%|█         | 9185/87741 [03:53<33:18, 39.31it/s, loss=0.00055] [A
Epoch 0:  10%|█         | 9185/87741 [03:53<33:18, 39.31it/s, loss=0.00055][A
Epoch 0:  10%|█         | 9185/87741 [03:53<33:18, 39.31it/s, loss=0.00422][A
Epoch 0:  10%|█         | 9185/87741 [03:53<33:18, 39.31it/s, loss=0.00422][A
Epoch 0:  10%|█         | 9185/87741 [03:53<33:18, 39.31it/s, loss=0.00245][A
Epoch 0:  10%|█         | 9185/87741 [03:53<33:18, 39.31it/s, loss=0.00245][A
Epoch 0:  10%|█         | 9185/87741 [03:54<33:18, 39.31it/s, loss=0.00217][A
Epoch 0:  10%|█         | 9189/87741 [03:54<33:18, 39.31it/s, loss=0.00217][A
Epoch 0:  10%|█         | 9189/87741 [03:54<33:18, 39.31it/s, loss=0.00217][A
Epoch 0:  10%|█         | 9189/87741 [03:54<33:18, 39.31it/s, loss=0.000269][A
Epoch 0:  10%|█         | 9189/87741 [03:54<33:18, 39.31it/s, loss=0.000269][A
Epoch 0:  10%|█         | 9189/87741 [03:54<33:1

Epoch 0:  11%|█         | 9277/87741 [03:56<33:17, 39.28it/s, loss=0.00532][A
Epoch 0:  11%|█         | 9277/87741 [03:56<33:17, 39.28it/s, loss=0.00532][A
Epoch 0:  11%|█         | 9277/87741 [03:56<33:17, 39.28it/s, loss=0.00255][A
Epoch 0:  11%|█         | 9277/87741 [03:56<33:17, 39.28it/s, loss=0.00255][A
Epoch 0:  11%|█         | 9277/87741 [03:56<33:17, 39.28it/s, loss=0.000926][A
Epoch 0:  11%|█         | 9277/87741 [03:56<33:17, 39.28it/s, loss=0.000926][A
Epoch 0:  11%|█         | 9277/87741 [03:56<33:17, 39.28it/s, loss=0.000181][A
Epoch 0:  11%|█         | 9277/87741 [03:56<33:17, 39.28it/s, loss=0.000181][A
Epoch 0:  11%|█         | 9277/87741 [03:56<33:17, 39.28it/s, loss=0.0114]  [A
Epoch 0:  11%|█         | 9281/87741 [03:56<33:17, 39.27it/s, loss=0.0114][A
Epoch 0:  11%|█         | 9281/87741 [03:56<33:17, 39.27it/s, loss=0.0114][A
Epoch 0:  11%|█         | 9281/87741 [03:56<33:17, 39.27it/s, loss=0.00551][A
Epoch 0:  11%|█         | 9281/87741 [03:56<33:17

Epoch 0:  11%|█         | 9365/87741 [03:58<33:13, 39.31it/s, loss=0.00331][A
Epoch 0:  11%|█         | 9369/87741 [03:58<33:14, 39.30it/s, loss=0.00331][A
Epoch 0:  11%|█         | 9369/87741 [03:58<33:14, 39.30it/s, loss=0.00331][A
Epoch 0:  11%|█         | 9369/87741 [03:58<33:14, 39.30it/s, loss=0.00084][A
Epoch 0:  11%|█         | 9369/87741 [03:58<33:14, 39.30it/s, loss=0.00084][A
Epoch 0:  11%|█         | 9369/87741 [03:58<33:14, 39.30it/s, loss=0.000178][A
Epoch 0:  11%|█         | 9369/87741 [03:58<33:14, 39.30it/s, loss=0.000178][A
Epoch 0:  11%|█         | 9369/87741 [03:58<33:14, 39.30it/s, loss=0.00456] [A
Epoch 0:  11%|█         | 9369/87741 [03:58<33:14, 39.30it/s, loss=0.00456][A
Epoch 0:  11%|█         | 9369/87741 [03:58<33:14, 39.30it/s, loss=1.43e-5][A
Epoch 0:  11%|█         | 9373/87741 [03:58<33:14, 39.30it/s, loss=1.43e-5][A
Epoch 0:  11%|█         | 9373/87741 [03:58<33:14, 39.30it/s, loss=1.43e-5][A
Epoch 0:  11%|█         | 9373/87741 [03:58<33:14

Epoch 0:  11%|█         | 9457/87741 [04:00<33:13, 39.27it/s, loss=8.03e-5][A
Epoch 0:  11%|█         | 9457/87741 [04:00<33:13, 39.27it/s, loss=0.000318][A
Epoch 0:  11%|█         | 9461/87741 [04:00<33:13, 39.27it/s, loss=0.000318][A
Epoch 0:  11%|█         | 9461/87741 [04:00<33:13, 39.27it/s, loss=0.000318][A
Epoch 0:  11%|█         | 9461/87741 [04:00<33:13, 39.27it/s, loss=0.00192] [A
Epoch 0:  11%|█         | 9461/87741 [04:00<33:13, 39.27it/s, loss=0.00192][A
Epoch 0:  11%|█         | 9461/87741 [04:00<33:13, 39.27it/s, loss=0.00426][A
Epoch 0:  11%|█         | 9461/87741 [04:01<33:13, 39.27it/s, loss=0.00426][A
Epoch 0:  11%|█         | 9461/87741 [04:01<33:13, 39.27it/s, loss=0.00027][A
Epoch 0:  11%|█         | 9461/87741 [04:01<33:13, 39.27it/s, loss=0.00027][A
Epoch 0:  11%|█         | 9461/87741 [04:01<33:13, 39.27it/s, loss=0.000747][A
Epoch 0:  11%|█         | 9465/87741 [04:01<33:12, 39.28it/s, loss=0.000747][A
Epoch 0:  11%|█         | 9465/87741 [04:01<33

Epoch 0:  11%|█         | 9549/87741 [04:03<33:20, 39.09it/s, loss=1.82e-5][A
Epoch 0:  11%|█         | 9549/87741 [04:03<33:20, 39.09it/s, loss=1.82e-5][A
Epoch 0:  11%|█         | 9549/87741 [04:03<33:20, 39.09it/s, loss=0.00213][A
Epoch 0:  11%|█         | 9553/87741 [04:03<33:22, 39.04it/s, loss=0.00213][A
Epoch 0:  11%|█         | 9553/87741 [04:03<33:22, 39.04it/s, loss=0.00213][A
Epoch 0:  11%|█         | 9553/87741 [04:03<33:22, 39.04it/s, loss=0.00785][A
Epoch 0:  11%|█         | 9553/87741 [04:03<33:22, 39.04it/s, loss=0.00785][A
Epoch 0:  11%|█         | 9553/87741 [04:03<33:22, 39.04it/s, loss=8.89e-5][A
Epoch 0:  11%|█         | 9553/87741 [04:03<33:22, 39.04it/s, loss=8.89e-5][A
Epoch 0:  11%|█         | 9553/87741 [04:03<33:22, 39.04it/s, loss=0.00214][A
Epoch 0:  11%|█         | 9553/87741 [04:03<33:22, 39.04it/s, loss=0.00214][A
Epoch 0:  11%|█         | 9553/87741 [04:03<33:22, 39.04it/s, loss=0.00643][A
Epoch 0:  11%|█         | 9557/87741 [04:03<33:18, 3

Epoch 0:  11%|█         | 9641/87741 [04:05<33:11, 39.21it/s, loss=0.00562][A
Epoch 0:  11%|█         | 9641/87741 [04:05<33:11, 39.21it/s, loss=0.00121][A
Epoch 0:  11%|█         | 9641/87741 [04:05<33:11, 39.21it/s, loss=0.00121][A
Epoch 0:  11%|█         | 9641/87741 [04:05<33:11, 39.21it/s, loss=0.00285][A
Epoch 0:  11%|█         | 9645/87741 [04:05<33:13, 39.17it/s, loss=0.00285][A
Epoch 0:  11%|█         | 9645/87741 [04:05<33:13, 39.17it/s, loss=0.00285][A
Epoch 0:  11%|█         | 9645/87741 [04:05<33:13, 39.17it/s, loss=0.00143][A
Epoch 0:  11%|█         | 9645/87741 [04:05<33:13, 39.17it/s, loss=0.00143][A
Epoch 0:  11%|█         | 9645/87741 [04:05<33:13, 39.17it/s, loss=0.00199][A
Epoch 0:  11%|█         | 9645/87741 [04:05<33:13, 39.17it/s, loss=0.00199][A
Epoch 0:  11%|█         | 9645/87741 [04:05<33:13, 39.17it/s, loss=0.00309][A
Epoch 0:  11%|█         | 9645/87741 [04:05<33:13, 39.17it/s, loss=0.00309][A
Epoch 0:  11%|█         | 9645/87741 [04:05<33:13, 3

Epoch 0:  11%|█         | 9733/87741 [04:07<33:05, 39.29it/s, loss=0.00369][A
Epoch 0:  11%|█         | 9733/87741 [04:07<33:05, 39.29it/s, loss=0.00369][A
Epoch 0:  11%|█         | 9733/87741 [04:07<33:05, 39.29it/s, loss=0.000729][A
Epoch 0:  11%|█         | 9733/87741 [04:07<33:05, 39.29it/s, loss=0.000729][A
Epoch 0:  11%|█         | 9733/87741 [04:07<33:05, 39.29it/s, loss=0.00013] [A
Epoch 0:  11%|█         | 9737/87741 [04:07<33:06, 39.27it/s, loss=0.00013][A
Epoch 0:  11%|█         | 9737/87741 [04:07<33:06, 39.27it/s, loss=0.00013][A
Epoch 0:  11%|█         | 9737/87741 [04:07<33:06, 39.27it/s, loss=0.00226][A
Epoch 0:  11%|█         | 9737/87741 [04:08<33:06, 39.27it/s, loss=0.00226][A
Epoch 0:  11%|█         | 9737/87741 [04:08<33:06, 39.27it/s, loss=0.00232][A
Epoch 0:  11%|█         | 9737/87741 [04:08<33:06, 39.27it/s, loss=0.00232][A
Epoch 0:  11%|█         | 9737/87741 [04:08<33:06, 39.27it/s, loss=0.000191][A
Epoch 0:  11%|█         | 9737/87741 [04:08<33:0

Epoch 0:  11%|█         | 9825/87741 [04:10<33:06, 39.23it/s, loss=0.000409][A
Epoch 0:  11%|█         | 9825/87741 [04:10<33:06, 39.23it/s, loss=0.00135] [A
Epoch 0:  11%|█         | 9825/87741 [04:10<33:06, 39.23it/s, loss=0.00135][A
Epoch 0:  11%|█         | 9825/87741 [04:10<33:06, 39.23it/s, loss=0.00153][A
Epoch 0:  11%|█         | 9825/87741 [04:10<33:06, 39.23it/s, loss=0.00153][A
Epoch 0:  11%|█         | 9825/87741 [04:10<33:06, 39.23it/s, loss=0.0025] [A
Epoch 0:  11%|█         | 9829/87741 [04:10<33:05, 39.25it/s, loss=0.0025][A
Epoch 0:  11%|█         | 9829/87741 [04:10<33:05, 39.25it/s, loss=0.0025][A
Epoch 0:  11%|█         | 9829/87741 [04:10<33:05, 39.25it/s, loss=0.00458][A
Epoch 0:  11%|█         | 9829/87741 [04:10<33:05, 39.25it/s, loss=0.00458][A
Epoch 0:  11%|█         | 9829/87741 [04:10<33:05, 39.25it/s, loss=0.00386][A
Epoch 0:  11%|█         | 9829/87741 [04:10<33:05, 39.25it/s, loss=0.00386][A
Epoch 0:  11%|█         | 9829/87741 [04:10<33:05, 3

Epoch 0:  11%|█▏        | 9917/87741 [04:12<33:02, 39.26it/s, loss=1.49e-5][A
Epoch 0:  11%|█▏        | 9917/87741 [04:12<33:02, 39.26it/s, loss=1.49e-5][A
Epoch 0:  11%|█▏        | 9917/87741 [04:12<33:02, 39.26it/s, loss=0.00454][A
Epoch 0:  11%|█▏        | 9917/87741 [04:12<33:02, 39.26it/s, loss=0.00454][A
Epoch 0:  11%|█▏        | 9917/87741 [04:12<33:02, 39.26it/s, loss=0.00178][A
Epoch 0:  11%|█▏        | 9917/87741 [04:12<33:02, 39.26it/s, loss=0.00178][A
Epoch 0:  11%|█▏        | 9917/87741 [04:12<33:02, 39.26it/s, loss=0.00656][A
Epoch 0:  11%|█▏        | 9921/87741 [04:12<33:02, 39.26it/s, loss=0.00656][A
Epoch 0:  11%|█▏        | 9921/87741 [04:12<33:02, 39.26it/s, loss=0.00656][A
Epoch 0:  11%|█▏        | 9921/87741 [04:12<33:02, 39.26it/s, loss=0.00219][A
Epoch 0:  11%|█▏        | 9921/87741 [04:12<33:02, 39.26it/s, loss=0.00219][A
Epoch 0:  11%|█▏        | 9921/87741 [04:12<33:02, 39.26it/s, loss=0.000201][A
Epoch 0:  11%|█▏        | 9921/87741 [04:12<33:02, 

Epoch 0:  11%|█▏        | 10009/87741 [04:14<33:03, 39.19it/s, loss=0.00586][A
Epoch 0:  11%|█▏        | 10009/87741 [04:14<33:03, 39.19it/s, loss=0.00103][A
Epoch 0:  11%|█▏        | 10009/87741 [04:14<33:03, 39.19it/s, loss=0.00103][A
Epoch 0:  11%|█▏        | 10009/87741 [04:14<33:03, 39.19it/s, loss=0.0023] [A
Epoch 0:  11%|█▏        | 10009/87741 [04:14<33:03, 39.19it/s, loss=0.0023][A
Epoch 0:  11%|█▏        | 10009/87741 [04:14<33:03, 39.19it/s, loss=0.00219][A
Epoch 0:  11%|█▏        | 10009/87741 [04:15<33:03, 39.19it/s, loss=0.00219][A
Epoch 0:  11%|█▏        | 10009/87741 [04:15<33:03, 39.19it/s, loss=0.000344][A
Epoch 0:  11%|█▏        | 10013/87741 [04:15<33:02, 39.21it/s, loss=0.000344][A
Epoch 0:  11%|█▏        | 10013/87741 [04:15<33:02, 39.21it/s, loss=0.000344][A
Epoch 0:  11%|█▏        | 10013/87741 [04:15<33:02, 39.21it/s, loss=0.00523] [A
Epoch 0:  11%|█▏        | 10013/87741 [04:15<33:02, 39.21it/s, loss=0.00523][A
Epoch 0:  11%|█▏        | 10013/87741

Epoch 0:  12%|█▏        | 10097/87741 [04:17<32:57, 39.26it/s, loss=6.59e-5][A
Epoch 0:  12%|█▏        | 10097/87741 [04:17<32:57, 39.26it/s, loss=3.58e-5][A
Epoch 0:  12%|█▏        | 10101/87741 [04:17<32:56, 39.29it/s, loss=3.58e-5][A
Epoch 0:  12%|█▏        | 10101/87741 [04:17<32:56, 39.29it/s, loss=3.58e-5][A
Epoch 0:  12%|█▏        | 10101/87741 [04:17<32:56, 39.29it/s, loss=0.00299][A
Epoch 0:  12%|█▏        | 10101/87741 [04:17<32:56, 39.29it/s, loss=0.00299][A
Epoch 0:  12%|█▏        | 10101/87741 [04:17<32:56, 39.29it/s, loss=0.00227][A
Epoch 0:  12%|█▏        | 10101/87741 [04:17<32:56, 39.29it/s, loss=0.00227][A
Epoch 0:  12%|█▏        | 10101/87741 [04:17<32:56, 39.29it/s, loss=0.00154][A
Epoch 0:  12%|█▏        | 10101/87741 [04:17<32:56, 39.29it/s, loss=0.00154][A
Epoch 0:  12%|█▏        | 10101/87741 [04:17<32:56, 39.29it/s, loss=0.00752][A
Epoch 0:  12%|█▏        | 10105/87741 [04:17<32:55, 39.30it/s, loss=0.00752][A
Epoch 0:  12%|█▏        | 10105/87741 [0

Epoch 0:  12%|█▏        | 10189/87741 [04:19<32:55, 39.25it/s, loss=0.00127] [A
Epoch 0:  12%|█▏        | 10189/87741 [04:19<32:55, 39.25it/s, loss=0.00127][A
Epoch 0:  12%|█▏        | 10189/87741 [04:19<32:55, 39.25it/s, loss=7.46e-5][A
Epoch 0:  12%|█▏        | 10189/87741 [04:19<32:55, 39.25it/s, loss=7.46e-5][A
Epoch 0:  12%|█▏        | 10189/87741 [04:19<32:55, 39.25it/s, loss=0.00118][A
Epoch 0:  12%|█▏        | 10193/87741 [04:19<32:54, 39.28it/s, loss=0.00118][A
Epoch 0:  12%|█▏        | 10193/87741 [04:19<32:54, 39.28it/s, loss=0.00118][A
Epoch 0:  12%|█▏        | 10193/87741 [04:19<32:54, 39.28it/s, loss=0.00916][A
Epoch 0:  12%|█▏        | 10193/87741 [04:19<32:54, 39.28it/s, loss=0.00916][A
Epoch 0:  12%|█▏        | 10193/87741 [04:19<32:54, 39.28it/s, loss=0.000359][A
Epoch 0:  12%|█▏        | 10193/87741 [04:19<32:54, 39.28it/s, loss=0.000359][A
Epoch 0:  12%|█▏        | 10193/87741 [04:19<32:54, 39.28it/s, loss=0.00623] [A
Epoch 0:  12%|█▏        | 10193/8774

Epoch 0:  12%|█▏        | 10281/87741 [04:21<32:58, 39.16it/s, loss=0.00383][A
Epoch 0:  12%|█▏        | 10281/87741 [04:21<32:58, 39.16it/s, loss=0.00383][A
Epoch 0:  12%|█▏        | 10281/87741 [04:21<32:58, 39.16it/s, loss=0.00301][A
Epoch 0:  12%|█▏        | 10281/87741 [04:21<32:58, 39.16it/s, loss=0.00301][A
Epoch 0:  12%|█▏        | 10281/87741 [04:21<32:58, 39.16it/s, loss=0.00389][A
Epoch 0:  12%|█▏        | 10281/87741 [04:21<32:58, 39.16it/s, loss=0.00389][A
Epoch 0:  12%|█▏        | 10281/87741 [04:21<32:58, 39.16it/s, loss=0.000859][A
Epoch 0:  12%|█▏        | 10281/87741 [04:21<32:58, 39.16it/s, loss=0.000859][A
Epoch 0:  12%|█▏        | 10281/87741 [04:21<32:58, 39.16it/s, loss=0.00625] [A
Epoch 0:  12%|█▏        | 10285/87741 [04:21<33:00, 39.12it/s, loss=0.00625][A
Epoch 0:  12%|█▏        | 10285/87741 [04:21<33:00, 39.12it/s, loss=0.00625][A
Epoch 0:  12%|█▏        | 10285/87741 [04:21<33:00, 39.12it/s, loss=0.000224][A
Epoch 0:  12%|█▏        | 10285/8774

Epoch 0:  12%|█▏        | 10369/87741 [04:24<32:52, 39.22it/s, loss=0.00143][A
Epoch 0:  12%|█▏        | 10369/87741 [04:24<32:52, 39.22it/s, loss=0.00776][A
Epoch 0:  12%|█▏        | 10369/87741 [04:24<32:52, 39.22it/s, loss=0.00776][A
Epoch 0:  12%|█▏        | 10369/87741 [04:24<32:52, 39.22it/s, loss=0.0107] [A
Epoch 0:  12%|█▏        | 10373/87741 [04:24<32:51, 39.24it/s, loss=0.0107][A
Epoch 0:  12%|█▏        | 10373/87741 [04:24<32:51, 39.24it/s, loss=0.0107][A
Epoch 0:  12%|█▏        | 10373/87741 [04:24<32:51, 39.24it/s, loss=0.00344][A
Epoch 0:  12%|█▏        | 10373/87741 [04:24<32:51, 39.24it/s, loss=0.00344][A
Epoch 0:  12%|█▏        | 10373/87741 [04:24<32:51, 39.24it/s, loss=0.0168] [A
Epoch 0:  12%|█▏        | 10373/87741 [04:24<32:51, 39.24it/s, loss=0.0168][A
Epoch 0:  12%|█▏        | 10373/87741 [04:24<32:51, 39.24it/s, loss=0.00778][A
Epoch 0:  12%|█▏        | 10373/87741 [04:24<32:51, 39.24it/s, loss=0.00778][A
Epoch 0:  12%|█▏        | 10373/87741 [04:2

Epoch 0:  12%|█▏        | 10461/87741 [04:26<32:52, 39.19it/s, loss=0.0099] [A
Epoch 0:  12%|█▏        | 10461/87741 [04:26<32:52, 39.19it/s, loss=0.0099][A
Epoch 0:  12%|█▏        | 10461/87741 [04:26<32:52, 39.19it/s, loss=0.00673][A
Epoch 0:  12%|█▏        | 10461/87741 [04:26<32:52, 39.19it/s, loss=0.00673][A
Epoch 0:  12%|█▏        | 10461/87741 [04:26<32:52, 39.19it/s, loss=0.00219][A
Epoch 0:  12%|█▏        | 10461/87741 [04:26<32:52, 39.19it/s, loss=0.00219][A
Epoch 0:  12%|█▏        | 10461/87741 [04:26<32:52, 39.19it/s, loss=0.00303][A
Epoch 0:  12%|█▏        | 10465/87741 [04:26<32:52, 39.17it/s, loss=0.00303][A
Epoch 0:  12%|█▏        | 10465/87741 [04:26<32:52, 39.17it/s, loss=0.00303][A
Epoch 0:  12%|█▏        | 10465/87741 [04:26<32:52, 39.17it/s, loss=5.74e-5][A
Epoch 0:  12%|█▏        | 10465/87741 [04:26<32:52, 39.17it/s, loss=5.74e-5][A
Epoch 0:  12%|█▏        | 10465/87741 [04:26<32:52, 39.17it/s, loss=0.00758][A
Epoch 0:  12%|█▏        | 10465/87741 [04

Epoch 0:  12%|█▏        | 10549/87741 [04:28<32:54, 39.10it/s, loss=0.000354][A
Epoch 0:  12%|█▏        | 10553/87741 [04:28<32:57, 39.03it/s, loss=0.000354][A
Epoch 0:  12%|█▏        | 10553/87741 [04:28<32:57, 39.03it/s, loss=0.000354][A
Epoch 0:  12%|█▏        | 10553/87741 [04:28<32:57, 39.03it/s, loss=0.00356] [A
Epoch 0:  12%|█▏        | 10553/87741 [04:28<32:57, 39.03it/s, loss=0.00356][A
Epoch 0:  12%|█▏        | 10553/87741 [04:28<32:57, 39.03it/s, loss=0.013]  [A
Epoch 0:  12%|█▏        | 10553/87741 [04:28<32:57, 39.03it/s, loss=0.013][A
Epoch 0:  12%|█▏        | 10553/87741 [04:28<32:57, 39.03it/s, loss=0.0117][A
Epoch 0:  12%|█▏        | 10553/87741 [04:28<32:57, 39.03it/s, loss=0.0117][A
Epoch 0:  12%|█▏        | 10553/87741 [04:28<32:57, 39.03it/s, loss=0.00706][A
Epoch 0:  12%|█▏        | 10557/87741 [04:28<32:59, 38.99it/s, loss=0.00706][A
Epoch 0:  12%|█▏        | 10557/87741 [04:28<32:59, 38.99it/s, loss=0.00706][A
Epoch 0:  12%|█▏        | 10557/87741 [0

Epoch 0:  12%|█▏        | 10641/87741 [04:31<32:42, 39.29it/s, loss=0.000456][A
Epoch 0:  12%|█▏        | 10641/87741 [04:31<32:42, 39.29it/s, loss=6.57e-5] [A
Epoch 0:  12%|█▏        | 10641/87741 [04:31<32:42, 39.29it/s, loss=6.57e-5][A
Epoch 0:  12%|█▏        | 10641/87741 [04:31<32:42, 39.29it/s, loss=0.00363][A
Epoch 0:  12%|█▏        | 10645/87741 [04:31<32:46, 39.21it/s, loss=0.00363][A
Epoch 0:  12%|█▏        | 10645/87741 [04:31<32:46, 39.21it/s, loss=0.00363][A
Epoch 0:  12%|█▏        | 10645/87741 [04:31<32:46, 39.21it/s, loss=0.00824][A
Epoch 0:  12%|█▏        | 10645/87741 [04:31<32:46, 39.21it/s, loss=0.00824][A
Epoch 0:  12%|█▏        | 10645/87741 [04:31<32:46, 39.21it/s, loss=8.53e-5][A
Epoch 0:  12%|█▏        | 10645/87741 [04:31<32:46, 39.21it/s, loss=8.53e-5][A
Epoch 0:  12%|█▏        | 10645/87741 [04:31<32:46, 39.21it/s, loss=0.00115][A
Epoch 0:  12%|█▏        | 10645/87741 [04:31<32:46, 39.21it/s, loss=0.00115][A
Epoch 0:  12%|█▏        | 10645/87741 

Epoch 0:  12%|█▏        | 10733/87741 [04:33<32:39, 39.30it/s, loss=0.00194][A
Epoch 0:  12%|█▏        | 10733/87741 [04:33<32:39, 39.30it/s, loss=0.00194][A
Epoch 0:  12%|█▏        | 10733/87741 [04:33<32:39, 39.30it/s, loss=0.000695][A
Epoch 0:  12%|█▏        | 10733/87741 [04:33<32:39, 39.30it/s, loss=0.000695][A
Epoch 0:  12%|█▏        | 10733/87741 [04:33<32:39, 39.30it/s, loss=0.000404][A
Epoch 0:  12%|█▏        | 10733/87741 [04:33<32:39, 39.30it/s, loss=0.000404][A
Epoch 0:  12%|█▏        | 10733/87741 [04:33<32:39, 39.30it/s, loss=0.00281] [A
Epoch 0:  12%|█▏        | 10737/87741 [04:33<32:38, 39.31it/s, loss=0.00281][A
Epoch 0:  12%|█▏        | 10737/87741 [04:33<32:38, 39.31it/s, loss=0.00281][A
Epoch 0:  12%|█▏        | 10737/87741 [04:33<32:38, 39.31it/s, loss=3.12e-5][A
Epoch 0:  12%|█▏        | 10737/87741 [04:33<32:38, 39.31it/s, loss=3.12e-5][A
Epoch 0:  12%|█▏        | 10737/87741 [04:33<32:38, 39.31it/s, loss=0.00383][A
Epoch 0:  12%|█▏        | 10737/877

Epoch 0:  12%|█▏        | 10821/87741 [04:35<32:37, 39.29it/s, loss=0.00447][A
Epoch 0:  12%|█▏        | 10825/87741 [04:35<32:37, 39.29it/s, loss=0.00447][A
Epoch 0:  12%|█▏        | 10825/87741 [04:35<32:37, 39.29it/s, loss=0.00447][A
Epoch 0:  12%|█▏        | 10825/87741 [04:35<32:37, 39.29it/s, loss=0.00353][A
Epoch 0:  12%|█▏        | 10825/87741 [04:35<32:37, 39.29it/s, loss=0.00353][A
Epoch 0:  12%|█▏        | 10825/87741 [04:35<32:37, 39.29it/s, loss=0.000435][A
Epoch 0:  12%|█▏        | 10825/87741 [04:35<32:37, 39.29it/s, loss=0.000435][A
Epoch 0:  12%|█▏        | 10825/87741 [04:35<32:37, 39.29it/s, loss=0.00526] [A
Epoch 0:  12%|█▏        | 10825/87741 [04:35<32:37, 39.29it/s, loss=0.00526][A
Epoch 0:  12%|█▏        | 10825/87741 [04:35<32:37, 39.29it/s, loss=0.0142] [A
Epoch 0:  12%|█▏        | 10829/87741 [04:35<32:41, 39.22it/s, loss=0.0142][A
Epoch 0:  12%|█▏        | 10829/87741 [04:35<32:41, 39.22it/s, loss=0.0142][A
Epoch 0:  12%|█▏        | 10829/87741 [

Epoch 0:  12%|█▏        | 10913/87741 [04:38<32:39, 39.21it/s, loss=0.000434][A
Epoch 0:  12%|█▏        | 10913/87741 [04:38<32:39, 39.21it/s, loss=0.00179] [A
Epoch 0:  12%|█▏        | 10913/87741 [04:38<32:39, 39.21it/s, loss=0.00179][A
Epoch 0:  12%|█▏        | 10913/87741 [04:38<32:39, 39.21it/s, loss=0.00239][A
Epoch 0:  12%|█▏        | 10917/87741 [04:38<32:38, 39.23it/s, loss=0.00239][A
Epoch 0:  12%|█▏        | 10917/87741 [04:38<32:38, 39.23it/s, loss=0.00239][A
Epoch 0:  12%|█▏        | 10917/87741 [04:38<32:38, 39.23it/s, loss=0.00509][A
Epoch 0:  12%|█▏        | 10917/87741 [04:38<32:38, 39.23it/s, loss=0.00509][A
Epoch 0:  12%|█▏        | 10917/87741 [04:38<32:38, 39.23it/s, loss=0.00201][A
Epoch 0:  12%|█▏        | 10917/87741 [04:38<32:38, 39.23it/s, loss=0.00201][A
Epoch 0:  12%|█▏        | 10917/87741 [04:38<32:38, 39.23it/s, loss=0.00132][A
Epoch 0:  12%|█▏        | 10917/87741 [04:38<32:38, 39.23it/s, loss=0.00132][A
Epoch 0:  12%|█▏        | 10917/87741 

Epoch 0:  13%|█▎        | 11005/87741 [04:40<32:33, 39.28it/s, loss=0.00113][A
Epoch 0:  13%|█▎        | 11005/87741 [04:40<32:33, 39.28it/s, loss=0.00113][A
Epoch 0:  13%|█▎        | 11005/87741 [04:40<32:33, 39.28it/s, loss=0.00156][A
Epoch 0:  13%|█▎        | 11005/87741 [04:40<32:33, 39.28it/s, loss=0.00156][A
Epoch 0:  13%|█▎        | 11005/87741 [04:40<32:33, 39.28it/s, loss=3.71e-5][A
Epoch 0:  13%|█▎        | 11005/87741 [04:40<32:33, 39.28it/s, loss=3.71e-5][A
Epoch 0:  13%|█▎        | 11005/87741 [04:40<32:33, 39.28it/s, loss=0.00123][A
Epoch 0:  13%|█▎        | 11009/87741 [04:40<32:37, 39.19it/s, loss=0.00123][A
Epoch 0:  13%|█▎        | 11009/87741 [04:40<32:37, 39.19it/s, loss=0.00123][A
Epoch 0:  13%|█▎        | 11009/87741 [04:40<32:37, 39.19it/s, loss=0.00161][A
Epoch 0:  13%|█▎        | 11009/87741 [04:40<32:37, 39.19it/s, loss=0.00161][A
Epoch 0:  13%|█▎        | 11009/87741 [04:40<32:37, 39.19it/s, loss=0.00586][A
Epoch 0:  13%|█▎        | 11009/87741 [0

Epoch 0:  13%|█▎        | 11093/87741 [04:42<32:30, 39.29it/s, loss=0.00351][A
Epoch 0:  13%|█▎        | 11093/87741 [04:42<32:30, 39.29it/s, loss=0.00917][A
Epoch 0:  13%|█▎        | 11097/87741 [04:42<32:30, 39.29it/s, loss=0.00917][A
Epoch 0:  13%|█▎        | 11097/87741 [04:42<32:30, 39.29it/s, loss=0.00917][A
Epoch 0:  13%|█▎        | 11097/87741 [04:42<32:30, 39.29it/s, loss=5.49e-5][A
Epoch 0:  13%|█▎        | 11097/87741 [04:42<32:30, 39.29it/s, loss=5.49e-5][A
Epoch 0:  13%|█▎        | 11097/87741 [04:42<32:30, 39.29it/s, loss=9.61e-5][A
Epoch 0:  13%|█▎        | 11097/87741 [04:42<32:30, 39.29it/s, loss=9.61e-5][A
Epoch 0:  13%|█▎        | 11097/87741 [04:42<32:30, 39.29it/s, loss=1.59e-5][A
Epoch 0:  13%|█▎        | 11097/87741 [04:42<32:30, 39.29it/s, loss=1.59e-5][A
Epoch 0:  13%|█▎        | 11097/87741 [04:42<32:30, 39.29it/s, loss=0.00381][A
Epoch 0:  13%|█▎        | 11101/87741 [04:42<32:31, 39.27it/s, loss=0.00381][A
Epoch 0:  13%|█▎        | 11101/87741 [0

Epoch 0:  13%|█▎        | 11185/87741 [04:44<32:33, 39.20it/s, loss=0.00291][A
Epoch 0:  13%|█▎        | 11185/87741 [04:44<32:33, 39.20it/s, loss=0.000485][A
Epoch 0:  13%|█▎        | 11185/87741 [04:44<32:33, 39.20it/s, loss=0.000485][A
Epoch 0:  13%|█▎        | 11185/87741 [04:44<32:33, 39.20it/s, loss=1.66e-5] [A
Epoch 0:  13%|█▎        | 11185/87741 [04:44<32:33, 39.20it/s, loss=1.66e-5][A
Epoch 0:  13%|█▎        | 11185/87741 [04:44<32:33, 39.20it/s, loss=0.000595][A
Epoch 0:  13%|█▎        | 11189/87741 [04:44<32:31, 39.23it/s, loss=0.000595][A
Epoch 0:  13%|█▎        | 11189/87741 [04:45<32:31, 39.23it/s, loss=0.000595][A
Epoch 0:  13%|█▎        | 11189/87741 [04:45<32:31, 39.23it/s, loss=7.95e-5] [A
Epoch 0:  13%|█▎        | 11189/87741 [04:45<32:31, 39.23it/s, loss=7.95e-5][A
Epoch 0:  13%|█▎        | 11189/87741 [04:45<32:31, 39.23it/s, loss=9.3e-5] [A
Epoch 0:  13%|█▎        | 11189/87741 [04:45<32:31, 39.23it/s, loss=9.3e-5][A
Epoch 0:  13%|█▎        | 11189/87

Epoch 0:  13%|█▎        | 11277/87741 [04:47<32:30, 39.21it/s, loss=0.00501][A
Epoch 0:  13%|█▎        | 11277/87741 [04:47<32:30, 39.21it/s, loss=0.00501][A
Epoch 0:  13%|█▎        | 11277/87741 [04:47<32:30, 39.21it/s, loss=0.000286][A
Epoch 0:  13%|█▎        | 11277/87741 [04:47<32:30, 39.21it/s, loss=0.000286][A
Epoch 0:  13%|█▎        | 11277/87741 [04:47<32:30, 39.21it/s, loss=3.32e-5] [A
Epoch 0:  13%|█▎        | 11277/87741 [04:47<32:30, 39.21it/s, loss=3.32e-5][A
Epoch 0:  13%|█▎        | 11277/87741 [04:47<32:30, 39.21it/s, loss=0.000718][A
Epoch 0:  13%|█▎        | 11277/87741 [04:47<32:30, 39.21it/s, loss=0.000718][A
Epoch 0:  13%|█▎        | 11277/87741 [04:47<32:30, 39.21it/s, loss=0.0014]  [A
Epoch 0:  13%|█▎        | 11281/87741 [04:47<32:34, 39.12it/s, loss=0.0014][A
Epoch 0:  13%|█▎        | 11281/87741 [04:47<32:34, 39.12it/s, loss=0.0014][A
Epoch 0:  13%|█▎        | 11281/87741 [04:47<32:34, 39.12it/s, loss=0.000678][A
Epoch 0:  13%|█▎        | 11281/877

Epoch 0:  13%|█▎        | 11365/87741 [04:49<32:34, 39.07it/s, loss=0.00132][A
Epoch 0:  13%|█▎        | 11365/87741 [04:49<32:34, 39.07it/s, loss=0.00132][A
Epoch 0:  13%|█▎        | 11365/87741 [04:49<32:34, 39.07it/s, loss=0.00423][A
Epoch 0:  13%|█▎        | 11369/87741 [04:49<32:32, 39.11it/s, loss=0.00423][A
Epoch 0:  13%|█▎        | 11369/87741 [04:49<32:32, 39.11it/s, loss=0.00423][A
Epoch 0:  13%|█▎        | 11369/87741 [04:49<32:32, 39.11it/s, loss=0.00263][A
Epoch 0:  13%|█▎        | 11369/87741 [04:49<32:32, 39.11it/s, loss=0.00263][A
Epoch 0:  13%|█▎        | 11369/87741 [04:49<32:32, 39.11it/s, loss=0.000325][A
Epoch 0:  13%|█▎        | 11369/87741 [04:49<32:32, 39.11it/s, loss=0.000325][A
Epoch 0:  13%|█▎        | 11369/87741 [04:49<32:32, 39.11it/s, loss=0.000709][A
Epoch 0:  13%|█▎        | 11369/87741 [04:49<32:32, 39.11it/s, loss=0.000709][A
Epoch 0:  13%|█▎        | 11369/87741 [04:49<32:32, 39.11it/s, loss=0.00761] [A
Epoch 0:  13%|█▎        | 11373/877

Epoch 0:  13%|█▎        | 11457/87741 [04:51<32:26, 39.18it/s, loss=0.00504][A
Epoch 0:  13%|█▎        | 11457/87741 [04:51<32:26, 39.18it/s, loss=0.00881][A
Epoch 0:  13%|█▎        | 11457/87741 [04:51<32:26, 39.18it/s, loss=0.00881][A
Epoch 0:  13%|█▎        | 11457/87741 [04:51<32:26, 39.18it/s, loss=7.4e-5] [A
Epoch 0:  13%|█▎        | 11457/87741 [04:51<32:26, 39.18it/s, loss=7.4e-5][A
Epoch 0:  13%|█▎        | 11457/87741 [04:51<32:26, 39.18it/s, loss=0.000295][A
Epoch 0:  13%|█▎        | 11461/87741 [04:51<32:30, 39.11it/s, loss=0.000295][A
Epoch 0:  13%|█▎        | 11461/87741 [04:51<32:30, 39.11it/s, loss=0.000295][A
Epoch 0:  13%|█▎        | 11461/87741 [04:51<32:30, 39.11it/s, loss=0.00389] [A
Epoch 0:  13%|█▎        | 11461/87741 [04:51<32:30, 39.11it/s, loss=0.00389][A
Epoch 0:  13%|█▎        | 11461/87741 [04:51<32:30, 39.11it/s, loss=0.00421][A
Epoch 0:  13%|█▎        | 11461/87741 [04:51<32:30, 39.11it/s, loss=0.00421][A
Epoch 0:  13%|█▎        | 11461/87741

Epoch 0:  13%|█▎        | 11545/87741 [04:54<32:19, 39.30it/s, loss=0.00382][A
Epoch 0:  13%|█▎        | 11549/87741 [04:54<32:19, 39.28it/s, loss=0.00382][A
Epoch 0:  13%|█▎        | 11549/87741 [04:54<32:19, 39.28it/s, loss=0.00382][A
Epoch 0:  13%|█▎        | 11549/87741 [04:54<32:19, 39.28it/s, loss=0.00072][A
Epoch 0:  13%|█▎        | 11549/87741 [04:54<32:19, 39.28it/s, loss=0.00072][A
Epoch 0:  13%|█▎        | 11549/87741 [04:54<32:19, 39.28it/s, loss=0.00574][A
Epoch 0:  13%|█▎        | 11549/87741 [04:54<32:19, 39.28it/s, loss=0.00574][A
Epoch 0:  13%|█▎        | 11549/87741 [04:54<32:19, 39.28it/s, loss=0.00254][A
Epoch 0:  13%|█▎        | 11549/87741 [04:54<32:19, 39.28it/s, loss=0.00254][A
Epoch 0:  13%|█▎        | 11549/87741 [04:54<32:19, 39.28it/s, loss=0.000126][A
Epoch 0:  13%|█▎        | 11553/87741 [04:54<32:19, 39.28it/s, loss=0.000126][A
Epoch 0:  13%|█▎        | 11553/87741 [04:54<32:19, 39.28it/s, loss=0.000126][A
Epoch 0:  13%|█▎        | 11553/87741

Epoch 0:  13%|█▎        | 11637/87741 [04:56<32:30, 39.03it/s, loss=7.21e-5][A
Epoch 0:  13%|█▎        | 11637/87741 [04:56<32:30, 39.03it/s, loss=0.00169][A
Epoch 0:  13%|█▎        | 11637/87741 [04:56<32:30, 39.03it/s, loss=0.00169][A
Epoch 0:  13%|█▎        | 11637/87741 [04:56<32:30, 39.03it/s, loss=0.00119][A
Epoch 0:  13%|█▎        | 11641/87741 [04:56<32:27, 39.08it/s, loss=0.00119][A
Epoch 0:  13%|█▎        | 11641/87741 [04:56<32:27, 39.08it/s, loss=0.00119][A
Epoch 0:  13%|█▎        | 11641/87741 [04:56<32:27, 39.08it/s, loss=0.00129][A
Epoch 0:  13%|█▎        | 11641/87741 [04:56<32:27, 39.08it/s, loss=0.00129][A
Epoch 0:  13%|█▎        | 11641/87741 [04:56<32:27, 39.08it/s, loss=0.00412][A
Epoch 0:  13%|█▎        | 11641/87741 [04:56<32:27, 39.08it/s, loss=0.00412][A
Epoch 0:  13%|█▎        | 11641/87741 [04:56<32:27, 39.08it/s, loss=0.000427][A
Epoch 0:  13%|█▎        | 11641/87741 [04:56<32:27, 39.08it/s, loss=0.000427][A
Epoch 0:  13%|█▎        | 11641/87741 

Epoch 0:  13%|█▎        | 11729/87741 [04:58<32:26, 39.04it/s, loss=0.00562][A
Epoch 0:  13%|█▎        | 11729/87741 [04:58<32:26, 39.04it/s, loss=0.00562][A
Epoch 0:  13%|█▎        | 11729/87741 [04:58<32:26, 39.04it/s, loss=0.00857][A
Epoch 0:  13%|█▎        | 11729/87741 [04:58<32:26, 39.04it/s, loss=0.00857][A
Epoch 0:  13%|█▎        | 11729/87741 [04:58<32:26, 39.04it/s, loss=0.00221][A
Epoch 0:  13%|█▎        | 11729/87741 [04:58<32:26, 39.04it/s, loss=0.00221][A
Epoch 0:  13%|█▎        | 11729/87741 [04:58<32:26, 39.04it/s, loss=0.00426][A
Epoch 0:  13%|█▎        | 11733/87741 [04:58<32:28, 39.01it/s, loss=0.00426][A
Epoch 0:  13%|█▎        | 11733/87741 [04:58<32:28, 39.01it/s, loss=0.00426][A
Epoch 0:  13%|█▎        | 11733/87741 [04:58<32:28, 39.01it/s, loss=4.93e-5][A
Epoch 0:  13%|█▎        | 11733/87741 [04:58<32:28, 39.01it/s, loss=4.93e-5][A
Epoch 0:  13%|█▎        | 11733/87741 [04:58<32:28, 39.01it/s, loss=0.000975][A
Epoch 0:  13%|█▎        | 11733/87741 [

Epoch 0:  13%|█▎        | 11817/87741 [05:01<32:12, 39.29it/s, loss=3.91e-6][A
Epoch 0:  13%|█▎        | 11821/87741 [05:01<32:14, 39.25it/s, loss=3.91e-6][A
Epoch 0:  13%|█▎        | 11821/87741 [05:01<32:14, 39.25it/s, loss=3.91e-6][A
Epoch 0:  13%|█▎        | 11821/87741 [05:01<32:14, 39.25it/s, loss=0.000173][A
Epoch 0:  13%|█▎        | 11821/87741 [05:01<32:14, 39.25it/s, loss=0.000173][A
Epoch 0:  13%|█▎        | 11821/87741 [05:01<32:14, 39.25it/s, loss=1.53e-5] [A
Epoch 0:  13%|█▎        | 11821/87741 [05:01<32:14, 39.25it/s, loss=1.53e-5][A
Epoch 0:  13%|█▎        | 11821/87741 [05:01<32:14, 39.25it/s, loss=0.000204][A
Epoch 0:  13%|█▎        | 11821/87741 [05:01<32:14, 39.25it/s, loss=0.000204][A
Epoch 0:  13%|█▎        | 11821/87741 [05:01<32:14, 39.25it/s, loss=0.000548][A
Epoch 0:  13%|█▎        | 11825/87741 [05:01<32:13, 39.25it/s, loss=0.000548][A
Epoch 0:  13%|█▎        | 11825/87741 [05:01<32:13, 39.25it/s, loss=0.000548][A
Epoch 0:  13%|█▎        | 11825/

Epoch 0:  14%|█▎        | 11865/87741 [05:02<32:12, 39.27it/s, loss=9.94e-5][A
Epoch 0:  14%|█▎        | 11865/87741 [05:02<32:12, 39.27it/s, loss=9.94e-5][A
Epoch 0:  14%|█▎        | 11865/87741 [05:02<32:12, 39.27it/s, loss=0.00778][A
Epoch 0:  14%|█▎        | 11865/87741 [05:02<32:12, 39.27it/s, loss=0.00778][A
Epoch 0:  14%|█▎        | 11865/87741 [05:02<32:12, 39.27it/s, loss=8.66e-5][A
Epoch 0:  14%|█▎        | 11865/87741 [05:02<32:12, 39.27it/s, loss=8.66e-5][A
Epoch 0:  14%|█▎        | 11865/87741 [05:02<32:12, 39.27it/s, loss=0.00244][A
Epoch 0:  14%|█▎        | 11869/87741 [05:02<32:11, 39.29it/s, loss=0.00244][A
Epoch 0:  14%|█▎        | 11869/87741 [05:02<32:11, 39.29it/s, loss=0.00244][A
Epoch 0:  14%|█▎        | 11869/87741 [05:02<32:11, 39.29it/s, loss=0.00134][A
Epoch 0:  14%|█▎        | 11869/87741 [05:02<32:11, 39.29it/s, loss=0.00134][A
Epoch 0:  14%|█▎        | 11869/87741 [05:02<32:11, 39.29it/s, loss=0.00389][A
Epoch 0:  14%|█▎        | 11869/87741 [0

Epoch 0:  14%|█▎        | 11953/87741 [05:04<32:17, 39.12it/s, loss=0.00255][A
Epoch 0:  14%|█▎        | 11957/87741 [05:04<32:15, 39.16it/s, loss=0.00255][A
Epoch 0:  14%|█▎        | 11957/87741 [05:04<32:15, 39.16it/s, loss=0.00255][A
Epoch 0:  14%|█▎        | 11957/87741 [05:04<32:15, 39.16it/s, loss=6.54e-5][A
Epoch 0:  14%|█▎        | 11957/87741 [05:04<32:15, 39.16it/s, loss=6.54e-5][A
Epoch 0:  14%|█▎        | 11957/87741 [05:04<32:15, 39.16it/s, loss=0.00732][A
Epoch 0:  14%|█▎        | 11957/87741 [05:04<32:15, 39.16it/s, loss=0.00732][A
Epoch 0:  14%|█▎        | 11957/87741 [05:04<32:15, 39.16it/s, loss=0.00511][A
Epoch 0:  14%|█▎        | 11957/87741 [05:04<32:15, 39.16it/s, loss=0.00511][A
Epoch 0:  14%|█▎        | 11957/87741 [05:04<32:15, 39.16it/s, loss=0.0034] [A
Epoch 0:  14%|█▎        | 11961/87741 [05:04<32:13, 39.19it/s, loss=0.0034][A
Epoch 0:  14%|█▎        | 11961/87741 [05:04<32:13, 39.19it/s, loss=0.0034][A
Epoch 0:  14%|█▎        | 11961/87741 [05:

Epoch 0:  14%|█▎        | 12045/87741 [05:06<32:05, 39.30it/s, loss=0.0101]  [A
Epoch 0:  14%|█▎        | 12045/87741 [05:06<32:05, 39.30it/s, loss=0.0101][A
Epoch 0:  14%|█▎        | 12045/87741 [05:06<32:05, 39.30it/s, loss=0.00705][A
Epoch 0:  14%|█▎        | 12045/87741 [05:06<32:05, 39.30it/s, loss=0.00705][A
Epoch 0:  14%|█▎        | 12045/87741 [05:06<32:05, 39.30it/s, loss=0.00414][A
Epoch 0:  14%|█▎        | 12049/87741 [05:06<32:07, 39.26it/s, loss=0.00414][A
Epoch 0:  14%|█▎        | 12049/87741 [05:06<32:07, 39.26it/s, loss=0.00414][A
Epoch 0:  14%|█▎        | 12049/87741 [05:06<32:07, 39.26it/s, loss=0.000867][A
Epoch 0:  14%|█▎        | 12049/87741 [05:06<32:07, 39.26it/s, loss=0.000867][A
Epoch 0:  14%|█▎        | 12049/87741 [05:06<32:07, 39.26it/s, loss=0.00947] [A
Epoch 0:  14%|█▎        | 12049/87741 [05:06<32:07, 39.26it/s, loss=0.00947][A
Epoch 0:  14%|█▎        | 12049/87741 [05:06<32:07, 39.26it/s, loss=0.00461][A
Epoch 0:  14%|█▎        | 12049/87741

Epoch 0:  14%|█▍        | 12137/87741 [05:09<32:04, 39.28it/s, loss=0.00256][A
Epoch 0:  14%|█▍        | 12137/87741 [05:09<32:04, 39.28it/s, loss=0.00256][A
Epoch 0:  14%|█▍        | 12137/87741 [05:09<32:04, 39.28it/s, loss=0.000487][A
Epoch 0:  14%|█▍        | 12137/87741 [05:09<32:04, 39.28it/s, loss=0.000487][A
Epoch 0:  14%|█▍        | 12137/87741 [05:09<32:04, 39.28it/s, loss=0.000376][A
Epoch 0:  14%|█▍        | 12137/87741 [05:09<32:04, 39.28it/s, loss=0.000376][A
Epoch 0:  14%|█▍        | 12137/87741 [05:09<32:04, 39.28it/s, loss=6.38e-5] [A
Epoch 0:  14%|█▍        | 12137/87741 [05:09<32:04, 39.28it/s, loss=6.38e-5][A
Epoch 0:  14%|█▍        | 12137/87741 [05:09<32:04, 39.28it/s, loss=0.00125][A
Epoch 0:  14%|█▍        | 12141/87741 [05:09<32:03, 39.30it/s, loss=0.00125][A
Epoch 0:  14%|█▍        | 12141/87741 [05:09<32:03, 39.30it/s, loss=0.00125][A
Epoch 0:  14%|█▍        | 12141/87741 [05:09<32:03, 39.30it/s, loss=0.000348][A
Epoch 0:  14%|█▍        | 12141/87

Epoch 0:  14%|█▍        | 12225/87741 [05:11<32:01, 39.30it/s, loss=0.00242] [A
Epoch 0:  14%|█▍        | 12225/87741 [05:11<32:01, 39.30it/s, loss=0.00242][A
Epoch 0:  14%|█▍        | 12225/87741 [05:11<32:01, 39.30it/s, loss=8.75e-5][A
Epoch 0:  14%|█▍        | 12229/87741 [05:11<32:04, 39.24it/s, loss=8.75e-5][A
Epoch 0:  14%|█▍        | 12229/87741 [05:11<32:04, 39.24it/s, loss=8.75e-5][A
Epoch 0:  14%|█▍        | 12229/87741 [05:11<32:04, 39.24it/s, loss=0.00382][A
Epoch 0:  14%|█▍        | 12229/87741 [05:11<32:04, 39.24it/s, loss=0.00382][A
Epoch 0:  14%|█▍        | 12229/87741 [05:11<32:04, 39.24it/s, loss=0.00017][A
Epoch 0:  14%|█▍        | 12229/87741 [05:11<32:04, 39.24it/s, loss=0.00017][A
Epoch 0:  14%|█▍        | 12229/87741 [05:11<32:04, 39.24it/s, loss=0.0012] [A
Epoch 0:  14%|█▍        | 12229/87741 [05:11<32:04, 39.24it/s, loss=0.0012][A
Epoch 0:  14%|█▍        | 12229/87741 [05:11<32:04, 39.24it/s, loss=0.00849][A
Epoch 0:  14%|█▍        | 12233/87741 [0

Epoch 0:  14%|█▍        | 12317/87741 [05:13<31:57, 39.33it/s, loss=0.0102][A
Epoch 0:  14%|█▍        | 12317/87741 [05:13<31:57, 39.33it/s, loss=0.00722][A
Epoch 0:  14%|█▍        | 12317/87741 [05:13<31:57, 39.33it/s, loss=0.00722][A
Epoch 0:  14%|█▍        | 12317/87741 [05:13<31:57, 39.33it/s, loss=0.000592][A
Epoch 0:  14%|█▍        | 12317/87741 [05:13<31:57, 39.33it/s, loss=0.000592][A
Epoch 0:  14%|█▍        | 12317/87741 [05:13<31:57, 39.33it/s, loss=0.000891][A
Epoch 0:  14%|█▍        | 12321/87741 [05:13<32:00, 39.27it/s, loss=0.000891][A
Epoch 0:  14%|█▍        | 12321/87741 [05:13<32:00, 39.27it/s, loss=0.000891][A
Epoch 0:  14%|█▍        | 12321/87741 [05:13<32:00, 39.27it/s, loss=0.000114][A
Epoch 0:  14%|█▍        | 12321/87741 [05:13<32:00, 39.27it/s, loss=0.000114][A
Epoch 0:  14%|█▍        | 12321/87741 [05:13<32:00, 39.27it/s, loss=0.00461] [A
Epoch 0:  14%|█▍        | 12321/87741 [05:13<32:00, 39.27it/s, loss=0.00461][A
Epoch 0:  14%|█▍        | 12321/8

In [12]:
train_size_avg = []
token_size = []
for tr in train:
    train_size_avg.append(len(tr.split('[SEP]')))
    inputs_train = tokenizer(tr, return_tensors='pt')
    token_size.append(len(inputs_train['input_ids'][0]))

Token indices sequence length is longer than the specified maximum sequence length for this model (561 > 512). Running this sequence through the model will result in indexing errors


In [13]:
sum(train_size_avg)/len(train_size_avg)

20.69794193154901

In [14]:
sum(token_size)/len(token_size)

221.17695700892162

## Versão POI Split I
- Nesta versão a geração de documentos ocorre de acordo com a mudança de POIs.
- Sempre que ocorre a mudança de POI no conjunto, um novo documento é gerado
- Também há um controle de tamanho de sentenças para evitar o estouro da tokenização
- A separação em treino e validação é feita considerando o split 80-20

In [None]:
model_checkpoint = "distilbert-base-uncased"
tokenizer = DistilBertTokenizer.from_pretrained(model_checkpoint)

SENTENCE_SIZE = 200
EPOCHS = 15
BATCH_SIZE = 4
MAX_LEN = 512
MASK_PERC = 0.15
LR = 5e-5
valid_size = 0.2
patience = 5
use_amp = True

osm_tables = ['bins_points_information', 'bins_polygons_information', 'bins_roads_information', 'bins_lines_information']
for n in range(0, 1):
    for w in np.arange(0.0, 0.1, 0.1):
        #Ajustando o parâmetro w
        wgt = round(w, 1)
        for osm_table in osm_tables:
        
            #Flag para permitir o treinamento correto
            do_training = False
            
            #Caso especial para carregar os dados de pontos e treinar apenas uma vez
            if(osm_table == 'bins_points_information' and wgt == 0.0):
                file_name = './geographic/GEOC2VEC/austin-sl-tuple-geoc2vec-' + str(n) + osm_table + '-pfp-c.parquet'
                model_name = './geographic/GEOC2VECBERT15TKP/austin-sl-tuple-geoc2vec-distilbert-MLM-' + str(n) + osm_table + '-pfp-c'
                do_training = True
                
            elif(osm_table != 'bins_points_information'):
                file_name = './geographic/GEOC2VEC/austin-sl-tuple-geoc2vec-' + str(n) + osm_table + '-wgt' + str(wgt) + 'pfp-c.parquet'
                model_name = './geographic/GEOC2VECBERT15TKP/austin-sl-tuple-geoc2vec-distilbert-MLM-' + str(n) + osm_table + '-wgt' + str(wgt) + '-pfp-c'
                do_training = True
                
            if(do_training):
                
                start_time = time.time()
                
                #Carregando o modelo vazio para o finetuning
                model = DistilBertForMaskedLM.from_pretrained(model_checkpoint)

                #Carregar dataset
                print("Carregando dados...")
                print(file_name)
                sentences = pd.read_parquet(file_name)
                sentences = sentences.values.tolist()
                print('Quantidade de sentenças:', len(sentences))


                print("Gerando subtextos com foco nos tipos de POI...")
                #Criando textos menores com o conjunto de duas palavras dos POIs e dados geográficos
                #Esse método considera a mudança do TIPO de dado geográfico para criar um novo subtexto
                bert_sentences = []
                local_sentences = []
                actual_id = sentences[0][0]
                count_sentences = 1
                count_append = 0
                for i, sentence in enumerate(sentences):

                    if((count_sentences % SENTENCE_SIZE) == 0):
                        count_sentences = 1
                        final_sentence = '[CLS]'.join(local_sentences)
                        bert_sentences.append(final_sentence)
                        local_sentences = []


                    elif(sentence[0] != actual_id):
                        actual_id = sentence[0]
                        count_sentences = 1
                        final_sentence = '[CLS]'.join(local_sentences)
                        bert_sentences.append(final_sentence)
                        local_sentences = []

                    else:
                        count_sentences+=1
                        sentence_text = [sentence[1], sentence[3]]
                        local_sentences.append(bertInput_clean(sentence_text))

                #Adicionando último trecho
                if(len(local_sentences) > 0):
                    final_sentence = '[CLS]'.join(local_sentences)
                    bert_sentences.append(final_sentence)
            
                #Esvaziando memória
                del sentences, local_sentences
                
                print("Gerando conjunto de Treino e Validação...")
                train, validation = train_test_split(bert_sentences, test_size=valid_size, random_state=42)
                print('Conjunto de Treino: ', len(train))
                print('Conjunto de Validação: ', len(validation))
                
                #Esvaziando memória
                del bert_sentences

                #Tokenizando e salvando uma cópia dos tokens para representar as labels
                inputs_train = tokenizer(train, return_tensors='pt', max_length=MAX_LEN, truncation = True, padding='max_length')
                inputs_train['labels'] = inputs_train.input_ids.detach().clone()

                inputs_val = tokenizer(validation, return_tensors='pt', max_length=MAX_LEN, truncation = True, padding='max_length')
                inputs_val['labels'] = inputs_val.input_ids.detach().clone()


                #Esvaziando memória
                del train, validation

                #Encontrando MASK_PERC% das frases que serão mascaradas
                #As frases terão a segunda sentença mascarada ex: [CLS]Bar[SEP]Park[SEP] => [CLS]Bar[SEP]#####[SEP]
                print("Mascarando dados...")
                rand_train = torch.rand(inputs_train.input_ids.shape)
                rand_val = torch.rand(inputs_val.input_ids.shape)
                
                #Gerando um mascaramento em posições aleatório
                #101 = [CLS]
                #102 = [SEP]
                mask_arr_train = (rand_train < MASK_PERC) * (inputs_train.input_ids != 101) * \
                           (inputs_train.input_ids != 102) * (inputs_train.input_ids != 0)
                
                mask_arr_val = (rand_val < MASK_PERC) * (inputs_val.input_ids != 101) * \
                           (inputs_val.input_ids != 102) * (inputs_val.input_ids != 0)
                
                #Tokens que terão os tokens mascarados
                selection_train = []
                for i in range(inputs_train.input_ids.shape[0]):
                    selection_train.append(
                        torch.flatten(mask_arr_train[i].nonzero()).tolist()
                    )
                    
                selection_val = []
                for i in range(inputs_val.input_ids.shape[0]):
                    selection_val.append(
                        torch.flatten(mask_arr_val[i].nonzero()).tolist()
                    )

                #Esvaziando memória
                del rand_train, mask_arr_train, rand_val, mask_arr_val
                
                #Alterando os tokens
                #103 = [MASK]
                for i in range(inputs_train.input_ids.shape[0]):
                    inputs_train.input_ids[i, selection_train[i]] = 103
                
                for i in range(inputs_val.input_ids.shape[0]):
                    inputs_val.input_ids[i, selection_val[i]] = 103
                    
                #Esvaziando memória
                del selection_train, selection_val

                #Transformando os dados no objeto dataset do torch
                print("Preparando para o treinamento...")
                dataset_train = Dataset(inputs_train)
                loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=BATCH_SIZE, shuffle=True)
                dataset_val = Dataset(inputs_val)
                loader_val = torch.utils.data.DataLoader(dataset_val, batch_size=BATCH_SIZE, shuffle=True)
                
                #Esvaziando memória
                del inputs_train, inputs_val

                #Preparando o dispotivo para o treinamento
                device = 'cuda' if cuda.is_available() else 'cpu' # CPU OR GPU
                torch.cuda.empty_cache()
                # and move our model over to the selected device
                model.to(device)

                optim = AdamW(model.parameters(), lr=LR)

                # to track the training loss as the model trains
                train_losses = []
                # to track the validation loss as the model trains
                valid_losses = []
                # to track the average training loss per epoch as the model trains
                avg_train_losses = []
                # to track the average validation loss per epoch as the model trains
                avg_valid_losses = [] 

                # initialize the early_stopping object
                early_stopping = EarlyStopping(patience=patience, verbose=False)
                scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
                has_early_stopping = False

                for epoch in range(EPOCHS):
                    # setup loop with TQDM and dataloader
                    # activate training mode
                    model.train()
                    loop_train = tqdm(loader_train, leave=True)
                    for batch_train in loop_train:

                        optim.zero_grad()
                        # pull all tensor batches required for training
                        input_ids = batch_train['input_ids'].to(device)
                        attention_mask = batch_train['attention_mask'].to(device)
                        labels = batch_train['labels'].to(device)

                        with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=use_amp):
                            outputs = model(input_ids, attention_mask=attention_mask,
                                        labels=labels)
                            loss = outputs.loss

                        scaler.scale(loss).backward()
                        scaler.step(optim)
                        scaler.update()

                        loop_train.set_description(f'Epoch {epoch}')
                        loop_train.set_postfix(loss=loss.item())


                        train_losses.append(loss.item())

                    ######################    
                    # validate the model #
                    ######################
                    model.eval() # prep model for evaluation
                    loop_val = tqdm(loader_val, leave=True)
                    for batch_val in loop_val:

                        # forward pass: compute predicted outputs by passing inputs to the model
                        input_ids = batch_val['input_ids'].to(device)
                        attention_mask = batch_val['attention_mask'].to(device)
                        labels = batch_val['labels'].to(device)

                        # process
                        with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=use_amp):
                            outputs = model(input_ids, attention_mask=attention_mask,
                                        labels=labels)
                            loss = outputs.loss

                        # record validation loss
                        valid_losses.append(loss.item())

                    # calculate average loss over an epoch
                    train_loss = np.average(train_losses)
                    valid_loss = np.average(valid_losses)
                    avg_train_losses.append(train_loss)
                    avg_valid_losses.append(valid_loss)

                    # early_stopping needs the validation loss to check if it has decresed, 
                    # and if it has, it will make a checkpoint of the current model
                    early_stopping(valid_loss, model)

                    if early_stopping.early_stop:
                        print("Early stopping")
                        final_time = (time.time() - start_time)
                        has_early_stopping = True
                        training_dictionary = {'epoch': epoch+1,
                                               'epochs': EPOCHS,
                                               'patience': patience,
                                               'train_loss': train_loss,
                                               'valid_loss': valid_loss,
                                               'avg_train_losses':avg_train_losses,
                                               'avg_valid_losses': avg_valid_losses,
                                               'time': final_time}
                        break

                    # clear lists to track next epoch
                    train_losses = []
                    valid_losses = []

                # load the last checkpoint with the best model
                model.load_state_dict(torch.load('checkpoint.pt'))


                #Novo tempo para o caso das coisas terem sido diferentes
                if(has_early_stopping == False):
                    final_time = (time.time() - start_time)
                    training_dictionary = {'epoch': epoch+1,
                                           'epochs': EPOCHS,
                                           'patience': patience,
                                           'train_loss': train_loss,
                                           'valid_loss': valid_loss,
                                           'avg_train_losses':avg_train_losses,
                                           'avg_valid_losses': avg_valid_losses,
                                           'time': final_time}

                #Salvando o modelo pronto
                print("Salvando o modelo...")
                args = TrainingArguments(
                    output_dir=model_name,
                    per_device_train_batch_size=BATCH_SIZE,
                    num_train_epochs=EPOCHS
                )

                trainer = Trainer(
                    model=model,
                    args=args,
                    train_dataset=dataset_train,
                    eval_dataset=dataset_val
                )

                trainer.save_model()
                
                #Salvando as estatísticas do treinamento
                file_name = model_name + '/training_dictionary.json'
                with open(file_name, "w") as outfile:
                    json.dump(training_dictionary, outfile)
                
                #Esvaziando memória
                del loader_train
                del loader_val
                del dataset_train
                del dataset_val
                del train_losses
                del valid_losses
                del avg_train_losses
                del avg_valid_losses
                del trainer

## Versão Type Split-II.II

- Essa versão faz a separação dos dados considerando os tipos de dados geográficos replicados
- Toda vez que o dado geográfico muda, significa que outra sequência de replicação irá iniciar
- Também há um controle de tamanho de sentenças para evitar o estouro da tokenização
- Nesse caso, pequenos documentos com base nessas mudanças são gerados
- O conjunto de validação é feito utilizando o split 80-20
- Não trabalha com Early Stop
- Trabalha com Batches na tokenização para evitar estouros de memória
- Salva os embeddings em Word2Vec para economizar memória

In [9]:
model_checkpoint = "distilbert-base-uncased"
tokenizer = DistilBertTokenizer.from_pretrained(model_checkpoint)

EPOCHS = 2
BATCH_SIZE = 1
MAX_LEN = 512
MASK_PERC = 0.15
LR = 5e-5
valid_size = 0.2
#patience = 5
use_amp = True
SENTENCE_SIZE = 60
SENTENCE_SIZE_BOUND = 5


osm_tables = ['bins_points_information', 'bins_polygons_information', 'bins_roads_information', 'bins_lines_information']
#osm_tables = ['bins_polygons_information', 'bins_roads_information', 'bins_lines_information']

for n in range(7, 9, 1):
    
    #Ajustando o parâmetro w
    for osm_table in osm_tables:

        #Caso especial para carregar os dados de pontos e treinar apenas uma vez
        if(osm_table == 'bins_points_information'):
            
            #file_name = './Atlanta/GeoContext2Vec - μ36/atlanta-sl-tuple-geoc2vec-' + str(n) + osm_table + '-pfp-c.parquet'
            #model_name_W2V = './Atlanta/GeoContext2Vec W2V - μ36/atlanta-sl-tuple-geoc2vec-' + str(n) + osm_table + '-pfp-c.model'
            #model_name_DW2V = './Atlanta/GeoContext2Vec DISTILW2V - μ36/atlanta-sl-tuple-geoc2vec-distilw2v-' + str(n) + osm_table + '-pfp-c.model'
            
            file_name = './Boston/GeoContext2Vec - μ55/boston-sl-tuple-geoc2vec-' + str(n) + osm_table + '-pfp-c.parquet'
            model_name_W2V = './Boston/GeoContext2Vec W2V - μ55/boston-sl-tuple-geoc2vec-' + str(n) + osm_table + '-pfp-c.model'
            model_name_DW2V = './Boston/GeoContext2Vec DISTILW2V - μ55/boston-sl-tuple-geoc2vec-distilw2v-' + str(n) + osm_table + '-pfp-c.model'

        else:
            
            #wgt = 0.9
            #file_name = './Atlanta/GeoContext2Vec - μ36/atlanta-sl-tuple-geoc2vec-' + str(n) + osm_table + '-wgt' + str(wgt) + 'pfp-c.parquet'
            #model_name_W2V = './Atlanta/GeoContext2Vec W2V - μ36/atlanta-sl-tuple-geoc2vec-' + str(n) + osm_table + '-wgt' + str(wgt) + 'pfp-c.model'
            #model_name_DW2V = './Atlanta/GeoContext2Vec DISTILW2V - μ36/atlanta-sl-tuple-geoc2vec-distilw2v-' + str(n) + osm_table + '-wgt' + str(wgt) + '-pfp-c.model'
            
            wgt = 0.0
            file_name = './Boston/GeoContext2Vec - μ55/boston-sl-tuple-geoc2vec-' + str(n) + osm_table + '-wgt' + str(wgt) + 'pfp-c.parquet'
            model_name_W2V = './Boston/GeoContext2Vec W2V - μ55/boston-sl-tuple-geoc2vec-' + str(n) + osm_table + '-wgt' + str(wgt) + 'pfp-c.model'
            model_name_DW2V = './Boston/GeoContext2Vec DISTILW2V - μ55/boston-sl-tuple-geoc2vec-distilw2v-' + str(n) + osm_table + '-wgt' + str(wgt) + '-pfp-c.model'


        #Carregando o modelo vazio para o finetuning
        model = DistilBertForMaskedLM.from_pretrained(model_checkpoint)

        #Carregar dataset
        print("Carregando dados...")
        print(file_name)
        sentences = pd.read_parquet(file_name)
        sentences = sentences.values.tolist()
        print('Quantidade de sentenças:', len(sentences))


        print("Gerando subtextos com foco nos tipos de POI...")
        #Criando textos menores com o conjunto de duas palavras dos POIs e dados geográficos
        #Esse método considera a mudança do TIPO de dado geográfico para criar um novo subtexto
        bert_sentences = []
        local_sentences = []
        count_sentences = 1
        actual_type = sentences[0][3] # => Dado geográfico
        for i, sentence in enumerate(sentences):

            if((count_sentences % SENTENCE_SIZE) == 0):
                count_sentences = 1
                final_sentence = '[SEP]'.join(local_sentences)
                bert_sentences.append(final_sentence)
                local_sentences = []

            #Aqui deve ser feito um split_test
            elif(sentence[3] != actual_type):
                actual_type = sentence[3]
                count_sentences = 1
                final_sentence = '[SEP]'.join(local_sentences)
                if len(local_sentences) >= SENTENCE_SIZE_BOUND:
                    bert_sentences.append(final_sentence)
                local_sentences = []

            else:
                count_sentences+=1
                sentence_text = [sentence[1], sentence[3]]
                local_sentences.append(bertInput_clean(sentence_text))
                #local_sentences.append(input_clean(sentence_text))

        #Adicionando último trecho
        if(len(local_sentences) > 0):
            final_sentence = '[SEP]'.join(local_sentences)
            bert_sentences.append(final_sentence)


        print(bert_sentences[0])
        #Esvaziando memória
        del sentences, local_sentences

        print("Gerando dataset de treinamento...")
        print("Quantidade de textos:", len(bert_sentences))

        # Define os datasets
        train_dataset = TextDataset(bert_sentences, tokenizer, max_len=MAX_LEN)
        #Esvaziando memória
        del bert_sentences


        # Cria DataLoaders para batches
        train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

        #Esvaziando memória
        #del train_dataset#, val_dataset


        #Preparando o dispotivo para o treinamento
        #Carregando o modelo vazio para o finetuning
        model = DistilBertForMaskedLM.from_pretrained(model_checkpoint)
        device = 'cuda' if cuda.is_available() else 'cpu' # CPU OR GPU
        torch.cuda.empty_cache()
        # and move our model over to the selected device
        model.to(device)

        optim = AdamW(model.parameters(), lr=LR)


        # initialize the early_stopping object
        #early_stopping = EarlyStopping(patience=patience, verbose=False)
        scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
        has_early_stopping = False

        for epoch in range(EPOCHS):
            # setup loop with TQDM and dataloader
            # activate training mode
            model.train()
            loop_train = tqdm(train_loader, leave=True)
            for batch_train in loop_train:


                # Inputs do batch
                input_ids = batch_train["input_ids"].squeeze(1)  # Remove dimensões extras
                attention_mask = batch_train["attention_mask"].squeeze(1)
                labels = batch_train["labels"].squeeze(1)

                # Esvaziando memória
                #del batch_train

                # Encontrando MASK_PERC% das frases que serão mascaradas
                rand = torch.rand(input_ids.shape)

                # Máscara aleatória baseada no MASK_PERC
                mask_arr = (rand < MASK_PERC) * (input_ids != 101) * \
                           (input_ids != 102) * (input_ids != 0)

                # Encontrando as posições a serem mascaradas
                selection = []
                for i in range(input_ids.shape[0]):
                    selection.append(torch.flatten(mask_arr[i].nonzero()).tolist())

                # Alterando os tokens para [MASK] (103)
                for i in range(input_ids.shape[0]):
                    input_ids[i, selection[i]] = 103

                optim.zero_grad()
                # pull all tensor batches required for training
                input_ids = input_ids.to(device)
                attention_mask = attention_mask.to(device)
                labels = labels.to(device)

                with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=use_amp):
                    outputs = model(input_ids, attention_mask=attention_mask,
                                labels=labels)
                    loss = outputs.loss

                scaler.scale(loss).backward()
                scaler.step(optim)
                scaler.update()

                loop_train.set_description(f'Epoch {epoch}')
                loop_train.set_postfix(loss=loss.item())


        #Salvando o modelo pronto
        print("Salvando o modelo em W2V...")

        #Carregando Modelos e base
        model_base = Word2Vec.load(model_name_W2V)
        model.to('cpu')
        model.eval()
        model_base = convertModels(model, model_base)

        print('saving file:', model_name_DW2V)
        model_base.save(model_name_DW2V)

        #Esvaziando memória
        del train_loader
        del model_base
        del model

Carregando dados...
./Boston/GeoContext2Vec - μ55/boston-sl-tuple-geoc2vec-7bins_polygons_information-wgt0.0pfp-c.parquet
Quantidade de sentenças: 4785397
Gerando subtextos com foco nos tipos de POI...
Shopping[SEP]polygons_amenity_parking[SEP]Shopping[SEP]polygons_amenity_parking[SEP]Shopping Centers[SEP]polygons_amenity_parking[SEP]Shopping Centers[SEP]polygons_amenity_parking[SEP]Shopping Centers[SEP]polygons_amenity_parking
Gerando dataset de treinamento...
Quantidade de textos: 243560


Epoch 0:   0%|          | 71/243560 [00:01<1:01:03, 66.47it/s, loss=0.131] 

# II.II DIOGO

In [None]:
model_checkpoint = "distilbert-base-uncased"
tokenizer = DistilBertTokenizer.from_pretrained(model_checkpoint)

EPOCHS = 2
BATCH_SIZE = 1
MAX_LEN = 512
MASK_PERC = 0.15
LR = 5e-5
valid_size = 0.2
#patience = 5
use_amp = True
SENTENCE_SIZE = 60
SENTENCE_SIZE_BOUND = 5


osm_tables = ['points_information', 'polygons_information', 'roads_information', 'lines_information']

    
#Ajustando o parâmetro w
for osm_table in osm_tables:

    file_name = './Models/new-york-sl-tuple-geoc2vec-μ90-pois_'+ osm_table +'-pfp-c.csv'
    model_name_W2V = './Models/new-york-sl-tuple-geoc2vec-μ90-pois_' + osm_table + '-pfp-c.model'
    model_name_DW2V = './Models/new-york-sl-tuple-geoc2vec-μ90-pois-distilw2v-' + osm_table + '-pfp-c.model'

    #Carregando o modelo vazio para o finetuning
    model = DistilBertForMaskedLM.from_pretrained(model_checkpoint)

    #Carregar dataset
    print("Carregando dados...")
    print(file_name)
    sentences = pd.read_csv(file_name)
    sentences = sentences.values.tolist()
    print('Quantidade de sentenças:', len(sentences))


    print("Gerando subtextos com foco nos tipos de POI...")
    #Criando textos menores com o conjunto de duas palavras dos POIs e dados geográficos
    #Esse método considera a mudança do TIPO de dado geográfico para criar um novo subtexto
    bert_sentences = []
    local_sentences = []
    count_sentences = 1
    actual_type = sentences[0][1] # => Dado geográfico
    for i, sentence in enumerate(sentences):

        if((count_sentences % SENTENCE_SIZE) == 0):
            count_sentences = 1
            final_sentence = '[SEP]'.join(local_sentences)
            bert_sentences.append(final_sentence)
            local_sentences = []

        #Aqui deve ser feito um split_test
        elif(sentence[1] != actual_type):
            actual_type = sentence[1]
            count_sentences = 1
            final_sentence = '[SEP]'.join(local_sentences)
            if len(local_sentences) >= SENTENCE_SIZE_BOUND:
                bert_sentences.append(final_sentence)
            local_sentences = []

        else:
            count_sentences+=1
            sentence_text = [sentence[1], sentence[2]]
            local_sentences.append(bertInput_clean(sentence_text))
            #local_sentences.append(input_clean(sentence_text))

    #Adicionando último trecho
    if(len(local_sentences) > 0):
        final_sentence = '[SEP]'.join(local_sentences)
        bert_sentences.append(final_sentence)


    print(bert_sentences[0])
    #Esvaziando memória
    del sentences, local_sentences

    print("Gerando dataset de treinamento...")
    print("Quantidade de textos:", len(bert_sentences))

    # Define os datasets
    train_dataset = TextDataset(bert_sentences, tokenizer, max_len=MAX_LEN)
    #Esvaziando memória
    del bert_sentences


    # Cria DataLoaders para batches
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

    #Esvaziando memória
    #del train_dataset#, val_dataset


    #Preparando o dispotivo para o treinamento
    #Carregando o modelo vazio para o finetuning
    model = DistilBertForMaskedLM.from_pretrained(model_checkpoint)
    device = 'cuda' if cuda.is_available() else 'cpu' # CPU OR GPU
    torch.cuda.empty_cache()
    # and move our model over to the selected device
    model.to(device)

    optim = AdamW(model.parameters(), lr=LR)


    # initialize the early_stopping object
    #early_stopping = EarlyStopping(patience=patience, verbose=False)
    scaler = torch.cuda.amp.GradScaler(enabled=use_amp)
    has_early_stopping = False

    for epoch in range(EPOCHS):
        # setup loop with TQDM and dataloader
        # activate training mode
        model.train()
        loop_train = tqdm(train_loader, leave=True)
        for batch_train in loop_train:


            # Inputs do batch
            input_ids = batch_train["input_ids"].squeeze(1)  # Remove dimensões extras
            attention_mask = batch_train["attention_mask"].squeeze(1)
            labels = batch_train["labels"].squeeze(1)

            # Esvaziando memória
            #del batch_train

            # Encontrando MASK_PERC% das frases que serão mascaradas
            rand = torch.rand(input_ids.shape)

            # Máscara aleatória baseada no MASK_PERC
            mask_arr = (rand < MASK_PERC) * (input_ids != 101) * \
                       (input_ids != 102) * (input_ids != 0)

            # Encontrando as posições a serem mascaradas
            selection = []
            for i in range(input_ids.shape[0]):
                selection.append(torch.flatten(mask_arr[i].nonzero()).tolist())

            # Alterando os tokens para [MASK] (103)
            for i in range(input_ids.shape[0]):
                input_ids[i, selection[i]] = 103

            optim.zero_grad()
            # pull all tensor batches required for training
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            labels = labels.to(device)

            with torch.autocast(device_type='cuda', dtype=torch.float16, enabled=use_amp):
                outputs = model(input_ids, attention_mask=attention_mask,
                            labels=labels)
                loss = outputs.loss

            scaler.scale(loss).backward()
            scaler.step(optim)
            scaler.update()

            loop_train.set_description(f'Epoch {epoch}')
            loop_train.set_postfix(loss=loss.item())


    #Salvando o modelo pronto
    print("Salvando o modelo em W2V...")

    #Carregando Modelos e base
    model_base = Word2Vec.load(model_name_W2V)
    model.to('cpu')
    model.eval()
    model_base = convertModels(model, model_base)

    print('saving file:', model_name_DW2V)
    model_base.save(model_name_DW2V)

    #Esvaziando memória
    del train_loader
    del model_base
    del model

Carregando dados...
./Models/new-york-sl-tuple-geoc2vec-μ90-pois_points_information-pfp-c.csv
Quantidade de sentenças: 423061
Gerando subtextos com foco nos tipos de POI...
Arts & Crafts Store[SEP]points_access_yes[SEP]Arts & Crafts Store[SEP]points_amenity_bicycle_parking[SEP]Arts & Crafts Store[SEP]points_amenity_bicycle_parking[SEP]Arts & Crafts Store[SEP]points_amenity_bicycle_parking[SEP]Arts & Crafts Store[SEP]points_amenity_bicycle_parking[SEP]Arts & Crafts Store[SEP]points_amenity_bicycle_parking[SEP]Arts & Crafts Store[SEP]points_amenity_bicycle_parking[SEP]Arts & Crafts Store[SEP]points_amenity_bicycle_parking[SEP]Arts & Crafts Store[SEP]points_amenity_bicycle_parking[SEP]Arts & Crafts Store[SEP]points_amenity_post_box[SEP]Arts & Crafts Store[SEP]points_amenity_post_box[SEP]Arts & Crafts Store[SEP]points_amenity_pub[SEP]Arts & Crafts Store[SEP]points_amenity_waste_basket[SEP]Arts & Crafts Store[SEP]points_amenity_waste_basket[SEP]Arts & Crafts Store[SEP]points_amenity_waste_b

Epoch 0:  41%|████▏     | 3502/8453 [02:03<02:58, 27.81it/s, loss=6.86e-5] 

In [2]:
#model_base_dsb = Word2Vec.load('./Models/new-york-sl-tuple-geoc2vec-pois-distilw2v-lines_information-pfp-c.model')
#model_base_dsb = Word2Vec.load('./Models/new-york-sl-tuple-geoc2vec-pois-distilw2v-points_information-pfp-c.model')
model_base_dsb = Word2Vec.load('./model/Bert/new-york-sl-tuple-geoc2vec-μ90-pois-distilw2v-roads_information-pfp-c.model')
#model_base_dsb = Word2Vec.load('./Models/new-york-sl-tuple-geoc2vec-pois-distilw2v-roads_information-pfp-c.model')

In [3]:
model_base_dsb.wv.most_similar("Cemetery")

[('Library', 0.9755752682685852),
 ('Synagogue', 0.9755641222000122),
 ('Church', 0.9725342392921448),
 ('Museum', 0.9717111587524414),
 ('Historic Site', 0.9714446663856506),
 ('Park', 0.9705073833465576),
 ('Playground', 0.9699022173881531),
 ('Restaurant', 0.9697355031967163),
 ('Hotel', 0.9683917760848999),
 ('Campground', 0.9678623080253601)]

In [8]:
import numpy as np

In [10]:
words = list(model_base_dsb.wv.vocab.keys())
word_vectors = np.array([model_base_dsb.wv[word] for word in words])
word_vectors.shape

(281, 768)

In [55]:
for value in model_base_dsb.wv.vocab:
    if("_" not in value):
        print(value)

Arts & Crafts Store
Bridge
Home (private)
Mobile Phone Shop
Food & Drink Shop
Ferry
Office
Music Venue
Subway
Park
Burger Joint
Bank
Coffee Shop
Clothing Store
Electronics Store
Deli / Bodega
University
Bus Station
Light Rail
Mexican Restaurant
Other Great Outdoors
Neighborhood
Ice Cream Shop
American Restaurant
BBQ Joint
Other Nightlife
Building
College Academic Building
Train Station
Department Store
Tanning Salon
Gym / Fitness Center
Medical Center
Factory
School
Airport
Hotel
Mall
Fast Food Restaurant
Hardware Store
Road
Bar
Campground
Cupcake Shop
Gas Station / Garage
General Travel
Residential Building (Apartment / Condo)
Laundry Service
Chinese Restaurant
Toy / Game Store
Government Building
Convention Center
Drugstore / Pharmacy
Community College
Bagel Shop
Vegetarian / Vegan Restaurant
Candy Store
Miscellaneous Shop
Pizza Place
General College & University
Movie Theater
Sandwich Place
Scenic Lookout
Stadium
Housing Development
Theater
Synagogue
General Entertainment
German Res

In [62]:
model_base_w2v = Word2Vec.load('./Models/new-york-sl-tuple-geoc2vec-pois_polygons_information-pfp-c.model')

In [68]:
model_base_w2v.wv.most_similar("Church")

[('Movie Theater', 0.9996915459632874),
 ('American Restaurant', 0.9996589422225952),
 ('Gas Station / Garage', 0.9996418356895447),
 ('Athletic & Sport', 0.9996417760848999),
 ('Zoo', 0.9996351003646851),
 ('polygons_sport_horse_racing', 0.99962317943573),
 ('Student Center', 0.9996138215065002),
 ('polygons_building_bridge', 0.9996100664138794),
 ('Other Nightlife', 0.9996029734611511),
 ('Flea Market', 0.9996013641357422)]