# Antes de empezar

conda activate python3.6_cv2

In [1]:
import torch
import torch.nn as nn 
import torch.nn.functional as F
from torch import optim
from torch.utils.data import DataLoader
import cv2
import albumentations

from scipy import ndimage
import math
import random
import skimage 
import h5py

import cProfile, pstats
import numpy as np
import pandas as pd
from IPython.display import display
import matplotlib.pyplot as plt
%matplotlib inline 

# Ignore harmless warnings:

import warnings
warnings.filterwarnings("ignore")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
import platform
print(torch.__version__)
print(platform.python_version())
torch.cuda.get_device_name(0)
path = "/home/abarreiro/data/handwriting/seq2seq/IAM_words_48_192.hdf5"

1.8.1
3.6.10


## Definiendo diccionario, codificación y longitud máxima

In [3]:
# Dictionary used in seq2seq paper
decoder_dict = {0: '0', 1: '!', 2: 'L', 3: 'z', 4: 'G', 5: 'm', 6: '6', 7: '/', 8: 'j', 9: 's', 10: 'S', 11: '5',
                12: 'R', 13: ')', 14: 'u', 15: 'y', 16: '9', 17: 'g', 18: '3', 19: '1', 20: 'e', 21: "'", 22: ':',
                23: 'Q', 24: '2', 25: 'a', 26: 't', 27: 'A', 28: '7', 29: ';', 30: 'i', 31: 'H', 32: 'W', 33: ',',
                34: '(', 35: 'O', 36: 'U', 37: 'K', 38: 'd', 39: '*', 40: '.', 41: '?', 42: 'q', 43: '-', 44: 'r',
                45: 'n', 46: '&', 47: 'C', 48: '"', 49: 'h', 50: 'v', 51: 'f', 52: 'E', 53: 'p', 54: 'x', 55: '+',
                56: 'w', 57: 'b', 58: 'o', 59: ' ', 60: 'B', 61: 'P', 62: 'D', 63: 'I', 64: 'J', 65: 'V', 66: 'N',
                67: 'M', 68: '8', 69: 'k', 70: 'c', 71: '4', 72: 'T', 73: 'X', 74: 'l', 75: 'Z', 76: 'F', 77: 'Y',
                78: 'START', 79: 'END', 80: 'PAD'}

inverse_decoder_dict = {v: k for k, v in decoder_dict.items()}
print(inverse_decoder_dict['END'])

79


In [4]:
# One_hot_mapping assigns to each number in decoder_dict its corresponding one-hot vector:

one_hot_mapping = {}

cont = 0
for item in decoder_dict:
    vector = torch.zeros(1, 1, len(decoder_dict))
    vector[0, 0, cont] = 1.0
    one_hot_mapping[item] = vector
    cont += 1

# Inverse_one_hot_mapping assigns to each one-hot vector its corresponding number in decoder_dict
inverse_one_hot_mapping = {v: k for k, v in one_hot_mapping.items()}

# One_hot_to_char assigns to each possible one-hot vector its corresponding character from decoder_dict
one_hot_to_char = {}
for one_hot, char in zip(inverse_one_hot_mapping, inverse_decoder_dict):
    one_hot_to_char[one_hot] = char 
    
# char_to_one_hot converts each character 'END', 'a', etc into a one-hot vector
char_to_one_hot = {}
for char, one_hot in zip(inverse_decoder_dict, inverse_one_hot_mapping): 
    char_to_one_hot[char] = one_hot
    
# Some examples...

#print(one_hot_mapping[80])
#print(inverse_one_hot_mapping[one_hot_mapping[80]])
#print(one_hot_to_char[one_hot_mapping[80]])
#print(char_to_one_hot['END'])

In [5]:
MAX_LENGTH = 19
output_size = len(decoder_dict)

# Definiendo funciones para el Data Augmentation

In [6]:
'''
import h5py
filename = path

with h5py.File(filename, "r") as f:
    # List all groups
    #print("Keys: %s" % f.keys())
    data_header = list(f.keys())
    print(data_header)
    data = []
    
    for item in data_header:
        
        # Getting data:
        data.append(list(f[item]))
        
    # Creating dictionary between data names and data   
    new_dict = {name: obj for name, obj in zip(data_header, data)}
'''

'\nimport h5py\nfilename = path\n\nwith h5py.File(filename, "r") as f:\n    # List all groups\n    #print("Keys: %s" % f.keys())\n    data_header = list(f.keys())\n    print(data_header)\n    data = []\n    \n    for item in data_header:\n        \n        # Getting data:\n        data.append(list(f[item]))\n        \n    # Creating dictionary between data names and data   \n    new_dict = {name: obj for name, obj in zip(data_header, data)}\n'

In [7]:
from ipynb.fs.full.New_DA_transf_proyectivas import ElasticDistortion
from ipynb.fs.full.New_DA_transf_proyectivas import RandomTransform
from ipynb.fs.full.New_DA_transf_proyectivas import move_img
from ipynb.fs.full.New_DA_transf_proyectivas import resize_down
from ipynb.fs.full.New_DA_transf_proyectivas import resize_up
from ipynb.fs.full.New_DA_transf_proyectivas import img_augmented

In [8]:
def get_Data_Aug(image_set, batch_size=128):
    images_da = [0]*batch_size
    counter = 0
    for image in image_set:
        try:
            image = img_augmented(image)         
        except:
            image = image / 255 # normalisation
            
        image = torch.FloatTensor(image)
        images_da[counter] = image
        counter += 1
        
    return images_da

# Generando patches y etiquetas

In [9]:
def patch_gen_IAM(image_set, batch_size=128, n_patches=92, patch_height=48, patch_width=10, stepsize=2):
    total_pt = [0]*batch_size*n_patches
    counter = 0
    #n_patches = int((width - patch_width)/stepsize + 1) 
    for image in image_set:
        #patches_tensor = torch.empty(patch_height, patch_width)    
        start = 0
        for p in range(n_patches):

            total_pt[p + counter*n_patches] = image[:, start:start + patch_width].unsqueeze(0) # sliding window
            start += stepsize # updating the bottom-left position of the patch adding the stepsize
            
        #total_pt += [patches_tensor]
        counter += 1
    total_pt = torch.cat(total_pt, dim=0).unsqueeze(1)
    return total_pt.to(device)

In [10]:
def get_one_hot_target_IAM(labels, seq_len=MAX_LENGTH+2, output_size=output_size, batch_size=128):
    # labels: tensor containing the labels of the words in the batch
    # each word label consists of a vector of length 19 (MAX LENGTH). The 19 elements are the encoded characters of the word
    # (according to Jorge's decoder dict, and completed with PADs to reach length = 19)
    one_hot_target = torch.empty(batch_size, seq_len, output_size) # future one-hot encoding tensor for the words of the batch
    START = inverse_decoder_dict['START'] # code number of the START token (according to Jorge's decoder_dict)
    END = inverse_decoder_dict['END']
    PAD = inverse_decoder_dict['PAD']

    for j, word in enumerate(labels):
        
        It_has_PADs = torch.any(word == PAD).item() # (majority case: the label vector of the word is completed with PADs)
        one_hot_target[j, 0, :] = one_hot_mapping[START] # START token's one-hot vector goes first
        
        for k, letter in enumerate(word):
            one_hot_target[j, k + 1, :] = one_hot_mapping[letter.item()] # one-hot encoding of the rest of letters (including PADs)
            
        one_hot_target[j, -1, :] = one_hot_mapping[END] # last = END token
        
        if It_has_PADs == True: # if we had PADs
            
            array_of_PADs = torch.where(word == PAD)[0] 
            first_PAD = torch.min(array_of_PADs).item() # we store the first position where it appeared
            first_PAD = first_PAD + 1 # (recall that we added the START as first element, so the indices won't match)
            one_hot_target[j, first_PAD, :] = one_hot_mapping[END] # we replace that first PAD by an END
            one_hot_target[j, -1, :] = one_hot_mapping[PAD] # then the last element was a PAD, and not the END token
            
    return one_hot_target.to(device)

In [11]:
'''
def one_hot_conversion(decoder_output, output_size):
    
    one_hot_output_letter = torch.zeros(1, 1, output_size)
    index = torch.argmax(decoder_output, dim = 2).item()
    one_hot_output_letter[0, 0, index] = 1.
    
    return one_hot_output_letter
'''

'\ndef one_hot_conversion(decoder_output, output_size):\n    \n    one_hot_output_letter = torch.zeros(1, 1, output_size)\n    index = torch.argmax(decoder_output, dim = 2).item()\n    one_hot_output_letter[0, 0, index] = 1.\n    \n    return one_hot_output_letter\n'

# Generando datos por batch

In [12]:
def sort_by_batch(set_random_sample, set_length, batch_size):
    '''
    This function takes an array of size = set_length of indices
    and sorts it in sub-arrays of size = batch_size
    '''
    sorted_set_rs = []
    j = 0
    while (j + batch_size < set_length):
        sorted_set_rs.append(np.sort(set_random_sample[j:j+batch_size]))
        j = j + batch_size
    
    sorted_set_rs.append(np.sort(set_random_sample[j:])) # adding last, smaller batch
    sorted_set_rs = np.concatenate(sorted_set_rs) # concatenate everything in a single array
    return sorted_set_rs

def data_generator(image_set, target_set, random_sampling, mode, batch_size=128):
    
    f = h5py.File(path, "r")
    j = 0
    len_set = len(f[target_set])
    
    while 1:
        indices = random_sampling[j:j+batch_size]
        data_X = f[image_set][indices]

        if mode == 'training':
            data_X = get_Data_Aug(data_X)
            
        elif mode == 'validation':
            data_X = data_X / 255
            data_X = torch.FloatTensor(data_X)
            
        data_X = patch_gen_IAM(data_X)
        
        data_y = f[target_set][indices]
        data_y = torch.ByteTensor(data_y) # for ~ 80 characters, 8-bit representation should be enough
        data_y[data_y == 100] = 80 # replacing Jorge's coding of PAD token by ours
        
        yield data_X, data_y
        
        if j + 2*batch_size >= len_set: # drop last, smaller batch
            j = 0
            break
        else:
            j += batch_size
        
    f.close()

# Definiendo la arquitectura

In [13]:
class ConvolutionalNetwork(nn.Module):
    
    def __init__(self, IN_CHANNELS, FILTERS_CNN_1, FILTERS_CNN_2, NEURONS_IN_DENSE_LAYER,
                 PATCH_HEIGHT, PATCH_WIDTH, STRIDE, PADDING, KERNEL_SIZE, dropout_p):
        super().__init__()
        self.IN_CHANNELS = IN_CHANNELS
        self.FILTERS_CNN_1 = FILTERS_CNN_1
        self.FILTERS_CNN_2 = FILTERS_CNN_2
        self.NEURONS_IN_DENSE_LAYER = NEURONS_IN_DENSE_LAYER
        self.PATCH_HEIGHT_AFTER_POOLING = PATCH_HEIGHT//4
        self.PATCH_WIDTH_AFTER_POOLING = PATCH_WIDTH//4
        self.STRIDE = STRIDE
        self.PADDING = PADDING
        self.KERNEL_SIZE = KERNEL_SIZE
        self.dropout = nn.Dropout(dropout_p)
        
        self.conv1 = nn.Conv2d(in_channels = self.IN_CHANNELS, out_channels = self.FILTERS_CNN_1,
                               kernel_size = self.KERNEL_SIZE, stride = self.STRIDE, padding = self.PADDING)
        self.conv2 = nn.Conv2d(in_channels = self.FILTERS_CNN_1, out_channels = self.FILTERS_CNN_2,
                               kernel_size = self.KERNEL_SIZE, stride = self.STRIDE, padding = self.PADDING)
        self.fc1 = nn.Linear(self.PATCH_HEIGHT_AFTER_POOLING * self.PATCH_WIDTH_AFTER_POOLING * self.FILTERS_CNN_2, 
                             self.NEURONS_IN_DENSE_LAYER)
        
    def forward(self, X):
        X = F.relu(self.conv1(X))
        X = F.max_pool2d(X, 2, 2)
        X = F.relu(self.conv2(X))
        X = F.max_pool2d(X, 2, 2)
        X = X.view(-1, self.PATCH_HEIGHT_AFTER_POOLING*self.PATCH_WIDTH_AFTER_POOLING*self.FILTERS_CNN_2)
        X = self.dropout(self.fc1(X))

        return X

In [14]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, batch_size, encoder_seq_len, num_layers, num_directions, dropout_p):        
        super(EncoderRNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.seq_len = encoder_seq_len
        self.num_layers = num_layers
        self.num_directions = num_directions
        self.dropout = dropout_p
        self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.num_layers, batch_first = True, dropout = self.dropout, 
                           bidirectional = True)

    def forward(self, input, hidden):        
        output = input.view(self.batch_size, self.seq_len, self.input_size)
        output, hidden = self.lstm(output, hidden)
        return output, hidden

    def initHidden(self):
        return (torch.zeros(self.num_layers * self.num_directions, self.batch_size, self.hidden_size, device=device),
                torch.zeros(self.num_layers * self.num_directions, self.batch_size, self.hidden_size, device=device))

In [15]:
class BahdanauDecoder(nn.Module):
    
    def __init__(self, output_size, hidden_size, dropout_p, batch_size, encoder_seq_len, decoder_seq_len):
        super(BahdanauDecoder, self).__init__()
        self.batch_size = batch_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.encoder_seq_len = encoder_seq_len
        self.decoder_seq_len = decoder_seq_len
        self.dropout_p = dropout_p

        #self.embedding = nn.Embedding(self.output_size, self.hidden_size)

        self.fc_hidden = nn.Linear(self.hidden_size, self.hidden_size, bias=False)
        self.fc_encoder = nn.Linear(self.hidden_size, self.hidden_size, bias=False)
        self.weight_vector = torch.FloatTensor(self.batch_size, self.hidden_size, self.decoder_seq_len)
        self.weight = nn.Parameter(nn.init.xavier_uniform_(self.weight_vector)) #xavier initializer avoids nans
        #self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.lstm = nn.LSTM(self.output_size + self.hidden_size, self.hidden_size, batch_first=True)
        self.classifier = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, inputs, hidden, encoder_outputs):
        #embedded = self.dropout(embedded)

        # Calculating Alignment Scores
        hidden_state = hidden[0].view(self.batch_size, self.decoder_seq_len, self.hidden_size)
        x = torch.tanh(self.fc_hidden(hidden_state) + self.fc_encoder(encoder_outputs))

        alignment_scores = torch.bmm(x, self.weight)
        alignment_scores = alignment_scores.view(self.batch_size, self.decoder_seq_len, self.encoder_seq_len)
 
        # Softmaxing alignment scores to get Attention weights
        attn_weights = F.softmax(alignment_scores, dim = 2)

        # Multiplying the Attention weights with encoder outputs to get the context vector
        context_vector = torch.bmm(attn_weights, encoder_outputs)

        # Concatenating context vector with embedded input word
        output = torch.cat((inputs, context_vector), 2)
        # Passing the concatenated vector as input to the LSTM cell
        output, hidden = self.lstm(output, hidden)
        # Passing the LSTM output through a Linear layer acting as a classifier
        output = F.log_softmax(self.classifier(output), dim = 2)
        return output, hidden, attn_weights

In [16]:
torch.manual_seed(1234)

CNN_model = ConvolutionalNetwork(IN_CHANNELS = 1, FILTERS_CNN_1 = 20, FILTERS_CNN_2 = 50, NEURONS_IN_DENSE_LAYER = 1024, 
                                 PATCH_HEIGHT = 48, PATCH_WIDTH = 10, STRIDE = 1, PADDING = 2, KERNEL_SIZE = 5, dropout_p = 0.5).to(device)
CNN_optimizer = torch.optim.AdamW(CNN_model.parameters(), lr = 0.001)
CNN_scheduler = torch.optim.lr_scheduler.StepLR(CNN_optimizer, step_size = 1, gamma = 0.98) # decreasing lr 2% every epoch

Encoder_model = EncoderRNN(input_size = 1024, hidden_size = 256, batch_size = 128, encoder_seq_len = 92, 
                           num_layers = 2, num_directions = 2, dropout_p = 0.5).to(device)
Encoder_optimizer = torch.optim.AdamW(Encoder_model.parameters(), lr = 0.001)
Encoder_scheduler = torch.optim.lr_scheduler.StepLR(Encoder_optimizer, step_size = 1, gamma = 0.98)

Decoder_model = BahdanauDecoder(output_size = len(decoder_dict), hidden_size = 256, dropout_p = 0, batch_size = 128,
                               encoder_seq_len = 92, decoder_seq_len = 1).to(device)
Decoder_optimizer = torch.optim.AdamW(Decoder_model.parameters(), lr = 0.001)
Decoder_scheduler = torch.optim.lr_scheduler.StepLR(Decoder_optimizer, step_size = 1, gamma = 0.98)

criterion = nn.NLLLoss()

## Entrenando

In [17]:
import time

#from torch.utils.tensorboard import SummaryWriter
#writer = SummaryWriter(log_dir='/home/abarreiro/runs/Variation_DA_Jorge/')

In [18]:
def train():
    train_losses = []
    
    random_sampling = random.sample(range(len_trn), len_trn)
    random_sampling = sort_by_batch(random_sampling, len_trn, batch_size)
    train_loader = data_generator(image_set = 'X_trn', target_set = 'target_trn',
                                  random_sampling = random_sampling, mode = 'training')
    
    for num_batch, (images, labels) in enumerate(train_loader):         
        num_batch += 1

        encoder_hidden = Encoder_model.initHidden()
        encoder_input = CNN_model(images)
        encoder_outputs, encoder_hidden = Encoder_model(encoder_input, encoder_hidden)
        encoder_outputs = encoder_outputs.view(batch_size, encoder_seq_len, enc_num_directions, enc_hidden_size)
        encoder_outputs = encoder_outputs[:, :, 0, :] + encoder_outputs[:, :, 1, :]
        encoder_outputs = encoder_outputs.view(batch_size, encoder_seq_len, enc_hidden_size)

        hidden_state = encoder_hidden[0][-2, :, :].view(1, batch_size, enc_hidden_size) + encoder_hidden[0][-1, :, :].view(1, batch_size, enc_hidden_size)
        cell_state = encoder_hidden[1][-2, :, :].view(1, batch_size, enc_hidden_size) + encoder_hidden[1][-1, :, :].view(1, batch_size, enc_hidden_size)
        decoder_hidden = (hidden_state, cell_state)
        decoder_input = get_one_hot_target_IAM(labels)
        
        decoder_output_total = []
        for num_letter in range(MAX_LENGTH + 2):
            
            decoder_input_letter = decoder_input[:, num_letter, :].unsqueeze(1)
            decoder_output, decoder_hidden, attn_weights = Decoder_model(decoder_input_letter, decoder_hidden, encoder_outputs)
            decoder_output_total += [decoder_output]

        decoder_output_total = torch.cat(decoder_output_total, dim = 1)
        decoder_output = decoder_output_total[:, :-1, :] # remove END token from output
        
        ground_truth = torch.argmax(decoder_input, dim = 2)
        ground_truth = ground_truth[:, 1:] # remove START token from input

        decoder_output = decoder_output.reshape(-1, output_size)
        ground_truth = ground_truth.flatten()
        
        loss = criterion(decoder_output, ground_truth)
        
        CNN_optimizer.zero_grad()
        Encoder_optimizer.zero_grad()
        Decoder_optimizer.zero_grad()
        
        loss.backward()
        
        CNN_optimizer.step()
        Encoder_optimizer.step()
        Decoder_optimizer.step()
        
        train_losses += [loss.item()]
        
    return np.mean(train_losses)

In [19]:
def validation():
    
    valid_losses = []
    
    random_sampling_val = random.sample(range(len_val), len_val)
    random_sampling_val = sort_by_batch(random_sampling_val, len_val, batch_size)
    val_loader = data_generator(image_set = 'X_val', target_set = 'target_val',
                                random_sampling = random_sampling_val, mode = 'validation')
    
    with torch.no_grad():       
        for num_batch_val, (images_val, labels_val) in enumerate(val_loader):        
            num_batch_val += 1
            encoder_hidden_val = Encoder_model.initHidden()
            encoder_input_val = CNN_model(images_val)
            encoder_outputs_val, encoder_hidden_val = Encoder_model(encoder_input_val, encoder_hidden_val)
            encoder_outputs_val = encoder_outputs_val.view(batch_size, encoder_seq_len, enc_num_directions, enc_hidden_size)
            encoder_outputs_val = encoder_outputs_val[:, :, 0, :] + encoder_outputs_val[:, :, 1, :]
            
            hidden_state_val = encoder_hidden_val[0][-2, :, :].view(1, batch_size, enc_hidden_size) + encoder_hidden_val[0][-1, :, :].view(1, batch_size, enc_hidden_size)
            cell_state_val = encoder_hidden_val[1][-2, :, :].view(1, batch_size, enc_hidden_size) + encoder_hidden_val[1][-1, :, :].view(1, batch_size, enc_hidden_size)
            decoder_hidden_val = (hidden_state_val, cell_state_val)
            decoder_input_val = get_one_hot_target_IAM(labels_val)
            
            decoder_output_total_val = []
            for num_letter_val in range(MAX_LENGTH + 2):
            
                decoder_input_letter_val = decoder_input_val[:, num_letter_val, :].unsqueeze(1)
                decoder_output_val, decoder_hidden_val, attn_weights_val = Decoder_model(decoder_input_letter_val, decoder_hidden_val, encoder_outputs_val)
                decoder_output_total_val += [decoder_output_val]
                
            decoder_output_total_val = torch.cat(decoder_output_total_val, dim = 1)
            decoder_output_val = decoder_output_total_val[:, :-1, :]

            ground_truth_val = torch.argmax(decoder_input_val, dim = 2)
            ground_truth_val = ground_truth_val[:, 1:]
            
            decoder_output_val = decoder_output_val.reshape(-1, output_size)
            ground_truth_val = ground_truth_val.flatten()
            
            loss_val = criterion(decoder_output_val, ground_truth_val)
            valid_losses += [loss_val.item()]
            
    return np.mean(valid_losses)

In [20]:
class Patience():
    
    def __init__(self, patience):
        self.patience = patience
        self.current_patience = patience
        self.min_loss_val = float('inf')

    def more_patience(self,loss_val):
        self.current_patience -= 1
        if self.current_patience == 0:
            return False

        if loss_val < self.min_loss_val:
            self.min_loss_val = loss_val
            self.current_patience = patience
            
            model_name = f"DA_Jorge_tesis_transf_proyectivas.pt"
            print(", saved best model.")
            
            torch.save({
                'CNN_model': CNN_model.state_dict(),
                'CNN_optimizer': CNN_optimizer.state_dict(),
                'Encoder_model': Encoder_model.state_dict(),
                'Encoder_optimizer': Encoder_optimizer.state_dict(),
                'Decoder_model': Decoder_model.state_dict(),
                'Decoder_optimizer': Decoder_optimizer.state_dict(),
            }, "/home/abarreiro/Variaciones_sobre_arquitectura_original/modelos/" + model_name)
            '''
            torch.save(CNN_model.state_dict(), 'CNN_'+model_name)
            torch.save(Encoder_model.state_dict(), 'Encoder_'+model_name)
            torch.save(Decoder_model.state_dict(), 'Decoder_'+model_name)
            '''
        return True

In [None]:
torch.manual_seed(1234)
patience = 150

patience_controler = Patience(patience)
start_time = time.time()

len_trn = 47926
len_val = 7558
len_tst = 20292

encoder_seq_len = 92
enc_num_directions = 2
batch_size = 128
hidden_size = 256
enc_hidden_size = 256
dec_hidden_size = 256


for num_epoch in range(5000000):

    train_loss = train()        
    valid_loss = validation()
    
    CNN_scheduler.step()
    Encoder_scheduler.step()
    Decoder_scheduler.step()
    
    #writer.add_scalar('Loss/train', train_loss, num_epoch)
    #writer.add_scalar('Loss/validation', valid_loss, num_epoch)
    
    print(f'Epoch: {num_epoch} Train loss: {train_loss} Valid loss: {valid_loss} Duration: {(time.time() - start_time)/60} minutes',)

    if not patience_controler.more_patience(valid_loss):
        print("Se acabó la paciencia")
        break

Epoch: 0 Train loss: 0.8555049037232119 Valid loss: 0.5985215669971401 Duration: 4.26476248105367 minutes
, saved best model.
Epoch: 1 Train loss: 0.5981243804176861 Valid loss: 0.5047390021510043 Duration: 7.55973786910375 minutes
, saved best model.
Epoch: 2 Train loss: 0.519980482956305 Valid loss: 0.42906328581147274 Duration: 10.84703921477 minutes
, saved best model.
Epoch: 3 Train loss: 0.4525890756737102 Valid loss: 0.3760189105898647 Duration: 14.142994046211243 minutes
, saved best model.
Epoch: 4 Train loss: 0.3963115008916447 Valid loss: 0.3321405356213198 Duration: 17.42906018892924 minutes
, saved best model.
Epoch: 5 Train loss: 0.3473249529612893 Valid loss: 0.2884119898080826 Duration: 20.724386397997538 minutes
, saved best model.
Epoch: 6 Train loss: 0.30326143775713 Valid loss: 0.260041207075119 Duration: 24.025274582703908 minutes
, saved best model.
Epoch: 7 Train loss: 0.26404727322054417 Valid loss: 0.21960326069492406 Duration: 27.33185313542684 minutes
, saved

In [None]:
# activar pytorch_estoril (environment) en la terminal y ejecutar tensorboard --host 0.0.0.0 --logdir ./runs
# Tensorboard se ejecutará en un cierto puerto y nos dará el enlace. Habrá que sustituir la IP 0.0.0.0 por la del equipo
# en remoto en la que esté corriendo en el caso de Estoril 212.128.3.86: