In [None]:
from google.colab import drive
drive.mount('/content/drive/') 


Mounted at /content/drive/


## handy function

In [None]:
import time
import torch
import math
import os
import numpy as np



def print_current_time(output=''):
    import datetime
    import pytz
    current_time = datetime.datetime.now(pytz.timezone('Israel'))
    if output == '':
        print("The current time is: ")
    else:
        print(output)
    print(current_time)


def move_x_and_y_cpu( x, y):
        x = x.cpu()
        y = y.cpu()
        return (x,y)


def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / percent
    rs = es - s
    return '%s (%s)' % (asMinutes(s), asMinutes(rs))


def calculate_accuracy(outputs, labels):
    pred = torch.argmax(outputs, dim=1)
    total = labels.size(0)
    correct = (pred == labels).sum().item()
    return correct, total


def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)







## tokenizing

In [None]:
!pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/ed/d5/f4157a376b8a79489a76ce6cfe147f4f3be1e029b7144fa7b8432e8acb26/transformers-4.4.2-py3-none-any.whl (2.0MB)
[K     |████████████████████████████████| 2.0MB 15.4MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)
[K     |████████████████████████████████| 890kB 41.6MB/s 
Collecting tokenizers<0.11,>=0.10.1
[?25l  Downloading https://files.pythonhosted.org/packages/71/23/2ddc317b2121117bf34dd00f5b0de194158f2a44ee2bf5e47c7166878a97/tokenizers-0.10.1-cp37-cp37m-manylinux2010_x86_64.whl (3.2MB)
[K     |████████████████████████████████| 3.2MB 54.5MB/s 
Building wheels for collected packages: sacremoses
  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone
  Created wheel for sacremoses: filename=sacremoses-0.0.43-cp37-none-any.whl size=893262 sha256=e6aab0dc13

In [None]:
from transformers import BertTokenizer
import torch

class Tokenizing:
    def __init__(self, df_songs):
        self.df_songs = df_songs
        self.max_embed_batch_len = 512
        self.songs_dict = dict()
        self.tokenizer = None
        self.create_tokenizer_instance()

    def create_tokenizer_instance(self):
        # Load the BERT tokenizer.
        print('Loading BERT tokenizer...')
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

    def tokenizing_batch(self, batch):
        """
        For every single song

        """

        # `encode_plus` will:
        #   (1) Tokenize the batch.
        #   (2) Prepend the `[CLS]` token to the start.
        #   (3) Append the `[SEP]` token to the end.
        #   (4) Map tokens to their IDs.
        #   (5) Pad or truncate the sentence to `max_length`
        #   (6) Create attention masks for [PAD] tokens.
        encoded_dict = self.tokenizer.encode_plus(
            batch,  # Sentence to encode.
            add_special_tokens=True,  # Add '[CLS]' and '[SEP]'
            max_length=self.max_embed_batch_len,  # Pad & truncate all sentences.
            pad_to_max_length=True,
            return_attention_mask=True,  # Construct attn. masks.
            return_tensors='pt'  # Return pytorch tensors.
        )
        return encoded_dict

    def tokenize_each_song(self, tokenizing_path=None):
        """
        For all songs

        """
        print_current_time("starting tokenizing process")

        # if saved tokenizing
        if tokenizing_path is not None:
            self.songs_dict = torch.load(tokenizing_path)
            self.tokenizing_path = tokenizing_path
        else:
            for i in range(len(self.df_songs)):
                key = (self.df_songs.loc[i, "Artist"], self.df_songs.loc[i, "Song_name"])

                batch_lyrics = self.df_songs.loc[i, "Lyrics"]

                token_batch_lyrics = self.tokenizing_batch(batch_lyrics)
                token_batch_lyrics_data = token_batch_lyrics.data

                token_batch_lyrics_data['Lyrics'] = batch_lyrics

                self.songs_dict[key] = token_batch_lyrics_data


            


        print_current_time("finished tokenizing process")


## bertclassifer

In [None]:
from transformers import BertForSequenceClassification, BertConfig

class BERTClassifer():


  def __init__(self, args, device):


    # config = BertConfig.from_pretrained('bert-base-uncased')
    # config.num_labels = args.class_number
    # self.model = BertForSequenceClassification(config)

    # Load BertForSequenceClassification, the pretrained BERT model with a single
    # linear classification layer on top.
    self.model = BertForSequenceClassification.from_pretrained(
        "bert-base-uncased",  # Use the 12-layer BERT model, with an uncased vocab.
        num_labels= args.class_number,  # The number of output labels
        # You can increase this for multi-class tasks.
        output_attentions=False,  # Whether the model returns attentions weights.
        output_hidden_states=False,  # Whether the model returns all hidden-states.
    )

    # Tell pytorch to run this model on the GPU.
    if device.type == 'cuda':
     self.model.cuda()


  # def embed_and_predict(self,token_songs_batch, device):
  #
  #    b_input_ids =token_songs_batch[0].to(device)
  #    b_input_mask =token_songs_batch[1].to(device)
  #    b_labels =token_songs_batch[2].to(device)
  #    b_labels =  b_labels.squeeze_()
  #
  #    loss, logits = self.model(b_input_ids,
  #                         token_type_ids=None,
  #                         attention_mask=b_input_mask,
  #                         labels=b_labels)
  #    return(logits)







## upload data to dataloader

In [None]:
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
import numpy as np




class upload_data_to_dataloader:

    def __init__(self, orig_songs_df, data, args,   tokenized_data = False ):

        self.labels = orig_songs_df['label'].values
        self.data = data
        self.args = args

        self.create_dataloader(tokenized_data)



    def create_dataloader(self,  tokenized_data):

       if  not tokenized_data:
            self.upload_emd_data()
       else:
            self.upload_tokenized_data()


    def upload_emd_data(self):

        train_features, test_features, train_labels, test_labels = train_test_split(self.data, self.labels,
                                                                                    test_size=self.args.test_ratio,
                                                                                    random_state=42)

        if self.args.validation_ratio > 0:
            train_features, val_features, train_labels, val_labels = train_test_split(train_features,
                                                                                      train_labels,
                                                                                      test_size=self.args.validation_ratio,
                                                                                      random_state=42
                                                                                      )
            val_dataset = TensorDataset(val_features, torch.from_numpy(val_labels))

            self.val_dataloader = DataLoader(val_dataset, batch_size=self.args.val_batch_size)

        tr_dataset = TensorDataset(train_features, torch.from_numpy(train_labels))

        self.tr_dataloader = DataLoader(tr_dataset, batch_size=self.args.tr_batch_size)

        test_dataset = TensorDataset(test_features, torch.from_numpy(test_labels))

        self.test_dataloader = DataLoader(test_dataset, batch_size=self.args.test_batch_size)



    def upload_tokenized_data(self):

        input_ids_data,  attention_mask_data  =   self.get_input_ids_att_maks_lists()

        train_input_id_data,  test_input_id_data,\
        train_attention_mask_data, test_attention_mask_data, \
        train_labels, test_labels = train_test_split(input_ids_data,  attention_mask_data, self.labels,test_size=self.args.test_ratio,   random_state=42)

        if self.args.validation_ratio > 0:
            train_input_id_data, val_input_id_data, \
            train_attention_mask_data, val_attention_mask_data, \
            train_labels, val_labels = train_test_split(train_input_id_data, train_attention_mask_data,  train_labels,
                                                         test_size=self.args.validation_ratio , random_state=42)

            val_dataset = TensorDataset(val_input_id_data, val_attention_mask_data ,torch.from_numpy(val_labels))

            self.val_dataloader = DataLoader(val_dataset, batch_size=self.args.val_batch_size)

        tr_dataset = TensorDataset(train_input_id_data, train_attention_mask_data ,torch.from_numpy(train_labels))

        self.tr_dataloader = DataLoader(tr_dataset, batch_size=self.args.tr_batch_size)

        test_dataset = TensorDataset(test_input_id_data, test_attention_mask_data ,torch.from_numpy(test_labels))

        self.test_dataloader =  DataLoader(test_dataset, batch_size=self.args.tr_batch_size)





    def get_input_ids_att_maks_lists(self):

        input_ids_data = torch.tensor([])
        attention_masks_data = torch.tensor([])

        for key, token_song in self.data.items():
                input_ids = token_song.get('input_ids')
                input_ids_data= torch.cat((input_ids_data,input_ids), dim = 0)

                attention_mask = token_song.get('attention_mask')
                attention_masks_data = torch.cat(( attention_masks_data, attention_mask), dim=0)


        return  input_ids_data, attention_masks_data








## train net

In [None]:
# import pandas as pd
from tqdm import tqdm
from handy_function import print_current_time
import torch
import time
import copy
from handy_function import timeSince,save_model, calculate_accuracy



class TrainNet:
    def __init__(self, train_dataloader, net, optimizer, device, args, val_dataloader=None, save= False,  tr_bert_classifer = False,use_validation= True ):

        self.train_dataloader = train_dataloader
        self.val_dataloader = val_dataloader
        self.optimizer = optimizer
        self.net = net
        #self.best_net = None
        self.device = device
        self.save = save
        self.use_validation = use_validation
        self.num_epochs = args.num_epochs
        self.criterion = args.criterion
        self.early_stop_n = args.early_stop_n
        self.early_stop_acc_value = args.early_stop_acc_value
        self.tr_bert_classifer = tr_bert_classifer

        self.epoch_before_early_stop = 0
        self.val_best_acc_epoch = 0
        self.val_acc_value_before_eraly_stop = 0.0
        self.val_loss_value_before_eraly_stop = 0.0
        self.tr_bert_classifer = tr_bert_classifer
        self.val_best_acc_value = 0.0
        self.val_best_loss_value = 100000.0




        self.train_loss = [None] *  self.num_epochs
        self.val_loss = [None] *  self.num_epochs

        self.train_acc = [None] *  self.num_epochs
        self.val_acc = [None] *  self.num_epochs



        self.train_net(tr_bert_classifer)

    def train_net(self, tr_bert_classifer= False):

        start = time.time()
        print_current_time("starting to train classifier net")
        epoch_total_train_loss = 0.0  # Reset every epoch

        for epoch in range( self.num_epochs):

            for tr_batch in tqdm(self.train_dataloader):

                self.net.train()
                # zero the parameter gradients
                self.optimizer.zero_grad()

                # forward
                loss, y_pred = self.forwad(tr_batch)

                # backward + optimize
                y_train = self.get_labels(tr_batch)
                loss = self.get_loss(loss, y_pred, y_train)
                loss.backward()
                self.optimizer.step()

                epoch_total_train_loss += loss.item()

            # train metrics
            self.train_loss[epoch] = epoch_total_train_loss / len(self.train_dataloader)
            epoch_total_train_loss = 0.0

            self.train_acc[epoch], _ = self.evaluate(self.train_dataloader)

            if self.use_validation:
                # val metrics
                self.val_acc[epoch], self.val_loss[epoch] = self.evaluate(self.val_dataloader, True)
                self.update_best_val_loss_acc(self.val_acc[epoch], self.val_loss[epoch], epoch )
                self.print_metrics(epoch, start, False )


                self.epoch_before_early_stop = epoch
                self.val_acc_value_before_eraly_stop =   self.val_acc[epoch]
                self.val_loss_value_before_eraly_stop =  self.val_loss[epoch]
                #early stop check
                if self.early_stopping_check(epoch):
                        break



            self.print_metrics(epoch, start)
            # print(f'Epoch #{epoch}:\n'
            #       f'Last batch Loss: {loss.item():.4f}\n'
            #       f'Train accuracy: {epoch_total_train_acc:.3f}\n'
            #       f'Test accuracy: {test_accuracy:.3f}\n'
            #       f'Time elapsed (remaining): {timeSince(start, (epoch+1) /  self.num_epochs)}')

            if self.save:
                save_model(self.net, epoch)



    def print_metrics(self, epoch, start, train=True):
        if epoch % 20 == 0 or epoch == self.num_epochs-1:
            print()
            print("******************************")

            if train:
                print(f'Epoch #{epoch+1}:\n'
                      f'Train Loss: {self.train_loss[epoch]:.4f}\n'
                      f'Train accuracy: {self.train_acc[epoch]:.4f}\n'
                      f'Time elapsed (remaining): {timeSince(start, (epoch+1) /  self.num_epochs)}')

            else:

                print(f'Epoch #{epoch + 1}:\n'
                      f'Validation Loss: {self.val_loss[epoch]:.4f}\n'
                      f'Validation accuracy: {self.val_acc[epoch]:.4f}\n' )

    def early_stopping_check(self, curr_epoch):
        if curr_epoch <  self.early_stop_n :
            return False



        for i in range(0,  self.early_stop_n):
            if self.val_acc[curr_epoch - i] - self.val_acc[curr_epoch - i - 1] >= self.early_stop_acc_value:
                return False


        print("made early stopping after epoch: ", curr_epoch)
        return True

    def evaluate(self, dataloader, val = False):

        total = 0.0
        correct = 0.0
        epoch_val_loss = 0.0
        self.net.eval()

        with torch.no_grad():

            for val_batch in tqdm(dataloader):
                loss,outputs = self.forwad(val_batch)

                labels = self.get_labels(val_batch)
                current_correct, current_total = calculate_accuracy(outputs, labels)
                correct += current_correct
                total += current_total

                if val:
                    val_loss = self.criterion(outputs, labels.long())
                    epoch_val_loss += val_loss.item()

        accuracy = correct / total
        epoch_val_avg_loss = epoch_val_loss/len(self.train_dataloader)

        return accuracy, epoch_val_avg_loss

    def forwad(self, batch):

        loss = -1

        if self.tr_bert_classifer:

            b_input_ids  =  batch[0].to(self.device).long()
            b_input_mask =  batch[1].to(self.device)
            b_labels     =  batch[2].to(self.device).long()
            #b_labels = b_labels.squeeze_()

            loss, y_pred = self.net(b_input_ids,
                                 token_type_ids=None,
                                 attention_mask=b_input_mask,
                                 labels=b_labels,
                                 return_dict=False
                                    )
        else:
            x_train = batch[0]
            x_train = x_train.to(self.device)

            y_pred = self.net(x_train)


        return loss, y_pred



    def get_labels(self, batch):

        if self.tr_bert_classifer:
            labels = batch[2].to(self.device)

        else:
            labels = batch[1].to(self.device)

        labels = labels.squeeze_()

        return labels

    def get_loss(self, loss, y_pred, y_train):

        if self.tr_bert_classifer:
            return loss

        else:
            loss = self.criterion(y_pred, y_train.long())
            return loss

    def update_best_val_loss_acc(self,  last_epoch_acc_value,  last_epoch_loss_value, epoch):

        if last_epoch_acc_value > self.val_best_acc_value:
            self.val_best_acc_value= last_epoch_acc_value
            self.val_best_acc_epoch = epoch
            #self.best_net = copy.deepcopy(self.net)

        if last_epoch_loss_value < self.val_best_loss_value:
            self.val_best_loss_value =  last_epoch_loss_value



















##Preproccess Data

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder


class PreprocessData:

    def __init__(self, song_csv_path, max_word):
        self.df_songs = pd.read_csv(song_csv_path)
        self.filtered_df_songs = pd.DataFrame()
        self.max_words = max_word

        self.add_labels()
        self.delete_songs_more_n_words(self.max_words)

    def add_labels(self):
        labelencoder = LabelEncoder()
        self.df_songs["label"]=labelencoder.fit_transform(self.df_songs["Artist"])

    def delete_songs_more_n_words(self, max_n_words):
        self.df_songs['words_num'] =   self.df_songs.Lyrics.apply(lambda x: len(str(x).replace("\n"," " ).split(' ')))
        # from collections import Counter
        # self.df_songs['words_num'] =self.df_songs['lyrics'].apply(lambda x: Counter(" ".join(x).split(" ")).items())
        self.filtered_df_songs =  self.df_songs[self.df_songs['words_num'] <= max_n_words].reset_index()


## args

In [None]:
import torch.nn.functional


class args:

  def __init__(self):

    #NN architecture args
    self.input_size = 768
    self.class_number= 19
    self.p1= 0.5
    self.p2 = 0.25
    self.fc1_output_size = 256
    self.fc2_output_size = 64


    # Train-test split args
    self.validation_ratio = 0.2
    self.test_ratio = 0.2

    # NN training args
    self.tr_batch_size =  8
    self.val_batch_size = 8
    self.num_epochs = 100000000

    # NN back propagation arg0s
    self.lr = 2e-5
    self.weight_decay= 0.001
    self.criterion = torch.nn.CrossEntropyLoss()

    # NN early stopping args
    self.early_stop_n = 3
    self.early_stop_acc_value = 0




## MAIN

In [None]:
import torch
from Tokenizing import Tokenizing
from embedding import Embedding
import os
from classification_net import ClassificationNet
from bert_include_classifer_net import BERTClassifer
from upload_data_to_dataloader import upload_data_to_dataloader
from train_net import TrainNet
from preprocess_data import PreprocessData
from torch import optim
from plots import plot_accuracies, plot_loss
from directories import ROOT_DIR, PARAMETERS_DIR
from args import args
import pickle

tr_bert_classifer =  True

# set device to GPU
print("conda environment:", os.environ['CONDA_DEFAULT_ENV'], "\n \n")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

#args
args = args()


# pre-process data
data_file = os.path.join(ROOT_DIR, r'data\all_songs_nineteen_artists.csv')
songs_df = PreprocessData(data_file, 512)

# tokenize songs
tokenizing_path = os.path.join(PARAMETERS_DIR, "all_songs_token.pt")
song_token = Tokenizing(df_songs= songs_df.filtered_df_songs )
song_token.tokenize_each_song(tokenizing_path)

if tokenizing_path is None:
    torch.save(song_token.songs_dict, PARAMETERS_DIR + "\\all_songs_token.pt")

if not tr_bert_classifer:
    # song embeddings
    embedding_path = os.path.join(PARAMETERS_DIR, "embedding_all_artist.pt")
    embedding_songs = Embedding(tokenizing_data=song_token.songs_dict, device=device, embedding_path= embedding_path)
    embedding_songs.data_embedding()

    if embedding_songs.embedding_path is None:
        torch.save(embedding_songs.songs_features,  PARAMETERS_DIR + "\\embedding_all_artist.pt" )

    # create dataloader from embeddings
    embedding_dataloaders = upload_data_to_dataloader(song_token.df_songs, embedding_songs.songs_features, args= args)

else:
    embedding_dataloaders = upload_data_to_dataloader(song_token.df_songs, song_token.songs_dict, args=args, tokenized_data= True)

# train classification net
if tr_bert_classifer:

    bert_classifer = BERTClassifer(args, device)
    net = bert_classifer.model


else:
    net = ClassificationNet(args, input_size=args.input_size).to(device)

LR = args.lr
WEIGHT_DECAY = args.weight_decay

if WEIGHT_DECAY > 0:
    adam_optimizer = optim.Adam(net.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
else:
    adam_optimizer = optim.Adam(net.parameters(), lr=LR)


trained_net = TrainNet(train_dataloader=embedding_dataloaders.tr_dataloader, optimizer=adam_optimizer,
                                      device=device, net=   net,
                                      val_dataloader=embedding_dataloaders.val_dataloader,
                                      args= args, tr_bert_classifer = tr_bert_classifer)

model_result = {'loss': trained_net.train_loss, 'train_acc': trained_net.train_acc, 'test_acc': trained_net.val_acc}


#plot figures
plot_accuracies(trained_net.train_acc, trained_net.val_acc, 'all_artists')
plot_loss(trained_net.train_loss, trained_net.val_loss, 'all_artists')



with open(os.path.join(PARAMETERS_DIR, 'bert_not_include_classifer.pkl'), 'wb') as f:
    pickle.dump(model_result, f)


print("best validation accuracy was: ", round(trained_net.val_best_acc_value, 4), "after epoch number: ", trained_net.val_best_acc_epoch)
print("training final net")

args.validation_ratio = 0
args.early_stop_n = 1000000
args.num_epochs = trained_net.val_best_acc_epoch

embedding_path = os.path.join(PARAMETERS_DIR, "embedding_all_artist.pt")
embedding_songs = Embedding(tokenizing_data=song_token.songs_dict, device=device, embedding_path= embedding_path)
embedding_songs.data_embedding()


final_net = TrainNet(train_dataloader=embedding_dataloaders.tr_dataloader, optimizer=adam_optimizer,
                                      device=device, net=  net,
                                      val_dataloader=embedding_dataloaders.test_dataloader,
                                      args= args, tr_bert_classifer = tr_bert_classifer)







cuda
Loading BERT tokenizer...


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=28.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=466062.0, style=ProgressStyle(descripti…


starting tokenizing process
2021-04-02 21:05:33.617362+03:00
finished tokenizing process
2021-04-02 21:05:37.685470+03:00


HBox(children=(FloatProgress(value=0.0, description='Downloading', max=433.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=440473133.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

starting to train classifier net
2021-04-02 21:06:25.473753+03:00


100%|██████████| 647/647 [09:13<00:00,  1.17it/s]
100%|██████████| 647/647 [03:20<00:00,  3.22it/s]
100%|██████████| 162/162 [00:50<00:00,  3.23it/s]
  0%|          | 0/647 [00:00<?, ?it/s]


******************************
Epoch #1:
Validation Loss: 0.6040
Validation accuracy: 0.2749


******************************
Epoch #1:
Train Loss: 2.7824
Train accuracy: 0.2935
Time elapsed (remaining): 13m 24s (1340103705m 50s)


100%|██████████| 647/647 [09:12<00:00,  1.17it/s]
100%|██████████| 647/647 [03:20<00:00,  3.23it/s]
100%|██████████| 162/162 [00:49<00:00,  3.24it/s]
  0%|          | 0/647 [00:00<?, ?it/s]


******************************
Epoch #2:
Validation Loss: 0.5039
Validation accuracy: 0.4015


******************************
Epoch #2:
Train Loss: 2.1880
Train accuracy: 0.5294
Time elapsed (remaining): 26m 47s (1339301530m 57s)


100%|██████████| 647/647 [09:11<00:00,  1.17it/s]
100%|██████████| 647/647 [03:20<00:00,  3.23it/s]
100%|██████████| 162/162 [00:50<00:00,  3.21it/s]
  0%|          | 0/647 [00:00<?, ?it/s]


******************************
Epoch #3:
Validation Loss: 0.4773
Validation accuracy: 0.4301


******************************
Epoch #3:
Train Loss: 1.5358
Train accuracy: 0.7371
Time elapsed (remaining): 40m 9s (1338864758m 22s)


100%|██████████| 647/647 [09:13<00:00,  1.17it/s]
100%|██████████| 647/647 [03:20<00:00,  3.23it/s]
100%|██████████| 162/162 [00:50<00:00,  3.24it/s]
  0%|          | 0/647 [00:00<?, ?it/s]


******************************
Epoch #4:
Validation Loss: 0.5038
Validation accuracy: 0.4309


******************************
Epoch #4:
Train Loss: 0.9810
Train accuracy: 0.8512
Time elapsed (remaining): 53m 33s (1339013256m 30s)


100%|██████████| 647/647 [09:12<00:00,  1.17it/s]
100%|██████████| 647/647 [03:19<00:00,  3.24it/s]
100%|██████████| 162/162 [00:49<00:00,  3.24it/s]
  0%|          | 0/647 [00:00<?, ?it/s]


******************************
Epoch #5:
Validation Loss: 0.5660
Validation accuracy: 0.4015


******************************
Epoch #5:
Train Loss: 0.6130
Train accuracy: 0.8976
Time elapsed (remaining): 66m 56s (1338705384m 28s)


100%|██████████| 647/647 [09:12<00:00,  1.17it/s]
100%|██████████| 647/647 [03:19<00:00,  3.24it/s]
100%|██████████| 162/162 [00:49<00:00,  3.24it/s]
  0%|          | 0/647 [00:00<?, ?it/s]


******************************
Epoch #6:
Validation Loss: 0.6265
Validation accuracy: 0.4232


******************************
Epoch #6:
Train Loss: 0.3951
Train accuracy: 0.9250
Time elapsed (remaining): 80m 18s (1338528735m 23s)


100%|██████████| 647/647 [09:13<00:00,  1.17it/s]
100%|██████████| 647/647 [03:19<00:00,  3.24it/s]
100%|██████████| 162/162 [00:50<00:00,  3.23it/s]
  0%|          | 0/647 [00:00<?, ?it/s]


******************************
Epoch #7:
Validation Loss: 0.6471
Validation accuracy: 0.4270


******************************
Epoch #7:
Train Loss: 0.2611
Train accuracy: 0.9571
Time elapsed (remaining): 93m 42s (1338582257m 3s)


100%|██████████| 647/647 [09:12<00:00,  1.17it/s]
100%|██████████| 647/647 [03:19<00:00,  3.24it/s]
100%|██████████| 162/162 [00:49<00:00,  3.25it/s]
  0%|          | 0/647 [00:00<?, ?it/s]


******************************
Epoch #8:
Validation Loss: 0.7273
Validation accuracy: 0.3923


******************************
Epoch #8:
Train Loss: 0.1712
Train accuracy: 0.9080
Time elapsed (remaining): 107m 4s (1338427296m 2s)


100%|██████████| 647/647 [09:12<00:00,  1.17it/s]
100%|██████████| 647/647 [03:20<00:00,  3.23it/s]
100%|██████████| 162/162 [00:50<00:00,  3.22it/s]
  0%|          | 0/647 [00:00<?, ?it/s]


******************************
Epoch #9:
Validation Loss: 0.7860
Validation accuracy: 0.3691


******************************
Epoch #9:
Train Loss: 0.1512
Train accuracy: 0.8920
Time elapsed (remaining): 120m 27s (1338485968m 40s)


100%|██████████| 647/647 [09:12<00:00,  1.17it/s]
100%|██████████| 647/647 [03:20<00:00,  3.23it/s]
100%|██████████| 162/162 [00:50<00:00,  3.22it/s]
  0%|          | 0/647 [00:00<?, ?it/s]


******************************
Epoch #10:
Validation Loss: 0.7194
Validation accuracy: 0.4201


******************************
Epoch #10:
Train Loss: 0.1079
Train accuracy: 0.9731
Time elapsed (remaining): 133m 51s (1338583926m 55s)


100%|██████████| 647/647 [09:13<00:00,  1.17it/s]
100%|██████████| 647/647 [03:20<00:00,  3.23it/s]
100%|██████████| 162/162 [00:50<00:00,  3.24it/s]
  0%|          | 0/647 [00:00<?, ?it/s]


******************************
Epoch #11:
Validation Loss: 0.7691
Validation accuracy: 0.3861


******************************
Epoch #11:
Train Loss: 0.1403
Train accuracy: 0.9635
Time elapsed (remaining): 147m 15s (1338701211m 46s)


100%|██████████| 647/647 [09:13<00:00,  1.17it/s]
100%|██████████| 647/647 [03:20<00:00,  3.23it/s]
100%|██████████| 162/162 [00:50<00:00,  3.23it/s]
  0%|          | 0/647 [00:00<?, ?it/s]


******************************
Epoch #12:
Validation Loss: 0.7239
Validation accuracy: 0.4301


******************************
Epoch #12:
Train Loss: 0.1208
Train accuracy: 0.9816
Time elapsed (remaining): 160m 39s (1338776430m 4s)


100%|██████████| 647/647 [09:13<00:00,  1.17it/s]
100%|██████████| 647/647 [03:20<00:00,  3.23it/s]
100%|██████████| 162/162 [00:50<00:00,  3.22it/s]
  0%|          | 0/647 [00:00<?, ?it/s]


******************************
Epoch #13:
Validation Loss: 0.8146
Validation accuracy: 0.3730


******************************
Epoch #13:
Train Loss: 0.1105
Train accuracy: 0.9372
Time elapsed (remaining): 174m 2s (1338834473m 6s)


100%|██████████| 647/647 [09:12<00:00,  1.17it/s]
100%|██████████| 647/647 [03:20<00:00,  3.23it/s]
100%|██████████| 162/162 [00:50<00:00,  3.23it/s]
  0%|          | 0/647 [00:00<?, ?it/s]


******************************
Epoch #14:
Validation Loss: 0.7814
Validation accuracy: 0.4000


******************************
Epoch #14:
Train Loss: 0.0995
Train accuracy: 0.9689
Time elapsed (remaining): 187m 25s (1338798206m 44s)


100%|██████████| 647/647 [09:12<00:00,  1.17it/s]
100%|██████████| 647/647 [03:20<00:00,  3.23it/s]
100%|██████████| 162/162 [00:50<00:00,  3.22it/s]
  0%|          | 0/647 [00:00<?, ?it/s]


******************************
Epoch #15:
Validation Loss: 0.8362
Validation accuracy: 0.3737


******************************
Epoch #15:
Train Loss: 0.1179
Train accuracy: 0.9577
Time elapsed (remaining): 200m 48s (1338702349m 49s)


100%|██████████| 647/647 [09:12<00:00,  1.17it/s]
100%|██████████| 647/647 [03:19<00:00,  3.25it/s]
100%|██████████| 162/162 [00:50<00:00,  3.24it/s]
  0%|          | 0/647 [00:00<?, ?it/s]


******************************
Epoch #16:
Validation Loss: 0.8520
Validation accuracy: 0.3853


******************************
Epoch #16:
Train Loss: 0.0937
Train accuracy: 0.9791
Time elapsed (remaining): 214m 9s (1338494865m 25s)


 54%|█████▍    | 352/647 [05:00<04:12,  1.17it/s]