In [1]:
from datasets import load_dataset
import numpy as np
dataset = load_dataset("rotten_tomatoes")
train_dataset = dataset['train'].to_pandas()
validation_dataset = dataset['validation'].to_pandas()
test_dataset = dataset['test'].to_pandas()
max_len=max(0,train_dataset["text"].apply(lambda x:len(x)).max())
max_len=max(max_len,validation_dataset["text"].apply(lambda x:len(x)).max())
max_len=max(max_len,test_dataset["text"].apply(lambda x:len(x)).max())
max_len+=5


In [None]:
import nltk

def prep_pretrained_embedding():
    #copy from part 1
    def build_vocab(train_dataset):
        # Create set, unique words only
        vocab = set()
        train_dataset_pos = []
        
        # Loop thru each sentence in training dataset
        for sentence in train_dataset['text']:
            # Basic text processing
            
            # Case folding
            sentence = sentence.lower()
            
            # NLTK tokenizer does a good job at separating meaningful words + punctuations
            # Better than defining regex ourselves
            word_list = nltk.tokenize.word_tokenize(sentence)
            
            # # Further split words into separate words
            # # e.g., 'well-being' -> 'well', 'being'
            # # e.g., 'music/song' -> 'music', 'song'
            # split_word_list = []
            # for word in sentence_list:
            #     split_word_list.extend(word.replace('-', ' ').replace('/', ' ').split())
            
            # Dont remove all special characters, some are meaningful
            # Some words are surrounded by single/double quotes
            word_list = [word.strip("'\"") for word in word_list]
            
            # Add into set
            vocab.update(word_list)
            
            # Get pos tags
            # Also build POS tags
            pos_tags = nltk.pos_tag(word_list)
            train_dataset_pos.append(pos_tags)
            
        vocab.discard('')
        return vocab, train_dataset_pos

    vocab, train_dataset_pos = build_vocab(train_dataset)



    def load_glove_embeddings(path):
        glove_embeddings = {}
        with open(path, 'r', encoding='utf-8') as f:
            for line in f:
                values = line.split()
                word = values[0]
                vector = np.asarray(values[1:], dtype='float64')
                glove_embeddings[word] = vector
                
        return glove_embeddings

    glove_embeddings = load_glove_embeddings('glove.6B.50d.txt')
    vocab_word_to_index = {word: idx for idx, word in enumerate(vocab)}

    def create_embedding_matrix(word_to_index, glove_embeddings):
        # Initialize embedding matrix with zeros
        # 50d
        embedding_matrix = np.zeros((len(vocab)+2, 50), dtype='float64')
        
        # Loop thru each word in vocab
        for word, idx in word_to_index.items():
            # Check if word exists in glove embeddings
            if word in glove_embeddings:
                # Copy glove embedding to embedding matrix
                embedding_matrix[idx] = glove_embeddings[word]
                # If OOV, assign None first
                
        return embedding_matrix

    embedding_matrix = create_embedding_matrix(vocab_word_to_index, glove_embeddings)
    #handle <unk>
    embedding_matrix[-2]=[ 0.01513297,  0.2400952 , -0.13676383,  0.13166569, -0.28283166,
        0.10421129,  0.39747017,  0.07944959,  0.29670785,  0.05400998,
        0.48425894,  0.26516231, -0.48021244, -0.25129253, -0.24367068,
       -0.24188322,  0.47579495, -0.2097357 , -0.02568224, -0.31143999,
       -0.3196337 ,  0.44878632, -0.07379564,  0.32765833, -0.49052161,
       -0.33455611, -0.34772199, -0.05043562, -0.0898296 ,  0.04898804,
        0.4993778 ,  0.04359836,  0.40077601, -0.31343237,  0.24126281,
       -0.4907152 , -0.20372591, -0.32123346, -0.39554707,  0.37386547,
        0.44720326,  0.45492689, -0.16420979,  0.42844699,  0.15748723,
       -0.23547929, -0.33962153,  0.04243802, -0.03647524, -0.0042893 ]
    
    return vocab_word_to_index,embedding_matrix


In [None]:
import pickle

def prep_embedding(handle_oov=False,model_type=""):
    """
    handle_oov: bool, True if implement oov handling solution discussed in part 1
    model_type: special case for "SentimentRNN" in part 2
    """
    if handle_oov:
        #load from file
        with open('embedding_matrix.pkl', 'rb') as file:  
            embedding_matrix = pickle.load(file)
            if model_type=="SentimentRNN":
                #copy from part 2
                padding = [0 for i in range(50)]
                embedding_matrix = np.insert(embedding_matrix, 0, padding, 0)
            else:
                #add padding element
                embedding_matrix = np.concatenate((embedding_matrix, np.zeros((1, 50))), axis=0)
        with open('vocab_word_to_index.pkl', 'rb') as file:  
            vocab_word_to_index = pickle.load(file)
            if model_type!="SentimentRNN":
                #delete <UNK>
                del vocab_word_to_index['<UNK>']
    else:
        vocab_word_to_index,embedding_matrix= prep_pretrained_embedding()
        if model_type=="SentimentRNN":
            #copy from part 2
            vocab_word_to_index['<UNK>']=len(vocab_word_to_index)
            padding = [0 for i in range(50)]
            embedding_matrix = np.insert(embedding_matrix, 0, padding, 0)
        else:
            #add padding
            embedding_matrix[-1]=np.zeros(50)
    return vocab_word_to_index,embedding_matrix

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader,Dataset,TensorDataset

#set the device
device=torch.device('cuda')

#copy from part 2
class SentimentDataset:
    def __init__(self, dataset, word_to_index, max_len=30):
        self.dataset = dataset
        self.word_to_index = word_to_index
        self.max_len = max_len

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        text = self.dataset.iloc[idx]['text']
        label = self.dataset.iloc[idx]['label']

        # Tokenization and word-to-index conversion
        text = text.lower()
        word_list = nltk.tokenize.word_tokenize(text)
        word_list = [word.strip("'\"") for word in word_list]
        indices = [self.word_to_index.get(word, self.word_to_index.get('<UNK>')) + 1 for word in word_list]
        indices = indices[:self.max_len] + [0] * (self.max_len - len(indices))  # Padding

        return np.array(indices), np.array(label)

    def preprocess_data(self):
        texts = []
        labels = []
        for i in range(len(self.dataset)):
            features, label = self.__getitem__(i)
            texts.append(features)
            labels.append(label)
        return np.array(texts), np.array(labels)

#customed dataset
class CustomedDataset(Dataset):
    def __init__(self,sentences,labels,vocab_word_to_index):
        self.features=torch.tensor([[vocab_word_to_index[word] if word in vocab_word_to_index else len(vocab_word_to_index) for word in sentence]+[len(vocab_word_to_index)+1]*(max_len-len(sentence)) for sentence in sentences]).to(device)
        self.labels=torch.tensor(labels).to(device)
    
    def __len__(self):
        return self.features.shape[0]
    
    def __getitem__(self,idx):
        return self.features[idx],self.labels[idx]
#prepare dataloader
def prep_dataloader(train_dataset,validation_dataset,test_dataset,batch_size,vocab_word_to_index):
    train_dataloader=DataLoader(CustomedDataset(train_dataset["text"],train_dataset["label"],vocab_word_to_index),batch_size=batch_size,shuffle=True)
    validation_dataloader=DataLoader(CustomedDataset(validation_dataset["text"],validation_dataset["label"],vocab_word_to_index),batch_size=batch_size)
    test_dataloader=DataLoader(CustomedDataset(test_dataset["text"],test_dataset["label"],vocab_word_to_index),batch_size=batch_size)
    return train_dataloader,validation_dataloader,test_dataloader
    

In [None]:
#copy from part 2
class SentimentRNN(nn.Module):
    def __init__(self, embedding_matrix, hidden_dim, output_dim, num_layers = 1, freeze_embeddings=False, dropout = 0.5):
        super(SentimentRNN, self).__init__()
        embedding_matrix=torch.tensor(embedding_matrix,dtype=torch.float32)
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=freeze_embeddings)
        self.rnn = nn.RNN(50, hidden_dim, num_layers, batch_first=True, device= device)
        #self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_dim, output_dim, device= device)
        self.sigmoid = nn.Sigmoid()
        self.device = device


    def forward(self, x):
        embedded = self.embedding(x)
        out, hidden = self.rnn(embedded)
        out = out[:, -1, :]
        #out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        out = self.sigmoid(out)
        return out

In [None]:
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
#part 3.4 CNN definition
class CNNTextClassifier(nn.Module):
    def __init__(self, embedding_matrix, n_filters, filter_sizes, output_dim, dropout):
        super().__init__()
        #embedding from pretrained model
        embedding_matrix=torch.tensor(embedding_matrix,dtype=torch.float32)
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
        #parallel kernels
        self.convs = nn.ModuleList(
            [nn.Conv2d(1, n_filters, (fs, embedding_matrix.shape[1])) for fs in filter_sizes]
        )
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        self.dropout = nn.Dropout(dropout)
        self.softmax=nn.Softmax(-1)

    def forward(self, sentences):
        embedded = self.embedding(sentences)  # embedded = [batch size, sent len, emb dim]
        embedded = embedded.unsqueeze(1)  # embedded = [batch size, 1, sent len, emb dim]
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]  # conv_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]  # pooled_n = [batch size, n_filters]
        cat = self.dropout(torch.cat(pooled, dim=1))  # cat = [batch size, n_filters * len(filter_sizes)]
        
        return self.softmax(self.fc(cat))
#copy from 3.3
class SentimentModel(nn.Module):
    def __init__(self, embedding_matrix, hidden_size, num_layers, output_size = 1, model_type = 'lstm'):
        super(SentimentModel, self).__init__()
        vocab_size, embedding_dim = embedding_matrix.shape
        self.embedding = nn.Embedding.from_pretrained(torch.tensor(embedding_matrix, dtype = torch.float32), freeze = False, padding_idx = vocab_size-1)

        if model_type == 'lstm':
            self.rnn = nn.LSTM(embedding_dim, hidden_size, num_layers = num_layers, bidirectional = True, batch_first = True)
        elif model_type == 'gru':
            self.rnn = nn.GRU(embedding_dim, hidden_size, num_layers = num_layers, bidirectional = True, batch_first = True)

        self.fc = nn.Linear(hidden_size * 2, 1)  # 2 for bidirectional and 1 output class
        # self.softmax = nn.Softmax(dim = 1)
        # self.sigmoid = nn.Sigmoid()
        self.init_weights()

    def init_weights(self):
        # Initialize Embedding Layer
        nn.init.uniform_(self.embedding.weight, -0.01, 0.01)

        # Initialize RNN (LSTM/GRU) weights and biases
        for name, param in self.rnn.named_parameters():
            if 'weight_ih' in name:  # Input to hidden weights
                nn.init.xavier_uniform_(param.data)  # Xavier initialization
            elif 'weight_hh' in name:  # Hidden to hidden weights
                nn.init.orthogonal_(param.data)  # Orthogonal initialization
            elif 'bias' in name:
                nn.init.zeros_(param.data)  # Zero bias initialization

        # Initialize Linear (Fully connected) layer
        nn.init.xavier_uniform_(self.fc.weight)
        nn.init.zeros_(self.fc.bias)

    def forward(self, x):
        mask = torch.eq(x, self.embedding.num_embeddings-1)
        lengths= mask.float().argmax(dim=1)-1
        print(x)
        print(lengths)
        embedded = self.embedding(x)
        packed_embedded = pack_padded_sequence(embedded, lengths.cpu(), batch_first = True, enforce_sorted = False)
        packed_rnn_out, _ = self.rnn(packed_embedded)
        rnn_out, _ = pad_packed_sequence(packed_rnn_out, batch_first = True)
        final_feature_map = rnn_out[torch.arange(rnn_out.size(0)), lengths - 1]
        final_out = self.fc(final_feature_map)
        return final_out

In [None]:
#res block
class ResidualBlock(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.fc=nn.Linear(input_dim,output_dim)
        self.relu=nn.ReLU()

    def forward(self, x):
        return self.relu(x+self.fc(x))
#CNN + res block
class CNNTextResidualClassifier(nn.Module):
    def __init__(self, embedding_matrix, n_filters, filter_sizes, output_dim, dropout,num_hidden=256,res_block_num=3):
        super().__init__()
        #embedding from pretrained model
        embedding_matrix=torch.tensor(embedding_matrix,dtype=torch.float32)
        self.embedding = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
        #parallel kernels
        self.convs = nn.ModuleList(
            [nn.Conv2d(1, n_filters, (fs, embedding_matrix.shape[1])) for fs in filter_sizes]
        )
        self.fc = nn.Linear(len(filter_sizes) * n_filters, num_hidden)
        self.relu=nn.ReLU()
        #res block
        self.res_block=nn.Sequential(*[ResidualBlock(num_hidden,num_hidden) for _ in range(res_block_num)])
        self.fc_out=nn.Linear(num_hidden,output_dim)
        self.dropout = nn.Dropout(dropout)
        self.softmax=nn.Softmax(-1)

    def forward(self, sentences):
        embedded = self.embedding(sentences)  # embedded = [batch size, sent len, emb dim]
        embedded = embedded.unsqueeze(1)  # embedded = [batch size, 1, sent len, emb dim]
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]  # conv_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]  # pooled_n = [batch size, n_filters]
        cat = self.dropout(torch.cat(pooled, dim=1))  # cat = [batch size, n_filters * len(filter_sizes)]
        res_block_in=self.relu(self.fc(cat))
        res_block_out=self.res_block(res_block_in)
        return self.softmax(self.fc_out(res_block_out))

In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        # positional encoding matrix
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-torch.log(torch.tensor(10000.0)) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        
        # register parameters
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:,:x.shape[1], :].requires_grad_(False)
        return self.dropout(x)
#get key padding mask
def get_key_padding_mask(tokens,vocab_size):
    key_padding_mask=torch.zeros(tokens.size())
    key_padding_mask[tokens==vocab_size-1]=-torch.inf
    return key_padding_mask.to(device)
#transformer model for part 3.5
class TransformerModel(nn.Module):
    def __init__(self, embedding_matrix, max_len, dropout, num_hidden=64, num_resblock=2, nhead=3, num_encoder_layers=1):
        super(TransformerModel, self).__init__()
        self.nhead=nhead
        #embedding from pretrained model
        embedding_matrix=torch.tensor(embedding_matrix,dtype=torch.float32)
        d_model=embedding_matrix.shape[1]
        self.embedding_src = nn.Embedding.from_pretrained(embedding_matrix, freeze=False)
        #positional encoding
        self.positional_encoding = PositionalEncoding(d_model,max_len=max_len) 
        #transformer encoder
        encoder = nn.TransformerEncoderLayer(d_model=d_model*nhead, nhead=nhead,dim_feedforward=64)
        self.transformer = nn.TransformerEncoder(encoder,num_encoder_layers)
        #fully connected layers and res blocks
        self.fc1 = nn.Linear(d_model*nhead, num_hidden)
        self.res_blocks1=nn.Sequential(*[ResidualBlock(num_hidden,num_hidden) for _ in range(num_resblock)])
        self.fc2 = nn.Linear(num_hidden,num_hidden)
        self.res_blocks2=nn.Sequential(*[ResidualBlock(num_hidden,num_hidden) for _ in range(num_resblock)])
        self.fc3 = nn.Linear(num_hidden,2)
        self.softmax=nn.Softmax(-1)
        self.relu=nn.ReLU()
        self.dropout=nn.Dropout(dropout)

    def forward(self, src):
        src_key_padding_mask = get_key_padding_mask(src,self.embedding_src.num_embeddings)
        src_emb = self.positional_encoding(self.embedding_src(src))
        
        # Transformer forward with attention masks
        output = self.transformer(
            src_emb.unsqueeze(2).repeat(1,1,self.nhead,1).reshape(src_emb.shape[0],src_emb.shape[1],-1).permute(1, 0, 2), 
            src_key_padding_mask=src_key_padding_mask,
        ) #[token_num,batch_size,embedding_dim]
        
        output=self.dropout(self.relu(self.fc1(output.permute(1,0,2))))
        output=self.res_blocks1(output)
        output=self.dropout(self.relu(self.fc2(torch.max(output,dim=1)[0])))
        output=self.res_blocks2(output)
        
        return self.softmax(self.fc3(output))
    

In [None]:


def train(model,optimizer,criterion,num_epoch,train_dataloader,validation_dataloader,model_type=""):
    from tqdm import tqdm
    model.to(device)
    for _ in range(num_epoch):
        acc_loss=0
        model.train()
        process_bar=tqdm(train_dataloader,desc=f"Epoch {_}/{num_epoch}",leave=True)
        for features,labels in process_bar:
            
            pred=model(features)
            optimizer.zero_grad()
            if model_type=="SentimentRNN":
                loss=criterion(pred,labels.unsqueeze(1).float())
            else:
                loss=criterion(pred,labels)
            
            loss.backward()
            optimizer.step()
            
            acc_loss+=loss.item()
            process_bar.set_postfix_str(f"Mean loss: {acc_loss/(process_bar.n+1)}")
        
        print("Train loss:",acc_loss/process_bar.n)
        
        acc_loss=0
        model.eval()
        with torch.no_grad():
            acc_loss=0
            process_bar=tqdm(validation_dataloader,desc="Validating",leave=True)
            for features,labels in process_bar:
                
                pred=model(features)
                
                if model_type=="SentimentRNN":
                    loss=criterion(pred,labels.unsqueeze(1).float())
                else:
                    loss=criterion(pred,labels)
                
                acc_loss+=loss.item()
                process_bar.set_postfix_str(f"Mean loss: {acc_loss/(process_bar.n+1)}")
                
            print("Validation loss:",acc_loss/process_bar.n)



In [None]:
def work_flow(model_type,handle_oov,params):
    """
    model_type: str, controls which model to use.
    handle_oov: bool, True if implementing oov handling solution discussed in part 1
    params: dict, hyper parameters to use
    """
    vocab_word_to_index,embedding_matrix=prep_embedding(handle_oov,model_type)
    # prepare dataloaders
    if model_type=="SentimentRNN":
        train_texts, train_labels = SentimentDataset(train_dataset, vocab_word_to_index, max_len=30).preprocess_data()
        valid_texts, valid_labels = SentimentDataset(validation_dataset, vocab_word_to_index, max_len=30).preprocess_data()
        test_texts, test_labels = SentimentDataset(test_dataset, vocab_word_to_index, max_len=30).preprocess_data()

        # Convert preprocessed arrays to PyTorch tensors
        train_texts = torch.tensor(train_texts).to(device)
        train_labels = torch.tensor(train_labels).to(device)
        valid_texts = torch.tensor(valid_texts).to(device)
        valid_labels = torch.tensor(valid_labels).to(device)
        test_texts = torch.tensor(test_texts).to(device)
        test_labels = torch.tensor(test_labels).to(device)

        train_datasets = TensorDataset(train_texts, train_labels)
        valid_datasets = TensorDataset(valid_texts, valid_labels)
        test_datasets = TensorDataset(test_texts, test_labels)

        train_dataloader = DataLoader(train_datasets, batch_size=32, shuffle=True)
        validation_dataloader = DataLoader(valid_datasets, batch_size=32, shuffle=False)
        test_dataloader = DataLoader(test_datasets, batch_size=32, shuffle=False)
    else:
        train_dataloader,validation_dataloader,test_dataloader=prep_dataloader(train_dataset,validation_dataset,test_dataset,params["batch_size"],vocab_word_to_index)

    #instantiating models
    if model_type=="CNN":
        model = CNNTextClassifier(embedding_matrix, params["n_filters"], params["filter_sizes"], params["output_dim"], params["dropout"])
    if model_type=="CNN_res_block":
        model = CNNTextResidualClassifier(embedding_matrix, params["n_filters"], params["filter_sizes"], params["output_dim"], params["dropout"])
    if model_type=="transformer":
        model = TransformerModel(embedding_matrix,max_len,params["dropout"])
    if model_type=="SentimentRNN":
        model = SentimentRNN(embedding_matrix,params["hidden_dim"],params["output_dim"])
    if model_type=="SentimentModel":
        model = SentimentModel(embedding_matrix,params["hidden_size"],params["num_layers"])
    #preparing loss function and optimizer
    criterion=nn.CrossEntropyLoss()
    if model_type=="SentimentRNN":
        criterion=nn.BCELoss()
    optimizer=torch.optim.Adam(model.parameters(),lr=params["lr"])
    
    train(model,optimizer,criterion,params["num_epoch"],train_dataloader,validation_dataloader,model_type)
    #get the test accuracy
    model.eval()
    test_acc=0
    tot_samples=0
    with torch.no_grad():
        for features,labels in test_dataloader:
            pred_labels=model(features)
            # count number of correct predictions
            if model_type=="SentimentRNN":
                test_acc+=(labels==(pred_labels>0.5).int().squeeze()).sum().item()
            else:
                test_acc+=(labels==pred_labels.argmax(1)).sum().item()
            tot_samples+=labels.shape[0]
        print(f"Test acc is:{test_acc/tot_samples*100}%")

params={"batch_size":32,"n_filters":32,"filter_sizes":[1,2,3,5],"hidden_dim":128,"output_dim":1,"dropout":0.1,"lr":0.00005,"hidden_size":64,"num_layers":3,"num_epoch":15}
work_flow("SentimentRNN",True,params)

Epoch 0/15: 100%|██████████| 267/267 [00:01<00:00, 173.86it/s, Mean loss: 0.7320256065945381]


Train loss: 0.6936422414547495


Validating: 100%|██████████| 34/34 [00:00<00:00, 351.74it/s, Mean loss: 23.58026432991028]


Validation loss: 0.6935371861738318


Epoch 1/15: 100%|██████████| 267/267 [00:01<00:00, 189.44it/s, Mean loss: 0.7112458614202646]


Train loss: 0.6925989661770366


Validating: 100%|██████████| 34/34 [00:00<00:00, 411.16it/s, Mean loss: 23.5673810839653]


Validation loss: 0.69315826717545


Epoch 2/15: 100%|██████████| 267/267 [00:01<00:00, 196.32it/s, Mean loss: 0.6916154585081093]


Train loss: 0.6916154585081093


Validating: 100%|██████████| 34/34 [00:00<00:00, 367.41it/s, Mean loss: 23.542530477046967]


Validation loss: 0.6924273669719696


Epoch 3/15: 100%|██████████| 267/267 [00:01<00:00, 185.99it/s, Mean loss: 0.6939198076725006]


Train loss: 0.6757271535387647


Validating: 100%|██████████| 34/34 [00:00<00:00, 372.71it/s, Mean loss: 21.495155096054077]


Validation loss: 0.6322104440015905


Epoch 4/15: 100%|██████████| 267/267 [00:01<00:00, 177.51it/s, Mean loss: 0.6183241705099741]


Train loss: 0.6044292453299748


Validating: 100%|██████████| 34/34 [00:00<00:00, 340.76it/s, Mean loss: 20.919913589954376]


Validation loss: 0.6152915761751288


Epoch 5/15: 100%|██████████| 267/267 [00:01<00:00, 173.72it/s, Mean loss: 0.6068818326089896]


Train loss: 0.5796062446265632


Validating: 100%|██████████| 34/34 [00:00<00:00, 382.08it/s, Mean loss: 20.321056246757507]


Validation loss: 0.5976781249046326


Epoch 6/15: 100%|██████████| 267/267 [00:01<00:00, 172.90it/s, Mean loss: 0.5932699566773275]


Train loss: 0.5621621686867561


Validating: 100%|██████████| 34/34 [00:00<00:00, 270.13it/s, Mean loss: 0.7067497958030019]


Validation loss: 0.5820292436024722


Epoch 7/15: 100%|██████████| 267/267 [00:01<00:00, 169.73it/s, Mean loss: 0.5548894915940626]


Train loss: 0.5507330160015977


Validating: 100%|██████████| 34/34 [00:00<00:00, 367.12it/s, Mean loss: 19.607175439596176]


Validation loss: 0.5766816305763581


Epoch 8/15: 100%|██████████| 267/267 [00:01<00:00, 166.52it/s, Mean loss: 0.5561243631518804]


Train loss: 0.5415443236684978


Validating: 100%|██████████| 34/34 [00:00<00:00, 352.89it/s, Mean loss: 19.485952258110046]


Validation loss: 0.5731162428855896


Epoch 9/15: 100%|██████████| 267/267 [00:01<00:00, 172.45it/s, Mean loss: 0.5334873004129317]


Train loss: 0.5334873004129317


Validating: 100%|██████████| 34/34 [00:00<00:00, 327.29it/s, Mean loss: 0.5712831677759395]


Validation loss: 0.5712831677759395


Epoch 10/15: 100%|██████████| 267/267 [00:01<00:00, 181.60it/s, Mean loss: 0.5509133006744233]


Train loss: 0.5220264609386859


Validating: 100%|██████████| 34/34 [00:00<00:00, 330.04it/s, Mean loss: 19.222966581583023]


Validation loss: 0.5653813700465595


Epoch 11/15: 100%|██████████| 267/267 [00:01<00:00, 190.08it/s, Mean loss: 0.5301489516977191]


Train loss: 0.51227876231465


Validating: 100%|██████████| 34/34 [00:00<00:00, 351.26it/s, Mean loss: 19.376380175352097]


Validation loss: 0.5698935345691793


Epoch 12/15: 100%|██████████| 267/267 [00:01<00:00, 188.20it/s, Mean loss: 0.5231713264947757]


Train loss: 0.5016174516204591


Validating: 100%|██████████| 34/34 [00:00<00:00, 391.43it/s, Mean loss: 19.61876517534256]


Validation loss: 0.5770225051571342


Epoch 13/15: 100%|██████████| 267/267 [00:01<00:00, 187.31it/s, Mean loss: 0.518279303680174]  


Train loss: 0.4969269728169459


Validating: 100%|██████████| 34/34 [00:00<00:00, 343.39it/s, Mean loss: 19.07141238451004]


Validation loss: 0.56092389366206


Epoch 14/15: 100%|██████████| 267/267 [00:01<00:00, 187.25it/s, Mean loss: 0.5068631423454658] 


Train loss: 0.48408277639735503


Validating: 100%|██████████| 34/34 [00:00<00:00, 403.97it/s, Mean loss: 20.15124922990799]


Validation loss: 0.5926838008796468
Test acc is:73.07692307692307%
