In [4]:
#!pip install fasttext
#!pip install transformers
#import nltk
#nltk.download('punkt')
#!pip3 install torch torchvision torchaudio

In [1]:
DATA_DIR =  './data/'

In [3]:
import pandas as pd
import numpy as np
import scipy
import nltk
import re
import os
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from torch.optim import AdamW
from fasttext import load_model
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset
from tqdm import tqdm
from sklearn.metrics import f1_score, confusion_matrix
from sklearn.model_selection import train_test_split
import xml.etree.ElementTree as ET

In [12]:
VALID_SIZE = .2


# generate word_index list
def build_vocab(data_dir, plain=[]):
    """plain is a empty str file which will record all text from official dataset"""
    for fn in os.listdir(data_dir):
        if fn.endswith('.xml'):
            with open(data_dir + fn) as f:
                dom = ET.parse(f)
                root = dom.getroot()
                for sent in root.iter("sentence"):
                    text = sent.find('text').text.lower()
                    token = word_tokenize(text)
                    plain = plain + token
    vocab = sorted(set(plain))
    with open(os.path.join(data_dir, "plain.txt"), "w+", encoding="utf8") as f:
        for v in vocab:
            f.write(f"{v}\n")
    word_idx = {}
    for idx, word in enumerate(vocab):
        word_idx[word] = idx + 1
    return word_idx


def gen_np_embedding(fn, word_idx, dim=100, emb=False):
    if emb:
        model = load_model(fn + ".bin")
    embedding = np.zeros((len(word_idx) + 2, dim))

    with open(fn, encoding="utf8") as f:
        for l in f:
            # for each line, get the word and its vector
            rec = l.rstrip().split(' ')
            if len(rec) == 2:  # skip the first line.
                continue
                # if the word in word_idx, fill the embedding
            if rec[0] in word_idx:
                embedding[word_idx[rec[0]]] = np.array([float(r) for r in rec[1:]])
    for w in word_idx:
        if embedding[word_idx[w]].sum() == 0.:
            if emb:
                embedding[word_idx[w]] = model.get_word_vector(w)
    return embedding

dict_polarity = {'non-aspect':0, 'positive':1,'neutral':2, 'negative':3}
def create_train_data_restaurant(fn, word_idx, sent_len=83):
    dom = ET.parse(fn)
    root = dom.getroot()
    train_X = np.zeros((len(root), sent_len), np.int16)
    mask = np.zeros_like(train_X)

    train_y = np.zeros((len(root), sent_len), np.int16)
    train_y_polarity = np.zeros((len(root), sent_len), np.int16)
    take = np.ones(len(root), dtype=bool)

    dom = ET.parse(fn)
    root = dom.getroot()
    # iterate the sentence
    for sx, sent in enumerate(root.iter("sentence")):
        # TODO temporary to compare this and transformers
        if not [_ for _ in sent.iter("aspectTerm")]:
            take[sx] = False
            continue
        text = sent.find('text').text.lower()
        # tokenize the current sentence
        token = word_tokenize(text)

        # write word index and tag in train_X
        try:
            for wx, word in enumerate(token):
                train_X[sx, wx] = word_idx[word]
                mask[sx, wx] = 1
        except KeyError:
            continue

        for ox, apin in enumerate(sent.iter('aspectTerms')):
            for ax, opin in enumerate(apin.iter('aspectTerm')):
                target, polarity, start, end = opin.attrib['term'], opin.attrib['polarity'], int(
                    opin.attrib['from']), int(opin.attrib['to'])

                if polarity == 'conflict':
                    continue

                # find word index (instead of str index) if start,end is not (0,0)
                if end != 0:
                    if start != 0:
                        start = len(word_tokenize(text[:start]))
                    end = len(word_tokenize(text[:end])) - 1
                    # for training only identify aspect word, but not polarity
                    train_y[sx, start] = 1
                    train_y_polarity[sx, start] = dict_polarity[polarity]
                    if end > start:
                        # train_y[sx, start + 1:end] = 2
                        train_y[sx, start + 1:end] = 1
                        train_y_polarity[sx, start + 1:end] = dict_polarity[polarity]

    return (train_X[take], mask[take]), train_y[take], train_y_polarity[take]


def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')


def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list, tuple)):
        return [to_device(x, device) for x in data]
    elif isinstance(data, dict):
        for k, v in data.items():
            data[k] = v.to(device)
        return data
    return data.to(device, non_blocking=True)


class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""

    def __init__(self, dl, device):
        self.dl = dl
        self.device = device

    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl:
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)


def loss_fn(pred, mask, label):
    label.masked_fill_(~mask, -100)
    pred = pred.view(-1, 4)
    label = label.view(-1)
    loss = torch.nn.functional.cross_entropy(pred, label, weight = torch.tensor([2, 0.3,0.3,0.3]))
    return loss


def cal_acc(pred_tags, mask, true_tags):
    if isinstance(pred_tags, list):
        pred_tags = torch.cat(pred_tags, 0)
        mask = torch.cat(mask, 0)
        true_tags = torch.cat(true_tags, 0)
    pred_tags = pred_tags[mask]
    true_tags = true_tags[mask]
    acc = (pred_tags == true_tags).sum() / pred_tags.numel()
    f1 = f1_score(true_tags.cpu().numpy(), pred_tags.cpu().numpy(), labels=[0, 1, 2, 3], average='weighted')
    cm = confusion_matrix(true_tags.cpu().numpy(), pred_tags.cpu().numpy())

    return acc, f1, cm


class Model(torch.nn.Module):
    def __init__(self, gen_emb, domain_emb, num_classes=5, dropout=0.5):
        super(Model, self).__init__()
        self.gen_embedding = torch.nn.Embedding(gen_emb.shape[0], gen_emb.shape[1])
        self.gen_embedding.weight = torch.nn.Parameter(torch.from_numpy(gen_emb), requires_grad=False)
        self.domain_embedding = torch.nn.Embedding(domain_emb.shape[0], domain_emb.shape[1])
        self.domain_embedding.weight = torch.nn.Parameter(torch.from_numpy(domain_emb), requires_grad=False)
        self.conv1 = torch.nn.Conv1d(gen_emb.shape[1] + domain_emb.shape[1], 128, 5, padding=2)
        self.conv2 = torch.nn.Conv1d(gen_emb.shape[1] + domain_emb.shape[1], 128, 3, padding=1)
        self.dropout = torch.nn.Dropout(dropout)

        self.conv3 = torch.nn.Conv1d(256, 256, 5, padding=2)
        self.conv4 = torch.nn.Conv1d(256, 256, 5, padding=2)
        self.conv5 = torch.nn.Conv1d(256, 256, 5, padding=2)

        self.lstm = nn.LSTM(256, hidden_size=128, num_layers=1, bidirectional=True, batch_first=True)

        self.linear_ae = torch.nn.Linear(256, 2)

        #aspect sentiment analysis
        self.embed = nn.Embedding.from_pretrained(torch.tensor(gen_emb, dtype=torch.float))
        self.lstm_l = nn.LSTM(gen_emb.shape[1], hidden_size=128, num_layers=1, bidirectional=True, batch_first=True)
        self.lstm_r = nn.LSTM(gen_emb.shape[1],hidden_size=128, num_layers=1, bidirectional=True, batch_first=True)

        self.dense = nn.Linear(128*2, 3)


    def forward(self, x_train):
    
        x_emb = torch.cat((self.gen_embedding(x_train), self.domain_embedding(x_train)), dim=2)
        x_emb = self.dropout(x_emb).transpose(1, 2)

        x_conv = torch.nn.functional.relu(torch.cat((self.conv1(x_emb.float()), self.conv2(x_emb.float())), dim=1))
        x_conv = self.dropout(x_conv)

        x_conv = torch.nn.functional.relu(self.conv3(x_conv))
        x_conv = self.dropout(x_conv)

        x_conv = torch.nn.functional.relu(self.conv4(x_conv))
        x_conv = self.dropout(x_conv)

        x_conv = torch.nn.functional.relu(self.conv5(x_conv))
        x_conv = x_conv.transpose(1, 2)

        x_lstm, (hidden, cell) = self.lstm(x_conv)

        x_logit = self.linear_ae(x_lstm)

        n1 = len(x_train)
        n2 = len(x_train[0])

        # for each sentence, convert a word to a vector of [0, 0, 0, 0] which corresponding to [non-aspect, positive, neutral, negative]
        # then perform td-lstm
        # example: sentence = 'the food is amazing but ambience is bad in the restaurant'
        # we have two aspects (food, ambience)
        # hence, for food, will split the sentence into 'the food' + 'food is amazing but ambience is bad in the restaurant'
        # for ambience, will split the sentence into 'the food is amazing but ambience' + 'ambience is bad in the restaurent'
        # then perform lstm + softmax will obtain a vector of size 3 for each aspect
        # hence, update the aspect vector to [0, resulted vector of size 3]

        output = torch.tensor([[[0]*4]*n2]*n1).float()
        output.requires_grad = True

        #sentiment analysis
        pred_aspect = x_logit.max(-1)[1]

        for j, pred in enumerate(pred_aspect):
            i = 0
            aspect = False
            start = 0
            n = len(pred)
            pred = pred.tolist()
            count_aspect = 0
            index_lst = []
            left_right = []
            x_train_word = x_train[j]
    
            while i < n:
    
                if aspect == True and pred[i] == 0:
                  
                    count_aspect = count_aspect + 1
                    index_lst.append([start, i-1])
                    aspect = False
                    #print(x_train_word, 'x_train+word')
                    #print(x_train_word[start:])
                    right_context = x_train_word[start:].flip(dims=(0,))
                    #print(right_context)
                    #print(right_context[right_context.nonzero().squeeze().detach()])
                    left_right.append([x_train_word[:i], right_context[right_context.nonzero().squeeze().detach()]])

                if aspect == False and pred[i] == 1:
                    start = i
                    aspect = True

                i = i + 1

            for m in range(count_aspect):
                inputs = left_right[m]
                index = index_lst[m]
                x_l, x_r = inputs[0], inputs[1]

                if x_l.dim() == 0 or len(x_l) == 0:
                    x_l = torch.tensor([0])
                elif len(x_l) ==1:
                    x_l = torch.tensor([x_l])
                
                if x_r.dim() == 0 or len(x_r) == 0:
                    x_r = torch.tensor([0])
                elif len(x_r) ==1:
                    x_r = torch.tensor([x_r])
                
                x_l, x_r = self.embed(x_l), self.embed(x_r)
                _, (h_n_l, _) = self.lstm_l(x_l)
                _, (h_n_r, _) = self.lstm_r(x_r)
                h_n = torch.cat((h_n_l[0], h_n_r[0]), dim=-1)
                out = self.dense(h_n)
                with torch.no_grad():
                    output[j][index[0]:index[1]+1]= torch.cat((torch.tensor([-10]),out),0)

        return output


In [7]:
word_indx = build_vocab(DATA_DIR)
fn = DATA_DIR + 'restaurant_emb.vec'
res_domain_embedding = gen_np_embedding(fn, word_indx, dim=100, emb=True)

    
fn = DATA_DIR + 'glove.840B.300d.txt'
general_embedding = gen_np_embedding(fn, word_indx, dim=300, emb=False)



In [8]:
fn = DATA_DIR + 'Restaurants_Train_v2.xml'
(X_train_res, mask_res), y_train_res , y_train_pol = create_train_data_restaurant(fn, word_indx, sent_len=100)
X, mask, y , y_pol = X_train_res, mask_res, y_train_res, y_train_pol
   
X_train, X_valid, mask_train, mask_valid, y_train, y_valid , y_pol_train, y_pol_valid= train_test_split(X, mask, y, y_pol, test_size=VALID_SIZE)


In [None]:
#print(X_train[:3])
#print(mask_train[:3])
print(y_train[:5])
print(' ')
print(y_pol_train[:5])

In [15]:
device = get_default_device()

NUM_EPOCHS = 20
TRAIN_BATCH_SIZE = 128
VALID_BATCH_SIZE = 1024

NUM_ASPECT_TAGS = 2

dataset = TensorDataset(torch.Tensor(X_train), torch.Tensor(mask_train), torch.Tensor(y_pol_train))
print(f"train samples:{len(dataset)}")
train_loader = DataLoader(dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, drop_last=True)

dataset_valid = TensorDataset(torch.Tensor(X_valid), torch.Tensor(mask_valid), torch.Tensor(y_pol_valid))
print(f"valid samples:{len(dataset_valid)}")
test_loader = DataLoader(dataset_valid, batch_size=VALID_BATCH_SIZE)

model = to_device(Model(general_embedding, res_domain_embedding, num_classes=5), device)

torch.cuda.empty_cache()

parameters = [p for p in model.parameters() if p.requires_grad]
optimizer = AdamW(parameters, lr=1e-4)


for epoch in range(NUM_EPOCHS):
    train_losses = []
    train_acc = []
    test_loss = []
    test_acc = []
    train_f1 = []
    test_f1 = []

    model.train()
    preds = []
    masks = []
    labels = []
    for data in tqdm(train_loader, total=len(train_loader)):
        for i in range(len(data)):
            data[i] = data[i].to(device)
        feature, mask, label = data
        feature, mask, label = feature.long(), mask.bool(), label.long()
        optimizer.zero_grad()
        pred_logits = model(feature)

        loss = loss_fn(pred_logits.float(), mask, label)
        loss.backward()
        optimizer.step()

        train_losses.append(loss.item())

        pred_tags = pred_logits.max(-1)[1]
        preds.append(pred_tags)
        masks.append(mask)
        labels.append(label)

    avg_train_acc, avg_train_f1, train_cm = cal_acc(preds, masks, labels)
    avg_train_loss = sum(train_losses) / len(train_losses)

    preds = []
    masks = []
    labels = []
    with torch.no_grad():
        for data in tqdm(test_loader, total=len(test_loader)):
            for i in range(len(data)):
                data[i] = data[i].to(device)
            feature, mask, label = data
            feature, mask, label = feature.long(), mask.bool(), label.long()
            pred_logits = model(feature)
            loss = loss_fn(pred_logits, mask, label)
            
            test_loss.append(loss.item())

            pred_tags = pred_logits.max(-1)[1]

            preds.append(pred_tags)
            masks.append(mask)
            labels.append(label)

    avg_test_acc, avg_test_f1, test_cm = cal_acc(preds, masks, labels)
    avg_test_loss = sum(test_loss) / len(test_loss)

    print(f"\nepoch {epoch}")
    print("\ttrain_loss:{:.3f} valid_loss:{:.3f}".format(avg_train_loss, avg_test_loss))
    print("\ttrain_acc:{:.2%} valid_acc:{:.2%}".format(avg_train_acc, avg_test_acc))
    print("\ttrain_f1:{:.3f} valid_f1:{:.3f}".format(avg_train_f1, avg_test_f1))
    print(f"\ttrain_confusion_matrix:\n{train_cm}")
    print(f"\tvalid_confusion_matrix:\n{test_cm}")


train samples:1616
valid samples:405


100%|██████████| 12/12 [00:12<00:00,  1.03s/it]
100%|██████████| 1/1 [00:02<00:00,  2.28s/it]



epoch 0
	train_loss:4.419 valid_loss:4.495
	train_acc:61.24% valid_acc:61.02%
	train_f1:0.683 valid_f1:0.683
	train_confusion_matrix:
[[15881  1498   419  5443]
 [ 1402   149    97   362]
 [  389    36    13   119]
 [  426    36    11   135]]
	valid_confusion_matrix:
[[4151  342   96 1548]
 [ 306   32   16   92]
 [ 101    7    1   27]
 [ 151   14    8   55]]


100%|██████████| 12/12 [00:12<00:00,  1.02s/it]
100%|██████████| 1/1 [00:02<00:00,  2.32s/it]



epoch 1
	train_loss:4.440 valid_loss:4.609
	train_acc:61.27% valid_acc:60.26%
	train_f1:0.685 valid_f1:0.680
	train_confusion_matrix:
[[15910  1373   398  5657]
 [ 1362   152    97   408]
 [  374    37    23   129]
 [  374    44    19   162]]
	valid_confusion_matrix:
[[4078  380  144 1535]
 [ 296   45   21   84]
 [  89    8    5   34]
 [ 140   15   15   58]]


100%|██████████| 12/12 [00:12<00:00,  1.04s/it]
100%|██████████| 1/1 [00:02<00:00,  2.28s/it]



epoch 2
	train_loss:4.468 valid_loss:4.438
	train_acc:60.80% valid_acc:61.54%
	train_f1:0.681 valid_f1:0.689
	train_confusion_matrix:
[[15717  1366   411  5667]
 [ 1380   131   108   369]
 [  376    25    15   140]
 [  417    41    16   138]]
	valid_confusion_matrix:
[[4187  357  120 1473]
 [ 286   37   27   96]
 [  95   11    0   30]
 [ 147   12   18   51]]


100%|██████████| 12/12 [00:12<00:00,  1.02s/it]
100%|██████████| 1/1 [00:02<00:00,  2.24s/it]



epoch 3
	train_loss:4.392 valid_loss:4.311
	train_acc:61.55% valid_acc:62.72%
	train_f1:0.687 valid_f1:0.695
	train_confusion_matrix:
[[16078  1307   394  5629]
 [ 1350   134    99   437]
 [  387    43    14   114]
 [  416    34    14   143]]
	valid_confusion_matrix:
[[4268  380   89 1400]
 [ 311   37   15   83]
 [  94   11    3   28]
 [ 165    9    5   49]]


100%|██████████| 12/12 [00:12<00:00,  1.04s/it]
100%|██████████| 1/1 [00:02<00:00,  2.30s/it]



epoch 4
	train_loss:4.368 valid_loss:4.364
	train_acc:61.86% valid_acc:62.34%
	train_f1:0.689 valid_f1:0.695
	train_confusion_matrix:
[[16059  1302   372  5566]
 [ 1384   134    98   394]
 [  365    33    15   131]
 [  396    34    13   155]]
	valid_confusion_matrix:
[[4234  379  110 1414]
 [ 281   46   30   89]
 [  87   14    1   34]
 [ 156   13    9   50]]


100%|██████████| 12/12 [00:12<00:00,  1.03s/it]
100%|██████████| 1/1 [00:02<00:00,  2.24s/it]



epoch 5
	train_loss:4.398 valid_loss:4.567
	train_acc:61.61% valid_acc:60.40%
	train_f1:0.688 valid_f1:0.679
	train_confusion_matrix:
[[16006  1396   391  5525]
 [ 1344   151   130   385]
 [  360    44    25   115]
 [  421    39    18   134]]
	valid_confusion_matrix:
[[4105  386   82 1564]
 [ 309   36    9   92]
 [  90    8    6   32]
 [ 159   12    8   49]]


100%|██████████| 12/12 [00:12<00:00,  1.01s/it]
100%|██████████| 1/1 [00:02<00:00,  2.28s/it]



epoch 6
	train_loss:4.467 valid_loss:4.577
	train_acc:60.84% valid_acc:60.52%
	train_f1:0.682 valid_f1:0.680
	train_confusion_matrix:
[[15777  1397   407  5661]
 [ 1362   154   119   392]
 [  367    36    16   136]
 [  418    38    16   133]]
	valid_confusion_matrix:
[[4098  370  114 1555]
 [ 298   40   21   87]
 [  95   11    4   26]
 [ 147   14    5   62]]


100%|██████████| 12/12 [00:12<00:00,  1.03s/it]
100%|██████████| 1/1 [00:02<00:00,  2.23s/it]



epoch 7
	train_loss:4.401 valid_loss:4.391
	train_acc:61.48% valid_acc:61.95%
	train_f1:0.687 valid_f1:0.690
	train_confusion_matrix:
[[15950  1328   426  5559]
 [ 1372   146   106   387]
 [  372    46    26   109]
 [  415    38    25   131]]
	valid_confusion_matrix:
[[4217  411  108 1401]
 [ 300   31   27   88]
 [  98   10    5   23]
 [ 152   11   14   51]]


100%|██████████| 12/12 [00:12<00:00,  1.03s/it]
100%|██████████| 1/1 [00:02<00:00,  2.26s/it]



epoch 8
	train_loss:4.447 valid_loss:4.413
	train_acc:61.22% valid_acc:61.83%
	train_f1:0.685 valid_f1:0.689
	train_confusion_matrix:
[[15908  1347   401  5686]
 [ 1330   161   117   418]
 [  387    36    18   121]
 [  401    31    13   151]]
	valid_confusion_matrix:
[[4203  343  106 1485]
 [ 301   38   27   80]
 [  98    5    1   32]
 [ 158    8    9   53]]


100%|██████████| 12/12 [00:12<00:00,  1.04s/it]
100%|██████████| 1/1 [00:02<00:00,  2.24s/it]



epoch 9
	train_loss:4.468 valid_loss:4.357
	train_acc:60.90% valid_acc:62.31%
	train_f1:0.683 valid_f1:0.693
	train_confusion_matrix:
[[15772  1355   373  5733]
 [ 1329   152    99   410]
 [  385    47    17   116]
 [  418    39    19   136]]
	valid_confusion_matrix:
[[4239  319  108 1471]
 [ 316   33   22   75]
 [  82    8    3   43]
 [ 147   16   11   54]]


100%|██████████| 12/12 [00:12<00:00,  1.04s/it]
100%|██████████| 1/1 [00:02<00:00,  2.32s/it]



epoch 10
	train_loss:4.411 valid_loss:4.471
	train_acc:61.41% valid_acc:61.28%
	train_f1:0.686 valid_f1:0.686
	train_confusion_matrix:
[[15939  1305   384  5656]
 [ 1379   134    93   395]
 [  362    49    22   127]
 [  401    42    19   155]]
	valid_confusion_matrix:
[[4166  372   75 1524]
 [ 312   35   25   74]
 [  88   12    4   32]
 [ 150   16   10   52]]


100%|██████████| 12/12 [00:12<00:00,  1.04s/it]
100%|██████████| 1/1 [00:02<00:00,  2.19s/it]



epoch 11
	train_loss:4.417 valid_loss:4.300
	train_acc:61.46% valid_acc:62.62%
	train_f1:0.687 valid_f1:0.694
	train_confusion_matrix:
[[16012  1416   383  5587]
 [ 1350   155    95   402]
 [  378    35    20   113]
 [  427    34    16   138]]
	valid_confusion_matrix:
[[4276  339   86 1436]
 [ 313   26   25   82]
 [  95   10    2   29]
 [ 162    9   11   46]]


100%|██████████| 12/12 [00:12<00:00,  1.03s/it]
100%|██████████| 1/1 [00:02<00:00,  2.33s/it]



epoch 12
	train_loss:4.417 valid_loss:4.368
	train_acc:61.39% valid_acc:62.31%
	train_f1:0.686 valid_f1:0.693
	train_confusion_matrix:
[[15935  1374   397  5593]
 [ 1375   156    98   388]
 [  368    41    16   131]
 [  398    38    18   141]]
	valid_confusion_matrix:
[[4232  310  138 1457]
 [ 314   33   13   86]
 [  84    8    6   38]
 [ 148    8   14   58]]


100%|██████████| 12/12 [00:12<00:00,  1.02s/it]
100%|██████████| 1/1 [00:02<00:00,  2.29s/it]



epoch 13
	train_loss:4.368 valid_loss:4.389
	train_acc:61.81% valid_acc:62.17%
	train_f1:0.688 valid_f1:0.694
	train_confusion_matrix:
[[16052  1354   363  5517]
 [ 1377   137   104   400]
 [  370    47    15   135]
 [  399    32     9   151]]
	valid_confusion_matrix:
[[4218  323  107 1489]
 [ 298   38   20   90]
 [  84   10   10   32]
 [ 142   18   15   53]]


100%|██████████| 12/12 [00:12<00:00,  1.03s/it]
100%|██████████| 1/1 [00:02<00:00,  2.34s/it]



epoch 14
	train_loss:4.413 valid_loss:4.402
	train_acc:61.57% valid_acc:61.97%
	train_f1:0.688 valid_f1:0.692
	train_confusion_matrix:
[[15964  1380   372  5616]
 [ 1357   161    91   398]
 [  363    35    31   124]
 [  399    36    11   160]]
	valid_confusion_matrix:
[[4210  337  111 1479]
 [ 298   38   21   89]
 [  84    8    1   43]
 [ 147   15   10   56]]


100%|██████████| 12/12 [00:12<00:00,  1.01s/it]
100%|██████████| 1/1 [00:02<00:00,  2.26s/it]



epoch 15
	train_loss:4.343 valid_loss:4.208
	train_acc:62.11% valid_acc:63.81%
	train_f1:0.691 valid_f1:0.704
	train_confusion_matrix:
[[16140  1349   426  5401]
 [ 1357   135   104   409]
 [  369    29    27   121]
 [  413    37    17   141]]
	valid_confusion_matrix:
[[4334  322  104 1377]
 [ 303   39   24   80]
 [  92    7    5   32]
 [ 153   13    7   55]]


100%|██████████| 12/12 [00:12<00:00,  1.02s/it]
100%|██████████| 1/1 [00:02<00:00,  2.28s/it]



epoch 16
	train_loss:4.423 valid_loss:4.424
	train_acc:61.13% valid_acc:61.87%
	train_f1:0.683 valid_f1:0.691
	train_confusion_matrix:
[[15883  1334   381  5642]
 [ 1390   106    93   404]
 [  378    53    10   123]
 [  394    47    19   131]]
	valid_confusion_matrix:
[[4196  309   98 1534]
 [ 300   48    9   89]
 [  87   11    4   34]
 [ 165   10    3   50]]


100%|██████████| 12/12 [00:12<00:00,  1.03s/it]
100%|██████████| 1/1 [00:02<00:00,  2.24s/it]



epoch 17
	train_loss:4.478 valid_loss:4.307
	train_acc:60.81% valid_acc:63.01%
	train_f1:0.683 valid_f1:0.698
	train_confusion_matrix:
[[15815  1302   418  5807]
 [ 1341   160   100   423]
 [  370    49    22   121]
 [  405    45    20   140]]
	valid_confusion_matrix:
[[4271  326   95 1445]
 [ 304   40   15   87]
 [  97    6    6   27]
 [ 146   13    9   60]]


100%|██████████| 12/12 [00:12<00:00,  1.03s/it]
100%|██████████| 1/1 [00:02<00:00,  2.31s/it]



epoch 18
	train_loss:4.410 valid_loss:4.372
	train_acc:61.47% valid_acc:62.21%
	train_f1:0.687 valid_f1:0.691
	train_confusion_matrix:
[[15996  1398   388  5565]
 [ 1355   169   102   394]
 [  373    39    20   126]
 [  425    47    13   127]]
	valid_confusion_matrix:
[[4229  370  111 1427]
 [ 311   33   15   87]
 [  97    8    1   30]
 [ 151    8   10   59]]


100%|██████████| 12/12 [00:12<00:00,  1.02s/it]
100%|██████████| 1/1 [00:02<00:00,  2.32s/it]


epoch 19
	train_loss:4.386 valid_loss:4.327
	train_acc:61.74% valid_acc:62.75%
	train_f1:0.687 valid_f1:0.697
	train_confusion_matrix:
[[16005  1410   370  5491]
 [ 1413   163    84   348]
 [  395    45    16   107]
 [  410    39     9   149]]
	valid_confusion_matrix:
[[4258  363  112 1404]
 [ 303   41   19   83]
 [  99    7    6   24]
 [ 139   21   14   54]]



