In [4]:
#!pip install fasttext
#!pip install transformers
#import nltk
#nltk.download('punkt')
#!pip3 install torch torchvision torchaudio

In [5]:
DATA_DIR =  './data/'

In [6]:
import pandas as pd
import numpy as np
import scipy
import nltk
import re
import os
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from torch.optim import AdamW
from fasttext import load_model
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, TensorDataset
from tqdm import tqdm
from sklearn.metrics import f1_score, confusion_matrix
from sklearn.model_selection import train_test_split
import xml.etree.ElementTree as ET

In [7]:
VALID_SIZE = .2


# generate word_index list
def build_vocab(data_dir, plain=[]):
    """plain is a empty str file which will record all text from official dataset"""
    for fn in os.listdir(data_dir):
        if fn.endswith('.xml'):
            with open(data_dir + fn) as f:
                dom = ET.parse(f)
                root = dom.getroot()
                for sent in root.iter("sentence"):
                    text = sent.find('text').text.lower()
                    token = word_tokenize(text)
                    plain = plain + token
    vocab = sorted(set(plain))
    with open(os.path.join(data_dir, "plain.txt"), "w+", encoding="utf8") as f:
        for v in vocab:
            f.write(f"{v}\n")
    word_idx = {}
    for idx, word in enumerate(vocab):
        word_idx[word] = idx + 1
    return word_idx


def gen_np_embedding(fn, word_idx, dim=100, emb=False):
    if emb:
        model = load_model(fn + ".bin")
    embedding = np.zeros((len(word_idx) + 2, dim))

    with open(fn, encoding="utf8") as f:
        for l in f:
            # for each line, get the word and its vector
            rec = l.rstrip().split(' ')
            if len(rec) == 2:  # skip the first line.
                continue
                # if the word in word_idx, fill the embedding
            if rec[0] in word_idx:
                embedding[word_idx[rec[0]]] = np.array([float(r) for r in rec[1:]])
    for w in word_idx:
        if embedding[word_idx[w]].sum() == 0.:
            if emb:
                embedding[word_idx[w]] = model.get_word_vector(w)
    return embedding

dict_polarity = {'non-aspect':0, 'positive':1,'neutral':2, 'negative':3,  'conflict':4}
def create_train_data_restaurant(fn, word_idx, sent_len=83):
    dom = ET.parse(fn)
    root = dom.getroot()
    train_X = np.zeros((len(root), sent_len), np.int16)
    mask = np.zeros_like(train_X)

    train_y = np.zeros((len(root), sent_len), np.int16)
    train_y_polarity = np.zeros((len(root), sent_len), np.int16)
    take = np.ones(len(root), dtype=bool)

    dom = ET.parse(fn)
    root = dom.getroot()
    # iterate the sentence
    for sx, sent in enumerate(root.iter("sentence")):
        # TODO temporary to compare this and transformers
        if not [_ for _ in sent.iter("aspectTerm")]:
            take[sx] = False
            continue
        text = sent.find('text').text.lower()
        # tokenize the current sentence
        token = word_tokenize(text)

        # write word index and tag in train_X
        try:
            for wx, word in enumerate(token):
                train_X[sx, wx] = word_idx[word]
                mask[sx, wx] = 1
        except KeyError:
            continue

        for ox, apin in enumerate(sent.iter('aspectTerms')):
            for ax, opin in enumerate(apin.iter('aspectTerm')):
                target, polarity, start, end = opin.attrib['term'], opin.attrib['polarity'], int(
                    opin.attrib['from']), int(opin.attrib['to'])
                # find word index (instead of str index) if start,end is not (0,0)
                if end != 0:
                    if start != 0:
                        start = len(word_tokenize(text[:start]))
                    end = len(word_tokenize(text[:end])) - 1
                    # for training only identify aspect word, but not polarity
                    train_y[sx, start] = 1
                    train_y_polarity[sx, start] = dict_polarity[polarity]
                    if end > start:
                        # train_y[sx, start + 1:end] = 2
                        train_y[sx, start + 1:end] = 1
                        train_y_polarity[sx, start + 1:end] = dict_polarity[polarity]

    return (train_X[take], mask[take]), train_y[take], train_y_polarity[take]


def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')


def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list, tuple)):
        return [to_device(x, device) for x in data]
    elif isinstance(data, dict):
        for k, v in data.items():
            data[k] = v.to(device)
        return data
    return data.to(device, non_blocking=True)


class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""

    def __init__(self, dl, device):
        self.dl = dl
        self.device = device

    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl:
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)


def loss_fn(pred, mask, label):
    label.masked_fill_(~mask, -100)
    pred = pred.view(-1, 5)
    label = label.view(-1)
    loss = torch.nn.functional.cross_entropy(pred, label, weight = torch.tensor([2, 0.3,0.3,0.3,0.3]))
    return loss


def cal_acc(pred_tags, mask, true_tags):
    if isinstance(pred_tags, list):
        pred_tags = torch.cat(pred_tags, 0)
        mask = torch.cat(mask, 0)
        true_tags = torch.cat(true_tags, 0)
    pred_tags = pred_tags[mask]
    true_tags = true_tags[mask]
    acc = (pred_tags == true_tags).sum() / pred_tags.numel()
    f1 = f1_score(true_tags.cpu().numpy(), pred_tags.cpu().numpy(), labels=[0, 1, 2, 3, 4], average='weighted')
    cm = confusion_matrix(true_tags.cpu().numpy(), pred_tags.cpu().numpy())

    return acc, f1, cm


class Model(torch.nn.Module):
    def __init__(self, gen_emb, domain_emb, num_classes=5, dropout=0.5):
        super(Model, self).__init__()
        self.gen_embedding = torch.nn.Embedding(gen_emb.shape[0], gen_emb.shape[1])
        self.gen_embedding.weight = torch.nn.Parameter(torch.from_numpy(gen_emb), requires_grad=False)
        self.domain_embedding = torch.nn.Embedding(domain_emb.shape[0], domain_emb.shape[1])
        self.domain_embedding.weight = torch.nn.Parameter(torch.from_numpy(domain_emb), requires_grad=False)
        self.conv1 = torch.nn.Conv1d(gen_emb.shape[1] + domain_emb.shape[1], 128, 5, padding=2)
        self.conv2 = torch.nn.Conv1d(gen_emb.shape[1] + domain_emb.shape[1], 128, 3, padding=1)
        self.dropout = torch.nn.Dropout(dropout)

        self.conv3 = torch.nn.Conv1d(256, 256, 5, padding=2)
        self.conv4 = torch.nn.Conv1d(256, 256, 5, padding=2)
        self.conv5 = torch.nn.Conv1d(256, 256, 5, padding=2)

        self.lstm = nn.LSTM(256, hidden_size=128, num_layers=1, bidirectional=True, batch_first=True)

        self.linear_ae = torch.nn.Linear(256, 2)

        #aspect sentiment analysis
        self.embed = nn.Embedding.from_pretrained(torch.tensor(gen_emb, dtype=torch.float))
        self.lstm_l = nn.LSTM(gen_emb.shape[1], hidden_size=128, num_layers=1, bidirectional=True, batch_first=True)
        self.lstm_r = nn.LSTM(gen_emb.shape[1],hidden_size=128, num_layers=1, bidirectional=True, batch_first=True)

        self.dense = nn.Linear(128*2, 4)


    def forward(self, x_train):
    
        x_emb = torch.cat((self.gen_embedding(x_train), self.domain_embedding(x_train)), dim=2)
        x_emb = self.dropout(x_emb).transpose(1, 2)

        x_conv = torch.nn.functional.relu(torch.cat((self.conv1(x_emb.float()), self.conv2(x_emb.float())), dim=1))
        x_conv = self.dropout(x_conv)

        x_conv = torch.nn.functional.relu(self.conv3(x_conv))
        x_conv = self.dropout(x_conv)

        x_conv = torch.nn.functional.relu(self.conv4(x_conv))
        x_conv = self.dropout(x_conv)

        x_conv = torch.nn.functional.relu(self.conv5(x_conv))
        x_conv = x_conv.transpose(1, 2)

        x_lstm, (hidden, cell) = self.lstm(x_conv)

        x_logit = self.linear_ae(x_lstm)

        n1 = len(x_train)
        n2 = len(x_train[0])
  
        output = torch.tensor([[[0]*5]*n2]*n1).float()
        output.requires_grad = True

        #sentiment analysis
        pred_aspect = x_logit.max(-1)[1]

        for j, pred in enumerate(pred_aspect):
            i = 0
            aspect = False
            start = 0
            n = len(pred)
            pred = pred.tolist()
            count_aspect = 0
            index_lst = []
            left_right = []
            x_train_word = x_train[j]
    
            while i < n:
    
                if aspect == True and pred[i] == 0:
                  
                    count_aspect = count_aspect + 1
                    index_lst.append([start, i-1])
                    aspect = False
                    #print(x_train_word, 'x_train+word')
                    #print(x_train_word[start:])
                    right_context = x_train_word[start:].flip(dims=(0,))
                    #print(right_context)
                    #print(right_context[right_context.nonzero().squeeze().detach()])
                    left_right.append([x_train_word[:i], right_context[right_context.nonzero().squeeze().detach()]])

                if aspect == False and pred[i] == 1:
                    start = i
                    aspect = True

                i = i + 1

            for m in range(count_aspect):
                inputs = left_right[m]
                index = index_lst[m]
                x_l, x_r = inputs[0], inputs[1]

                if x_l.dim() == 0 or len(x_l) == 0:
                    x_l = torch.tensor([0])
                elif len(x_l) ==1:
                    x_l = torch.tensor([x_l])
                
                if x_r.dim() == 0 or len(x_r) == 0:
                    x_r = torch.tensor([0])
                elif len(x_r) ==1:
                    x_r = torch.tensor([x_r])
                
                x_l, x_r = self.embed(x_l), self.embed(x_r)
                _, (h_n_l, _) = self.lstm_l(x_l)
                _, (h_n_r, _) = self.lstm_r(x_r)
                h_n = torch.cat((h_n_l[0], h_n_r[0]), dim=-1)
                out = self.dense(h_n)
                with torch.no_grad():
                    output[j][index[0]:index[1]+1]= torch.cat((torch.tensor([-10]),out),0)

        return output


In [8]:
a = torch.tensor([1,2,0])
a[a!= 0]

tensor([1, 2])

In [9]:
word_indx = build_vocab(DATA_DIR)
fn = DATA_DIR + 'restaurant_emb.vec'
res_domain_embedding = gen_np_embedding(fn, word_indx, dim=100, emb=True)

    
fn = DATA_DIR + 'glove.840B.300d.txt'
general_embedding = gen_np_embedding(fn, word_indx, dim=300, emb=False)



In [10]:
fn = DATA_DIR + 'Restaurants_Train_v2.xml'
(X_train_res, mask_res), y_train_res , y_train_pol = create_train_data_restaurant(fn, word_indx, sent_len=100)
X, mask, y , y_pol = X_train_res, mask_res, y_train_res, y_train_pol
   
X_train, X_valid, mask_train, mask_valid, y_train, y_valid , y_pol_train, y_pol_valid= train_test_split(X, mask, y, y_pol, test_size=VALID_SIZE)


In [11]:
#print(X_train[:3])
#print(mask_train[:3])
print(y_train[:5])
print(' ')
print(y_pol_train[:5])

[[0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 

In [15]:
device = get_default_device()

NUM_EPOCHS = 20
TRAIN_BATCH_SIZE = 128
VALID_BATCH_SIZE = 1024

NUM_ASPECT_TAGS = 4

dataset = TensorDataset(torch.Tensor(X_train), torch.Tensor(mask_train), torch.Tensor(y_pol_train))
print(f"train samples:{len(dataset)}")
train_loader = DataLoader(dataset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, drop_last=True)

dataset_valid = TensorDataset(torch.Tensor(X_valid), torch.Tensor(mask_valid), torch.Tensor(y_pol_valid))
print(f"valid samples:{len(dataset_valid)}")
test_loader = DataLoader(dataset_valid, batch_size=VALID_BATCH_SIZE)

model = to_device(Model(general_embedding, res_domain_embedding, num_classes=5), device)

torch.cuda.empty_cache()

parameters = [p for p in model.parameters() if p.requires_grad]
optimizer = AdamW(parameters, lr=1e-4)


for epoch in range(NUM_EPOCHS):
    train_losses = []
    train_acc = []
    test_loss = []
    test_acc = []
    train_f1 = []
    test_f1 = []

    model.train()
    preds = []
    masks = []
    labels = []
    for data in tqdm(train_loader, total=len(train_loader)):
        for i in range(len(data)):
            data[i] = data[i].to(device)
        feature, mask, label = data
        feature, mask, label = feature.long(), mask.bool(), label.long()
        optimizer.zero_grad()
        pred_logits = model(feature)

        loss = loss_fn(pred_logits.float(), mask, label)
        loss.backward()
        optimizer.step()

        train_losses.append(loss.item())

        pred_tags = pred_logits.max(-1)[1]
        preds.append(pred_tags)
        masks.append(mask)
        labels.append(label)

    avg_train_acc, avg_train_f1, train_cm = cal_acc(preds, masks, labels)
    avg_train_loss = sum(train_losses) / len(train_losses)

    preds = []
    masks = []
    labels = []
    with torch.no_grad():
        for data in tqdm(test_loader, total=len(test_loader)):
            for i in range(len(data)):
                data[i] = data[i].to(device)
            feature, mask, label = data
            feature, mask, label = feature.long(), mask.bool(), label.long()
            pred_logits = model(feature)
            loss = loss_fn(pred_logits, mask, label)
            
            test_loss.append(loss.item())

            pred_tags = pred_logits.max(-1)[1]

            preds.append(pred_tags)
            masks.append(mask)
            labels.append(label)

    avg_test_acc, avg_test_f1, test_cm = cal_acc(preds, masks, labels)
    avg_test_loss = sum(test_loss) / len(test_loss)

    print(f"\nepoch {epoch}")
    print("\ttrain_loss:{:.3f} valid_loss:{:.3f}".format(avg_train_loss, avg_test_loss))
    print("\ttrain_acc:{:.2%} valid_acc:{:.2%}".format(avg_train_acc, avg_test_acc))
    print("\ttrain_f1:{:.3f} valid_f1:{:.3f}".format(avg_train_f1, avg_test_f1))
    print(f"\ttrain_confusion_matrix:\n{train_cm}")
    print(f"\tvalid_confusion_matrix:\n{test_cm}")


train samples:1616
valid samples:405


100%|██████████| 12/12 [00:07<00:00,  1.63it/s]
100%|██████████| 1/1 [00:01<00:00,  1.44s/it]



epoch 0
	train_loss:3.406 valid_loss:3.471
	train_acc:72.23% valid_acc:71.56%
	train_f1:0.752 valid_f1:0.747
	train_confusion_matrix:
[[18999  1038  1579  1724    24]
 [ 1465   145   174   182     2]
 [  462    18    30    44     0]
 [  499    49    41    42     1]
 [   50    13     9    15     0]]
	valid_confusion_matrix:
[[4745  291  415  432    5]
 [ 353   41   45   48    1]
 [ 120    3    9    9    0]
 [ 155   10   13   10    0]
 [   3    1    3    3    0]]


100%|██████████| 12/12 [00:07<00:00,  1.61it/s]
100%|██████████| 1/1 [00:01<00:00,  1.46s/it]



epoch 1
	train_loss:3.446 valid_loss:3.514
	train_acc:71.86% valid_acc:71.01%
	train_f1:0.750 valid_f1:0.742
	train_confusion_matrix:
[[18986  1021  1584  1853    23]
 [ 1465   160   179   175     4]
 [  457    29    32    28     2]
 [  524    47    48    39     0]
 [   55    10    14     8     0]]
	valid_confusion_matrix:
[[4719  297  423  446    3]
 [ 370   26   49   43    0]
 [ 118    4    8   11    0]
 [ 150   11   12   15    0]
 [   3    1    2    4    0]]


100%|██████████| 12/12 [00:07<00:00,  1.60it/s]
100%|██████████| 1/1 [00:01<00:00,  1.48s/it]



epoch 2
	train_loss:3.449 valid_loss:3.573
	train_acc:72.03% valid_acc:70.59%
	train_f1:0.752 valid_f1:0.742
	train_confusion_matrix:
[[19064  1008  1680  1793    20]
 [ 1480   142   168   151     5]
 [  440    29    40    34     1]
 [  503    50    46    46     2]
 [   49     6    12    14     0]]
	valid_confusion_matrix:
[[4682  317  439  443    7]
 [ 346   34   62   46    0]
 [ 117    6   11    7    0]
 [ 147    8   20   13    0]
 [   5    1    2    2    0]]


100%|██████████| 12/12 [00:07<00:00,  1.58it/s]
100%|██████████| 1/1 [00:01<00:00,  1.47s/it]



epoch 3
	train_loss:3.414 valid_loss:3.427
	train_acc:72.11% valid_acc:72.06%
	train_f1:0.751 valid_f1:0.750
	train_confusion_matrix:
[[19094   988  1636  1769    25]
 [ 1505   130   173   168     0]
 [  477    23    35    27     0]
 [  499    49    49    45     0]
 [   45    13    12    10     0]]
	valid_confusion_matrix:
[[4772  258  388  462    8]
 [ 363   35   48   42    0]
 [ 115    4   14    8    0]
 [ 145   12   13   18    0]
 [   7    1    2    0    0]]


100%|██████████| 12/12 [00:07<00:00,  1.57it/s]
100%|██████████| 1/1 [00:01<00:00,  1.46s/it]



epoch 4
	train_loss:3.421 valid_loss:3.509
	train_acc:71.92% valid_acc:71.17%
	train_f1:0.749 valid_f1:0.746
	train_confusion_matrix:
[[18974   995  1621  1764    32]
 [ 1501   129   167   168     3]
 [  478    28    26    36     1]
 [  503    51    49    35     0]
 [   54     8    14    10     0]]
	valid_confusion_matrix:
[[4722  254  401  507    4]
 [ 346   34   56   52    0]
 [ 121    5    9    6    0]
 [ 143   14   17   14    0]
 [   5    0    2    3    0]]


100%|██████████| 12/12 [00:07<00:00,  1.59it/s]
100%|██████████| 1/1 [00:01<00:00,  1.50s/it]



epoch 5
	train_loss:3.477 valid_loss:3.551
	train_acc:71.44% valid_acc:70.84%
	train_f1:0.747 valid_f1:0.743
	train_confusion_matrix:
[[18784  1048  1637  1832    24]
 [ 1473   140   196   166     2]
 [  448    31    28    43     1]
 [  516    52    39    39     0]
 [   44    17     9    15     0]]
	valid_confusion_matrix:
[[4696  278  444  464    6]
 [ 354   37   54   43    0]
 [ 122    2   11    6    0]
 [ 151   11   13   13    0]
 [   5    1    2    2    0]]


100%|██████████| 12/12 [00:07<00:00,  1.57it/s]
100%|██████████| 1/1 [00:01<00:00,  1.47s/it]



epoch 6
	train_loss:3.438 valid_loss:3.456
	train_acc:71.87% valid_acc:71.50%
	train_f1:0.750 valid_f1:0.746
	train_confusion_matrix:
[[18931  1043  1574  1813    24]
 [ 1473   136   178   162     3]
 [  471    26    36    33     2]
 [  488    68    48    42     0]
 [   46    16    15     9     0]]
	valid_confusion_matrix:
[[4754  258  426  444    6]
 [ 364   27   55   42    0]
 [ 115    9    9    8    0]
 [ 152   13   12   11    0]
 [   7    0    2    1    0]]


100%|██████████| 12/12 [00:07<00:00,  1.59it/s]
100%|██████████| 1/1 [00:01<00:00,  1.50s/it]



epoch 7
	train_loss:3.404 valid_loss:3.582
	train_acc:72.17% valid_acc:70.54%
	train_f1:0.751 valid_f1:0.742
	train_confusion_matrix:
[[19054   925  1650  1778    24]
 [ 1497   130   176   161     0]
 [  462    23    30    30     0]
 [  512    51    49    36     0]
 [   54     9    11    12     0]]
	valid_confusion_matrix:
[[4677  278  449  478    6]
 [ 346   38   62   42    0]
 [ 112    5    9   14    1]
 [ 145   16   14   13    0]
 [   3    0    4    3    0]]


100%|██████████| 12/12 [00:07<00:00,  1.58it/s]
100%|██████████| 1/1 [00:01<00:00,  1.47s/it]



epoch 8
	train_loss:3.420 valid_loss:3.535
	train_acc:71.96% valid_acc:70.75%
	train_f1:0.749 valid_f1:0.741
	train_confusion_matrix:
[[18891  1001  1559  1803    16]
 [ 1497   135   193   157     2]
 [  460    28    37    33     0]
 [  500    63    45    39     0]
 [   45    11    15    14     0]]
	valid_confusion_matrix:
[[4706  261  449  466    6]
 [ 368   25   59   36    0]
 [ 115    5    9   12    0]
 [ 145   18   14   11    0]
 [   7    0    3    0    0]]


100%|██████████| 12/12 [00:07<00:00,  1.58it/s]
100%|██████████| 1/1 [00:01<00:00,  1.45s/it]



epoch 9
	train_loss:3.428 valid_loss:3.513
	train_acc:72.01% valid_acc:71.01%
	train_f1:0.751 valid_f1:0.743
	train_confusion_matrix:
[[19007  1012  1599  1782    34]
 [ 1478   126   169   188     2]
 [  454    24    33    41     0]
 [  498    51    52    41     2]
 [   45    10    13    12     0]]
	valid_confusion_matrix:
[[4719  290  446  429    4]
 [ 354   30   57   47    0]
 [ 117    3   12    8    1]
 [ 152   14   15    7    0]
 [   5    1    3    1    0]]


100%|██████████| 12/12 [00:07<00:00,  1.58it/s]
100%|██████████| 1/1 [00:01<00:00,  1.46s/it]



epoch 10
	train_loss:3.401 valid_loss:3.443
	train_acc:72.19% valid_acc:71.84%
	train_f1:0.752 valid_f1:0.750
	train_confusion_matrix:
[[19043   973  1660  1703    29]
 [ 1479   146   179   180     2]
 [  461    33    32    33     2]
 [  487    57    55    38     0]
 [   49    10    17    10     0]]
	valid_confusion_matrix:
[[4762  222  431  464    9]
 [ 351   38   44   54    1]
 [ 115    3    9   14    0]
 [ 147    8   18   15    0]
 [   5    0    4    1    0]]


100%|██████████| 12/12 [00:07<00:00,  1.59it/s]
100%|██████████| 1/1 [00:01<00:00,  1.46s/it]



epoch 11
	train_loss:3.395 valid_loss:3.396
	train_acc:72.23% valid_acc:71.90%
	train_f1:0.752 valid_f1:0.748
	train_confusion_matrix:
[[19070   949  1656  1732    19]
 [ 1491   136   193   147     1]
 [  460    29    29    34     1]
 [  514    52    45    34     0]
 [   46    11    16    14     1]]
	valid_confusion_matrix:
[[4791  232  437  425    3]
 [ 353   21   66   48    0]
 [ 115    6    9   11    0]
 [ 153   13   15    7    0]
 [   7    0    2    1    0]]


100%|██████████| 12/12 [00:07<00:00,  1.57it/s]
100%|██████████| 1/1 [00:01<00:00,  1.52s/it]



epoch 12
	train_loss:3.446 valid_loss:3.478
	train_acc:71.83% valid_acc:71.45%
	train_f1:0.750 valid_f1:0.746
	train_confusion_matrix:
[[19018  1041  1627  1803    19]
 [ 1469   139   182   167     1]
 [  471    27    31    34     1]
 [  507    57    46    40     0]
 [   47    12    14    14     0]]
	valid_confusion_matrix:
[[4741  253  438  452    4]
 [ 359   34   52   43    0]
 [ 116    3   12   10    0]
 [ 151    8   18   11    0]
 [   7    0    2    1    0]]


100%|██████████| 12/12 [00:07<00:00,  1.57it/s]
100%|██████████| 1/1 [00:01<00:00,  1.50s/it]



epoch 13
	train_loss:3.405 valid_loss:3.575
	train_acc:72.13% valid_acc:70.51%
	train_f1:0.750 valid_f1:0.742
	train_confusion_matrix:
[[19036   987  1528  1835    29]
 [ 1496   140   156   186     1]
 [  481    16    36    28     0]
 [  504    56    48    40     0]
 [   51    14    12    10     1]]
	valid_confusion_matrix:
[[4681  290  423  488    6]
 [ 355   34   59   40    0]
 [ 111    5    7   17    1]
 [ 139   12   24   13    0]
 [   5    1    3    1    0]]


100%|██████████| 12/12 [00:07<00:00,  1.59it/s]
100%|██████████| 1/1 [00:01<00:00,  1.47s/it]



epoch 14
	train_loss:3.411 valid_loss:3.574
	train_acc:72.02% valid_acc:70.59%
	train_f1:0.750 valid_f1:0.743
	train_confusion_matrix:
[[19033   995  1588  1793    16]
 [ 1514   124   180   159     1]
 [  467    34    22    33     0]
 [  486    56    60    40     0]
 [   55     6    11    14     0]]
	valid_confusion_matrix:
[[4682  263  459  476    8]
 [ 345   40   59   44    0]
 [ 120    5    7    9    0]
 [ 143   16   18   11    0]
 [   6    0    4    0    0]]


100%|██████████| 12/12 [00:07<00:00,  1.59it/s]
100%|██████████| 1/1 [00:01<00:00,  1.45s/it]



epoch 15
	train_loss:3.414 valid_loss:3.476
	train_acc:72.09% valid_acc:71.59%
	train_f1:0.751 valid_f1:0.748
	train_confusion_matrix:
[[19019  1018  1577  1790    15]
 [ 1468   126   174   174     0]
 [  473    21    31    34     1]
 [  489    55    64    39     0]
 [   53     9     9    15     0]]
	valid_confusion_matrix:
[[4742  279  408  456    3]
 [ 356   42   43   47    0]
 [ 109    4   12   16    0]
 [ 146   15   16   11    0]
 [   5    0    2    3    0]]


100%|██████████| 12/12 [00:07<00:00,  1.59it/s]
100%|██████████| 1/1 [00:01<00:00,  1.43s/it]



epoch 16
	train_loss:3.432 valid_loss:3.433
	train_acc:71.93% valid_acc:71.84%
	train_f1:0.750 valid_f1:0.749
	train_confusion_matrix:
[[18963  1010  1618  1781    32]
 [ 1472   131   183   170     2]
 [  454    27    37    35     1]
 [  497    58    52    31     0]
 [   51    11    11    14     0]]
	valid_confusion_matrix:
[[4768  268  411  440    1]
 [ 358   36   48   46    0]
 [ 113    6    9   13    0]
 [ 147   14   16   11    0]
 [   7    0    2    1    0]]


100%|██████████| 12/12 [00:07<00:00,  1.60it/s]
100%|██████████| 1/1 [00:01<00:00,  1.51s/it]



epoch 17
	train_loss:3.439 valid_loss:3.495
	train_acc:71.85% valid_acc:71.42%
	train_f1:0.749 valid_f1:0.748
	train_confusion_matrix:
[[18901   951  1665  1815    19]
 [ 1484   118   176   175     0]
 [  454    16    42    37     1]
 [  514    50    41    38     0]
 [   53    12    11    10     0]]
	valid_confusion_matrix:
[[4730  249  450  458    1]
 [ 343   36   61   48    0]
 [ 111    7   13   10    0]
 [ 144    9   18   17    0]
 [   4    0    4    2    0]]


100%|██████████| 12/12 [00:07<00:00,  1.56it/s]
100%|██████████| 1/1 [00:01<00:00,  1.52s/it]



epoch 18
	train_loss:3.465 valid_loss:3.513
	train_acc:71.69% valid_acc:71.06%
	train_f1:0.749 valid_f1:0.743
	train_confusion_matrix:
[[18884  1040  1635  1822    27]
 [ 1492   147   183   173     1]
 [  440    25    28    37     0]
 [  491    50    38    41     0]
 [   53    12    11    11     0]]
	valid_confusion_matrix:
[[4719  276  448  441    4]
 [ 362   33   51   41    1]
 [ 114    4    9   14    0]
 [ 158    8   11   11    0]
 [   5    1    2    2    0]]


100%|██████████| 12/12 [00:07<00:00,  1.58it/s]
100%|██████████| 1/1 [00:01<00:00,  1.48s/it]


epoch 19
	train_loss:3.431 valid_loss:3.482
	train_acc:71.97% valid_acc:71.32%
	train_f1:0.750 valid_f1:0.747
	train_confusion_matrix:
[[19023   957  1640  1824    26]
 [ 1489   141   186   149     2]
 [  448    29    39    53     1]
 [  512    43    50    41     0]
 [   53     8    11    15     0]]
	valid_confusion_matrix:
[[4738  262  441  440    7]
 [ 346   33   66   43    0]
 [ 118    7    7    9    0]
 [ 143   16   18   11    0]
 [   6    0    3    1    0]]



