# **Lesson04: Deep Learning Methods**

In [None]:
!gdown --id 1p4moPeR2QoRmoTPWTx0rbtGdGocZPhWA
# !gdown --id 1znxMF6QTOQk_pehRIkHgXZRusIDhl34w
!gdown --id 1RSQNLoJUeyXuEmMAELa_xV-9X1YXa9jX

Downloading...
From: https://drive.google.com/uc?id=1p4moPeR2QoRmoTPWTx0rbtGdGocZPhWA
To: /content/ChnSentiCorp_htl_ba_6000_cutted.csv
100% 1.74M/1.74M [00:00<00:00, 117MB/s]
Downloading...
From: https://drive.google.com/uc?id=1znxMF6QTOQk_pehRIkHgXZRusIDhl34w
To: /content/text.model.bin
26.6MB [00:00, 84.5MB/s]


## **Load Dataset**

### Define load data function

In [None]:

import pandas as pd

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer, TfidfTransformer


seed = 42


def _get_raw_data(path, compression='gzip'):
  data = pd.read_csv(path, compression=compression)
  if isinstance(data.cut[0], str):
    data.cut = data.cut.apply(lambda x: eval(x))
  return data


def label_transform(label):
  le = preprocessing.LabelEncoder()
  return le.fit_transform(label)


def get_data(path='data/ChnSentiCorp_cutted.csv', is_cutted=True):
  data = _get_raw_data(path=path)
  y = label_transform(data.label)

  cut_values = data.cut.str.join(sep=' ').values
  if is_cutted:
    features = cut_values
  else:
    features = data.review.values
  X_train, X_test, y_train, y_test = train_test_split(
    features, y, test_size=0.2, random_state=42, shuffle=True)
  return data, (X_train, X_test, y_train, y_test), cut_values


### get dataset

In [None]:
data, (X_train, X_test, y_train, y_test), cut_values = get_data(path='ChnSentiCorp_htl_ba_6000_cutted.csv')

## **Define Metric Object**

In [None]:
import numpy as np
from sklearn.metrics import log_loss
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support


class Metric(object):
    
  def __init__(self, y_true):
    self.y_true = y_true
      
  def get_metric(self, y_pred, y_true=None):
    if y_true is None:
      y_true = self.y_true
    
    loss = np.nan
    if y_pred.ndim > 1:
      loss = log_loss(y_true=y_true, y_pred=y_pred)
      y_pred = y_pred.argmax(axis=1)
    
    acuuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    precision, recall, f_score, _ = precision_recall_fscore_support(
      y_true=y_true, y_pred=y_pred, average='macro', zero_division='warn')
    
    print('loss:', loss)
    print('acuuracy:', acuuracy)
    print('precision:', precision)
    print('recall:', recall)
    print('f_score:', f_score)


metric_fn = Metric(y_true=y_test)

## **Re-train a Word2Vec by Gensim (Optional)**

In [None]:
# from gensim.models.callbacks import CallbackAny2Vec
# from gensim.models.word2vec import Word2Vec


# class callback(CallbackAny2Vec):
#   '''Callback to print loss after each epoch.'''

#   def __init__(self):
#     self.epoch = 0
#     self.loss_previous_step = 0

#   def on_epoch_end(self, model):
#     current_loss = model.get_latest_training_loss()
#     loss = current_loss - self.loss_previous_step
#     self.loss_previous_step = current_loss
#     if self.epoch % 5 == 0:
#       print('Loss after epoch {}: {}'.format(self.epoch, loss))
#     if self.epoch + 1 == model.epochs:
#       print('Loss of last epoch {}: {}'.format(self.epoch, loss))
#     self.epoch += 1


# corpus = data.cut.values.tolist()
# model = Word2Vec(
#     sentences=corpus, size=300, min_count=1, seed=42, workers=4, sg=1,
#     negative=5, iter=500, compute_loss=True, callbacks=[callback()])

# model.wv.save_word2vec_format('text.model.bin', binary=True)

## **Define Dataset and Collate Function**

In [None]:
import numpy as np
from torch import tensor
from gensim.models import KeyedVectors
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence


class Word2VecDataset(Dataset):
  def __init__(self, data, label, keyed_vectors, max_seq_length=128):
    word2index = dict(zip(
      keyed_vectors.index2word, np.arange(len(keyed_vectors.index2word))))
    unk = len(word2index)
    
    x_embed = []
    for sent in data:
      embed = []
      for word in sent.split():
        if word in word2index:
          embed.append(word2index[word])
        else:
          embed.append(unk)
      x_embed.append(tensor(embed[:max_seq_length-2]))
    self.data = x_embed
    self.label = label

  def __len__(self):
    return len(self.data)
      
  def __getitem__(self, idx):
    return self.data[idx], self.label[idx]


def collate_fn(batch):
  batch.sort(key=lambda x: len(x[0]), reverse=True)
  data, label = zip(*batch)
  data_length = [len(sq) for sq in data]
  data = pad_sequence(data, batch_first=True, padding_value=0)
  # return data, tensor(label), data_length
  return data, tensor(label)


keyed_vectors = KeyedVectors.load_word2vec_format('text.model.bin', binary=True)

train_dataset = Word2VecDataset(data=X_train, label=y_train, keyed_vectors=keyed_vectors)
train_dataloader = DataLoader(train_dataset, batch_size=20, collate_fn=collate_fn)

test_dataset = Word2VecDataset(data=X_test, label=y_test, keyed_vectors=keyed_vectors)
test_dataloader = DataLoader(test_dataset, batch_size=100, collate_fn=collate_fn)

  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL


## **Define a Bi-directional GRU Network**

In [None]:
import torch
from torch import nn
import torch.nn.functional as F


class BiGRUNetwork(nn.Module):
  def __init__(self, embed_vectors, hidden_size=128, num_layers=2, classes=2, bidirectional=True):
    super(BiGRUNetwork, self).__init__()
    self.word_dim = embed_vectors.shape[1]
    self.hidden_size = hidden_size
    self.num_layers = num_layers
    self.classes = classes
    self.embed_layer = nn.Embedding.from_pretrained(
      torch.from_numpy(embed_vectors))
    self.embed_layer.requires_grad = False  # batch * length * 300
    
    self.bigru = nn.GRU(
      self.word_dim, self.hidden_size, dropout=0.5, 
      num_layers=self.num_layers, bidirectional=bidirectional, batch_first=True)
      
    self.drop = nn.Dropout(0.5)
    
    self.fc = nn.Linear(self.hidden_size * 2, self.classes)

    self.output = nn.Softmax(dim=1)
      
  def init_hidden(self, batch_size):
    return torch.zeros(self.num_layers * 2, batch_size, self.hidden_size)
  
  def _gru_forward(self, x):
    x = self.embed_layer(x)
    hidden = nn.Parameter(self.init_hidden(x.size(0)))

    self.bigru.flatten_parameters()
    gru_out, hidden = self.bigru(x, hidden)
    return gru_out, hidden
  
  def _output_forward(self, x):
    x = torch.transpose(x, 1, 2).contiguous()
    x = F.max_pool1d(x, x.size(2)).squeeze(2)
    x = F.relu(x)
    x = self.drop(x)
    x = self.fc(x)
    return self.output(x)

  def forward(self, x):
    gru_out, hidden = self._gru_forward(x)
    return self._output_forward(gru_out)


unk_vector = np.random.uniform(-0.01, 0.01, size=(keyed_vectors.vectors.shape[1])).astype("float32")
embed_vectors = np.concatenate((keyed_vectors.vectors, unk_vector[np.newaxis, :]), axis=0)

model = BiGRUNetwork(embed_vectors=embed_vectors)

## **Define a Loss function, optimizer and scheduler**

In [None]:
from tqdm.auto import tqdm
from torch import optim
from torch.optim.lr_scheduler import LambdaLR


def get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps, last_epoch=-1):

  def lr_lambda(current_step: int):
    if current_step < num_warmup_steps:
      return float(current_step) / float(max(1, num_warmup_steps))
    return max(
      0.0, float(
        num_training_steps - current_step) / float(
          max(1, num_training_steps - num_warmup_steps)))
  return LambdaLR(optimizer, lr_lambda, last_epoch)


epochs = 2

num_training_steps = len(train_dataloader) * epochs
criterion = nn.CrossEntropyLoss()

parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = optim.Adam(parameters, lr=1e-4)

scheduler = get_linear_schedule_with_warmup(
  optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

## **Define Test Function**

In [None]:
def get_test_result(model, test_dataloader):
  total_probs = None
  total_label = None
  with torch.no_grad():
    for batch_data, batch_label in tqdm(test_dataloader, total=len(test_dataloader)):
      probs = model(batch_data)
      if total_probs is None:
        total_probs = probs.numpy()
      else:
        total_probs = np.concatenate((total_probs, probs.numpy()))
      if total_label is None:
        total_label = batch_label
      else:
        total_label = np.concatenate((total_label, batch_label))

    metric_fn = Metric(total_label)
    metric_fn.get_metric(F.softmax(tensor(total_probs), dim=1))

## **Define Trainer and Train!!!!!**

In [None]:

for epoch in tqdm(range(epochs)):  # loop over the dataset multiple times

  running_loss = 0.0
  model.train()
  for step, (batch_data, batch_label) in tqdm(enumerate(train_dataloader, 0), total=len(train_dataloader)):
    # zero the parameter gradients
    optimizer.zero_grad()

    # forward + backward + optimize
    probs = model(batch_data)
    loss = criterion(probs, batch_label)
    loss.backward()
    
    optimizer.step()
    scheduler.step()
    
    running_loss += loss.item()
    if (step + 1) % 20 == 0 and step != 0:
      running_loss /= 20
      print('epoch: {}, step: {}, loss: {}'.format(epoch, step + 1, running_loss))
      running_loss = 0.0

  get_test_result(model, test_dataloader)

HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=240.0), HTML(value='')))

epoch: 0, step: 20, loss: 0.6978888630867004
epoch: 0, step: 40, loss: 0.6883434653282166
epoch: 0, step: 60, loss: 0.6876093059778213
epoch: 0, step: 80, loss: 0.6832243621349334
epoch: 0, step: 100, loss: 0.6775568157434464
epoch: 0, step: 120, loss: 0.6684012115001678
epoch: 0, step: 140, loss: 0.6636695772409439
epoch: 0, step: 160, loss: 0.6668097645044326
epoch: 0, step: 180, loss: 0.6551657438278198
epoch: 0, step: 200, loss: 0.6553746521472931
epoch: 0, step: 220, loss: 0.655083280801773
epoch: 0, step: 240, loss: 0.6487073957920074



HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


loss: 0.6437217557430267
acuuracy: 0.6575
precision: 0.6618212669683258
recall: 0.6590190950751881
f_score: 0.6564289650166386


HBox(children=(FloatProgress(value=0.0, max=240.0), HTML(value='')))

epoch: 1, step: 20, loss: 0.6413537591695786
epoch: 1, step: 40, loss: 0.6453778505325317
epoch: 1, step: 60, loss: 0.6270778894424438
epoch: 1, step: 80, loss: 0.6254492372274398
epoch: 1, step: 100, loss: 0.6196135669946671
epoch: 1, step: 120, loss: 0.6073072016239166
epoch: 1, step: 140, loss: 0.621847602725029
epoch: 1, step: 160, loss: 0.6183683395385742
epoch: 1, step: 180, loss: 0.6068349838256836
epoch: 1, step: 200, loss: 0.6140138223767281
epoch: 1, step: 220, loss: 0.6076585859060287
epoch: 1, step: 240, loss: 0.6092404663562775



HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


loss: 0.6064839489758015
acuuracy: 0.7116666666666667
precision: 0.7126871385653708
recall: 0.7122899361088956
f_score: 0.7116153982930298



## **Add Multi-Head Self-Attention**

In [None]:
class BiGRUAttentionNetwork(BiGRUNetwork):
    
  def __init__(self, embed_vectors, hidden_size=128, num_layers=2, classes=2, bidirectional=True):
    super(BiGRUAttentionNetwork, self).__init__(embed_vectors, hidden_size, num_layers, classes, bidirectional)
    self.multihead_attn = nn.MultiheadAttention(embed_dim=self.hidden_size * 2, num_heads=8, dropout=0.5)
  
  def _attention_forward(self, x):
    x = x.transpose(1, 0).contiguous()
    attn_output, attn_output_weights = self.multihead_attn(query=x, key=x, value=x)
    return attn_output.transpose(1, 0).contiguous(), attn_output_weights
  
  def forward(self, x):

    gru_out, hidden = self._gru_forward(x)
    
    attn_output, attn_output_weights = self._attention_forward(gru_out)

    return self._output_forward(attn_output)
    

model = BiGRUAttentionNetwork(embed_vectors=embed_vectors)

## **Re-define optimizer and train the network**

In [None]:
parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = optim.Adam(parameters, lr=1e-4)

scheduler = get_linear_schedule_with_warmup(
  optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

for epoch in tqdm(range(epochs)):  # loop over the dataset multiple times

  running_loss = 0.0
  model.train()
  for step, (batch_data, batch_label) in tqdm(enumerate(train_dataloader, 0), total=len(train_dataloader)):
    # zero the parameter gradients
    optimizer.zero_grad()

    # forward + backward + optimize
    probs = model(batch_data)
    loss = criterion(probs, batch_label)
    loss.backward()
    
    optimizer.step()
    scheduler.step()
    
    running_loss += loss.item()
    if (step + 1) % 20 == 0 and step != 0:
      running_loss /= 20
      print('epoch: {}, step: {}, loss: {}'.format(epoch, step + 1, running_loss))
      running_loss = 0.0

  get_test_result(model, test_dataloader)

HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=240.0), HTML(value='')))

epoch: 0, step: 20, loss: 0.6907199084758758
epoch: 0, step: 40, loss: 0.6895082980394364
epoch: 0, step: 60, loss: 0.687746649980545
epoch: 0, step: 80, loss: 0.6748631119728088
epoch: 0, step: 100, loss: 0.6678098708391189
epoch: 0, step: 120, loss: 0.6411935061216354
epoch: 0, step: 140, loss: 0.6376170501112938
epoch: 0, step: 160, loss: 0.6468523472547532
epoch: 0, step: 180, loss: 0.5977820664644241
epoch: 0, step: 200, loss: 0.6279011845588685
epoch: 0, step: 220, loss: 0.5891267463564873
epoch: 0, step: 240, loss: 0.5757665619254112



HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


loss: 0.5633181971808274
acuuracy: 0.7383333333333333
precision: 0.7382681774051314
recall: 0.7380297973215204
f_score: 0.7380976211924065


HBox(children=(FloatProgress(value=0.0, max=240.0), HTML(value='')))

epoch: 1, step: 20, loss: 0.5656714141368866
epoch: 1, step: 40, loss: 0.5683722749352456
epoch: 1, step: 60, loss: 0.5198346883058548
epoch: 1, step: 80, loss: 0.5151140362024307
epoch: 1, step: 100, loss: 0.5087313279509544
epoch: 1, step: 120, loss: 0.5048629522323609
epoch: 1, step: 140, loss: 0.5069108605384827
epoch: 1, step: 160, loss: 0.5122437879443169
epoch: 1, step: 180, loss: 0.4968088656663895
epoch: 1, step: 200, loss: 0.5191928908228874
epoch: 1, step: 220, loss: 0.5026273995637893
epoch: 1, step: 240, loss: 0.5055032625794411



HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


loss: 0.4938067732503017
acuuracy: 0.8141666666666667
precision: 0.8141243786074133
recall: 0.8139890670898282
f_score: 0.8140425659068309



# **Lesson05: BERT, Transformers (using huggingface)**

## **Get dataset**

In [None]:
data, (X_train, X_test, y_train, y_test), cut_values = get_data(
  path='ChnSentiCorp_htl_ba_6000_cutted.csv', is_cutted=False)

## **Install huggingface's transformers and load BERT-WWM**

In [None]:
!pip install transformers==2.11.0

Collecting transformers==2.11.0
[?25l  Downloading https://files.pythonhosted.org/packages/48/35/ad2c5b1b8f99feaaf9d7cdadaeef261f098c6e1a6a2935d4d07662a6b780/transformers-2.11.0-py3-none-any.whl (674kB)
[K     |▌                               | 10kB 24.3MB/s eta 0:00:01[K     |█                               | 20kB 6.1MB/s eta 0:00:01[K     |█▌                              | 30kB 8.5MB/s eta 0:00:01[K     |██                              | 40kB 10.6MB/s eta 0:00:01[K     |██▍                             | 51kB 6.9MB/s eta 0:00:01[K     |███                             | 61kB 8.0MB/s eta 0:00:01[K     |███▍                            | 71kB 8.1MB/s eta 0:00:01[K     |███▉                            | 81kB 9.0MB/s eta 0:00:01[K     |████▍                           | 92kB 8.1MB/s eta 0:00:01[K     |████▉                           | 102kB 8.8MB/s eta 0:00:01[K     |█████▍                          | 112kB 8.8MB/s eta 0:00:01[K     |█████▉                          | 

### load BERT-WWM model: rbtl3

In [None]:
from transformers import BertModel, BertTokenizer, BertConfig


pretrained_weights = 'hfl/rbtl3'

config = BertConfig.from_pretrained(pretrained_weights)
tokenizer = BertTokenizer.from_pretrained(pretrained_weights)
model = BertModel.from_pretrained(pretrained_weights, config=config)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=757.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=109540.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=2.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=112.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=19.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=248320487.0, style=ProgressStyle(descri…




## **Define Dataset and Collate Function**

In [None]:
from tqdm.auto import tqdm
from torch import tensor, stack
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader


class BertDataset(Dataset):
  def __init__(self, data, label, tokenizer, max_seq_length=128):
    tokens = []
    for sent in tqdm(data, total=len(data)):
      token = tokenizer.encode_plus(
        sent,
        add_special_tokens=True,  # Add '[CLS]' and '[SEP]'
        max_length=max_seq_length, # Pad & truncate all sentences.
        pad_to_max_length=True,
        return_token_type_ids=True,
        return_attention_mask=True,   # Construct attn. masks.
        return_tensors='pt')     # Return pytorch tensors.
      token_dict = {}
      token_dict['input_ids'] = token['input_ids'].squeeze()
      token_dict['token_type_ids'] = token['token_type_ids'].squeeze()
      token_dict['attention_mask'] = token['attention_mask'].squeeze()
      tokens.append(token_dict)

    self.data = tokens
    self.label = label

  def __len__(self):
    return len(self.data)
      
  def __getitem__(self, idx):
    return self.data[idx], self.label[idx]

    
def collate_fn(batch):
  data, label = zip(*batch)
  return {key: stack([d[key]
    for d in data], 0, out=None) for key in data[0]}, tensor(label)


train_dataset_bert = BertDataset(data=X_train, label=y_train, tokenizer=tokenizer)
train_dataloader_bert = DataLoader(train_dataset_bert, batch_size=20, collate_fn=collate_fn)

test_dataset_bert = BertDataset(data=X_test, label=y_test, tokenizer=tokenizer)
test_dataloader_bert = DataLoader(test_dataset_bert, batch_size=100, collate_fn=collate_fn)

HBox(children=(FloatProgress(value=0.0, max=4800.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1200.0), HTML(value='')))




## **Define Network**

In [None]:
from torch import nn


class BERTNetwork(nn.Module):
  def __init__(self, bert, hidden_size, dropout=0.5, num_labels=2):
    super(BERTNetwork, self).__init__()
    self.bert = bert
    self.dropout = nn.Dropout(dropout)
    self.classifier = nn.Linear(hidden_size, num_labels)
    self.output = nn.Softmax(dim=1)

  def forward(self, input_ids, token_type_ids, attention_mask):
    sequence_output, pooled_output = self.bert(
      input_ids=input_ids, token_type_ids=token_type_ids, 
      attention_mask=attention_mask)
    pooled_output = self.dropout(pooled_output)
    logits = self.classifier(pooled_output)
    return self.output(logits)


model_bert = BERTNetwork(bert=model, hidden_size=config.hidden_size)

## **Define a Loss function, optimizer and scheduler**

In [None]:
from transformers import AdamW, get_linear_schedule_with_warmup


epochs = 2

num_training_steps = len(train_dataloader_bert) * epochs

criterion = nn.CrossEntropyLoss()

optimizer = AdamW(model.parameters(), lr=5e-5, eps=1e-8)
scheduler = get_linear_schedule_with_warmup(
  optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)

## **Re-define Test Function**

In [None]:
def get_test_result(model, test_dataloader):
  total_probs = None
  total_label = None
  with torch.no_grad():
    for batch_data, batch_label in tqdm(test_dataloader, total=len(test_dataloader)):
      probs = model(**batch_data)
      if total_probs is None:
        total_probs = probs.numpy()
      else:
        total_probs = np.concatenate((total_probs, probs.numpy()))
      if total_label is None:
        total_label = batch_label
      else:
        total_label = np.concatenate((total_label, batch_label))

    metric_fn = Metric(total_label)
    metric_fn.get_metric(F.softmax(tensor(total_probs), dim=1))

## **Train the Network**

In [None]:

for epoch in tqdm(range(epochs)):  # loop over the dataset multiple times

  running_loss = 0.0
  model_bert.train()
  for step, (batch_data, batch_label) in tqdm(enumerate(train_dataloader_bert, 0), total=len(train_dataloader_bert)):
    # zero the parameter gradients
    optimizer.zero_grad()

    # forward + backward + optimize
    probs = model_bert(**batch_data)
    loss = criterion(probs, batch_label)
    loss.backward()
    
    optimizer.step()
    scheduler.step()
    
    running_loss += loss.item()
    if (step + 1) % 20 == 0 and step != 0:
      running_loss /= 20
      print('epoch: {}, step: {}, loss: {}'.format(epoch, step + 1, running_loss))
      running_loss = 0.0

  get_test_result(model_bert, test_dataloader_bert)

HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=240.0), HTML(value='')))

epoch: 0, step: 20, loss: 0.6235000222921372
epoch: 0, step: 40, loss: 0.5263320669531822
epoch: 0, step: 60, loss: 0.4666388794779778
epoch: 0, step: 80, loss: 0.4650882348418236
epoch: 0, step: 100, loss: 0.4584090277552605
epoch: 0, step: 120, loss: 0.4368864193558693
epoch: 0, step: 140, loss: 0.4804265409708023
epoch: 0, step: 160, loss: 0.43328375965356825
epoch: 0, step: 180, loss: 0.43510876297950746
epoch: 0, step: 200, loss: 0.44604685008525846
epoch: 0, step: 220, loss: 0.42556606978178024
epoch: 0, step: 240, loss: 0.456384913623333



HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


loss: 0.421281964952747
acuuracy: 0.8875
precision: 0.8875
recall: 0.8876819951588384
f_score: 0.8874867953252847


HBox(children=(FloatProgress(value=0.0, max=240.0), HTML(value='')))

epoch: 1, step: 20, loss: 0.41248242259025575
epoch: 1, step: 40, loss: 0.43781994134187696
epoch: 1, step: 60, loss: 0.3987840101122856
epoch: 1, step: 80, loss: 0.40648560523986815
epoch: 1, step: 100, loss: 0.4051087886095047
epoch: 1, step: 120, loss: 0.3793901279568672
epoch: 1, step: 140, loss: 0.4142871454358101
epoch: 1, step: 160, loss: 0.39621806144714355
epoch: 1, step: 180, loss: 0.39724526554346085
epoch: 1, step: 200, loss: 0.4018104910850525
epoch: 1, step: 220, loss: 0.39915278255939485
epoch: 1, step: 240, loss: 0.39116141051054



HBox(children=(FloatProgress(value=0.0, max=12.0), HTML(value='')))


loss: 0.4133794599523147
acuuracy: 0.8966666666666666
precision: 0.8979600357390383
recall: 0.8973406960489786
f_score: 0.8966525999372137

