### sumbt_baseline(single train)
- checkpoint
    - add checkpoint
    - add checkpoint saving process
    - update checkpoint type(available countinous training)
- update validation per epoch or minimal loss
- add wandb

In [1]:
import sys
sys.path.append('..')

In [2]:
import os
from pathlib import Path
import json
from tqdm import tqdm
import random
from collections import defaultdict

import numpy as np

import torch
from transformers import BertTokenizer
from data_utils import get_examples_from_dialogues, convert_state_dict, load_dataset
from data_utils import OntologyDSTFeature, DSTPreprocessor, _truncate_seq_pair

### wandb

In [3]:
import wandb
# !wandb login  # run once

In [4]:
def increment_output_dir(output_path, exist_ok=False):
  path = Path(output_path)
  if (path.exists() and exist_ok) or (not path.exists()):
    return str(path)
  else:
    dirs = glob.glob(f"{path}*")
    matches = [re.search(rf"%s(\d+)" %path.stem, d) for d in dirs]
    i = [int(m.groups()[0]) for m in matches if m]
    n = max(i) + 1 if i else 2
    return f"{path}{n}"

### argparse setting

In [5]:
from argparse import Namespace

args = {
    'batch_size': 12,  # 8
    'hidden_dim': 300,
    'num_rnn_layers': 1,
    'zero_init_rnn': False,
    'max_seq_length': 64,
    'max_label_length': 12,
    'attn_head': 8,  # 4
    'fix_utterance_encoder': False,
    'task_name': 'sumbtgru',
    'distance_metric': 'euclidean',
    'model_name_or_path': 'dsksd/bert-ko-small-minimal',
    'warmup_ratio': 0.1,
    'learning_rate': 1e-4,  # 5e-5, 
    'weight_decay': 0.001,  # 0.01
    'num_train_epochs': 25
}

args = Namespace(**args)

In [6]:
# wandb sweep 생성 시 parameters에 전달하는 config 설정
hyperparameter_defaults = dict(
    batch_size = args.batch_size,
    learning_rate = args.learning_rate,
    epochs = args.num_train_epochs,
    weight_decay = args.weight_decay,
    attn_head = args.attn_head,
    distance_metric = args.distance_metric,
    
#     dropout = 0.1,
#     smoothing = 0.2
#     model_name = 'BertForSequenceClassification',
#     tokenizer_name = 'BertTokenizer',
    )

wandb.init(config=hyperparameter_defaults, project="SUMBT-sweep")
config = wandb.config

[34m[1mwandb[0m: Currently logged in as: [33mtaepd[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.10.29 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


- wandb sweep config

In [7]:
# sweep_config = {
#   "name": "SUMBT-sweep",
#   "method": "bayes",
#   "metric": {
#       "goal": "maximize",
#       "name": "Joint Goal Accuracy"},
#   "parameters": {
#       "attn_head": {
#           "distribution": "int_uniform",
#           "max": 12,
#           "min": 4
#       },
#       "batch_size": {
#           "distribution": "int_uniform",
#           "max": 12,
#           "min": 8
#       },
#       "distance_metric": {
#           "distribution": "categorical",
#           "values": ["euclidean", "cosine"]
#       },
#       "learning_rate": {
#           "distribution": "uniform",
#           "max": 1e-03,
#           "min": 5e-05
#       },
#       "weight_decay": {
#           "distribution": "uniform",
#           "max": 0.02,
#           "min": 0.005
#       }
#     }
# }

# # sweep_id = wandb.sweep(sweep_config, project="SUMBT-sweep")
# # sweep_id

In [8]:
def seed_everything(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU        
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

seed_everything(42)

## Data Loading 

In [9]:
train_data_file = "/opt/ml/repo/taepd/input/data/train_dataset/train_dials.json"
slot_meta = json.load(open("/opt/ml/repo/taepd/input/data/train_dataset/slot_meta.json"))
ontology = json.load(open("/opt/ml/repo/taepd/input/data/train_dataset/ontology.json"))
train_data, dev_data, dev_labels = load_dataset(train_data_file)

In [10]:
train_examples = get_examples_from_dialogues(data=train_data,
                                             user_first=True,
                                             dialogue_level=True)

dev_examples = get_examples_from_dialogues(data=dev_data,
                                           user_first=True,
                                           dialogue_level=True)

100%|██████████| 6301/6301 [00:00<00:00, 8760.54it/s]
100%|██████████| 699/699 [00:00<00:00, 2938.77it/s]


In [11]:
len(train_data)

6301

In [12]:
max_turn = max([len(e['dialogue']) for e in train_data])
tokenizer = BertTokenizer.from_pretrained('dsksd/bert-ko-small-minimal')

In [13]:
print(max_turn)

34


## TODO-1: SUMBT Preprocessor 정의 

Ontology-based DST model인 SUMBT의 InputFeature를 만들기 위한 Preprocessor를 정의해야 합니다. <br>

1. `_convert_examples_to_features` 함수의 빈칸을 매워 완성하세요.
2. `recover_state` 함수의 빈칸을 매워 완성하세요.

In [14]:
class SUMBTPreprocessor(DSTPreprocessor):
    def __init__(
        self,
        slot_meta,
        src_tokenizer,
        trg_tokenizer=None,
        ontology=None,
        max_seq_length=64,
        max_turn_length=12,
    ):
        self.slot_meta = slot_meta
        self.src_tokenizer = src_tokenizer
        self.trg_tokenizer = trg_tokenizer if trg_tokenizer else src_tokenizer
        self.ontology = ontology
        self.max_seq_length = max_seq_length
        self.max_turn_length = max_turn_length

    def _convert_example_to_feature(self, example):
        guid = example[0].guid.rsplit("-", 1)[0]  # dialogue_idx
        turns = []
        token_types = []
        labels = []
        num_turn = None
        for turn in example[: self.max_turn_length]:
            assert len(turn.current_turn) == 2
            uttrs = []
            for segment_idx, uttr in enumerate(turn.current_turn):
                token = self.src_tokenizer.encode(uttr, add_special_tokens=False)
                uttrs.append(token)

            _truncate_seq_pair(uttrs[0], uttrs[1], self.max_seq_length - 3)
            tokens = (
                [self.src_tokenizer.cls_token_id]
                + uttrs[0]
                + [self.src_tokenizer.sep_token_id]
                + uttrs[1]
                + [self.src_tokenizer.sep_token_id]
            )
            token_type = [0] * (len(uttrs[0]) + 2) + [1] * (len(uttrs[1]) + 1)
            if len(tokens) < self.max_seq_length:
                gap = self.max_seq_length - len(tokens)
                tokens.extend([self.src_tokenizer.pad_token_id] * gap)
                token_type.extend([0] * gap)
            turns.append(tokens)
            token_types.append(token_type)
            label = []
            if turn.label:
                slot_dict = convert_state_dict(turn.label)
            else:
                slot_dict = {}
            for slot_type in self.slot_meta:
                value = slot_dict.get(slot_type, "none")
                # TODO
                # raise Exception('label_idx를 ontology에서 꺼내오는 코드를 작성하세요!')
#                 label_idx = self.ontology[slot_type].index(value)  # 이렇게 해도 될듯한데?
                if value in self.ontology[slot_type]:
                    label_idx = self.ontology[slot_type].index(value)
                else:
                    label_idx = self.ontology[slot_type].index("none")
                label.append(label_idx)
            labels.append(label)
        num_turn = len(turns)
        if len(turns) < self.max_turn_length:
            gap = self.max_turn_length - len(turns)
            for _ in range(gap):
                dummy_turn = [self.src_tokenizer.pad_token_id] * self.max_seq_length
                turns.append(dummy_turn)
                token_types.append(dummy_turn)
                dummy_label = [-1] * len(self.slot_meta)
                labels.append(dummy_label)
        return OntologyDSTFeature(
            guid=guid,
            input_ids=turns,
            segment_ids=token_types,
            num_turn=num_turn,
            target_ids=labels,
        )

    def convert_examples_to_features(self, examples):
        return list(map(self._convert_example_to_feature, examples))

    def recover_state(self, pred_slots, num_turn):
        states = []
        for pred_slot in pred_slots[:num_turn]:
            state = []
            for s, p in zip(self.slot_meta, pred_slot):
                v = self.ontology[s][p]
                if v != "none":
                    state.append(f"{s}-{v}")
            states.append(state)
        return states

    def collate_fn(self, batch):
        guids = [b.guid for b in batch]
        input_ids = torch.LongTensor([b.input_ids for b in batch])
        segment_ids = torch.LongTensor([b.segment_ids for b in batch])
        input_masks = input_ids.ne(self.src_tokenizer.pad_token_id)
        target_ids = torch.LongTensor([b.target_ids for b in batch])
        num_turns = [b.num_turn for b in batch]
        return input_ids, segment_ids, input_masks, target_ids, num_turns, guids

## Convert_Examples_to_Features 

In [15]:
processor = SUMBTPreprocessor(slot_meta,
                              tokenizer,
                              ontology=ontology,  # predefined ontology
                              max_seq_length=64,  # 각 turn마다 최대 길이
                              max_turn_length=max_turn)  # 각 dialogue의 최대 turn 길이
train_features = processor.convert_examples_to_features(train_examples)
dev_features = processor.convert_examples_to_features(dev_examples)

In [16]:
print(len(train_features))  # 대화 level의 features
print(len(dev_features))

6301
699


In [17]:
f = train_features[0]

print(f.guid)  # 대화 unique_id
print(f.num_turn)  # 실제 대화의 turn 길이 == T
print(len(f.input_ids))  # input_ids의 턴 길이 (max_turn_length == 현재 34)
print(len(f.input_ids[0]))  # input_ids에서 각 턴의 최대 길이 (max_seq_length == 64)
print(len(f.segment_ids))  # segment_ids의 턴 길이 (max_turn_length == 34)
print(len(f.target_ids))  # target_ids의 갯수 (턴마다의 State == max_turn_length == 34)
print(len(f.target_ids[0]))  # 각 턴마다 target의 갯수 == number of Slot Meta (== 45)

snowy-hat-8324:관광_식당_11
8
34
64
34
34
45


## SUMBT 모델 선언 

In [18]:
"""
Most of code is from https://github.com/SKTBrain/SUMBT
"""

import math
import os.path

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import CosineEmbeddingLoss, CrossEntropyLoss
from transformers import BertModel, BertPreTrainedModel


class BertForUtteranceEncoding(BertPreTrainedModel):
    def __init__(self, config):
        super(BertForUtteranceEncoding, self).__init__(config)

        self.config = config
        self.bert = BertModel(config)

    def forward(self, input_ids, token_type_ids, attention_mask):
        return self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            output_attentions=False,
            output_hidden_states=False,
            return_dict=False,
        )


class MultiHeadAttention(nn.Module):
    def __init__(self, heads, d_model, dropout=0.1):
        super().__init__()

        self.d_model = d_model
        self.d_k = d_model // heads
        self.h = heads

        self.q_linear = nn.Linear(d_model, d_model)
        self.v_linear = nn.Linear(d_model, d_model)
        self.k_linear = nn.Linear(d_model, d_model)
        self.dropout = nn.Dropout(dropout)
        self.out = nn.Linear(d_model, d_model)

        self.scores = None

    def attention(self, q, k, v, d_k, mask=None, dropout=None):

        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(d_k)

        if mask is not None:
            mask = mask.unsqueeze(1)
            scores = scores.masked_fill(mask == 0, -1e9)
        scores = F.softmax(scores, dim=-1)

        if dropout is not None:
            scores = dropout(scores)

        self.scores = scores
        output = torch.matmul(scores, v)
        return output

    def forward(self, q, k, v, mask=None):
        bs = q.size(0)

        # perform linear operation and split into h heads
        k = self.k_linear(k).view(bs, -1, self.h, self.d_k)
        q = self.q_linear(q).view(bs, -1, self.h, self.d_k)
        v = self.v_linear(v).view(bs, -1, self.h, self.d_k)

        # transpose to get dimensions bs * h * sl * d_model
        k = k.transpose(1, 2)
        q = q.transpose(1, 2)
        v = v.transpose(1, 2)

        scores = self.attention(q, k, v, self.d_k, mask, self.dropout)

        # concatenate heads and put through final linear layer
        concat = scores.transpose(1, 2).contiguous().view(bs, -1, self.d_model)
        output = self.out(concat)
        return output

    def get_scores(self):
        return self.scores


class SUMBT(nn.Module):
    def __init__(self, args, num_labels, device):  # num_labels : # of Candidate values per each Slot
        super(SUMBT, self).__init__()

        self.hidden_dim = args.hidden_dim
        self.rnn_num_layers = args.num_rnn_layers
        self.zero_init_rnn = args.zero_init_rnn
        self.max_seq_length = args.max_seq_length
        self.max_label_length = args.max_label_length
        self.num_labels = num_labels
        self.num_slots = len(num_labels)
        self.attn_head = args.attn_head
        self.device = device

        ### Utterance Encoder
        self.utterance_encoder = BertForUtteranceEncoding.from_pretrained(
            args.model_name_or_path
        )
        self.bert_output_dim = self.utterance_encoder.config.hidden_size
        self.hidden_dropout_prob = self.utterance_encoder.config.hidden_dropout_prob
        if args.fix_utterance_encoder:
            for p in self.utterance_encoder.bert.pooler.parameters():
                p.requires_grad = False

        ### slot, slot-value Encoder (not trainable)
        self.sv_encoder = BertForUtteranceEncoding.from_pretrained(
            args.model_name_or_path
        )
        # os.path.join(args.bert_dir, 'bert-base-uncased.model'))
        for p in self.sv_encoder.bert.parameters():
            p.requires_grad = False

        self.slot_lookup = nn.Embedding(self.num_slots, self.bert_output_dim)
        self.value_lookup = nn.ModuleList(
            [nn.Embedding(num_label, self.bert_output_dim) for num_label in num_labels]
        )

        ### Attention layer
        self.attn = MultiHeadAttention(self.attn_head, self.bert_output_dim, dropout=0)

        ### RNN Belief Tracker
        self.nbt = nn.GRU(
            input_size=self.bert_output_dim,
            hidden_size=self.hidden_dim,
            num_layers=self.rnn_num_layers,
            dropout=self.hidden_dropout_prob,
            batch_first=True,
        )
        self.init_parameter(self.nbt)

        if not self.zero_init_rnn:
            self.rnn_init_linear = nn.Sequential(
                nn.Linear(self.bert_output_dim, self.hidden_dim),
                nn.ReLU(),
                nn.Dropout(self.hidden_dropout_prob),
            )

        self.linear = nn.Linear(self.hidden_dim, self.bert_output_dim)
        self.layer_norm = nn.LayerNorm(self.bert_output_dim)

        ### Measure
#         self.metric = torch.nn.PairwiseDistance(p=2.0, eps=1e-06, keepdim=False)
        self.distance_metric = args.distance_metric
        if self.distance_metric == "cosine":
            self.metric = torch.nn.CosineSimilarity(dim=-1, eps=1e-08)
        elif self.distance_metric == "euclidean":
            self.metric = torch.nn.PairwiseDistance(p=2.0, eps=1e-06, keepdim=False)

        ### Classifier
        self.nll = CrossEntropyLoss(ignore_index=-1)

        ### Etc.
        self.dropout = nn.Dropout(self.hidden_dropout_prob)

    def initialize_slot_value_lookup(self, label_ids, slot_ids):

        self.sv_encoder.eval()

        # Slot encoding
        slot_type_ids = torch.zeros(slot_ids.size(), dtype=torch.long).to(
            slot_ids.device
        )
        slot_mask = slot_ids > 0
        hid_slot, _ = self.sv_encoder(
            slot_ids.view(-1, self.max_label_length),
            slot_type_ids.view(-1, self.max_label_length),
            slot_mask.view(-1, self.max_label_length),
        )
        hid_slot = hid_slot[:, 0, :]
        hid_slot = hid_slot.detach()
        self.slot_lookup = nn.Embedding.from_pretrained(hid_slot, freeze=True)

        for s, label_id in enumerate(label_ids):
            label_type_ids = torch.zeros(label_id.size(), dtype=torch.long).to(
                label_id.device
            )
            label_mask = label_id > 0
            hid_label, _ = self.sv_encoder(
                label_id.view(-1, self.max_label_length),
                label_type_ids.view(-1, self.max_label_length),
                label_mask.view(-1, self.max_label_length),
            )
            hid_label = hid_label[:, 0, :]
            hid_label = hid_label.detach()
            self.value_lookup[s] = nn.Embedding.from_pretrained(hid_label, freeze=True)
            self.value_lookup[s].padding_idx = -1

        print("Complete initialization of slot and value lookup")
        self.sv_encoder = None

    def forward(
        self,
        input_ids,
        token_type_ids,
        attention_mask,
        labels=None,
        n_gpu=1,
        target_slot=None,
    ):
        # B: Batch size, M: Max turn length, N: Max seq length, 
        # J: # of Slot Meta, H: Hidden dimension
        
        # input_ids: [B, M, N]
        # token_type_ids: [B, M, N]
        # attention_mask: [B, M, N]
        # labels: [B, M, J]

        # if target_slot is not specified, output values corresponding all slot-types
        if target_slot is None:
            target_slot = list(range(0, self.num_slots))

        ds = input_ids.size(0)  # Batch size (B)
        ts = input_ids.size(1)  # Max turn size (M)
        bs = ds * ts
        slot_dim = len(target_slot)  # J

        # Utterance encoding
        # Utterence-level로 독립적으로 인코딩하므로 flatten필요
        hidden, _ = self.utterance_encoder(
            input_ids.view(-1, self.max_seq_length),
            token_type_ids.view(-1, self.max_seq_length),
            attention_mask.view(-1, self.max_seq_length),
        )
        hidden = torch.mul(
            hidden,
            attention_mask.view(-1, self.max_seq_length, 1)
            .expand(hidden.size())
            .float(),
        )
        hidden = hidden.repeat(slot_dim, 1, 1)  # [J*M*B, N, H]

        hid_slot = self.slot_lookup.weight[target_slot, :]  # Select target slot embedding
        hid_slot = hid_slot.repeat(1, bs).view(bs * slot_dim, -1)  # [J*M*B, N, H]

        # Attended utterance vector
        hidden = self.attn(
            hid_slot,  # q^s  [J*M*B, N, H]
            hidden,  # U [J*M*B, N, H]
            hidden,  # U [J*M*B, N, H]
            mask=attention_mask.view(-1, 1, self.max_seq_length).repeat(slot_dim, 1, 1),
        )
        hidden = hidden.squeeze()  # h [J*M*B, H] Aggregated Slot Context
        hidden = hidden.view(slot_dim, ds, ts, -1).view(-1, ts, self.bert_output_dim)  # [J*B, M, H]

        # NBT
        if self.zero_init_rnn:
            h = torch.zeros(
                self.rnn_num_layers, input_ids.shape[0] * slot_dim, self.hidden_dim
            ).to(
                self.device
            )  # [1, slot_dim*ds, hidden]
        else:
            h = hidden[:, 0, :].unsqueeze(0).repeat(self.rnn_num_layers, 1, 1)
            h = self.rnn_init_linear(h)

        if isinstance(self.nbt, nn.GRU):
            rnn_out, _ = self.nbt(hidden, h)  # [J*B, M, H_GRU]
        elif isinstance(self.nbt, nn.LSTM):
            c = torch.zeros(
                self.rnn_num_layers, input_ids.shape[0] * slot_dim, self.hidden_dim
            ).to(
                self.device
            )  # [1, slot_dim*ds, hidden]
            rnn_out, _ = self.nbt(hidden, (h, c))  # [slot_dim*ds, turn, hidden]
        rnn_out = self.layer_norm(self.linear(self.dropout(rnn_out)))

        hidden = rnn_out.view(slot_dim, ds, ts, -1)  # [J, B, M, H_GRU]

        # Label (slot-value) encoding
        loss = 0
        loss_slot = []
        pred_slot = []
        output = []
        for s, slot_id in enumerate(target_slot):  ## note: target_slots are successive
            # loss calculation
            hid_label = self.value_lookup[slot_id].weight
            num_slot_labels = hid_label.size(0)

            _hid_label = (
                hid_label.unsqueeze(0)
                .unsqueeze(0)
                .repeat(ds, ts, 1, 1)
                .view(ds * ts * num_slot_labels, -1)
            )
            _hidden = (
                hidden[s, :, :, :]
                .unsqueeze(2)
                .repeat(1, 1, num_slot_labels, 1)
                .view(ds * ts * num_slot_labels, -1)
            )
            _dist = self.metric(_hid_label, _hidden).view(ds, ts, num_slot_labels)
            if self.distance_metric == "euclidean":
                _dist = -_dist
            _, pred = torch.max(_dist, -1)  # taget_ids에서 ignore index 즉, padding일 경우 -1로 setting했었음
            pred_slot.append(pred.view(ds, ts, 1))
            output.append(_dist)

            if labels is not None:
                _loss = self.nll(_dist.view(ds * ts, -1), labels[:, :, s].view(-1))
                loss_slot.append(_loss.item())
                loss += _loss

        pred_slot = torch.cat(pred_slot, 2)
        if labels is None:
            return output, pred_slot

        # calculate joint accuracy
        accuracy = (pred_slot == labels).view(-1, slot_dim)
        acc_slot = (
            torch.sum(accuracy, 0).float()
            / torch.sum(labels.view(-1, slot_dim) > -1, 0).float()
        )
        acc = (
            sum(torch.sum(accuracy, 1) / slot_dim).float()
            / torch.sum(labels[:, :, 0].view(-1) > -1, 0).float()
        )  # joint accuracy
                
        if n_gpu == 1:
            return loss, loss_slot, acc, acc_slot, pred_slot
        else:
            return (
                loss.unsqueeze(0),
                None,
                acc.unsqueeze(0),
                acc_slot.unsqueeze(0),
                pred_slot.unsqueeze(0),
            )

    @staticmethod
    def init_parameter(module):
        if isinstance(module, nn.Linear):
            torch.nn.init.xavier_normal_(module.weight)
            torch.nn.init.constant_(module.bias, 0.0)
        elif isinstance(module, nn.GRU) or isinstance(module, nn.LSTM):
            torch.nn.init.xavier_normal_(module.weight_ih_l0)
            torch.nn.init.xavier_normal_(module.weight_hh_l0)
            torch.nn.init.constant_(module.bias_ih_l0, 0.0)
            torch.nn.init.constant_(module.bias_hh_l0, 0.0)

## TODO-2: Ontology Pre-Encoding 

Ontology의 slot type들과 이에 속하는 slot_value들을 tokenizing하는 `tokenize_ontology`를 작성하세요. <br>
[CLS] Pooling하여 `slot_lookup` 과 `value_lookup` embedding matrix들을 초기화하는 <br>
`initialize_slot_value_lookup`에 인자로 넘겨주세요. <br>

In [19]:
def tokenize_ontology(ontology, tokenizer, max_seq_length=12):
    slot_types = []
    slot_values = []
    for k, v in ontology.items():
        tokens = tokenizer.encode(k)
        if len(tokens) < max_seq_length:
            gap = max_seq_length - len(tokens)
            tokens.extend([tokenizer.pad_token_id] *  gap)
        slot_types.append(tokens)
        slot_value = []
        for vv in v:
            tokens = tokenizer.encode(vv)
            if len(tokens) < max_seq_length:
                gap = max_seq_length - len(tokens)
                tokens.extend([tokenizer.pad_token_id] *  gap)
            slot_value.append(tokens)
        slot_values.append(torch.LongTensor(slot_value))
    return torch.LongTensor(slot_types), slot_values

In [20]:
slot_type_ids, slot_values_ids = tokenize_ontology(ontology, tokenizer, 12)
num_labels = [len(s) for s in slot_values_ids]  # 각 Slot 별 후보 Values의 갯수
print(num_labels)
print("Tokenized Slot: ", slot_type_ids.size())
for slot, slot_value_id in zip(slot_meta, slot_values_ids):
    print(f"Tokenized Value of {slot}", slot_value_id.size())

[4, 4, 4, 4, 4, 103, 13, 4, 7, 5, 4, 4, 4, 12, 12, 9, 67, 4, 4, 7, 4, 7, 4, 4, 5, 4, 4, 12, 569, 9, 44, 4, 10, 4, 4, 7, 4, 60, 12, 60, 190, 298, 5, 431, 286]
Tokenized Slot:  torch.Size([45, 12])
Tokenized Value of 관광-경치 좋은 torch.Size([4, 12])
Tokenized Value of 관광-교육적 torch.Size([4, 12])
Tokenized Value of 관광-도보 가능 torch.Size([4, 12])
Tokenized Value of 관광-문화 예술 torch.Size([4, 12])
Tokenized Value of 관광-역사적 torch.Size([4, 12])
Tokenized Value of 관광-이름 torch.Size([103, 12])
Tokenized Value of 관광-종류 torch.Size([13, 12])
Tokenized Value of 관광-주차 가능 torch.Size([4, 12])
Tokenized Value of 관광-지역 torch.Size([7, 12])
Tokenized Value of 숙소-가격대 torch.Size([5, 12])
Tokenized Value of 숙소-도보 가능 torch.Size([4, 12])
Tokenized Value of 숙소-수영장 유무 torch.Size([4, 12])
Tokenized Value of 숙소-스파 유무 torch.Size([4, 12])
Tokenized Value of 숙소-예약 기간 torch.Size([12, 12])
Tokenized Value of 숙소-예약 명수 torch.Size([12, 12])
Tokenized Value of 숙소-예약 요일 torch.Size([9, 12])
Tokenized Value of 숙소-이름 torch.Size([67, 12])

## Model 선언 

In [21]:
# argsparse 있던 위치

num_labels = [len(s) for s in slot_values_ids]
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
n_gpu = 1 if torch.cuda.device_count() < 2 else torch.cuda.device_count()
n_epochs = args.num_train_epochs

In [22]:
model = SUMBT(args, num_labels, device)
model.initialize_slot_value_lookup(slot_values_ids, slot_type_ids)  # Tokenized Ontology의 Pre-encoding using BERT_SV
model.to(device)

wandb.watch(model)
# print()

Some weights of the model checkpoint at dsksd/bert-ko-small-minimal were not used when initializing BertForUtteranceEncoding: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForUtteranceEncoding from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForUtteranceEncoding from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at dsksd/bert-ko-small-minimal 

Complete initialization of slot and value lookup


[<wandb.wandb_torch.TorchGraph at 0x7f3a24941ed0>]

## 데이터 로더 정의

In [23]:
from data_utils import WOSDataset
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from transformers import AdamW, get_linear_schedule_with_warmup
import random


train_data = WOSDataset(train_features)
train_sampler = RandomSampler(train_data)
train_loader = DataLoader(train_data, batch_size=args.batch_size, sampler=train_sampler, collate_fn=processor.collate_fn)

dev_data = WOSDataset(dev_features)
dev_sampler = SequentialSampler(dev_data)
dev_loader = DataLoader(dev_data, batch_size=8, sampler=dev_sampler, collate_fn=processor.collate_fn)

## Optimizer & Scheduler 선언 

In [24]:
no_decay = ["bias", "LayerNorm.weight"]
optimizer_grouped_parameters = [
        {
            "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
            "weight_decay": args.weight_decay,
        },
        {
            "params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)],
            "weight_decay": 0.0,
        },
    ]

t_total = len(train_loader) * n_epochs
optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=1e-8)
scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=int(t_total * args.warmup_ratio), num_training_steps=t_total
)

## TODO-3: Inference code 작성 

In [25]:
from evaluation import _evaluation

In [26]:
def inference(model, eval_loader, processor, device):
    model.eval()
    predictions = {}
    for batch in eval_loader:
        input_ids, segment_ids, input_masks, target_ids, num_turns, guids = \
        [b.to(device) if not isinstance(b, list) else b for b in batch]

        with torch.no_grad():
            _, pred_slot = model(
                input_ids, segment_ids, input_masks, labels=None, n_gpu=1
            )
        
        batch_size = input_ids.size(0)
        for i in range(batch_size):
            guid = guids[i]
            states = processor.recover_state(pred_slot.tolist()[i], num_turns[i])
            for tid, state in enumerate(states):
                predictions[f"{guid}-{tid}"] = state
    return predictions

## Training 

In [27]:
# model.load_state_dict(torch.load('20epoch.pth'))

In [28]:
# for checkpoint management
chk_list = []
output_dir = increment_output_dir(wandb.run.name)

if not os.path.exists(f"checkpoint/{output_dir}"):
    os.makedirs(f"checkpoint/{output_dir}")       


best_score, best_checkpoint = 0, 0
epoch_miss_labels = defaultdict(list)
for epoch in range(n_epochs):
    batch_loss = []
    batch_loss_per_100step = []
    for step, batch in enumerate(tqdm(train_loader)):
        model.train()
        input_ids, segment_ids, input_masks, target_ids, num_turns, guids  = \
        [b.to(device) if not isinstance(b, list) else b for b in batch]

        # Forwabatch_size        
        if n_gpu == 1:
            loss, loss_slot, acc, acc_slot, _ = model(input_ids, segment_ids, input_masks, target_ids, n_gpu)
        else:
            loss, _, acc, acc_slot, _ = model(input_ids, segment_ids, input_masks, target_ids, n_gpu)

        batch_loss.append(loss.item())

        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
#         if step % 100 == 0 or step == len(train_loader):
        if step % 100 == 0:
#             batch_loss_per_100step.append(loss.item())
            print('[%d/%d] [%d/%d] %f' % (epoch+1, n_epochs, step, len(train_loader), loss.item()))
#             # epoch를 마쳤거나, 최저 loss 갱신했을 때 추론
#             if step == len(train_loader) or min(batch_loss_per_100step) >= loss.item():
            print('inferencing')
            predictions = inference(model, dev_loader, processor, device)
#             eval_result = _evaluation(predictions, dev_labels, slot_meta)
            eval_result, batch_miss_labels = _evaluation(predictions, dev_labels, slot_meta)
            if epoch >= 5:
                epoch_miss_labels[epoch].extend(batch_miss_labels)   
        
            current_score = eval_result['joint_goal_accuracy']

            if best_score < current_score:
                best_score = current_score
                print('new best JGA score! ', best_score)

                # checkpoint 수 관리
                if len(chk_list) >= 2:
                    os.remove(chk_list.pop(0))

                output_path = f"checkpoint/{output_dir}/{epoch}_{step}_{best_score}.pth"
                chk_list.append(output_path)

                torch.save({
                            'epoch': epoch,
                            'model_state_dict': model.state_dict(),
                            'optimizer_state_dict': optimizer.state_dict(),
                            'loss': loss,
                            }, output_path)
            for k, v in eval_result.items():
                print(f"{k}: {v}")
            wandb.log({
                "loss": loss.item(),
                "Joint Goal Accuracy": eval_result['joint_goal_accuracy'],
                "Turn Slot_Accuracy": eval_result['turn_slot_accuracy'],
                "Turn Slot F1": eval_result['turn_slot_f1']
                })

  0%|          | 0/526 [00:00<?, ?it/s]

[0/25] [0/526] 122.008194
inferencing


  0%|          | 1/526 [00:31<4:38:30, 31.83s/it]

{'joint_goal_accuracy': 0.0, 'turn_slot_accuracy': 0.03896223316913063, 'turn_slot_f1': 0.05778458738865145}
joint_goal_accuracy: 0.0
turn_slot_accuracy: 0.03896223316913063
turn_slot_f1: 0.05778458738865145


 19%|█▉        | 100/526 [03:03<10:54,  1.54s/it]

[0/25] [100/526] 47.043095
inferencing
{'joint_goal_accuracy': 0.019310344827586208, 'turn_slot_accuracy': 0.8205363984674218, 'turn_slot_f1': 0.019310344827586208}
new best JGA score!  0.019310344827586208


 19%|█▉        | 101/526 [03:37<1:19:15, 11.19s/it]

joint_goal_accuracy: 0.019310344827586208
turn_slot_accuracy: 0.8205363984674218
turn_slot_f1: 0.019310344827586208


 38%|███▊      | 200/526 [06:09<08:20,  1.53s/it]  

[0/25] [200/526] 38.849449
inferencing


 38%|███▊      | 201/526 [06:41<57:09, 10.55s/it]

{'joint_goal_accuracy': 0.019310344827586208, 'turn_slot_accuracy': 0.820996168582364, 'turn_slot_f1': 0.022702117045293565}
joint_goal_accuracy: 0.019310344827586208
turn_slot_accuracy: 0.820996168582364
turn_slot_f1: 0.022702117045293565


 57%|█████▋    | 300/526 [09:13<05:46,  1.53s/it]

[0/25] [300/526] 34.605038
inferencing


 57%|█████▋    | 301/526 [09:44<39:34, 10.55s/it]

{'joint_goal_accuracy': 0.019310344827586208, 'turn_slot_accuracy': 0.8260405035577393, 'turn_slot_f1': 0.07822979014820194}
joint_goal_accuracy: 0.019310344827586208
turn_slot_accuracy: 0.8260405035577393
turn_slot_f1: 0.07822979014820194


 76%|███████▌  | 400/526 [12:16<03:13,  1.54s/it]

[0/25] [400/526] 27.276615
inferencing
{'joint_goal_accuracy': 0.020689655172413793, 'turn_slot_accuracy': 0.8402977558839664, 'turn_slot_f1': 0.18674548517773443}
new best JGA score!  0.020689655172413793


 76%|███████▌  | 401/526 [12:50<23:18, 11.19s/it]

joint_goal_accuracy: 0.020689655172413793
turn_slot_accuracy: 0.8402977558839664
turn_slot_f1: 0.18674548517773443


 95%|█████████▌| 500/526 [15:22<00:39,  1.54s/it]

[0/25] [500/526] 27.157665
inferencing
{'joint_goal_accuracy': 0.02108374384236453, 'turn_slot_accuracy': 0.8468877941981428, 'turn_slot_f1': 0.2396193991148409}
new best JGA score!  0.02108374384236453


 95%|█████████▌| 501/526 [15:56<04:40, 11.22s/it]

joint_goal_accuracy: 0.02108374384236453
turn_slot_accuracy: 0.8468877941981428
turn_slot_f1: 0.2396193991148409


100%|██████████| 526/526 [16:33<00:00,  1.89s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[1/25] [0/526] 24.498058
inferencing


  0%|          | 1/526 [00:31<4:36:44, 31.63s/it]

{'joint_goal_accuracy': 0.020492610837438422, 'turn_slot_accuracy': 0.8467476737821553, 'turn_slot_f1': 0.19621624814233468}
joint_goal_accuracy: 0.020492610837438422
turn_slot_accuracy: 0.8467476737821553
turn_slot_f1: 0.19621624814233468


 19%|█▉        | 100/526 [03:03<10:54,  1.54s/it]

[1/25] [100/526] 21.133490
inferencing
{'joint_goal_accuracy': 0.024039408866995075, 'turn_slot_accuracy': 0.85648166392994, 'turn_slot_f1': 0.2851829567418528}
new best JGA score!  0.024039408866995075


 19%|█▉        | 101/526 [03:37<1:19:44, 11.26s/it]

joint_goal_accuracy: 0.024039408866995075
turn_slot_accuracy: 0.85648166392994
turn_slot_f1: 0.2851829567418528


 38%|███▊      | 200/526 [06:10<08:20,  1.54s/it]  

[1/25] [200/526] 22.464117
inferencing
{'joint_goal_accuracy': 0.0445320197044335, 'turn_slot_accuracy': 0.8789928845101228, 'turn_slot_f1': 0.38845891338614286}
new best JGA score!  0.0445320197044335


 38%|███▊      | 201/526 [06:43<1:00:47, 11.22s/it]

joint_goal_accuracy: 0.0445320197044335
turn_slot_accuracy: 0.8789928845101228
turn_slot_f1: 0.38845891338614286


 57%|█████▋    | 300/526 [09:15<05:46,  1.53s/it]  

[1/25] [300/526] 17.755138
inferencing
{'joint_goal_accuracy': 0.0658128078817734, 'turn_slot_accuracy': 0.8995599343185589, 'turn_slot_f1': 0.5130460160169481}
new best JGA score!  0.0658128078817734


 57%|█████▋    | 301/526 [09:49<41:58, 11.19s/it]

joint_goal_accuracy: 0.0658128078817734
turn_slot_accuracy: 0.8995599343185589
turn_slot_f1: 0.5130460160169481


 76%|███████▌  | 400/526 [12:21<03:13,  1.54s/it]

[1/25] [400/526] 17.858919
inferencing
{'joint_goal_accuracy': 0.12394088669950738, 'turn_slot_accuracy': 0.9168035030104054, 'turn_slot_f1': 0.607562572907826}
new best JGA score!  0.12394088669950738


 76%|███████▌  | 401/526 [12:55<23:19, 11.20s/it]

joint_goal_accuracy: 0.12394088669950738
turn_slot_accuracy: 0.9168035030104054
turn_slot_f1: 0.607562572907826


 95%|█████████▌| 500/526 [15:27<00:39,  1.54s/it]

[1/25] [500/526] 14.658987
inferencing
{'joint_goal_accuracy': 0.16492610837438423, 'turn_slot_accuracy': 0.9315205254515699, 'turn_slot_f1': 0.683943248245405}
new best JGA score!  0.16492610837438423


 95%|█████████▌| 501/526 [16:01<04:40, 11.24s/it]

joint_goal_accuracy: 0.16492610837438423
turn_slot_accuracy: 0.9315205254515699
turn_slot_f1: 0.683943248245405


100%|██████████| 526/526 [16:38<00:00,  1.90s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[2/25] [0/526] 13.281662
inferencing


  0%|          | 1/526 [00:31<4:36:33, 31.61s/it]

{'joint_goal_accuracy': 0.15645320197044335, 'turn_slot_accuracy': 0.9298784893267736, 'turn_slot_f1': 0.6827849231209581}
joint_goal_accuracy: 0.15645320197044335
turn_slot_accuracy: 0.9298784893267736
turn_slot_f1: 0.6827849231209581


 19%|█▉        | 100/526 [03:03<10:54,  1.54s/it]

[2/25] [100/526] 9.981441
inferencing
{'joint_goal_accuracy': 0.2122167487684729, 'turn_slot_accuracy': 0.9517766830870428, 'turn_slot_f1': 0.7849503714608119}
new best JGA score!  0.2122167487684729


 19%|█▉        | 101/526 [03:37<1:19:25, 11.21s/it]

joint_goal_accuracy: 0.2122167487684729
turn_slot_accuracy: 0.9517766830870428
turn_slot_f1: 0.7849503714608119


 38%|███▊      | 200/526 [06:09<08:20,  1.54s/it]  

[2/25] [200/526] 10.284952
inferencing
{'joint_goal_accuracy': 0.2768472906403941, 'turn_slot_accuracy': 0.9597985769020428, 'turn_slot_f1': 0.8326654255456856}
new best JGA score!  0.2768472906403941


 38%|███▊      | 201/526 [06:43<1:00:45, 11.22s/it]

joint_goal_accuracy: 0.2768472906403941
turn_slot_accuracy: 0.9597985769020428
turn_slot_f1: 0.8326654255456856


 57%|█████▋    | 300/526 [09:15<05:47,  1.54s/it]  

[2/25] [300/526] 8.326200
inferencing
{'joint_goal_accuracy': 0.33911330049261085, 'turn_slot_accuracy': 0.9676803503010573, 'turn_slot_f1': 0.86322111156551}
new best JGA score!  0.33911330049261085


 57%|█████▋    | 301/526 [09:49<42:05, 11.22s/it]

joint_goal_accuracy: 0.33911330049261085
turn_slot_accuracy: 0.9676803503010573
turn_slot_f1: 0.86322111156551


 76%|███████▌  | 400/526 [12:21<03:13,  1.54s/it]

[2/25] [400/526] 8.585069
inferencing
{'joint_goal_accuracy': 0.4, 'turn_slot_accuracy': 0.9713541324575935, 'turn_slot_f1': 0.8790873362271333}
new best JGA score!  0.4


 76%|███████▌  | 401/526 [12:55<23:19, 11.20s/it]

joint_goal_accuracy: 0.4
turn_slot_accuracy: 0.9713541324575935
turn_slot_f1: 0.8790873362271333


 95%|█████████▌| 500/526 [15:27<00:39,  1.53s/it]

[2/25] [500/526] 4.307240
inferencing
{'joint_goal_accuracy': 0.4484729064039409, 'turn_slot_accuracy': 0.975675971538057, 'turn_slot_f1': 0.8962181491156744}
new best JGA score!  0.4484729064039409


 95%|█████████▌| 501/526 [16:01<04:40, 11.20s/it]

joint_goal_accuracy: 0.4484729064039409
turn_slot_accuracy: 0.975675971538057
turn_slot_f1: 0.8962181491156744


100%|██████████| 526/526 [16:38<00:00,  1.90s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[3/25] [0/526] 5.330597
inferencing
{'joint_goal_accuracy': 0.47507389162561575, 'turn_slot_accuracy': 0.9774712643678278, 'turn_slot_f1': 0.9045543668944712}
new best JGA score!  0.47507389162561575


  0%|          | 1/526 [00:33<4:55:24, 33.76s/it]

joint_goal_accuracy: 0.47507389162561575
turn_slot_accuracy: 0.9774712643678278
turn_slot_f1: 0.9045543668944712


 19%|█▉        | 100/526 [03:05<10:53,  1.53s/it]

[3/25] [100/526] 4.776783
inferencing
{'joint_goal_accuracy': 0.5355665024630542, 'turn_slot_accuracy': 0.9815610290093147, 'turn_slot_f1': 0.9219886175702413}
new best JGA score!  0.5355665024630542


 19%|█▉        | 101/526 [03:39<1:19:22, 11.21s/it]

joint_goal_accuracy: 0.5355665024630542
turn_slot_accuracy: 0.9815610290093147
turn_slot_f1: 0.9219886175702413


 38%|███▊      | 200/526 [06:12<08:20,  1.54s/it]  

[3/25] [200/526] 5.057821
inferencing
{'joint_goal_accuracy': 0.578128078817734, 'turn_slot_accuracy': 0.9839124247400234, 'turn_slot_f1': 0.9332587631709873}
new best JGA score!  0.578128078817734


 38%|███▊      | 201/526 [06:45<1:00:41, 11.20s/it]

joint_goal_accuracy: 0.578128078817734
turn_slot_accuracy: 0.9839124247400234
turn_slot_f1: 0.9332587631709873


 57%|█████▋    | 300/526 [09:17<05:46,  1.53s/it]  

[3/25] [300/526] 4.524399
inferencing
{'joint_goal_accuracy': 0.601576354679803, 'turn_slot_accuracy': 0.9857690202517881, 'turn_slot_f1': 0.9412458408379245}
new best JGA score!  0.601576354679803


 57%|█████▋    | 301/526 [09:51<41:58, 11.19s/it]

joint_goal_accuracy: 0.601576354679803
turn_slot_accuracy: 0.9857690202517881
turn_slot_f1: 0.9412458408379245


 76%|███████▌  | 400/526 [12:23<03:13,  1.54s/it]

[3/25] [400/526] 3.091749
inferencing
{'joint_goal_accuracy': 0.661871921182266, 'turn_slot_accuracy': 0.988356869184463, 'turn_slot_f1': 0.9544621912869099}
new best JGA score!  0.661871921182266


 76%|███████▌  | 401/526 [12:58<23:39, 11.36s/it]

joint_goal_accuracy: 0.661871921182266
turn_slot_accuracy: 0.988356869184463
turn_slot_f1: 0.9544621912869099


 95%|█████████▌| 500/526 [15:30<00:39,  1.54s/it]

[3/25] [500/526] 2.926061
inferencing
{'joint_goal_accuracy': 0.6935960591133005, 'turn_slot_accuracy': 0.9899507389162646, 'turn_slot_f1': 0.959832944468408}
new best JGA score!  0.6935960591133005


 95%|█████████▌| 501/526 [16:04<04:40, 11.21s/it]

joint_goal_accuracy: 0.6935960591133005
turn_slot_accuracy: 0.9899507389162646
turn_slot_f1: 0.959832944468408


100%|██████████| 526/526 [16:41<00:00,  1.90s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[4/25] [0/526] 2.470419
inferencing
{'joint_goal_accuracy': 0.7073891625615764, 'turn_slot_accuracy': 0.9901696770662378, 'turn_slot_f1': 0.9620353653032591}
new best JGA score!  0.7073891625615764


  0%|          | 1/526 [00:33<4:55:39, 33.79s/it]

joint_goal_accuracy: 0.7073891625615764
turn_slot_accuracy: 0.9901696770662378
turn_slot_f1: 0.9620353653032591


 19%|█▉        | 100/526 [03:05<10:54,  1.54s/it]

[4/25] [100/526] 4.735694
inferencing


 19%|█▉        | 101/526 [03:37<1:14:41, 10.54s/it]

{'joint_goal_accuracy': 0.694384236453202, 'turn_slot_accuracy': 0.989806239737281, 'turn_slot_f1': 0.9614197730780017}
joint_goal_accuracy: 0.694384236453202
turn_slot_accuracy: 0.989806239737281
turn_slot_f1: 0.9614197730780017


 38%|███▊      | 200/526 [06:09<08:19,  1.53s/it]  

[4/25] [200/526] 2.573931
inferencing
{'joint_goal_accuracy': 0.7089655172413794, 'turn_slot_accuracy': 0.9901171319102455, 'turn_slot_f1': 0.9614685139994836}
new best JGA score!  0.7089655172413794


 38%|███▊      | 201/526 [06:43<1:00:37, 11.19s/it]

joint_goal_accuracy: 0.7089655172413794
turn_slot_accuracy: 0.9901171319102455
turn_slot_f1: 0.9614685139994836


 57%|█████▋    | 300/526 [09:15<05:46,  1.53s/it]  

[4/25] [300/526] 3.882759
inferencing
{'joint_goal_accuracy': 0.7134975369458129, 'turn_slot_accuracy': 0.99030979748222, 'turn_slot_f1': 0.9644549362310922}
new best JGA score!  0.7134975369458129


 57%|█████▋    | 301/526 [09:49<41:57, 11.19s/it]

joint_goal_accuracy: 0.7134975369458129
turn_slot_accuracy: 0.99030979748222
turn_slot_f1: 0.9644549362310922


 76%|███████▌  | 400/526 [12:21<03:13,  1.54s/it]

[4/25] [400/526] 2.227322
inferencing
{'joint_goal_accuracy': 0.7174384236453202, 'turn_slot_accuracy': 0.9905769020251872, 'turn_slot_f1': 0.9621524281614832}
new best JGA score!  0.7174384236453202


 76%|███████▌  | 401/526 [12:55<23:21, 11.21s/it]

joint_goal_accuracy: 0.7174384236453202
turn_slot_accuracy: 0.9905769020251872
turn_slot_f1: 0.9621524281614832


 95%|█████████▌| 500/526 [15:27<00:39,  1.54s/it]

[4/25] [500/526] 1.642575
inferencing
{'joint_goal_accuracy': 0.744039408866995, 'turn_slot_accuracy': 0.9919518336070147, 'turn_slot_f1': 0.96829394170829}
new best JGA score!  0.744039408866995


 95%|█████████▌| 501/526 [16:01<04:40, 11.21s/it]

joint_goal_accuracy: 0.744039408866995
turn_slot_accuracy: 0.9919518336070147
turn_slot_f1: 0.96829394170829


100%|██████████| 526/526 [16:38<00:00,  1.90s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[5/25] [0/526] 2.278442
inferencing


  0%|          | 1/526 [00:31<4:36:14, 31.57s/it]

{'joint_goal_accuracy': 0.7399014778325124, 'turn_slot_accuracy': 0.9913125342090945, 'turn_slot_f1': 0.967544160427625}
joint_goal_accuracy: 0.7399014778325124
turn_slot_accuracy: 0.9913125342090945
turn_slot_f1: 0.967544160427625


 19%|█▉        | 100/526 [03:03<10:54,  1.54s/it]

[5/25] [100/526] 2.092332
inferencing
{'joint_goal_accuracy': 0.7574384236453202, 'turn_slot_accuracy': 0.9922627257799731, 'turn_slot_f1': 0.9703481531867019}
new best JGA score!  0.7574384236453202


 19%|█▉        | 101/526 [03:37<1:19:22, 11.20s/it]

joint_goal_accuracy: 0.7574384236453202
turn_slot_accuracy: 0.9922627257799731
turn_slot_f1: 0.9703481531867019


 38%|███▊      | 200/526 [06:09<08:21,  1.54s/it]  

[5/25] [200/526] 1.921038
inferencing


 38%|███▊      | 201/526 [06:41<57:07, 10.55s/it]

{'joint_goal_accuracy': 0.747192118226601, 'turn_slot_accuracy': 0.9920218938150032, 'turn_slot_f1': 0.9692436143821753}
joint_goal_accuracy: 0.747192118226601
turn_slot_accuracy: 0.9920218938150032
turn_slot_f1: 0.9692436143821753


 57%|█████▋    | 300/526 [09:13<05:47,  1.54s/it]

[5/25] [300/526] 1.334854
inferencing
{'joint_goal_accuracy': 0.7666995073891626, 'turn_slot_accuracy': 0.9927312534209176, 'turn_slot_f1': 0.9719837393841041}
new best JGA score!  0.7666995073891626


 57%|█████▋    | 301/526 [09:47<42:00, 11.20s/it]

joint_goal_accuracy: 0.7666995073891626
turn_slot_accuracy: 0.9927312534209176
turn_slot_f1: 0.9719837393841041


 76%|███████▌  | 400/526 [12:19<03:13,  1.54s/it]

[5/25] [400/526] 1.627242
inferencing


 76%|███████▌  | 401/526 [12:51<21:58, 10.55s/it]

{'joint_goal_accuracy': 0.7487684729064039, 'turn_slot_accuracy': 0.9918160919540295, 'turn_slot_f1': 0.9684853551137395}
joint_goal_accuracy: 0.7487684729064039
turn_slot_accuracy: 0.9918160919540295
turn_slot_f1: 0.9684853551137395


 95%|█████████▌| 500/526 [15:23<00:39,  1.54s/it]

[5/25] [500/526] 1.760462
inferencing


 95%|█████████▌| 501/526 [15:54<04:23, 10.55s/it]

{'joint_goal_accuracy': 0.7550738916256158, 'turn_slot_accuracy': 0.9922408319649781, 'turn_slot_f1': 0.9691750208872085}
joint_goal_accuracy: 0.7550738916256158
turn_slot_accuracy: 0.9922408319649781
turn_slot_f1: 0.9691750208872085


100%|██████████| 526/526 [16:31<00:00,  1.89s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[6/25] [0/526] 1.738819
inferencing


  0%|          | 1/526 [00:31<4:36:15, 31.57s/it]

{'joint_goal_accuracy': 0.7601970443349754, 'turn_slot_accuracy': 0.9925342090859414, 'turn_slot_f1': 0.9709685495312239}
joint_goal_accuracy: 0.7601970443349754
turn_slot_accuracy: 0.9925342090859414
turn_slot_f1: 0.9709685495312239


 19%|█▉        | 100/526 [03:03<10:54,  1.54s/it]

[6/25] [100/526] 2.235587
inferencing


 19%|█▉        | 101/526 [03:35<1:14:43, 10.55s/it]

{'joint_goal_accuracy': 0.7466009852216748, 'turn_slot_accuracy': 0.9919474548440157, 'turn_slot_f1': 0.9689321864037441}
joint_goal_accuracy: 0.7466009852216748
turn_slot_accuracy: 0.9919474548440157
turn_slot_f1: 0.9689321864037441


 38%|███▊      | 200/526 [06:07<08:20,  1.53s/it]  

[6/25] [200/526] 1.868284
inferencing


 38%|███▊      | 201/526 [06:39<57:07, 10.55s/it]

{'joint_goal_accuracy': 0.7566502463054188, 'turn_slot_accuracy': 0.9922671045429755, 'turn_slot_f1': 0.9697538993668888}
joint_goal_accuracy: 0.7566502463054188
turn_slot_accuracy: 0.9922671045429755
turn_slot_f1: 0.9697538993668888


 57%|█████▋    | 300/526 [09:11<05:46,  1.54s/it]

[6/25] [300/526] 1.040709
inferencing


 57%|█████▋    | 301/526 [09:42<39:32, 10.55s/it]

{'joint_goal_accuracy': 0.7635467980295566, 'turn_slot_accuracy': 0.9927181171319189, 'turn_slot_f1': 0.9719037039018712}
joint_goal_accuracy: 0.7635467980295566
turn_slot_accuracy: 0.9927181171319189
turn_slot_f1: 0.9719037039018712


 76%|███████▌  | 400/526 [12:14<03:13,  1.54s/it]

[6/25] [400/526] 1.520730
inferencing


 76%|███████▌  | 401/526 [12:46<21:58, 10.55s/it]

{'joint_goal_accuracy': 0.7540886699507389, 'turn_slot_accuracy': 0.9924028461959593, 'turn_slot_f1': 0.9695314321324718}
joint_goal_accuracy: 0.7540886699507389
turn_slot_accuracy: 0.9924028461959593
turn_slot_f1: 0.9695314321324718


 95%|█████████▌| 500/526 [15:18<00:39,  1.54s/it]

[6/25] [500/526] 1.116300
inferencing
{'joint_goal_accuracy': 0.7779310344827586, 'turn_slot_accuracy': 0.9929414340448931, 'turn_slot_f1': 0.9729206751901961}
new best JGA score!  0.7779310344827586


 95%|█████████▌| 501/526 [15:52<04:40, 11.21s/it]

joint_goal_accuracy: 0.7779310344827586
turn_slot_accuracy: 0.9929414340448931
turn_slot_f1: 0.9729206751901961


100%|██████████| 526/526 [16:29<00:00,  1.88s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[7/25] [0/526] 1.676416
inferencing


  0%|          | 1/526 [00:31<4:36:15, 31.57s/it]

{'joint_goal_accuracy': 0.7710344827586207, 'turn_slot_accuracy': 0.9928626163109018, 'turn_slot_f1': 0.9724253221161883}
joint_goal_accuracy: 0.7710344827586207
turn_slot_accuracy: 0.9928626163109018
turn_slot_f1: 0.9724253221161883


 19%|█▉        | 100/526 [03:03<10:54,  1.54s/it]

[7/25] [100/526] 1.789270
inferencing


 19%|█▉        | 101/526 [03:35<1:14:42, 10.55s/it]

{'joint_goal_accuracy': 0.7643349753694582, 'turn_slot_accuracy': 0.9926130268199328, 'turn_slot_f1': 0.9715115717293873}
joint_goal_accuracy: 0.7643349753694582
turn_slot_accuracy: 0.9926130268199328
turn_slot_f1: 0.9715115717293873


 38%|███▊      | 200/526 [06:07<08:21,  1.54s/it]  

[7/25] [200/526] 2.514629
inferencing


 38%|███▊      | 201/526 [06:38<57:06, 10.54s/it]

{'joint_goal_accuracy': 0.7718226600985222, 'turn_slot_accuracy': 0.9929064039408944, 'turn_slot_f1': 0.9719295383591855}
joint_goal_accuracy: 0.7718226600985222
turn_slot_accuracy: 0.9929064039408944
turn_slot_f1: 0.9719295383591855


 57%|█████▋    | 300/526 [09:11<05:47,  1.54s/it]

[7/25] [300/526] 1.812333
inferencing


 57%|█████▋    | 301/526 [09:42<39:32, 10.54s/it]

{'joint_goal_accuracy': 0.7469950738916256, 'turn_slot_accuracy': 0.992140120415992, 'turn_slot_f1': 0.9706317019854533}
joint_goal_accuracy: 0.7469950738916256
turn_slot_accuracy: 0.992140120415992
turn_slot_f1: 0.9706317019854533


 76%|███████▌  | 400/526 [12:14<03:13,  1.53s/it]

[7/25] [400/526] 2.397815
inferencing


 76%|███████▌  | 401/526 [12:46<21:57, 10.54s/it]

{'joint_goal_accuracy': 0.7710344827586207, 'turn_slot_accuracy': 0.9928713738369005, 'turn_slot_f1': 0.9722056634936203}
joint_goal_accuracy: 0.7710344827586207
turn_slot_accuracy: 0.9928713738369005
turn_slot_f1: 0.9722056634936203


 95%|█████████▌| 500/526 [15:18<00:39,  1.54s/it]

[7/25] [500/526] 1.447885
inferencing


 95%|█████████▌| 501/526 [15:50<04:23, 10.54s/it]

{'joint_goal_accuracy': 0.7680788177339901, 'turn_slot_accuracy': 0.9930377668308786, 'turn_slot_f1': 0.9730196804635972}
joint_goal_accuracy: 0.7680788177339901
turn_slot_accuracy: 0.9930377668308786
turn_slot_f1: 0.9730196804635972


100%|██████████| 526/526 [16:27<00:00,  1.88s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[8/25] [0/526] 1.222326
inferencing
{'joint_goal_accuracy': 0.7856157635467981, 'turn_slot_accuracy': 0.9936070060208078, 'turn_slot_f1': 0.9749502866807551}
new best JGA score!  0.7856157635467981


  0%|          | 1/526 [00:33<4:55:13, 33.74s/it]

joint_goal_accuracy: 0.7856157635467981
turn_slot_accuracy: 0.9936070060208078
turn_slot_f1: 0.9749502866807551


 19%|█▉        | 100/526 [03:05<10:54,  1.54s/it]

[8/25] [100/526] 0.970231
inferencing
{'joint_goal_accuracy': 0.7925123152709359, 'turn_slot_accuracy': 0.9936858237547982, 'turn_slot_f1': 0.9752485749095764}
new best JGA score!  0.7925123152709359


 19%|█▉        | 101/526 [03:39<1:19:25, 11.21s/it]

joint_goal_accuracy: 0.7925123152709359
turn_slot_accuracy: 0.9936858237547982
turn_slot_f1: 0.9752485749095764


 38%|███▊      | 200/526 [06:12<08:21,  1.54s/it]  

[8/25] [200/526] 0.939829
inferencing


 38%|███▊      | 201/526 [06:43<57:11, 10.56s/it]

{'joint_goal_accuracy': 0.7842364532019704, 'turn_slot_accuracy': 0.9932961138478467, 'turn_slot_f1': 0.9737663864739233}
joint_goal_accuracy: 0.7842364532019704
turn_slot_accuracy: 0.9932961138478467
turn_slot_f1: 0.9737663864739233


 57%|█████▋    | 300/526 [09:15<05:47,  1.54s/it]

[8/25] [300/526] 1.094521
inferencing


 57%|█████▋    | 301/526 [09:47<39:35, 10.56s/it]

{'joint_goal_accuracy': 0.7885714285714286, 'turn_slot_accuracy': 0.9932829775588466, 'turn_slot_f1': 0.9737485052778794}
joint_goal_accuracy: 0.7885714285714286
turn_slot_accuracy: 0.9932829775588466
turn_slot_f1: 0.9737485052778794


 76%|███████▌  | 400/526 [12:19<03:13,  1.54s/it]

[8/25] [400/526] 0.823849
inferencing


 76%|███████▌  | 401/526 [12:51<21:58, 10.55s/it]

{'joint_goal_accuracy': 0.7806896551724138, 'turn_slot_accuracy': 0.992976464148887, 'turn_slot_f1': 0.9728947687619417}
joint_goal_accuracy: 0.7806896551724138
turn_slot_accuracy: 0.992976464148887
turn_slot_f1: 0.9728947687619417


 95%|█████████▌| 500/526 [15:23<00:39,  1.54s/it]

[8/25] [500/526] 0.867204
inferencing


 95%|█████████▌| 501/526 [15:55<04:23, 10.55s/it]

{'joint_goal_accuracy': 0.7793103448275862, 'turn_slot_accuracy': 0.9932041598248558, 'turn_slot_f1': 0.9740305154979512}
joint_goal_accuracy: 0.7793103448275862
turn_slot_accuracy: 0.9932041598248558
turn_slot_f1: 0.9740305154979512


100%|██████████| 526/526 [16:32<00:00,  1.89s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[9/25] [0/526] 1.441202
inferencing


  0%|          | 1/526 [00:31<4:36:23, 31.59s/it]

{'joint_goal_accuracy': 0.7714285714285715, 'turn_slot_accuracy': 0.9928845101258976, 'turn_slot_f1': 0.9730890898775549}
joint_goal_accuracy: 0.7714285714285715
turn_slot_accuracy: 0.9928845101258976
turn_slot_f1: 0.9730890898775549


 19%|█▉        | 100/526 [03:03<10:55,  1.54s/it]

[9/25] [100/526] 0.801733
inferencing


 19%|█▉        | 101/526 [03:35<1:14:44, 10.55s/it]

{'joint_goal_accuracy': 0.7816748768472906, 'turn_slot_accuracy': 0.9931209633278683, 'turn_slot_f1': 0.974089306257691}
joint_goal_accuracy: 0.7816748768472906
turn_slot_accuracy: 0.9931209633278683
turn_slot_f1: 0.974089306257691


 38%|███▊      | 200/526 [06:07<08:20,  1.54s/it]  

[9/25] [200/526] 1.522675
inferencing


 38%|███▊      | 201/526 [06:39<57:11, 10.56s/it]

{'joint_goal_accuracy': 0.7883743842364532, 'turn_slot_accuracy': 0.9931297208538675, 'turn_slot_f1': 0.9736677679632126}
joint_goal_accuracy: 0.7883743842364532
turn_slot_accuracy: 0.9931297208538675
turn_slot_f1: 0.9736677679632126


 57%|█████▋    | 300/526 [09:11<05:47,  1.54s/it]

[9/25] [300/526] 1.399984
inferencing


 57%|█████▋    | 301/526 [09:42<39:35, 10.56s/it]

{'joint_goal_accuracy': 0.7787192118226601, 'turn_slot_accuracy': 0.9931822660098597, 'turn_slot_f1': 0.9745124234253143}
joint_goal_accuracy: 0.7787192118226601
turn_slot_accuracy: 0.9931822660098597
turn_slot_f1: 0.9745124234253143


 76%|███████▌  | 400/526 [12:15<03:13,  1.54s/it]

[9/25] [400/526] 1.086773
inferencing


 76%|███████▌  | 401/526 [12:46<21:59, 10.56s/it]

{'joint_goal_accuracy': 0.7820689655172414, 'turn_slot_accuracy': 0.9930465243568767, 'turn_slot_f1': 0.9740279397681467}
joint_goal_accuracy: 0.7820689655172414
turn_slot_accuracy: 0.9930465243568767
turn_slot_f1: 0.9740279397681467


 95%|█████████▌| 500/526 [15:18<00:39,  1.54s/it]

[9/25] [500/526] 1.187544
inferencing


 95%|█████████▌| 501/526 [15:50<04:23, 10.55s/it]

{'joint_goal_accuracy': 0.7824630541871921, 'turn_slot_accuracy': 0.9931603721948625, 'turn_slot_f1': 0.9735151283399259}
joint_goal_accuracy: 0.7824630541871921
turn_slot_accuracy: 0.9931603721948625
turn_slot_f1: 0.9735151283399259


100%|██████████| 526/526 [16:27<00:00,  1.88s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[10/25] [0/526] 1.087064
inferencing
{'joint_goal_accuracy': 0.7952709359605912, 'turn_slot_accuracy': 0.9935106732348208, 'turn_slot_f1': 0.9751432713855962}
new best JGA score!  0.7952709359605912


  0%|          | 1/526 [00:33<4:55:08, 33.73s/it]

joint_goal_accuracy: 0.7952709359605912
turn_slot_accuracy: 0.9935106732348208
turn_slot_f1: 0.9751432713855962


 19%|█▉        | 100/526 [03:05<10:54,  1.54s/it]

[10/25] [100/526] 1.475111
inferencing
{'joint_goal_accuracy': 0.7996059113300492, 'turn_slot_accuracy': 0.9937821565407859, 'turn_slot_f1': 0.9766791973058976}
new best JGA score!  0.7996059113300492


 19%|█▉        | 101/526 [03:39<1:19:29, 11.22s/it]

joint_goal_accuracy: 0.7996059113300492
turn_slot_accuracy: 0.9937821565407859
turn_slot_f1: 0.9766791973058976


 38%|███▊      | 200/526 [06:12<08:20,  1.54s/it]  

[10/25] [200/526] 0.790311
inferencing


 38%|███▊      | 201/526 [06:43<57:09, 10.55s/it]

{'joint_goal_accuracy': 0.7862068965517242, 'turn_slot_accuracy': 0.9932391899288541, 'turn_slot_f1': 0.9746664421866961}
joint_goal_accuracy: 0.7862068965517242
turn_slot_accuracy: 0.9932391899288541
turn_slot_f1: 0.9746664421866961


 57%|█████▋    | 300/526 [09:16<05:47,  1.54s/it]

[10/25] [300/526] 0.650152
inferencing


 57%|█████▋    | 301/526 [09:47<39:34, 10.55s/it]

{'joint_goal_accuracy': 0.7860098522167488, 'turn_slot_accuracy': 0.9934012041598352, 'turn_slot_f1': 0.9745176400248993}
joint_goal_accuracy: 0.7860098522167488
turn_slot_accuracy: 0.9934012041598352
turn_slot_f1: 0.9745176400248993


 76%|███████▌  | 400/526 [12:20<03:13,  1.54s/it]

[10/25] [400/526] 0.524738
inferencing


 76%|███████▌  | 401/526 [12:51<21:59, 10.56s/it]

{'joint_goal_accuracy': 0.7923152709359605, 'turn_slot_accuracy': 0.9936332785988066, 'turn_slot_f1': 0.9757512748795006}
joint_goal_accuracy: 0.7923152709359605
turn_slot_accuracy: 0.9936332785988066
turn_slot_f1: 0.9757512748795006


 95%|█████████▌| 500/526 [15:24<00:39,  1.54s/it]

[10/25] [500/526] 0.424523
inferencing


 95%|█████████▌| 501/526 [15:56<04:27, 10.70s/it]

{'joint_goal_accuracy': 0.7903448275862069, 'turn_slot_accuracy': 0.9935281882868182, 'turn_slot_f1': 0.9753592919937445}
joint_goal_accuracy: 0.7903448275862069
turn_slot_accuracy: 0.9935281882868182
turn_slot_f1: 0.9753592919937445


100%|██████████| 526/526 [16:33<00:00,  1.89s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[11/25] [0/526] 1.023458
inferencing


  0%|          | 1/526 [00:31<4:36:25, 31.59s/it]

{'joint_goal_accuracy': 0.7933004926108375, 'turn_slot_accuracy': 0.9935938697318092, 'turn_slot_f1': 0.9759715529389735}
joint_goal_accuracy: 0.7933004926108375
turn_slot_accuracy: 0.9935938697318092
turn_slot_f1: 0.9759715529389735


 19%|█▉        | 100/526 [03:04<10:54,  1.54s/it]

[11/25] [100/526] 0.635712
inferencing


 19%|█▉        | 101/526 [03:35<1:14:44, 10.55s/it]

{'joint_goal_accuracy': 0.7968472906403941, 'turn_slot_accuracy': 0.9935982484948094, 'turn_slot_f1': 0.9753714179142826}
joint_goal_accuracy: 0.7968472906403941
turn_slot_accuracy: 0.9935982484948094
turn_slot_f1: 0.9753714179142826


 38%|███▊      | 200/526 [06:07<08:21,  1.54s/it]  

[11/25] [200/526] 0.695252
inferencing


 38%|███▊      | 201/526 [06:39<57:10, 10.55s/it]

{'joint_goal_accuracy': 0.7929064039408867, 'turn_slot_accuracy': 0.9937077175697958, 'turn_slot_f1': 0.9757631097550931}
joint_goal_accuracy: 0.7929064039408867
turn_slot_accuracy: 0.9937077175697958
turn_slot_f1: 0.9757631097550931


 57%|█████▋    | 300/526 [09:11<05:47,  1.54s/it]

[11/25] [300/526] 1.208681
inferencing
{'joint_goal_accuracy': 0.8003940886699508, 'turn_slot_accuracy': 0.9936814449917967, 'turn_slot_f1': 0.9766357401190698}
new best JGA score!  0.8003940886699508


 57%|█████▋    | 301/526 [09:45<42:03, 11.22s/it]

joint_goal_accuracy: 0.8003940886699508
turn_slot_accuracy: 0.9936814449917967
turn_slot_f1: 0.9766357401190698


 76%|███████▌  | 400/526 [12:18<03:13,  1.54s/it]

[11/25] [400/526] 0.678237
inferencing


 76%|███████▌  | 401/526 [12:49<21:59, 10.55s/it]

{'joint_goal_accuracy': 0.7958620689655173, 'turn_slot_accuracy': 0.9935632183908133, 'turn_slot_f1': 0.9757331700723784}
joint_goal_accuracy: 0.7958620689655173
turn_slot_accuracy: 0.9935632183908133
turn_slot_f1: 0.9757331700723784


 95%|█████████▌| 500/526 [15:21<00:39,  1.54s/it]

[11/25] [500/526] 0.509508
inferencing


 95%|█████████▌| 501/526 [15:53<04:23, 10.55s/it]

{'joint_goal_accuracy': 0.7933004926108375, 'turn_slot_accuracy': 0.9934362342638285, 'turn_slot_f1': 0.9749380781812141}
joint_goal_accuracy: 0.7933004926108375
turn_slot_accuracy: 0.9934362342638285
turn_slot_f1: 0.9749380781812141


100%|██████████| 526/526 [16:30<00:00,  1.88s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[12/25] [0/526] 0.810934
inferencing
{'joint_goal_accuracy': 0.8013793103448276, 'turn_slot_accuracy': 0.9936902025177977, 'turn_slot_f1': 0.9761525975155433}
new best JGA score!  0.8013793103448276


  0%|          | 1/526 [00:33<4:55:48, 33.81s/it]

joint_goal_accuracy: 0.8013793103448276
turn_slot_accuracy: 0.9936902025177977
turn_slot_f1: 0.9761525975155433


 19%|█▉        | 100/526 [03:05<10:54,  1.54s/it]

[12/25] [100/526] 0.696397
inferencing


 19%|█▉        | 101/526 [03:37<1:14:46, 10.56s/it]

{'joint_goal_accuracy': 0.7879802955665025, 'turn_slot_accuracy': 0.993388067870836, 'turn_slot_f1': 0.973889545120218}
joint_goal_accuracy: 0.7879802955665025
turn_slot_accuracy: 0.993388067870836
turn_slot_f1: 0.973889545120218


 38%|███▊      | 200/526 [06:09<08:20,  1.54s/it]  

[12/25] [200/526] 0.737758
inferencing


 38%|███▊      | 201/526 [06:41<57:10, 10.55s/it]

{'joint_goal_accuracy': 0.8007881773399015, 'turn_slot_accuracy': 0.9937602627257878, 'turn_slot_f1': 0.9755836690214654}
joint_goal_accuracy: 0.8007881773399015
turn_slot_accuracy: 0.9937602627257878
turn_slot_f1: 0.9755836690214654


 57%|█████▋    | 300/526 [09:13<05:47,  1.54s/it]

[12/25] [300/526] 0.830124
inferencing


 57%|█████▋    | 301/526 [09:45<39:34, 10.55s/it]

{'joint_goal_accuracy': 0.8009852216748768, 'turn_slot_accuracy': 0.9937383689107921, 'turn_slot_f1': 0.9763040729508246}
joint_goal_accuracy: 0.8009852216748768
turn_slot_accuracy: 0.9937383689107921
turn_slot_f1: 0.9763040729508246


 76%|███████▌  | 400/526 [12:17<03:14,  1.54s/it]

[12/25] [400/526] 0.805638
inferencing
{'joint_goal_accuracy': 0.8084729064039409, 'turn_slot_accuracy': 0.9940711548987496, 'turn_slot_f1': 0.9777086435111988}
new best JGA score!  0.8084729064039409


 76%|███████▌  | 401/526 [12:51<23:22, 11.22s/it]

joint_goal_accuracy: 0.8084729064039409
turn_slot_accuracy: 0.9940711548987496
turn_slot_f1: 0.9777086435111988


 95%|█████████▌| 500/526 [15:23<00:40,  1.54s/it]

[12/25] [500/526] 0.425837
inferencing


 95%|█████████▌| 501/526 [15:55<04:23, 10.56s/it]

{'joint_goal_accuracy': 0.8017733990147783, 'turn_slot_accuracy': 0.9937164750957944, 'turn_slot_f1': 0.9758613197500564}
joint_goal_accuracy: 0.8017733990147783
turn_slot_accuracy: 0.9937164750957944
turn_slot_f1: 0.9758613197500564


100%|██████████| 526/526 [16:32<00:00,  1.89s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[13/25] [0/526] 0.600087
inferencing


  0%|          | 1/526 [00:31<4:36:40, 31.62s/it]

{'joint_goal_accuracy': 0.8049261083743843, 'turn_slot_accuracy': 0.9940142309797579, 'turn_slot_f1': 0.9768718227315708}
joint_goal_accuracy: 0.8049261083743843
turn_slot_accuracy: 0.9940142309797579
turn_slot_f1: 0.9768718227315708


 19%|█▉        | 100/526 [03:03<10:54,  1.54s/it]

[13/25] [100/526] 0.683605
inferencing


 19%|█▉        | 101/526 [03:35<1:14:46, 10.56s/it]

{'joint_goal_accuracy': 0.7954679802955665, 'turn_slot_accuracy': 0.9936420361248041, 'turn_slot_f1': 0.9758203548645643}
joint_goal_accuracy: 0.7954679802955665
turn_slot_accuracy: 0.9936420361248041
turn_slot_f1: 0.9758203548645643


 38%|███▊      | 200/526 [06:07<08:23,  1.55s/it]  

[13/25] [200/526] 0.676159
inferencing


 38%|███▊      | 201/526 [06:39<57:13, 10.57s/it]

{'joint_goal_accuracy': 0.797832512315271, 'turn_slot_accuracy': 0.9935062944718198, 'turn_slot_f1': 0.9761369551244178}
joint_goal_accuracy: 0.797832512315271
turn_slot_accuracy: 0.9935062944718198
turn_slot_f1: 0.9761369551244178


 57%|█████▋    | 300/526 [09:11<05:47,  1.54s/it]

[13/25] [300/526] 0.406438
inferencing


 57%|█████▋    | 301/526 [09:43<39:34, 10.56s/it]

{'joint_goal_accuracy': 0.8023645320197045, 'turn_slot_accuracy': 0.9938215654077827, 'turn_slot_f1': 0.9766674938903182}
joint_goal_accuracy: 0.8023645320197045
turn_slot_accuracy: 0.9938215654077827
turn_slot_f1: 0.9766674938903182


 76%|███████▌  | 400/526 [12:15<03:13,  1.54s/it]

[13/25] [400/526] 0.533827
inferencing


 76%|███████▌  | 401/526 [12:47<21:59, 10.56s/it]

{'joint_goal_accuracy': 0.8029556650246306, 'turn_slot_accuracy': 0.9938916256157719, 'turn_slot_f1': 0.9766715781989385}
joint_goal_accuracy: 0.8029556650246306
turn_slot_accuracy: 0.9938916256157719
turn_slot_f1: 0.9766715781989385


 95%|█████████▌| 500/526 [15:19<00:39,  1.54s/it]

[13/25] [500/526] 0.935513
inferencing


 95%|█████████▌| 501/526 [15:51<04:23, 10.55s/it]

{'joint_goal_accuracy': 0.7919211822660098, 'turn_slot_accuracy': 0.9936639299398, 'turn_slot_f1': 0.9753121231959401}
joint_goal_accuracy: 0.7919211822660098
turn_slot_accuracy: 0.9936639299398
turn_slot_f1: 0.9753121231959401


100%|██████████| 526/526 [16:28<00:00,  1.88s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[14/25] [0/526] 0.788690
inferencing


  0%|          | 1/526 [00:31<4:36:31, 31.60s/it]

{'joint_goal_accuracy': 0.7960591133004926, 'turn_slot_accuracy': 0.9935894909688088, 'turn_slot_f1': 0.9759881100062374}
joint_goal_accuracy: 0.7960591133004926
turn_slot_accuracy: 0.9935894909688088
turn_slot_f1: 0.9759881100062374


 19%|█▉        | 100/526 [03:03<10:54,  1.54s/it]

[14/25] [100/526] 0.792437
inferencing


 19%|█▉        | 101/526 [03:35<1:14:47, 10.56s/it]

{'joint_goal_accuracy': 0.8072906403940887, 'turn_slot_accuracy': 0.9940886699507481, 'turn_slot_f1': 0.977269965445522}
joint_goal_accuracy: 0.8072906403940887
turn_slot_accuracy: 0.9940886699507481
turn_slot_f1: 0.977269965445522


 38%|███▊      | 200/526 [06:07<08:20,  1.54s/it]  

[14/25] [200/526] 0.485423
inferencing


 38%|███▊      | 201/526 [06:39<57:11, 10.56s/it]

{'joint_goal_accuracy': 0.8072906403940887, 'turn_slot_accuracy': 0.9938434592227776, 'turn_slot_f1': 0.9773343561729618}
joint_goal_accuracy: 0.8072906403940887
turn_slot_accuracy: 0.9938434592227776
turn_slot_f1: 0.9773343561729618


 57%|█████▋    | 300/526 [09:11<05:47,  1.54s/it]

[14/25] [300/526] 0.449495
inferencing


 57%|█████▋    | 301/526 [09:43<39:35, 10.56s/it]

{'joint_goal_accuracy': 0.8082758620689655, 'turn_slot_accuracy': 0.9940273672687558, 'turn_slot_f1': 0.9773827591130413}
joint_goal_accuracy: 0.8082758620689655
turn_slot_accuracy: 0.9940273672687558
turn_slot_f1: 0.9773827591130413


 76%|███████▌  | 400/526 [12:15<03:13,  1.54s/it]

[14/25] [400/526] 0.885672
inferencing


 76%|███████▌  | 401/526 [12:46<21:59, 10.56s/it]

{'joint_goal_accuracy': 0.7976354679802956, 'turn_slot_accuracy': 0.9937515051997895, 'turn_slot_f1': 0.9763095249015579}
joint_goal_accuracy: 0.7976354679802956
turn_slot_accuracy: 0.9937515051997895
turn_slot_f1: 0.9763095249015579


 95%|█████████▌| 500/526 [15:18<00:39,  1.54s/it]

[14/25] [500/526] 0.604143
inferencing


 95%|█████████▌| 501/526 [15:50<04:23, 10.56s/it]

{'joint_goal_accuracy': 0.7988177339901478, 'turn_slot_accuracy': 0.9937339901477934, 'turn_slot_f1': 0.9760272159330619}
joint_goal_accuracy: 0.7988177339901478
turn_slot_accuracy: 0.9937339901477934
turn_slot_f1: 0.9760272159330619


100%|██████████| 526/526 [16:27<00:00,  1.88s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[15/25] [0/526] 0.585326
inferencing


  0%|          | 1/526 [00:31<4:36:28, 31.60s/it]

{'joint_goal_accuracy': 0.8011822660098522, 'turn_slot_accuracy': 0.9939354132457676, 'turn_slot_f1': 0.9765069144784264}
joint_goal_accuracy: 0.8011822660098522
turn_slot_accuracy: 0.9939354132457676
turn_slot_f1: 0.9765069144784264


 19%|█▉        | 100/526 [03:03<10:54,  1.54s/it]

[15/25] [100/526] 0.607741
inferencing


 19%|█▉        | 101/526 [03:35<1:14:45, 10.55s/it]

{'joint_goal_accuracy': 0.7986206896551724, 'turn_slot_accuracy': 0.993847837985779, 'turn_slot_f1': 0.9762064111593483}
joint_goal_accuracy: 0.7986206896551724
turn_slot_accuracy: 0.993847837985779
turn_slot_f1: 0.9762064111593483


 38%|███▊      | 200/526 [06:07<08:20,  1.54s/it]  

[15/25] [200/526] 0.229082
inferencing


 38%|███▊      | 201/526 [06:39<57:11, 10.56s/it]

{'joint_goal_accuracy': 0.8041379310344827, 'turn_slot_accuracy': 0.9938828680897741, 'turn_slot_f1': 0.9765050122892325}
joint_goal_accuracy: 0.8041379310344827
turn_slot_accuracy: 0.9938828680897741
turn_slot_f1: 0.9765050122892325


 57%|█████▋    | 300/526 [09:11<05:48,  1.54s/it]

[15/25] [300/526] 0.750695
inferencing


 57%|█████▋    | 301/526 [09:43<39:36, 10.56s/it]

{'joint_goal_accuracy': 0.8070935960591133, 'turn_slot_accuracy': 0.9940186097427557, 'turn_slot_f1': 0.9767224055617089}
joint_goal_accuracy: 0.8070935960591133
turn_slot_accuracy: 0.9940186097427557
turn_slot_f1: 0.9767224055617089


 76%|███████▌  | 400/526 [12:15<03:13,  1.54s/it]

[15/25] [400/526] 0.717047
inferencing


 76%|███████▌  | 401/526 [12:46<21:59, 10.56s/it]

{'joint_goal_accuracy': 0.8039408866995074, 'turn_slot_accuracy': 0.9939354132457661, 'turn_slot_f1': 0.9759715110279928}
joint_goal_accuracy: 0.8039408866995074
turn_slot_accuracy: 0.9939354132457661
turn_slot_f1: 0.9759715110279928


 95%|█████████▌| 500/526 [15:18<00:40,  1.54s/it]

[15/25] [500/526] 0.530813
inferencing
{'joint_goal_accuracy': 0.8173399014778325, 'turn_slot_accuracy': 0.9942463054187273, 'turn_slot_f1': 0.9783984172150603}
new best JGA score!  0.8173399014778325


 95%|█████████▌| 501/526 [15:52<04:40, 11.21s/it]

joint_goal_accuracy: 0.8173399014778325
turn_slot_accuracy: 0.9942463054187273
turn_slot_f1: 0.9783984172150603


100%|██████████| 526/526 [16:29<00:00,  1.88s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[16/25] [0/526] 0.424189
inferencing


  0%|          | 1/526 [00:31<4:36:33, 31.61s/it]

{'joint_goal_accuracy': 0.8059113300492611, 'turn_slot_accuracy': 0.993974822112763, 'turn_slot_f1': 0.9767726871069488}
joint_goal_accuracy: 0.8059113300492611
turn_slot_accuracy: 0.993974822112763
turn_slot_f1: 0.9767726871069488


 19%|█▉        | 100/526 [03:03<10:54,  1.54s/it]

[16/25] [100/526] 0.356144
inferencing


 19%|█▉        | 101/526 [03:35<1:14:46, 10.56s/it]

{'joint_goal_accuracy': 0.809064039408867, 'turn_slot_accuracy': 0.9941018062397462, 'turn_slot_f1': 0.97737617356621}
joint_goal_accuracy: 0.809064039408867
turn_slot_accuracy: 0.9941018062397462
turn_slot_f1: 0.97737617356621


 38%|███▊      | 200/526 [06:07<08:21,  1.54s/it]  

[16/25] [200/526] 0.544452
inferencing


 38%|███▊      | 201/526 [06:39<57:11, 10.56s/it]

{'joint_goal_accuracy': 0.7958620689655173, 'turn_slot_accuracy': 0.9936201423098072, 'turn_slot_f1': 0.9759096323328994}
joint_goal_accuracy: 0.7958620689655173
turn_slot_accuracy: 0.9936201423098072
turn_slot_f1: 0.9759096323328994


 57%|█████▋    | 300/526 [09:11<05:47,  1.54s/it]

[16/25] [300/526] 0.666287
inferencing


 57%|█████▋    | 301/526 [09:43<39:35, 10.56s/it]

{'joint_goal_accuracy': 0.8061083743842364, 'turn_slot_accuracy': 0.9939397920087649, 'turn_slot_f1': 0.9771388433918974}
joint_goal_accuracy: 0.8061083743842364
turn_slot_accuracy: 0.9939397920087649
turn_slot_f1: 0.9771388433918974


 76%|███████▌  | 400/526 [12:15<03:13,  1.54s/it]

[16/25] [400/526] 0.318549
inferencing


 76%|███████▌  | 401/526 [12:47<21:59, 10.56s/it]

{'joint_goal_accuracy': 0.8072906403940887, 'turn_slot_accuracy': 0.9939923371647593, 'turn_slot_f1': 0.9770504200609417}
joint_goal_accuracy: 0.8072906403940887
turn_slot_accuracy: 0.9939923371647593
turn_slot_f1: 0.9770504200609417


 95%|█████████▌| 500/526 [15:19<00:39,  1.54s/it]

[16/25] [500/526] 0.463944
inferencing


 95%|█████████▌| 501/526 [15:50<04:23, 10.56s/it]

{'joint_goal_accuracy': 0.8045320197044334, 'turn_slot_accuracy': 0.9939748221127618, 'turn_slot_f1': 0.9765643767124269}
joint_goal_accuracy: 0.8045320197044334
turn_slot_accuracy: 0.9939748221127618
turn_slot_f1: 0.9765643767124269


100%|██████████| 526/526 [16:27<00:00,  1.88s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[17/25] [0/526] 0.298780
inferencing


  0%|          | 1/526 [00:31<4:36:32, 31.60s/it]

{'joint_goal_accuracy': 0.8, 'turn_slot_accuracy': 0.9938171866447821, 'turn_slot_f1': 0.9761759090537561}
joint_goal_accuracy: 0.8
turn_slot_accuracy: 0.9938171866447821
turn_slot_f1: 0.9761759090537561


 19%|█▉        | 100/526 [03:03<10:55,  1.54s/it]

[17/25] [100/526] 0.358526
inferencing


 19%|█▉        | 101/526 [03:35<1:14:48, 10.56s/it]

{'joint_goal_accuracy': 0.8059113300492611, 'turn_slot_accuracy': 0.9939967159277591, 'turn_slot_f1': 0.9764887475259135}
joint_goal_accuracy: 0.8059113300492611
turn_slot_accuracy: 0.9939967159277591
turn_slot_f1: 0.9764887475259135


 38%|███▊      | 200/526 [06:07<08:20,  1.54s/it]  

[17/25] [200/526] 0.232151
inferencing


 38%|███▊      | 201/526 [06:39<57:10, 10.56s/it]

{'joint_goal_accuracy': 0.8017733990147783, 'turn_slot_accuracy': 0.993891625615771, 'turn_slot_f1': 0.9768726913590922}
joint_goal_accuracy: 0.8017733990147783
turn_slot_accuracy: 0.993891625615771
turn_slot_f1: 0.9768726913590922


 57%|█████▋    | 300/526 [09:11<05:47,  1.54s/it]

[17/25] [300/526] 0.537962
inferencing


 57%|█████▋    | 301/526 [09:42<39:35, 10.56s/it]

{'joint_goal_accuracy': 0.8017733990147783, 'turn_slot_accuracy': 0.9937558839627894, 'turn_slot_f1': 0.9764203080868205}
joint_goal_accuracy: 0.8017733990147783
turn_slot_accuracy: 0.9937558839627894
turn_slot_f1: 0.9764203080868205


 76%|███████▌  | 400/526 [12:15<03:13,  1.54s/it]

[17/25] [400/526] 0.425563
inferencing


 76%|███████▌  | 401/526 [12:46<21:59, 10.56s/it]

{'joint_goal_accuracy': 0.8017733990147783, 'turn_slot_accuracy': 0.9937733990147876, 'turn_slot_f1': 0.9763390665237147}
joint_goal_accuracy: 0.8017733990147783
turn_slot_accuracy: 0.9937733990147876
turn_slot_f1: 0.9763390665237147


 95%|█████████▌| 500/526 [15:18<00:39,  1.54s/it]

[17/25] [500/526] 0.267676
inferencing


 95%|█████████▌| 501/526 [15:50<04:23, 10.56s/it]

{'joint_goal_accuracy': 0.7998029556650247, 'turn_slot_accuracy': 0.9936770662287999, 'turn_slot_f1': 0.9761353214353539}
joint_goal_accuracy: 0.7998029556650247
turn_slot_accuracy: 0.9936770662287999
turn_slot_f1: 0.9761353214353539


100%|██████████| 526/526 [16:27<00:00,  1.88s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[18/25] [0/526] 0.396879
inferencing


  0%|          | 1/526 [00:32<4:41:09, 32.13s/it]

{'joint_goal_accuracy': 0.8065024630541872, 'turn_slot_accuracy': 0.9938434592227787, 'turn_slot_f1': 0.9771262359061162}
joint_goal_accuracy: 0.8065024630541872
turn_slot_accuracy: 0.9938434592227787
turn_slot_f1: 0.9771262359061162


 19%|█▉        | 100/526 [03:04<10:56,  1.54s/it]

[18/25] [100/526] 0.482546
inferencing


 19%|█▉        | 101/526 [03:36<1:14:49, 10.56s/it]

{'joint_goal_accuracy': 0.8104433497536946, 'turn_slot_accuracy': 0.9940054734537568, 'turn_slot_f1': 0.9773248256757503}
joint_goal_accuracy: 0.8104433497536946
turn_slot_accuracy: 0.9940054734537568
turn_slot_f1: 0.9773248256757503


 38%|███▊      | 200/526 [06:08<08:21,  1.54s/it]  

[18/25] [200/526] 0.385516
inferencing


 38%|███▊      | 201/526 [06:40<57:12, 10.56s/it]

{'joint_goal_accuracy': 0.8100492610837439, 'turn_slot_accuracy': 0.9941368363437395, 'turn_slot_f1': 0.9778369136500686}
joint_goal_accuracy: 0.8100492610837439
turn_slot_accuracy: 0.9941368363437395
turn_slot_f1: 0.9778369136500686


 57%|█████▋    | 300/526 [09:12<05:47,  1.54s/it]

[18/25] [300/526] 0.609890
inferencing


 57%|█████▋    | 301/526 [09:44<39:35, 10.56s/it]

{'joint_goal_accuracy': 0.8082758620689655, 'turn_slot_accuracy': 0.9940142309797575, 'turn_slot_f1': 0.9770463287975961}
joint_goal_accuracy: 0.8082758620689655
turn_slot_accuracy: 0.9940142309797575
turn_slot_f1: 0.9770463287975961


 76%|███████▌  | 400/526 [12:16<03:13,  1.54s/it]

[18/25] [400/526] 0.292209
inferencing


 76%|███████▌  | 401/526 [12:48<21:59, 10.56s/it]

{'joint_goal_accuracy': 0.8027586206896552, 'turn_slot_accuracy': 0.9938960043787717, 'turn_slot_f1': 0.9765094297725998}
joint_goal_accuracy: 0.8027586206896552
turn_slot_accuracy: 0.9938960043787717
turn_slot_f1: 0.9765094297725998


 95%|█████████▌| 500/526 [15:20<00:39,  1.54s/it]

[18/25] [500/526] 0.252667
inferencing


 95%|█████████▌| 501/526 [15:51<04:23, 10.56s/it]

{'joint_goal_accuracy': 0.8094581280788178, 'turn_slot_accuracy': 0.9940799124247512, 'turn_slot_f1': 0.9774333523770709}
joint_goal_accuracy: 0.8094581280788178
turn_slot_accuracy: 0.9940799124247512
turn_slot_f1: 0.9774333523770709


100%|██████████| 526/526 [16:29<00:00,  1.88s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[19/25] [0/526] 0.395666
inferencing


  0%|          | 1/526 [00:31<4:36:38, 31.62s/it]

{'joint_goal_accuracy': 0.8045320197044334, 'turn_slot_accuracy': 0.9939397920087678, 'turn_slot_f1': 0.9770666487963293}
joint_goal_accuracy: 0.8045320197044334
turn_slot_accuracy: 0.9939397920087678
turn_slot_f1: 0.9770666487963293


 19%|█▉        | 100/526 [03:03<10:53,  1.54s/it]

[19/25] [100/526] 0.306835
inferencing


 19%|█▉        | 101/526 [03:35<1:14:47, 10.56s/it]

{'joint_goal_accuracy': 0.7976354679802956, 'turn_slot_accuracy': 0.9937164750957953, 'turn_slot_f1': 0.9763303684377849}
joint_goal_accuracy: 0.7976354679802956
turn_slot_accuracy: 0.9937164750957953
turn_slot_f1: 0.9763303684377849


 38%|███▊      | 200/526 [06:07<08:21,  1.54s/it]  

[19/25] [200/526] 0.305331
inferencing


 38%|███▊      | 201/526 [06:39<57:10, 10.56s/it]

{'joint_goal_accuracy': 0.8082758620689655, 'turn_slot_accuracy': 0.9940930487137489, 'turn_slot_f1': 0.9781193606104085}
joint_goal_accuracy: 0.8082758620689655
turn_slot_accuracy: 0.9940930487137489
turn_slot_f1: 0.9781193606104085


 57%|█████▋    | 300/526 [09:11<05:47,  1.54s/it]

[19/25] [300/526] 0.393703
inferencing


 57%|█████▋    | 301/526 [09:43<39:37, 10.57s/it]

{'joint_goal_accuracy': 0.8031527093596059, 'turn_slot_accuracy': 0.9938872468527715, 'turn_slot_f1': 0.9771385284638499}
joint_goal_accuracy: 0.8031527093596059
turn_slot_accuracy: 0.9938872468527715
turn_slot_f1: 0.9771385284638499


 76%|███████▌  | 400/526 [12:15<03:13,  1.54s/it]

[19/25] [400/526] 0.457916
inferencing


 76%|███████▌  | 401/526 [12:46<21:59, 10.56s/it]

{'joint_goal_accuracy': 0.8108374384236453, 'turn_slot_accuracy': 0.9941718664477375, 'turn_slot_f1': 0.9783808819481946}
joint_goal_accuracy: 0.8108374384236453
turn_slot_accuracy: 0.9941718664477375
turn_slot_f1: 0.9783808819481946


 95%|█████████▌| 500/526 [15:18<00:40,  1.54s/it]

[19/25] [500/526] 0.242483
inferencing


 95%|█████████▌| 501/526 [15:50<04:24, 10.57s/it]

{'joint_goal_accuracy': 0.8025615763546798, 'turn_slot_accuracy': 0.9938522167487782, 'turn_slot_f1': 0.9768990486204779}
joint_goal_accuracy: 0.8025615763546798
turn_slot_accuracy: 0.9938522167487782
turn_slot_f1: 0.9768990486204779


100%|██████████| 526/526 [16:27<00:00,  1.88s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[20/25] [0/526] 0.449920
inferencing


  0%|          | 1/526 [00:31<4:36:35, 31.61s/it]

{'joint_goal_accuracy': 0.8063054187192118, 'turn_slot_accuracy': 0.9940229885057558, 'turn_slot_f1': 0.9775067647783371}
joint_goal_accuracy: 0.8063054187192118
turn_slot_accuracy: 0.9940229885057558
turn_slot_f1: 0.9775067647783371


 19%|█▉        | 100/526 [03:03<10:54,  1.54s/it]

[20/25] [100/526] 0.453864
inferencing


 19%|█▉        | 101/526 [03:35<1:14:47, 10.56s/it]

{'joint_goal_accuracy': 0.8104433497536946, 'turn_slot_accuracy': 0.994158730158739, 'turn_slot_f1': 0.9780756254362689}
joint_goal_accuracy: 0.8104433497536946
turn_slot_accuracy: 0.994158730158739
turn_slot_f1: 0.9780756254362689


 38%|███▊      | 200/526 [06:07<08:21,  1.54s/it]  

[20/25] [200/526] 0.231298
inferencing


 38%|███▊      | 201/526 [06:39<57:11, 10.56s/it]

{'joint_goal_accuracy': 0.8094581280788178, 'turn_slot_accuracy': 0.9940098522167577, 'turn_slot_f1': 0.9775887077176045}
joint_goal_accuracy: 0.8094581280788178
turn_slot_accuracy: 0.9940098522167577
turn_slot_f1: 0.9775887077176045


 57%|█████▋    | 300/526 [09:11<05:47,  1.54s/it]

[20/25] [300/526] 0.397050
inferencing


 57%|█████▋    | 301/526 [09:43<39:36, 10.56s/it]

{'joint_goal_accuracy': 0.8096551724137931, 'turn_slot_accuracy': 0.9940799124247482, 'turn_slot_f1': 0.9780040705582778}
joint_goal_accuracy: 0.8096551724137931
turn_slot_accuracy: 0.9940799124247482
turn_slot_f1: 0.9780040705582778


 76%|███████▌  | 400/526 [12:15<03:13,  1.54s/it]

[20/25] [400/526] 0.303242
inferencing


 76%|███████▌  | 401/526 [12:47<22:00, 10.56s/it]

{'joint_goal_accuracy': 0.8147783251231527, 'turn_slot_accuracy': 0.9941105637657448, 'turn_slot_f1': 0.9781515408410569}
joint_goal_accuracy: 0.8147783251231527
turn_slot_accuracy: 0.9941105637657448
turn_slot_f1: 0.9781515408410569


 95%|█████████▌| 500/526 [15:19<00:39,  1.54s/it]

[20/25] [500/526] 0.267164
inferencing


 95%|█████████▌| 501/526 [15:50<04:24, 10.56s/it]

{'joint_goal_accuracy': 0.8027586206896552, 'turn_slot_accuracy': 0.9937909140667831, 'turn_slot_f1': 0.9765513268146765}
joint_goal_accuracy: 0.8027586206896552
turn_slot_accuracy: 0.9937909140667831
turn_slot_f1: 0.9765513268146765


100%|██████████| 526/526 [16:27<00:00,  1.88s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[21/25] [0/526] 0.238641
inferencing


  0%|          | 1/526 [00:31<4:36:31, 31.60s/it]

{'joint_goal_accuracy': 0.814384236453202, 'turn_slot_accuracy': 0.9941149425287442, 'turn_slot_f1': 0.9782202557394968}
joint_goal_accuracy: 0.814384236453202
turn_slot_accuracy: 0.9941149425287442
turn_slot_f1: 0.9782202557394968


 19%|█▉        | 100/526 [03:03<10:54,  1.54s/it]

[21/25] [100/526] 0.240517
inferencing


 19%|█▉        | 101/526 [03:35<1:14:48, 10.56s/it]

{'joint_goal_accuracy': 0.812807881773399, 'turn_slot_accuracy': 0.9940186097427571, 'turn_slot_f1': 0.977517170178765}
joint_goal_accuracy: 0.812807881773399
turn_slot_accuracy: 0.9940186097427571
turn_slot_f1: 0.977517170178765


 38%|███▊      | 200/526 [06:08<08:20,  1.54s/it]  

[21/25] [200/526] 0.389204
inferencing


 38%|███▊      | 201/526 [06:39<57:11, 10.56s/it]

{'joint_goal_accuracy': 0.8100492610837439, 'turn_slot_accuracy': 0.9940317460317549, 'turn_slot_f1': 0.9777607164803214}
joint_goal_accuracy: 0.8100492610837439
turn_slot_accuracy: 0.9940317460317549
turn_slot_f1: 0.9777607164803214


 57%|█████▋    | 300/526 [09:12<05:47,  1.54s/it]

[21/25] [300/526] 0.335929
inferencing


 57%|█████▋    | 301/526 [09:43<39:35, 10.56s/it]

{'joint_goal_accuracy': 0.8116256157635467, 'turn_slot_accuracy': 0.994119321291744, 'turn_slot_f1': 0.977838565214782}
joint_goal_accuracy: 0.8116256157635467
turn_slot_accuracy: 0.994119321291744
turn_slot_f1: 0.977838565214782


 76%|███████▌  | 400/526 [12:15<03:13,  1.54s/it]

[21/25] [400/526] 0.225960
inferencing


 76%|███████▌  | 401/526 [12:47<21:59, 10.56s/it]

{'joint_goal_accuracy': 0.8155665024630542, 'turn_slot_accuracy': 0.994193760262734, 'turn_slot_f1': 0.9782189103495711}
joint_goal_accuracy: 0.8155665024630542
turn_slot_accuracy: 0.994193760262734
turn_slot_f1: 0.9782189103495711


 95%|█████████▌| 500/526 [15:19<00:39,  1.54s/it]

[21/25] [500/526] 0.243877
inferencing


 95%|█████████▌| 501/526 [15:51<04:23, 10.56s/it]

{'joint_goal_accuracy': 0.8147783251231527, 'turn_slot_accuracy': 0.9941850027367352, 'turn_slot_f1': 0.9781774829750691}
joint_goal_accuracy: 0.8147783251231527
turn_slot_accuracy: 0.9941850027367352
turn_slot_f1: 0.9781774829750691


100%|██████████| 526/526 [16:28<00:00,  1.88s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[22/25] [0/526] 0.356298
inferencing


  0%|          | 1/526 [00:31<4:36:31, 31.60s/it]

{'joint_goal_accuracy': 0.8122167487684729, 'turn_slot_accuracy': 0.9941455938697399, 'turn_slot_f1': 0.97814983619203}
joint_goal_accuracy: 0.8122167487684729
turn_slot_accuracy: 0.9941455938697399
turn_slot_f1: 0.97814983619203


 19%|█▉        | 100/526 [03:03<10:57,  1.54s/it]

[22/25] [100/526] 0.284489
inferencing


 19%|█▉        | 101/526 [03:35<1:14:49, 10.56s/it]

{'joint_goal_accuracy': 0.812807881773399, 'turn_slot_accuracy': 0.9941061850027451, 'turn_slot_f1': 0.9779639227224747}
joint_goal_accuracy: 0.812807881773399
turn_slot_accuracy: 0.9941061850027451
turn_slot_f1: 0.9779639227224747


 38%|███▊      | 200/526 [06:07<08:20,  1.54s/it]  

[22/25] [200/526] 0.271744
inferencing


 38%|███▊      | 201/526 [06:39<57:10, 10.56s/it]

{'joint_goal_accuracy': 0.8133990147783251, 'turn_slot_accuracy': 0.9942068965517328, 'turn_slot_f1': 0.9783620217442323}
joint_goal_accuracy: 0.8133990147783251
turn_slot_accuracy: 0.9942068965517328
turn_slot_f1: 0.9783620217442323


 57%|█████▋    | 300/526 [09:11<05:47,  1.54s/it]

[22/25] [300/526] 0.140640
inferencing


 57%|█████▋    | 301/526 [09:42<39:34, 10.55s/it]

{'joint_goal_accuracy': 0.8149753694581281, 'turn_slot_accuracy': 0.9941850027367354, 'turn_slot_f1': 0.978490188274681}
joint_goal_accuracy: 0.8149753694581281
turn_slot_accuracy: 0.9941850027367354
turn_slot_f1: 0.978490188274681


 76%|███████▌  | 400/526 [12:14<03:13,  1.53s/it]

[22/25] [400/526] 0.203453
inferencing
{'joint_goal_accuracy': 0.8185221674876847, 'turn_slot_accuracy': 0.9942857142857223, 'turn_slot_f1': 0.9787662265935624}
new best JGA score!  0.8185221674876847


 76%|███████▌  | 401/526 [12:48<23:24, 11.23s/it]

joint_goal_accuracy: 0.8185221674876847
turn_slot_accuracy: 0.9942857142857223
turn_slot_f1: 0.9787662265935624


 95%|█████████▌| 500/526 [15:20<00:39,  1.54s/it]

[22/25] [500/526] 0.188098
inferencing


 95%|█████████▌| 501/526 [15:52<04:24, 10.56s/it]

{'joint_goal_accuracy': 0.81064039408867, 'turn_slot_accuracy': 0.9940405035577526, 'turn_slot_f1': 0.9779564362025507}
joint_goal_accuracy: 0.81064039408867
turn_slot_accuracy: 0.9940405035577526
turn_slot_f1: 0.9779564362025507


100%|██████████| 526/526 [16:29<00:00,  1.88s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[23/25] [0/526] 0.264930
inferencing


  0%|          | 1/526 [00:31<4:36:31, 31.60s/it]

{'joint_goal_accuracy': 0.8108374384236453, 'turn_slot_accuracy': 0.9940974274767456, 'turn_slot_f1': 0.9781075194346898}
joint_goal_accuracy: 0.8108374384236453
turn_slot_accuracy: 0.9940974274767456
turn_slot_f1: 0.9781075194346898


 19%|█▉        | 100/526 [03:03<10:57,  1.54s/it]

[23/25] [100/526] 0.333729
inferencing


 19%|█▉        | 101/526 [03:35<1:14:50, 10.57s/it]

{'joint_goal_accuracy': 0.8098522167487685, 'turn_slot_accuracy': 0.9940361247947535, 'turn_slot_f1': 0.9778023391325661}
joint_goal_accuracy: 0.8098522167487685
turn_slot_accuracy: 0.9940361247947535
turn_slot_f1: 0.9778023391325661


 38%|███▊      | 200/526 [06:07<08:21,  1.54s/it]  

[23/25] [200/526] 0.151569
inferencing


 38%|███▊      | 201/526 [06:39<57:10, 10.56s/it]

{'joint_goal_accuracy': 0.8098522167487685, 'turn_slot_accuracy': 0.9940273672687543, 'turn_slot_f1': 0.9777329226962548}
joint_goal_accuracy: 0.8098522167487685
turn_slot_accuracy: 0.9940273672687543
turn_slot_f1: 0.9777329226962548


 57%|█████▋    | 300/526 [09:11<05:46,  1.54s/it]

[23/25] [300/526] 0.315080
inferencing


 57%|█████▋    | 301/526 [09:43<39:34, 10.56s/it]

{'joint_goal_accuracy': 0.8084729064039409, 'turn_slot_accuracy': 0.9939354132457657, 'turn_slot_f1': 0.9774514014373451}
joint_goal_accuracy: 0.8084729064039409
turn_slot_accuracy: 0.9939354132457657
turn_slot_f1: 0.9774514014373451


 76%|███████▌  | 400/526 [12:15<03:13,  1.54s/it]

[23/25] [400/526] 0.251610
inferencing


 76%|███████▌  | 401/526 [12:47<22:00, 10.56s/it]

{'joint_goal_accuracy': 0.8086699507389162, 'turn_slot_accuracy': 0.9939616858237629, 'turn_slot_f1': 0.9776493975564436}
joint_goal_accuracy: 0.8086699507389162
turn_slot_accuracy: 0.9939616858237629
turn_slot_f1: 0.9776493975564436


 95%|█████████▌| 500/526 [15:19<00:39,  1.54s/it]

[23/25] [500/526] 0.253947
inferencing


 95%|█████████▌| 501/526 [15:50<04:23, 10.56s/it]

{'joint_goal_accuracy': 0.8084729064039409, 'turn_slot_accuracy': 0.9939310344827664, 'turn_slot_f1': 0.9775654590833345}
joint_goal_accuracy: 0.8084729064039409
turn_slot_accuracy: 0.9939310344827664
turn_slot_f1: 0.9775654590833345


100%|██████████| 526/526 [16:27<00:00,  1.88s/it]
  0%|          | 0/526 [00:00<?, ?it/s]

[24/25] [0/526] 0.127217
inferencing


  0%|          | 1/526 [00:31<4:36:32, 31.61s/it]

{'joint_goal_accuracy': 0.8068965517241379, 'turn_slot_accuracy': 0.9939135194307689, 'turn_slot_f1': 0.9775416095653221}
joint_goal_accuracy: 0.8068965517241379
turn_slot_accuracy: 0.9939135194307689
turn_slot_f1: 0.9775416095653221


 19%|█▉        | 100/526 [03:03<10:54,  1.54s/it]

[24/25] [100/526] 0.263841
inferencing


 19%|█▉        | 101/526 [03:35<1:14:47, 10.56s/it]

{'joint_goal_accuracy': 0.8088669950738916, 'turn_slot_accuracy': 0.9939529282977639, 'turn_slot_f1': 0.9776728019202116}
joint_goal_accuracy: 0.8088669950738916
turn_slot_accuracy: 0.9939529282977639
turn_slot_f1: 0.9776728019202116


 38%|███▊      | 200/526 [06:07<08:20,  1.53s/it]  

[24/25] [200/526] 0.287795
inferencing


 38%|███▊      | 201/526 [06:39<57:09, 10.55s/it]

{'joint_goal_accuracy': 0.8100492610837439, 'turn_slot_accuracy': 0.994022988505756, 'turn_slot_f1': 0.9778152955144328}
joint_goal_accuracy: 0.8100492610837439
turn_slot_accuracy: 0.994022988505756
turn_slot_f1: 0.9778152955144328


 57%|█████▋    | 300/526 [09:11<05:47,  1.54s/it]

[24/25] [300/526] 0.194027
inferencing


 57%|█████▋    | 301/526 [09:42<39:35, 10.56s/it]

{'joint_goal_accuracy': 0.8096551724137931, 'turn_slot_accuracy': 0.9939573070607634, 'turn_slot_f1': 0.9776558358730665}
joint_goal_accuracy: 0.8096551724137931
turn_slot_accuracy: 0.9939573070607634
turn_slot_f1: 0.9776558358730665


 76%|███████▌  | 400/526 [12:14<03:13,  1.54s/it]

[24/25] [400/526] 0.321451
inferencing


 76%|███████▌  | 401/526 [12:46<22:00, 10.56s/it]

{'joint_goal_accuracy': 0.8094581280788178, 'turn_slot_accuracy': 0.9939660645867623, 'turn_slot_f1': 0.9776605573076544}
joint_goal_accuracy: 0.8094581280788178
turn_slot_accuracy: 0.9939660645867623
turn_slot_f1: 0.9776605573076544


 95%|█████████▌| 500/526 [15:18<00:40,  1.54s/it]

[24/25] [500/526] 0.152952
inferencing


 95%|█████████▌| 501/526 [15:50<04:24, 10.56s/it]

{'joint_goal_accuracy': 0.8096551724137931, 'turn_slot_accuracy': 0.9939748221127609, 'turn_slot_f1': 0.977674060465985}
joint_goal_accuracy: 0.8096551724137931
turn_slot_accuracy: 0.9939748221127609
turn_slot_f1: 0.977674060465985


100%|██████████| 526/526 [16:27<00:00,  1.88s/it]


In [None]:
epoch_miss_labels

with open('miss_labels.json', 'w') as outfile:
    json.dump(epoch_miss_labels, outfile)


In [32]:
# model1 = SUMBT(args, num_labels, device)
# model1.initialize_slot_value_lookup(slot_values_ids, slot_type_ids)  # Tokenized Ontology의 Pre-encoding using BERT_SV
# model1.to(device)


# no_decay = ["bias", "LayerNorm.weight"]
# optimizer_grouped_parameters = [
#         {
#             "params": [p for n, p in model1.named_parameters() if not any(nd in n for nd in no_decay)],
#             "weight_decay": args.weight_decay,
#         },
#         {
#             "params": [p for n, p in model1.named_parameters() if any(nd in n for nd in no_decay)],
#             "weight_decay": 0.0,
#         },
#     ]
# optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=1e-8)

# PATH = 'checkpoint/laced-lake-76/23_100_0.8232512315270936.pth'

# checkpoint = torch.load(PATH)
# model1.load_state_dict(checkpoint['model_state_dict'])
# optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
# epoch = checkpoint['epoch']
# loss = checkpoint['loss']

# # model1.load_state_dict(torch.load('checkpoint/solar-sweep-3/9_700_0.7885714285714286.pth'))


Some weights of the model checkpoint at dsksd/bert-ko-small-minimal were not used when initializing BertForUtteranceEncoding: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForUtteranceEncoding from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForUtteranceEncoding from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at dsksd/bert-ko-small-minimal 

Complete initialization of slot and value lookup


## Inference

In [29]:
eval_data = json.load(open(f"/opt/ml/repo/taepd/input/data/eval_dataset/eval_dials.json", "r"))

eval_examples = get_examples_from_dialogues(
    eval_data, user_first=True, dialogue_level=True
)

# Extracting Featrues
eval_features = processor.convert_examples_to_features(eval_examples)
eval_data = WOSDataset(eval_features)
eval_sampler = SequentialSampler(eval_data)
eval_loader = DataLoader(
    eval_data,
    batch_size=8,
    sampler=eval_sampler,
    collate_fn=processor.collate_fn,
)

100%|██████████| 2000/2000 [00:00<00:00, 3165.59it/s]


In [30]:
predictions = inference(model, eval_loader, processor, device)

In [31]:
json.dump(predictions, open('predictions.csv', 'w'), indent=2, ensure_ascii=False) 