# Hierarchical Classification SwDA

In [None]:
import time
import os
import pyhocon
import torch
import argparse
from torch import nn
from torch import optim
import matplotlib.pyplot as plt
from torch.utils.tensorboard import SummaryWriter
from torch.nn.functional import one_hot
import numpy as np
import glob
import os, re, json
import matplotlib.pyplot as plt
import jsonlines
import random
import torch.nn.functional as F
import math
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix, accuracy_score
from sklearn.metrics import precision_recall_fscore_support as score
from scipy.cluster.hierarchy import linkage
from scipy.cluster.hierarchy import dendrogram
from scipy.spatial.distance import pdist
from scipy.cluster.hierarchy import cophenet
from tqdm import tqdm

In [2]:
if torch.cuda.is_available():
    device = torch.device('cuda:0')

## データ前処理

In [3]:
EOS_token = '<EOS>'
BOS_token = '<BOS>'
file_pattern = re.compile(r'^sw\_([a-z]+?)\_([0-9]+?)\.jsonlines$')

swda_tagu = {
    '<Uninterpretable>': ['abandoned_or_turn-exit/uninterpretable', 'non-verbal'],
    '<Statement>': ['statement-non-opinion', 'statement-opinion', 'other_answers', '3rd-party-talk', 'self-talk', 'offers,_options_commits', 'collaborative_completion'],
    '<Question>': ['q', 'yes-no-question', 'wh-question', 'declarative_yes-no-question', 'backchannel_in_question_form', 'open-question', 'rhetorical-questions', 'signal-non-understanding', 'or-clause', 'tag-question', 'declarative_wh-question'],
    '<Directive>': ['action-directive'],
    '<Greeting>': ['conventional-opening', 'conventional-closing'],
    '<Apology>': ['apology', 'no_answers', 'reject', 'negative_non-no_answers', 'dispreferred_answers', 'dispreferred_answers'],
    '<Agreement>': ['agree/accept', 'maybe/accept-part', 'thanking'],
    '<Understanding>': ['acknowledge_(backchannel)', 'summarize/reformulate', 'appreciation', 'response_acknowledgement', 'affirmative_non-yes_answers', 'yes_answers'],
    '<Other>': ['other', 'hedge', 'quotation', 'repeat-phrase', 'hold_before_answer/agreement', 'downplayer']
}

### 対話行為のID化

In [4]:
tagu_to_ids = {
    'None': -1,
    '<Statement>': 0,
    '<Understanding>': 0, 
    '<Uninterpretable>': 0, 
    '<Other1>': 1,
    '<Other2>': 1,
    '<Other3>': 1,
    '<Question>': 0, 
    '<Agreement>': 1, 
    '<Apology>': 2, 
    '<Greeting>': 3, 
    '<Other>': 4, 
    '<Directive>': 5
}

In [5]:
class DA_to_ID:
    
    def __init__(self, config, X_DA, Y_DA, name):
        self.word2id = None
        self.id2word = None
        self.config = config
        self.X_DA = X_DA
        self.Y_DA = Y_DA
        self.name = name
        self.construct()
        
    def construct(self):
#        vocab = {'<PAD>': 0}
        vocab = {}
        vocab_count = {}
        
        for x,y in zip(self.X_DA, self.Y_DA):
            for token in x:
                if token in vocab_count:
                    vocab_count[token] += 1
                else:
                    vocab_count[token] = 1
                    
            for token in y:
                if token in vocab_count:
                    vocab_count[token] += 1
                else:
                    vocab_count[token] = 1
                    
        for k, _ in sorted(vocab_count.items(), key=lambda x: -x[1]):
            vocab[k] = len(vocab)
            if len(vocab) >= self.config[self.name]['MAX_VOCAB']: break
        self.word2id = vocab
        self.id2word = {v : k for k, v in vocab.items()}
        return vocab
        
    def tokenize(self, X_tensor, Y_tensor):
        X_Tensor = [[self.word2id[token] for token in sentence] for sentence in X_tensor]
        Y_Tensor = [[self.word2id[token] for token in sentence] for sentence in Y_tensor]
        return X_Tensor, Y_Tensor

In [6]:
class DA_to_ID_hire:
    
    def __init__(self, config, X_DA, Y_DA, name):
        self.word2id = None
        self.id2word = None
        self.config = config
        self.X_DA = X_DA
        self.Y_DA = Y_DA
        self.name = name
        self.construct()
        
    def construct(self):

        vocab = {}
        vocab_count = {}
        
        for x,y in zip(self.X_DA, self.Y_DA):
            for token in x:
                vocab[token] = tagu_to_ids[token]
            for token in y:
                vocab[token] = tagu_to_ids[token]
                    
        self.word2id = vocab
        self.id2word = {v : k for k, v in vocab.items()}
        return vocab
        
    def tokenize(self, X_tensor, Y_tensor):
        X_Tensor = [[self.word2id[token] for token in sentence] for sentence in X_tensor]
        Y_Tensor = [[self.word2id[token] for token in sentence] for sentence in Y_tensor]
        return X_Tensor, Y_Tensor

### 発話のID化

In [7]:
class UTT_to_ID:
    
    def __init__(self, config, X_UTT, Y_UTT, name):
        self.word2id = None
        self.id2word = None
        self.config = config
        self.X_UTT = X_UTT
        self.Y_UTT = Y_UTT
        self.name = name
        self.construct()
        
    def construct(self):
        
        vocab = {'<UNK>': 0, '<EOS>': 1, '<BOS>': 2, '<UttPAD>': 3, '<ConvPAD>': 4}
        vocab_count = {}
        
        for x,y in zip(self.X_UTT, self.Y_UTT):
            for seq in x:
                for word in seq:
                    if word in vocab_count:
                        vocab_count[word] += 1
                    else:
                        vocab_count[word] = 1
            for seq in y:
                for word in seq:
                    if word in vocab_count:
                        vocab_count[word] += 1
                    else:
                        vocab_count[word] = 1
                        
        for k, _ in sorted(vocab_count.items(), key=lambda x: -x[1]):
            vocab[k] = len(vocab)
            if len(vocab) >= self.config[self.name]['UTT_MAX_VOCAB']: break
        self.word2id = vocab
        self.id2word = {v : k for k, v in vocab.items()}

        return vocab
        
    def tokenize(self, X_tensor, Y_tensor):
        
        X_Tensor = [[[self.word2id[token] if token in self.word2id else self.word2id['<UNK>'] for token in seq] for seq in dialogue] for dialogue in X_tensor]
        Y_Tensor = [[[self.word2id[token] if token in self.word2id else self.word2id['<UNK>'] for token in seq] for seq in dialogue] for dialogue in Y_tensor]
        return X_Tensor, Y_Tensor

### トレーニングデータ作成

In [8]:
def create_traindata(config, name):
    files = [f for f in os.listdir(config[name]['train_path']) if file_pattern.match(f)]
    # print("files:" , files)
    da_x, da_y, utt_x, utt_y, turn = [], [], [], [], []
    da_x1, da_x2, da_x3, da_x4 = [], [], [], []
    da_y1, da_y2, da_y3, da_y4 = [], [], [], []
    # 1file 1conversation
    for filename in files:
        # print(os.path.join(config['train_path'], filename))
        with open(os.path.join(config[name]['train_path'], filename), 'r') as f:
            data = f.read().split('\n')
            data.remove('')
            da_seq, utt_seq, turn_seq = [], [], []
            da1_seq, da2_seq, da3_seq, da4_seq = [], [], [], []
            # 1line 1turn
            for idx, line in enumerate(data, 1):
                jsondata = json.loads(line)
                # single-turn multi dialogue case
                for da, utt in zip(jsondata['DA'], jsondata['sentence']):
                    da_seq.append(da)
                    utt_seq.append(utt.split(' '))
                    turn_seq.append(0)
                for da1, da2, da3, da4 in zip(jsondata['DA1'], jsondata['DA2'], jsondata['DA3'], jsondata['DA4']):
                    da1_seq.append(da1)
                    da2_seq.append(da2)
                    da3_seq.append(da3)
                    da4_seq.append(da4)
                        
                turn_seq[-1] = 1
            da_seq = [easy_damsl(da) for da in da_seq]
            
        if config[name]['state']:
            for i in range(max(1, len(da_seq) - 1 - config[name]['window_size'])):
                ## 発話の対話行為
                da_x.append(da_seq[i:min(len(da_seq)-1, i + config[name]['window_size'])])
                da_x1.append(da1_seq[i:min(len(da1_seq)-1, i + config[name]['window_size'])])
                da_x2.append(da2_seq[i:min(len(da2_seq)-1, i + config[name]['window_size'])])
                da_x3.append(da3_seq[i:min(len(da3_seq)-1, i + config[name]['window_size'])])
                da_x4.append(da4_seq[i:min(len(da4_seq)-1, i + config[name]['window_size'])])
                
                ## 応答の対話行為
                da_y.append(da_seq[1 + i:min(len(da_seq), 1 + i + config[name]['window_size'])])
                da_y1.append(da1_seq[1 + i:min(len(da1_seq), 1 + i + config[name]['window_size'])])
                da_y2.append(da2_seq[1 + i:min(len(da2_seq), 1 + i + config[name]['window_size'])])
                da_y3.append(da3_seq[1 + i:min(len(da3_seq), 1 + i + config[name]['window_size'])])
                da_y4.append(da4_seq[1 + i:min(len(da4_seq), 1 + i + config[name]['window_size'])])
                
                ## 対話文
                utt_x.append(utt_seq[i:min(len(da_seq)-1, i + config[name]['window_size'])])
                utt_y.append(utt_seq[1 + i:min(len(da_seq), 1 + i + config[name]['window_size'])])
                
                ## ターン制
                turn.append(turn_seq[i:min(len(da_seq), i + config[name]['window_size'])])
    
    return da_x, da_x1, da_x2, da_x3, da_x4, da_y, da_y1, da_y2, da_y3, da_y4, utt_x, utt_y, turn

In [9]:
def easy_damsl(tag):
    easy_tag = [k for k, v in swda_tagu.items() if tag in v]
    return easy_tag[0] if not len(easy_tag) < 1 else tag

def separate_data(x, y, turn):
    split_size = round(len(x) / 10)
    if split_size == 0: split_size = 1
    X_train, Y_train, Tturn = x[split_size * 2:], y[split_size * 2:], turn[split_size * 2:]
    X_valid, Y_valid, Vturn = x[split_size: split_size * 2], y[split_size: split_size * 2], turn[split_size: split_size * 2]
    X_test, Y_test, Testturn = x[:split_size], y[:split_size], turn[:split_size]
    assert len(X_train) == len(Y_train), 'Unexpect to separate train data'
    return X_train, Y_train, X_valid, Y_valid, X_test, Y_test, Tturn, Vturn, Testturn


def separate_data_da(x, y):
    split_size = round(len(x) / 10)
    if split_size == 0: split_size = 1
    X_train, Y_train = x[split_size * 2:], y[split_size * 2:]
    X_valid, Y_valid = x[split_size: split_size * 2], y[split_size: split_size * 2]
    X_test, Y_test = x[:split_size], y[:split_size]
    assert len(X_train) == len(Y_train), 'Unexpect to separate train data'
    return X_train, Y_train, X_valid, Y_valid, X_test, Y_test

In [10]:
def initialize_env(name):
    config = pyhocon.ConfigFactory.parse_file('./dialogue.conf')
    config['log_dirs'] = os.path.join(config[name]['log_dir'])
    if not os.path.exists(config['log_dirs']):
        os.mkdir(config['log_dirs'])
     
    return config

def create_DAdata(config, name):
    da_x, da_x1, da_x2, da_x3, da_x4, da_y, da_y1, da_y2, da_y3, da_y4, _, _, _ = create_traindata(config, name)
    return da_x, da_x1, da_x2, da_x3, da_x4, da_y, da_y1, da_y2, da_y3, da_y4

def create_Uttdata(config, name):
    _, _, _, _, _, _, _, _, _, _, posts, cmnts, turn = create_traindata(config, name)
    X_train, Y_train, X_valid, Y_valid, X_test, Y_test, Tturn, Vturn, Testturn = separate_data(posts, cmnts, turn)
    return X_train, Y_train, X_valid, Y_valid, X_test, Y_test, Tturn, Vturn, Testturn


### データの分割

In [11]:
model_name="CmbAttention"
loss_name="HireCE_All"

In [12]:
write = SummaryWriter("./logs")
config = initialize_env(model_name+loss_name)

In [13]:
da_x, da_x1, da_x2, da_x3, da_x4, da_y, da_y1, da_y2, da_y3, da_y4 = create_DAdata(config, model_name+loss_name)
XDA_train, YDA_train, XDA_valid, YDA_valid, _, _ = separate_data_da(da_x, da_y)
XDA1_train, YDA1_train, XDA1_valid, YDA1_valid, _, _ = separate_data_da(da_x1, da_y1)
XDA2_train, YDA2_train, XDA2_valid, YDA2_valid, _, _ = separate_data_da(da_x2, da_y2)
XDA3_train, YDA3_train, XDA3_valid, YDA3_valid, _, _ = separate_data_da(da_x3, da_y3)
XDA4_train, YDA4_train, XDA4_valid, YDA4_valid, _, _ = separate_data_da(da_x4, da_y4)

In [14]:
DA_vocab = DA_to_ID(config, XDA_train+XDA_valid, YDA_train+YDA_valid, model_name+loss_name)
DA1_vocab = DA_to_ID_hire(config, XDA1_train+XDA1_valid, YDA1_train+YDA1_valid, model_name+loss_name)
DA2_vocab = DA_to_ID_hire(config, XDA2_train+XDA2_valid, YDA2_train+YDA2_valid, model_name+loss_name)
DA3_vocab = DA_to_ID_hire(config, XDA3_train+XDA3_valid, YDA3_train+YDA3_valid, model_name+loss_name)
DA4_vocab = DA_to_ID_hire(config, XDA4_train+XDA4_valid, YDA4_train+YDA4_valid, model_name+loss_name)

In [15]:
XUtt_train, YUtt_train, XUtt_valid, YUtt_valid, _, _, Tturn, Vturn, _ = create_Uttdata(config, model_name+loss_name)
Utt_vocab = UTT_to_ID(config, XUtt_train+XUtt_valid, YUtt_train+YUtt_valid, model_name+loss_name)

In [16]:
## 対話行為
XDA_train, YDA_train = DA_vocab.tokenize(XDA_train, YDA_train)
XDA1_train, YDA1_train = DA1_vocab.tokenize(XDA1_train, YDA1_train)
XDA2_train, YDA2_train = DA2_vocab.tokenize(XDA2_train, YDA2_train)
XDA3_train, YDA3_train = DA3_vocab.tokenize(XDA3_train, YDA3_train)
XDA4_train, YDA4_train = DA4_vocab.tokenize(XDA4_train, YDA4_train)

XDA_valid, YDA_valid = DA_vocab.tokenize(XDA_valid, YDA_valid)
XDA1_valid, YDA1_valid = DA1_vocab.tokenize(XDA1_valid, YDA1_valid)
XDA2_valid, YDA2_valid = DA2_vocab.tokenize(XDA2_valid, YDA2_valid)
XDA3_valid, YDA3_valid = DA3_vocab.tokenize(XDA3_valid, YDA3_valid)
XDA4_valid, YDA4_valid = DA4_vocab.tokenize(XDA4_valid, YDA4_valid)

In [17]:
## 発話文
XUtt_train, YUtt_train = Utt_vocab.tokenize(XUtt_train, YUtt_train)
XUtt_valid, YUtt_valid = Utt_vocab.tokenize(XUtt_valid, YUtt_valid)

In [18]:
XDA_valid = list(filter(None, XDA_valid))
XDA1_valid = list(filter(None, XDA1_valid))
XDA2_valid = list(filter(None, XDA2_valid))
XDA3_valid = list(filter(None, XDA3_valid))
XDA4_valid = list(filter(None, XDA4_valid))
XUtt_valid = list(filter(None, XUtt_valid))

YDA_valid = list(filter(None, YDA_valid))
YDA1_valid = list(filter(None, YDA1_valid))
YDA2_valid = list(filter(None, YDA2_valid))
YDA3_valid = list(filter(None, YDA3_valid))
YDA4_valid = list(filter(None, YDA4_valid))
YUtt_valid = list(filter(None, YUtt_valid))

In [19]:
print('Finish preparing dataset...')

Finish preparing dataset...


## Encoder Model

In [20]:
class CmbAttentionModel(nn.Module):
    
    def __init__(self, model_name, utt_vocab, da_vocab, config, device):
        super(CmbAttentionModel, self).__init__()
        
        self.total_layer = config[model_name]['total_layer']
        
        self.loss_fun = nn.CrossEntropyLoss().cuda()
        
        self.utter_encoder = UtteraceEncoder(len(utt_vocab.word2id), config[model_name]['UTT_EMBED'], config[model_name]['UTT_HIDDEN'])

        self.context_encoder = RNNContextAwareEncoder(config[model_name]['CON_EMBED'], config[model_name]['CON_HIDDEN'])

        self.da_encoder = RNNDAAwareEncoder(len(utt_vocab.word2id), config[model_name]['DA_EMBED'], config[model_name]['DA_HIDDEN'])
        
        self.classify = HierarchicelClassification(config[model_name]['DA_HIDDEN'] + config[model_name]['CON_HIDDEN'])

        self.device = device
        
    def forward(self, X_utter, X_da, Yda, mask, context_hidden, da_hidden, turn):

        utter_output, utter_weights = self.utter_encoder(X_utter, mask)

        turn_output = torch.cat((utter_output, turn), dim=2)

        context_output, context_weights, context_hidden = self.context_encoder(turn_output, mask, context_hidden)

        da_output, da_weights, da_hidden = self.da_encoder(X_da, mask, da_hidden)

        x_output = torch.cat((context_output, da_output), dim=2)
        
        pred1, pred2, pred3, pred4 = self.classify(x_output, Yda) 
        
        return pred1, pred2, pred3, pred4, context_hidden, da_hidden
    
    def validtion(self, X_utter, X_da, Yda, mask, context_hidden, da_hidden, turn):

        utter_output, utter_weights = self.utter_encoder(X_utter, mask)

        turn_output = torch.cat((utter_output, turn), dim=2)

        context_output, context_weights, context_hidden = self.context_encoder(turn_output, mask, context_hidden)

        da_output, da_weights, da_hidden = self.da_encoder(X_da, mask, da_hidden)

        x_output = torch.cat((context_output, da_output), dim=2)
        
        pred1, pred2, pred3, pred4 = self.classify(x_output, Yda)
        
        return pred1, pred2, pred3, pred4, context_hidden, da_hidden


    def prediction(self, X_utter, X_da, mask, context_hidden, da_hidden, turn):

        utter_output, _ = self.utter_encoder(X_utter, mask)

        turn_output = torch.cat((utter_output, turn), dim=2)

        context_output, context_weights, context_hidden = self.context_encoder(turn_output, mask, context_hidden)

        da_output, da_weights, da_hidden = self.da_encoder(X_da, mask, da_hidden)

        x_output = torch.cat((context_output, da_output), dim=2)
        
        pred1, pred2, pred3, pred4 = self.classify.test(x_output)

        return pred1, pred2, pred3, pred4, context_hidden, da_hidden


    def initDAHidden(self, batch_size):
        
        return self.utter_encoder.initHidden(batch_size, self.device), self.context_encoder.initHidden(batch_size, self.device), self.da_encoder.initHidden(batch_size, self.device)
    

### Decoder Model (Hierarchical Classification)

In [21]:
class HierarchicelClassification(nn.Module):
    def __init__(self, CDhidden, num_classes=[2,2,2,6]):
        super(HierarchicelClassification, self).__init__()
        
        self.linear_layer1 = nn.Linear(CDhidden, num_classes[0])
        self.linear_layer2 = nn.Linear(CDhidden, num_classes[1])
        self.linear_layer3 = nn.Linear(CDhidden, num_classes[2])
        self.linear_layer4 = nn.Linear(CDhidden, num_classes[3])
        
        self.softmax_layer1 = nn.Linear(num_classes[0], num_classes[0])
        self.softmax_layer2 = nn.Linear(num_classes[0]+num_classes[1], num_classes[1])
        self.softmax_layer3 = nn.Linear(num_classes[0]+num_classes[1]+num_classes[2], num_classes[2])
        self.softmax_layer4 = nn.Linear(num_classes[0]+num_classes[1]+num_classes[2]+num_classes[3], num_classes[3])
        
    def forward(self, x_output, Yda):
    
        level1_output = self.softmax_layer1(self.linear_layer1(x_output))
        if Yda[1] != -1:
            level2_output = self.softmax_layer2(torch.cat([level1_output, self.linear_layer2(x_output)], dim=2))
            if Yda[2] != -1:
                level3_output = self.softmax_layer3(torch.cat([level1_output, level2_output, self.linear_layer3(x_output)], dim=2))
                if Yda[3] != -1:
                    level4_output = self.softmax_layer4(torch.cat([level1_output, level2_output, level3_output, self.linear_layer4(x_output)], dim=2))
                    return level1_output.squeeze(1), level2_output.squeeze(1), level3_output.squeeze(1), level4_output.squeeze(1)
                else:
                    return level1_output.squeeze(1), level2_output.squeeze(1), level3_output.squeeze(1), None
            else:
                return level1_output.squeeze(1), level2_output.squeeze(1), None, None
        else:
            return level1_output.squeeze(1), None, None, None
        
    def test(self, x_output):
        
        level1_output = self.softmax_layer1(self.linear_layer1(x_output))
        pred1 = torch.argmax(level1_output.squeeze(1))
        if pred1==0:
            return pred1, None, None, None
        else:
            level2_output = self.softmax_layer2(torch.cat([level1_output, self.linear_layer2(x_output)], dim=2))
            pred2 = torch.argmax(level2_output.squeeze(1))
            if pred2==0:
                return pred1, pred2, None, None
            else:
                level3_output = self.softmax_layer3(torch.cat([level1_output, level2_output, self.linear_layer3(x_output)], dim=2))
                pred3 = torch.argmax(level3_output.squeeze(1))
                if pred3==0:
                    return pred1, pred2, pred3, None
                else:
                    level4_output = self.softmax_layer4(torch.cat([level1_output, level2_output, level3_output, self.linear_layer4(x_output)], dim=2))
                    pred4 = torch.argmax(level4_output.squeeze(1))
                    return pred1, pred2, pred3, pred4

## Block Model

### Utterance Layer

In [22]:
class UtteraceEncoder(nn.Module):
    
    def __init__(self, vocab_size, emb_dim, d_model):
        super(UtteraceEncoder, self).__init__()
        self.d_model = d_model
        self.embedding = WordEmbedding(vocab_size, emb_dim, self.d_model)
        self.pe = PositinalEncoding(self.d_model, 200)
        self.att = Attention(self.d_model)
        self.ffn = FeedForward(d_model, emb_dim)
        
    def forward(self, x_utter, mask):

        emb_output = self.embedding(x_utter)

        pos_output = self.pe(emb_output)

        att_output, att_weights = self.att(pos_output, pos_output, pos_output, mask)

        ffn_output = self.ffn(att_output)

        seq_len = ffn_output.size()[1]

        avg_output = F.avg_pool2d(ffn_output, (seq_len, 1)) # => (128, 1, 512)

        return avg_output, att_weights  # 発話ベクトル(128, 1, 512)

    def initHidden(self, batch_size, device):
        return torch.zeros(1, batch_size, self.d_model).to(device)

In [23]:
class FeedForward(nn.Module):

    def __init__(self, d_model, d_ff, dropout=0.1):
        super(FeedForward, self).__init__()
        self.linear_1 = nn.Linear(d_model, d_ff)
        self.dropout = nn.Dropout(dropout)
        self.linear_2 = nn.Linear(d_ff, d_model)

    def forward(self, x):

        x = self.linear_1(x)

        x = self.dropout(F.relu(x))

        x = self.linear_2(x)

        return x

In [24]:
class Attention(nn.Module):

    def __init__(self, d_model):
        super(Attention, self).__init__()
        self.q_linear = nn.Linear(d_model, d_model)
        self.v_linear = nn.Linear(d_model, d_model)
        self.k_linear = nn.Linear(d_model, d_model)
        self.out = nn.Linear(d_model, d_model)
        self.d_k = d_model
       
    def forward(self, q, k, v, mask=None):
        # 全結合層で特徴量を変換
        k = self.k_linear(k)
        q = self.q_linear(q)
        v = self.v_linear(v)

        # Attentionの値を計算する
        # 各値を足し算すると大きくなりすぎるので、root(d_k)で割って調整
        weights = torch.matmul(q, k.transpose(1, 2)) / math.sqrt(self.d_k)
        
        # ここでmaskを計算
        if mask is not None:
            mask = mask.unsqueeze(1)
            weights = weights.masked_fill(mask == 0, -1e9)

        # softmaxで規格化をする
        attention_weights = F.softmax(weights, dim=-1)

        # AttentionをValueとかけ算
        output = torch.matmul(attention_weights, v)

        # 全結合層で特徴量を変換
        output = self.out(output)

        return output, attention_weights

In [25]:
class PositinalEncoding(nn.Module):

    def __init__(self, d_model, max_len, dropout=0.1):
        super(PositinalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        
        x = x + self.pe[:x.size(0), :]

        return self.dropout(x)


In [26]:
class WordEmbedding(nn.Module):

    def __init__(self, vocab_size, embed_size, w_model):
        super(WordEmbedding, self).__init__()
        self.word_embedding = nn.Embedding(vocab_size, embed_size)
        self.linear = nn.Linear(embed_size, w_model)

    def forward(self, x_word):
        return torch.tanh(self.linear(self.word_embedding(x_word)))

### Context Layer

In [27]:
class RNNContextAwareEncoder(nn.Module):
    
    def __init__(self, emb_dim, d_model):
        super(RNNContextAwareEncoder, self).__init__()
        self.d_model = d_model
        self.linear = nn.Linear(self.d_model+1, self.d_model)
        self.rnn = nn.GRU(self.d_model, self.d_model, batch_first=True)
        self.attention = ContextAttention(self.d_model, self.d_model, self.d_model)
        self.ffn = FeedForward(self.d_model, self.d_model)

    def forward(self, x, mask, hidden):

        lin_output = self.linear(x)

        att_output, att_weights = self.attention(lin_output, mask, hidden.transpose(0,1))        

        rnn_output, rnn_hidden = self.rnn(att_output, hidden)

        ffn_output = self.ffn(rnn_output)

        return ffn_output, att_weights, rnn_hidden

    def initHidden(self, batch_size, device):
        return torch.zeros(1, batch_size, self.d_model).to(device)

In [28]:
class ContextAttention(nn.Module):

    def __init__(self, d_model, hidden_size, att_size):
        super(ContextAttention, self).__init__()
        self.q_linear = nn.Linear(att_size, att_size)
        self.v_linear = nn.Linear(att_size, att_size)
        self.k_linear = nn.Linear(att_size, att_size)

        self.fc_1 = nn.Linear(d_model, d_model)
        self.fc_3 = nn.Linear(hidden_size, d_model, bias=True)
        self.fc_2 = nn.Linear(d_model, att_size)

        self.fc_out = nn.Linear(att_size, hidden_size, bias=True)
        self.d_k = att_size

    def forward(self, x, mask, hidden):
        
        x = self.fc_2(torch.tanh(self.fc_1(x) + self.fc_3(hidden)))

        q = self.q_linear(x)
        v = self.v_linear(x)
        k = self.k_linear(x)

        weights = torch.matmul(q, k.transpose(1, 2)) / math.sqrt(self.d_k)

        # ここでmaskを計算
        if mask is not None:
            mask = mask.unsqueeze(1)
            weights = weights.masked_fill(mask == 0, -1e9)

        attention_weights = F.softmax(weights, dim=-1)

        att_output = torch.matmul(attention_weights, v)

        output = self.fc_out(att_output)

        return output, attention_weights

### RNNDAAwareEncoder

In [29]:
class RNNDAAwareEncoder(nn.Module):

    def __init__(self, da_size, emb_dim, d_model):
        super(RNNDAAwareEncoder, self).__init__()
        self.d_model = d_model
        self.embedding = DAEmbedding(da_size, emb_dim, self.d_model)
        self.rnn = nn.GRU(self.d_model, self.d_model, batch_first=True)
        self.attention = ContextAttention(self.d_model, self.d_model, self.d_model)
        self.ffn = FeedForward(self.d_model, emb_dim)

    def forward(self, X_da, mask, hidden):

        emb_output = self.embedding(X_da)

        att_output, att_weights = self.attention(emb_output, mask, hidden.transpose(0,1))        

        rnn_output, rnn_hidden = self.rnn(att_output, hidden)

        ffn_output = self.ffn(rnn_output)

        return ffn_output, att_weights, rnn_hidden

    def initHidden(self, batch_size, device):
        return torch.zeros(1, batch_size, self.d_model).to(device)

In [30]:
class DAEmbedding(nn.Module):

    def __init__(self, da_size, embed_size, d_model):
        super(DAEmbedding, self).__init__()
        self.da_embedding = nn.Embedding(da_size, embed_size)
        self.linear = nn.Linear(embed_size, d_model)

    def forward(self, x_da):
        return torch.tanh(self.linear(self.da_embedding(x_da)))

## 損失関数

In [31]:
class HierarchicalLossNetwork:
    '''Logics to calculate the loss of the model.
    '''
    def __init__(self, device='cpu', total_level=4, alpha=1, beta=0.8, p_loss=3):
        '''Param init.
        '''
        self.total_level = total_level
        self.alpha = alpha
        self.beta = beta
        self.p_loss = p_loss
        self.device = device

    def calculate_lloss(self, predictions, true_labels):
        '''Calculates the layer loss.
        '''
        lloss = 0
        for i in range(self.total_level):
            
            if predictions[i]==None:
                return lloss
            if i != 3:
                true_onehot = one_hot(true_labels[i], num_classes=2)[0]
                pred_vec = predictions[i][0]
                
                lloss += nn.BCELoss()(nn.Sigmoid()(pred_vec), true_onehot.to(torch.float))
            else:
                lloss += nn.CrossEntropyLoss()(predictions[i], true_labels[i])
            
        return lloss

## モデル設定

In [32]:
lr = config[model_name+loss_name]['lr']
config[model_name+loss_name]['BATCH_SIZE']=1
batch_size = config[model_name+loss_name]['BATCH_SIZE']

model = CmbAttentionModel(model_name+loss_name, Utt_vocab, DA_vocab, config, device).to(device)
opt = optim.Adam(model.parameters(), lr)

total_layer = config[model_name+loss_name]['total_layer']
HLN = HierarchicalLossNetwork(device=device, total_level=total_layer)

In [33]:
torch.set_default_tensor_type('torch.cuda.FloatTensor')
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

### 訓練

In [34]:
def Train(model_name, loss_name, models, optims):

    plot_train_losses = []
    plot_valid_losses = []
    print_total_loss = 0
    plot_total_loss = 0
    plot_total_acc = 0
    _valid_loss = None
    config[model_name+loss_name]['BATCH_SIZE']=1
    batch_size = config[model_name+loss_name]['BATCH_SIZE']
    
    print('{} start TRAINING'.format(model_name+loss_name))
    start = time.time()
    
    for epoch in range(config[model_name+loss_name]['EPOCH']):

        tmp_time = time.time()
        print('Epoch {} start'.format(epoch+1))
        index = [i for i in range(len(XDA_train))]
        random.shuffle(index)
        idx=0

        while idx < len(index):
            step_size = min(batch_size, len(index)-idx)
            batch_idx = index[idx:idx+step_size]
            utter_hidden, context_hidden, da_hidden = models.initDAHidden(step_size)
            classification = HierarchicelClassification(640)

            print('\rConversation {}/{} training...'.format(idx + step_size, len(XDA_train)), end='')
            Xda_seq = [XDA_train[seq_idx] for seq_idx in batch_idx]
            Yda_seq = [YDA_train[seq_idx] for seq_idx in batch_idx]
            Yda1_seq = [YDA1_train[seq_idx] for seq_idx in batch_idx]
            Yda2_seq = [YDA2_train[seq_idx] for seq_idx in batch_idx]
            Yda3_seq = [YDA3_train[seq_idx] for seq_idx in batch_idx]
            Yda4_seq = [YDA4_train[seq_idx] for seq_idx in batch_idx]
            turn_seq = [Tturn[seq_idx] for seq_idx in batch_idx]
            max_conv_len = max(len(s) for s in Xda_seq) 

            ## 
            XUtt_seq = [XUtt_train[seq_idx] for seq_idx in batch_idx]
            YUtt_seq = [YUtt_train[seq_idx] for seq_idx in batch_idx]

            for i in range(len(XUtt_seq)):
                XUtt_seq[i] = XUtt_seq[i] + [[Utt_vocab.word2id['<ConvPAD>']]] * (max_conv_len - len(XUtt_seq[i]))
                YUtt_seq[i] = YUtt_seq[i] + [[Utt_vocab.word2id['<ConvPAD>']]] * (max_conv_len - len(YUtt_seq[i]))

            for ci in range(len(Xda_seq)):

                turn_seq[ci] = turn_seq[ci] + [0] * (max_conv_len - len(turn_seq[ci]))
                Xda_seq[ci] = Xda_seq[ci] + [0] * (max_conv_len - len(Xda_seq[ci]))
                Yda_seq[ci] = Yda_seq[ci] + [0] * (max_conv_len - len(Yda_seq[ci]))
                Yda1_seq[ci] = Yda1_seq[ci] + [0] * (max_conv_len - len(Yda1_seq[ci]))
                Yda2_seq[ci] = Yda2_seq[ci] + [0] * (max_conv_len - len(Yda2_seq[ci]))
                Yda3_seq[ci] = Yda3_seq[ci] + [0] * (max_conv_len - len(Yda3_seq[ci]))
                Yda4_seq[ci] = Yda4_seq[ci] + [0] * (max_conv_len - len(Yda4_seq[ci]))

            for i in range(0, max_conv_len):
                
                last = True if i == max_conv_len - 1 else False
                
                Xda_tensor = torch.tensor([[X[i]] for X in Xda_seq]).to(device)
                Yda_tensor = torch.tensor([[Y[i]] for Y in Yda_seq]).to(device)
                Yda1_tensor = torch.tensor([[Y[i]] for Y in Yda1_seq]).to(device)
                Yda2_tensor = torch.tensor([[Y[i]] for Y in Yda2_seq]).to(device)
                Yda3_tensor = torch.tensor([[Y[i]] for Y in Yda3_seq]).to(device)
                Yda4_tensor = torch.tensor([[Y[i]] for Y in Yda4_seq]).to(device)
                
                turn_tensor = torch.tensor([[t[i]] for t in turn_seq]).to(device)
                turn_tensor = turn_tensor.float()
                turn_tensor = turn_tensor.unsqueeze(1)    

                ### 
                max_seq_len = max(len(XU[i]) + 1 for XU in XUtt_seq)
                
                ### Padding処理
                for ci in range(len(XUtt_seq)):
                    XUtt_seq[ci][i] = XUtt_seq[ci][i] + [Utt_vocab.word2id['<UttPAD>']] * (max_seq_len - len(XUtt_seq[ci][i]))
                    YUtt_seq[ci][i] = YUtt_seq[ci][i] + [Utt_vocab.word2id['<UttPAD>']] * (max_seq_len - len(YUtt_seq[ci][i]))
                XUtt_tensor = torch.tensor([XU[i] for XU in XUtt_seq]).to(device)
                YUtt_tensor = None
                level1_pred, level2_pred, level3_pred, level4_pred, context_hidden, da_hidden = models(XUtt_tensor, Xda_tensor, [Yda1_tensor.squeeze(1), Yda2_tensor.squeeze(1), Yda3_tensor.squeeze(1), Yda4_tensor.squeeze(1)], None, context_hidden, da_hidden, turn_tensor)
             
                lloss = HLN.calculate_lloss([level1_pred, level2_pred, level3_pred, level4_pred], [Yda1_tensor.squeeze(1), Yda2_tensor.squeeze(1), Yda3_tensor.squeeze(1), Yda4_tensor.squeeze(1)])
                optims.zero_grad()
                lloss.backward(retain_graph=True)
                print_total_loss += lloss.item()    
                
                if last:
                    optims.step()
                    
            print_total_loss/=max_conv_len  
            idx += step_size

        
        valid_loss = validation(XDA_valid, YDA1_valid, YDA2_valid, YDA3_valid, YDA4_valid, XUtt_valid, models, device, config, Vturn)


        def save_model(filename):
            torch.save(models.state_dict(), os.path.join(config[model_name+loss_name]['log_dir'], config[model_name+loss_name]['SAVE_NAME'] + "_v1" + str(config[model_name+loss_name]['window_size']) + "_v" + ".model".format(filename)))

        print("steps %d\tloss %.4f\tvalid loss %.4f | exec time %.4f" % (epoch+1, print_total_loss, valid_loss, time.time()-tmp_time))
        plot_train_losses.append(print_total_loss)
        plot_valid_losses.append(valid_loss)
        print_total_loss = 0

        if _valid_loss == None:
            save_model("model_save")
            print("Model Saved")
            _valid_loss = valid_loss
        else:
            if valid_loss<_valid_loss:
                _valid_loss = valid_loss
                save_model("model_save")
                print("Model Saved")

### 検証

In [35]:
@torch.no_grad()
def validation(X_valid, Y1_valid, Y2_valid, Y3_valid, Y4_valid, XU_valid, model, device, config, turn):

    total_loss = 0
    idx = 0
    
    for seq_idx in range(len(X_valid)):
        print('\r{}/{} conversation evaluating'.format(seq_idx+1, len(X_valid)), end='')
        utter_hidden, context_hidden, da_hidden = model.initDAHidden(1)
        
        X_seq = X_valid[seq_idx]
        
        Y1_seq = Y1_valid[seq_idx]
        Y2_seq = Y2_valid[seq_idx]
        Y3_seq = Y3_valid[seq_idx]
        Y4_seq = Y4_valid[seq_idx]
        
        turn[seq_idx] = turn[seq_idx] + [0] * (len(X_seq) - len(turn[seq_idx]))
        turn_seq = turn[seq_idx]
        XU_seq = XU_valid[seq_idx]
        

        assert len(X_seq) == len(Y1_seq), 'Unexpect sequence len in evaluate {} != {}'.format(len(X_seq), len(Y1_seq))
        
        for i in range(0, len(X_seq)):
            X_tensor = torch.tensor([[X_seq[i]]]).to(device)
            Y1_tensor = torch.tensor([[Y1_seq[i]]]).to(device)
            Y2_tensor = torch.tensor([[Y2_seq[i]]]).to(device)
            Y3_tensor = torch.tensor([[Y3_seq[i]]]).to(device)
            Y4_tensor = torch.tensor([[Y4_seq[i]]]).to(device)
            turn_tensor = torch.tensor([[turn_seq[i]]]).to(device)
            turn_tensor = turn_tensor.float()
            turn_tensor = turn_tensor.unsqueeze(1)   
            XU_tensor = torch.tensor([XU_seq[i]]).to(device)
            
            level1_pred, level2_pred, level3_pred, level4_pred, context_hidden, da_hidden = model.validtion(XU_tensor, X_tensor, [Y1_tensor.squeeze(1), Y2_tensor.squeeze(1), Y3_tensor.squeeze(1), Y4_tensor.squeeze(1)], None, context_hidden, da_hidden, turn_tensor)
            lloss = HLN.calculate_lloss([level1_pred, level2_pred, level3_pred, level4_pred], [Y1_tensor.squeeze(1), Y2_tensor.squeeze(1), Y3_tensor.squeeze(1), Y4_tensor.squeeze(1) ])
            total_loss += lloss.item()
        
        total_loss/=len(X_seq)
        
    return total_loss

## 実行

In [None]:
torch.set_default_tensor_type('torch.cuda.FloatTensor')
Train(model_name, loss_name, model, opt)

CmbAttentionHireCE_All start TRAINING
Epoch 1 start
20003/20003 conversation evaluatingsteps 1	loss 1.3653	valid loss 2.1535 | exec time 9779.9181
Model Saved
Epoch 2 start
20003/20003 conversation evaluatingsteps 2	loss 1.6230	valid loss 2.1977 | exec time 9113.1708
Epoch 3 start
20003/20003 conversation evaluatingsteps 3	loss 1.7531	valid loss 2.1828 | exec time 9101.4102
Epoch 4 start
20003/20003 conversation evaluatingsteps 4	loss 2.0747	valid loss 2.2899 | exec time 9111.7532
Epoch 5 start
20003/20003 conversation evaluating...steps 5	loss 1.8689	valid loss 2.1725 | exec time 9441.9608
Epoch 6 start
20003/20003 conversation evaluatingsteps 6	loss 1.3530	valid loss 2.1219 | exec time 9139.9438
Model Saved
Epoch 7 start
20003/20003 conversation evaluatingsteps 7	loss 1.8298	valid loss 2.0835 | exec time 9117.3484
Model Saved
Epoch 8 start
20003/20003 conversation evaluatingsteps 8	loss 2.3718	valid loss 2.1801 | exec time 9219.8426
Epoch 9 start
20003/20003 conversation evaluatingst

## モデルノ評価

In [None]:
da_x, da_x1, da_x2, da_x3, da_x4, da_y, da_y1, da_y2, da_y3, da_y4 = create_DAdata(config, model_name+loss_name)
_, _, _, _, XDA_test, YDA_test = separate_data_da(da_x, da_y)
_, _, _, _, XDA1_test, YDA1_test = separate_data_da(da_x1, da_y1)
_, _, _, _, XDA2_test, YDA2_test = separate_data_da(da_x2, da_y2)
_, _, _, _, XDA3_test, YDA3_test = separate_data_da(da_x3, da_y3)
_, _, _, _, XDA4_test, YDA4_test = separate_data_da(da_x4, da_y4)

XDA_test, YDA_test = DA_vocab.tokenize(XDA_test, YDA_test)
XDA1_test, YDA1_test = DA1_vocab.tokenize(XDA1_test, YDA1_test)
XDA2_test, YDA2_test = DA2_vocab.tokenize(XDA2_test, YDA2_test)
XDA3_test, YDA3_test = DA3_vocab.tokenize(XDA3_test, YDA3_test)
XDA4_test, YDA4_test = DA4_vocab.tokenize(XDA4_test, YDA4_test)

In [None]:
_, _, _, _, XUtt_test, YUtt_test, _, _, turn = create_Uttdata(config, model_name+loss_name)

XUtt_test, _ = Utt_vocab.tokenize(XUtt_test, YUtt_test)

In [None]:
model.load_state_dict(
    torch.load(
        os.path.join(config[model_name+loss_name]['log_dir'], 
                     config[model_name+loss_name]['SAVE_NAME'] + "_v1" + str(config[model_name+loss_name]['window_size']) + "_v2" + ".model".format('model_save'))))

### 推定結果の評価

In [None]:
@torch.no_grad()

def ModelTest(models):
    result=[]
    for seq_idx in range(0, len(XDA_test)):
        print('\r{}/{} conversation evaluating'.format(seq_idx+1, len(XDA_test)), end='')
        
        X_seq = XDA_test[seq_idx]
        Y1_seq = YDA1_test[seq_idx]
        Y2_seq = YDA2_test[seq_idx]
        Y3_seq = YDA3_test[seq_idx]
        Y4_seq = YDA4_test[seq_idx]
        
        turn[seq_idx] = turn[seq_idx] + [0] * (len(X_seq) - len(turn[seq_idx]))
        turn_seq = turn[seq_idx]
        XU_seq = XUtt_test[seq_idx]

        pred1_seq = []
        true1_seq = []
        pred2_seq = []
        true2_seq = []
        pred3_seq = []
        true3_seq = []
        pred4_seq = []
        true4_seq = []
        
        utter_hidden, context_hidden, da_hidden = models.initDAHidden(1)

        for i in range(0, len(X_seq)):
            X_tensor = torch.tensor([[X_seq[i]]]).to(device)
            Y1_tensor = torch.tensor([[Y1_seq[i]]]).to(device)
            Y2_tensor = torch.tensor([[Y2_seq[i]]]).to(device)
            Y3_tensor = torch.tensor([[Y3_seq[i]]]).to(device)
            Y4_tensor = torch.tensor([[Y4_seq[i]]]).to(device)
            turn_tensor = torch.tensor([[turn_seq[i]]]).to(device)
            turn_tensor = turn_tensor.float()
            turn_tensor = turn_tensor.unsqueeze(1)   
            XU_tensor = torch.tensor([XU_seq[i]]).to(device)

            level1_pred, level2_pred, level3_pred, level4_pred, context_hidden, da_hidden = model.prediction(XU_tensor, X_tensor, None, context_hidden, da_hidden, turn_tensor)
            
        pred_list=[level1_pred, level2_pred, level3_pred, level4_pred]
        pred_index=[torch.tensor(-1)]*4
        true_list=[Y1_tensor, Y2_tensor, Y3_tensor, Y4_tensor]
        true_index=[torch.tensor(-1)]*4
        
        for i in range(0,4):
            if pred_list[i]==None:
                break
            pred_index[i] = torch.argmax(pred_list[i])
            true_index[i] = true_list[i].squeeze(1)
    
        pred1_seq.append(pred_index[0].item())
        pred2_seq.append(pred_index[1].item())
        pred3_seq.append(pred_index[2].item())
        pred4_seq.append(pred_index[3].item())
        
        true1_seq.append(true_index[0].item())
        true2_seq.append(true_index[1].item())
        true3_seq.append(true_index[2].item())
        true4_seq.append(true_index[3].item())
        
        result.append({
            'true1': true1_seq,
            'true2': true2_seq,
            'true3': true3_seq,
            'true4': true4_seq,
            'pred1': pred1_seq,
            'pred2': pred2_seq,
            'pred3': pred3_seq,
            'pred4': pred4_seq
        })
        
    return result

### クラスタリング結果の評価

In [None]:
@torch.no_grad()

def ModelTestClustering(models):
    result=[]
    for seq_idx in range(0, len(XDA_test)):
        print('\r{}/{} conversation evaluating'.format(seq_idx+1, len(XDA_test)), end='')
        XDA_seq = XDA_test[seq_idx]
        YDA_seq = YDA_test[seq_idx]
        DAturn[seq_idx] = DAturn[seq_idx] + [0] * (len(XDA_seq) - len(DAturn[seq_idx]))
        DAturn_seq = DAturn[seq_idx]
        XUtt_seq = XUtt_test[seq_idx]

        pred_seq = []
        true_seq = []
        turn_seq = []
        utter_hidden, context_hidden, da_hidden = models.initDAHidden(1)

        for i in range(0, len(XDA_seq)):
            XDA_tensor = torch.tensor([[XDA_seq[i]]]).to(device)
            YDA_tensor = torch.tensor(YDA_seq[i]).to(device)
            DAturn_tensor = torch.tensor([[DAturn_seq[i]]]).to(device)
            DAturn_tensor = DAturn_tensor.float()
            DAturn_tensor = DAturn_tensor.unsqueeze(1)
            XUtt_tensor = torch.tensor([XUtt_seq[i]]).to(device)

            output, utter_hidden, context_hidden, da_hidden, att_weights = model.prediction(XUtt_tensor, XDA_tensor, None, utter_hidden, context_hidden, da_hidden, DAturn_tensor)

            
        x_numpy = output[-1].to('cpu').detach().numpy().copy()
        result.append(x_numpy)
        
    return result

In [None]:
result=ModelTest(model)

In [None]:
results=ModelTestClustering(model)

In [None]:
def calc_average(y_true, y_pred):
    p = precision_score(y_true=y_true, y_pred=y_pred, average='macro')
    r = recall_score(y_true=y_true, y_pred=y_pred, average='macro')
    f = f1_score(y_true=y_true, y_pred=y_pred, average='macro')
    acc = accuracy_score(y_true=y_true, y_pred=y_pred)
    print('p: {} | r: {} | f: {} | acc: {}'.format(p, r, f, acc))

In [None]:
def Evelu(result, loss_name):
    true = [label for line in result for label in line['true']]
    pred = [label for line in result for label in line['pred']]

    calc_average(y_true=true, y_pred=pred)
    f = f1_score(y_true=true, y_pred=pred, average=None)
    r = recall_score(y_true=true, y_pred=pred, average=None)
    p = precision_score(y_true=true, y_pred=pred, average=None)
    
    print("Recall")
    [print(DA_vocab.id2word[idx], score) for idx, score in zip(sorted(set(true)),r)]
    print("Precision")
    [print(DA_vocab.id2word[idx], score) for idx, score in zip(sorted(set(true)),p)]
    print("F-Score")
    [print(DA_vocab.id2word[idx], score) for idx, score in zip(sorted(set(true)),f)]

In [None]:
preds=[]
trues=[]
for re in result:
    if re['true1'][0]==0:
        trues.append(0)
    else:
        if re['true2'][0]==0:
            trues.append(1)
        else:
            if re['true3'][0]==0:
                trues.append(2)
            else:
                if re['true4'][0]!=-1:
                    trues.append(re['true4'][0]+3)
                    
for re in result:
    if re['pred1'][0]==0:
        preds.append(0)
    else:
        if re['pred2'][0]==0:
            preds.append(1)
        else:
            if re['pred3'][0]==0:
                preds.append(2)
            else:
                if re['pred4'][0]!=-1:
                    preds.append(re['pred4'][0]+3)                    


In [None]:
Evelu(result, loss_name)

### 混同行列

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
confusion_matrix_path="./data/img/confusionmatrix/LabelCM.png"

def CreateConfusionMatrix(result):
    
    y_true = [label for line in result for label in line['true']]
    y_pred = [label for line in result for label in line['pred']]
    cm = confusion_matrix(y_true, y_pred, normalize="pred")
    plt.figure(figsize=(40, 40))
    sns.heatmap(cm, annot=True, cmap='Blues')
    plt.savefig(confusion_matrix_path)
    

# result=ModelTest(model)
CreateConfusionMatrix(result)

### 階層型クラスタリング

In [None]:
clustering_hierarchical_path="./data/img/clustering/hierarchical/"

In [None]:
targets = [xDA[-1] for xDA in XDA_test]

output=np.array(results)
target=np.array(targets)

In [None]:
set_output = list(set(targets))
swda_lists = [DA_vocab.id2word[i] for i in set_output]

In [None]:
label_uniq = np.unique(target)
target_result = []
for label in label_uniq:
    target_result.append(output[target==label].mean(axis=0))
    
methods = ["single", "complete", "average", "weighted","centroid", "median", "ward"]

for method in methods:
    S = pdist(target_result)
    Z = linkage(S, method=method)
    fig = plt.figure(figsize=(30, 18))
    ax = fig.add_subplot(1, 1, 1, title="Dendrogram")
    dendrogram(Z, labels=np.array(swda_lists))
    c, d = cophenet(Z,S)
    print("{0} {1:.3f}".format(method, c))
    # x 軸のラベルを設定する。
    ax.set_xlabel("Dialogue ACT Label")
    # y 軸のラベルを設定する。
    ax.set_ylabel("threshold")
    plt.show()
    plt.savefig(clustering_hierarchical_path + "SwDA_hierarchy_dendrogram_Responce_Mean_{}_AllLabel.png".format(method))

### tSNEクラスタリング

In [None]:
clustering_tSNE_path="./data/img/clustering/tSNE/"
from sklearn.manifold import TSNE

targets = [xDA[-1] for xDA in XDA_test]
output=np.array(results)
target=np.array(targets)

In [None]:
def tSNE_Visualization(output, y_target, loss_name):
    
    if not os.path.isdir(clustering_tSNE_path):
        os.makedirs(clustering_tSNE_path)
        
    tsne = TSNE(n_components=2, random_state=41, n_iter=10000, perplexity=50.0, early_exaggeration=20.0, init='pca')
    X_reduced = tsne.fit_transform(output)

    f, ax = plt.subplots(1, 1, figsize=(13, 7))
    for idx in range(len(DA_vocab.word2id)):
        targets=X_reduced[target==idx]
        plt.scatter(targets[:, 0], targets[:, 1],
                    label=DA_vocab.id2word[idx],
                    cmap='jet',
                    s=15, alpha=0.5)
    
    plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left')  
    
    plt.savefig(clustering_tSNE_path + "SwDA_LF{0}_Hierarchical_Label.png".format(loss_name))

In [None]:
tSNE_Visualization(output, target, loss_name)

### 各対話行為タグをtSNEによる次元削減

In [None]:
clustering_tSNE_path="./data/img/clustering/tSNE/"

In [None]:
def tSNE_Visualization_Utterance(output, target, da_name):
    
    if not os.path.isdir(clustering_tSNE_path):
        os.makedirs(clustering_tSNE_path)
        
    tsne = TSNE(n_components=2, random_state=41, n_iter=10000, perplexity=50.0, early_exaggeration=20.0, init='pca')
    X_reduced = tsne.fit_transform(output)

    f, ax = plt.subplots(1, 1, figsize=(13, 7))
    
    idx = DA_vocab.word2id[da_name]
    
    targets=X_reduced[target==idx]
    plt.scatter(targets[:, 0], targets[:, 1],
                label=da_name,
                cmap='jet',
                s=15, alpha=0.5)
    
    plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left')  
    
    plt.savefig(clustering_tSNE_path + "SwDA_DA_{0}.png".format(da_name))

In [None]:
for d in DA_vocab.word2id:
#     if DA_vocab.word2id[d] in target:
    tSNE_Visualization_Utterance(output, target, d)

In [None]:
m = nn.Sigmoid()
loss = nn.BCELoss()
input = torch.randn(3, requires_grad=True)
target = torch.empty(3).random_(2)
print(input, target)
output = loss(m(input), target)
output.backward()

In [None]:
m(input),target