# Dialogue Evaluation
###### Created by Weber Huang 

In [2]:
import os
import pandas as pd
from tqdm import tqdm_notebook as tqdm
import numpy as np
from collections import Counter
from sklearn import preprocessing
import jieba
import re

In [3]:
train = pd.read_csv('C:\\Users\\doudi\\OneDrive\\Documents\\ntcir15\\Dataset\\New_DialEval-1\\train_baidu_cn.csv', encoding='utf_8')
dev = pd.read_csv('C:\\Users\\doudi\\OneDrive\\Documents\\ntcir15\\Dataset\\New_DialEval-1\\dev_baidu_cn.csv', encoding='utf_8')

In [4]:
test = pd.read_csv('C:\\Users\\doudi\\OneDrive\\Documents\\ntcir15\\Dataset\\New_DialEval-1\\test_w2v_cn.csv', encoding='utf_8')

In [5]:
train['Baidu_str'] = train['Baidu_str'].fillna('[]')
dev['Baidu_str'] = dev['Baidu_str'].fillna('[]')
# test['W2V_str'] = test['W2V_str'].fillna('[]')

In [6]:
def str_to_list(dataframe, string): 
    l = []
    n = []
    for i in dataframe[string]:
        if i != '[]':
            
            l.append([float(j) for j in str(i).split()])
        else:
            l.append(n)
    return l

# calculate length of the texts
def length(dataframe):
    len_num = []
    for i in dataframe['texts']:
        temp = i.replace(" ","")
        len_num.append(len(temp))
    
    dataframe['length'] = len_num
    return dataframe

In [7]:
train["W2V"] = str_to_list(train, 'W2V_str')
dev["W2V"] = str_to_list(dev, 'W2V_str')
# test["W2V"] = str_to_list(test, 'W2V_str')

In [8]:
train = length(train)
dev = length(dev)
# test = length(test)

In [9]:
from scipy import stats
import tensorflow as tf

def normalize(pred, truth):
    """ convert inputs to np.array and make sure
    inputs are normalized probability distributions
    """
    if len(pred) != len(truth):
        raise ValueError("pred and truth have different lengths")
    if len(pred) == 0 or len(truth) == 0:
        raise ValueError("pred or truth are empty")

    pred, truth = np.asarray(pred), np.asarray(truth)
    if not ((pred >= 0).all() and (truth >= 0).all()):
        raise ValueError("probability distribution should not be negative")
    pred, truth = pred / pred.sum(), truth / truth.sum()
    return pred, truth

def jensen_shannon_div(pred, truth, base=2):
    ''' JSD: Jensen-Shannon Divergence
    '''
    pred, truth = normalize(pred, truth)
    m = 1. / 2 * (pred + truth)
    return (stats.entropy(pred, m, base=base)
            + stats.entropy(truth, m, base=base)) / 2.

def root_normalized_squared_error(pred, truth):
    """ RNSS: Root Normalised Sum of Squares
    """

    def squared_error(pred, truth):
        return ((pred - truth) ** 2).sum()

    pred, truth = normalize(pred, truth)
    return np.sqrt(squared_error(pred, truth) / 2)

def jsd_custom_loss(y_true, y_pred):
            
    # calculate loss, using y_pred
    ''' JSD: Jensen-Shannon Divergence
    '''
#     y_pred, y_true = normalize(y_pred, y_true)
    m = 1. / 2 * (y_pred + y_true)
    # loss = (stats.entropy(y_pred, m, base=2) + stats.entropy(y_true, m, base=2)) / 2.
    # tf.keras.losses.KLD()
    loss = (tf.keras.losses.KLD(y_pred, m) + tf.keras.losses.KLD(y_true, m)) / 2.
    return loss
  

In [10]:
# split from sender
train_c = train[train.sender=='customer']
train_h = train[train.sender=='helpdesk']
dev_c = dev[dev.sender=='customer']
dev_h = dev[dev.sender=='helpdesk']
# test_c = test[test.sender=='customer']
# test_h = test[test.sender=='helpdesk']

In [1]:
import tensorflow as tf
from sklearn.model_selection import train_test_split 
from sklearn import metrics
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import concatenate, Dense, Embedding, Dropout, LSTM, Bidirectional
from tensorflow.keras import Model, Input
# from keras.layers import Convolution1D, Flatten, , MaxPool1D, GlobalAveragePooling1D
# from keras.layers import concatenate, Bidirectional
# from keras import initializers
# from keras.layers.core import Dense, Dropout, Activation, Flatten
# from keras.layers.embeddings import Embedding
# from keras.layers.recurrent import SimpleRNN
# from keras.layers.recurrent import LSTM
# from keras.activations import tanh, softmax
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Attention

# ---

# from sklearn.model_selection import train_test_split 
# from sklearn import metrics
# from keras.preprocessing import sequence
# from keras.preprocessing.text import Tokenizer
# from keras.utils import np_utils
# from keras import optimizers
# from keras.layers.merge import concatenate
# from keras.models import Sequential, Model
# from keras.layers import Input
# from keras.layers import Dense, Embedding
# from keras.layers import Convolution1D, Flatten, Dropout, MaxPool1D, GlobalAveragePooling1D
# from keras.layers import concatenate, Bidirectional
# from keras import initializers
# from keras.layers.core import Dense, Dropout, Activation, Flatten
# from keras.layers.embeddings import Embedding
# # from keras.layers.recurrent import SimpleRNN
# from keras.layers.recurrent import LSTM
# from keras.callbacks import ModelCheckpoint

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [44]:
# === customer
c_X_train = train_c.filter(['round','length','texts'])
c_X_test = dev_c.filter(['round','length','texts'])
    

y_train_c = train_c.filter(['CNUG0','CNUG','CNUG*','CNaN'])
y_test_c = dev_c.filter(['CNUG0','CNUG','CNUG*','CNaN'])

# y_train_h = train.filter(['HNUG','HNUG*','HNaN'])
# y_test_h = dev.filter(['HNUG','HNUG*','HNaN'])

c_X1_train = c_X_train['texts']
# c_X1_train = [str (item) for item in c_X1_train]
c_X1_test = c_X_test['texts']
# c_X1_test = [str (item) for item in c_X1_test]

c_X2_train = c_X_train[['round','length']].values
c_X2_test = c_X_test[['round','length']].values

c_token = Tokenizer(num_words = 20000)
c_token.fit_on_texts(c_X1_train)
c_vocab = c_token.word_index
print(c_token.document_count)

c_x_train_seq = c_token.texts_to_sequences(c_X1_train)
c_x_test_seq = c_token.texts_to_sequences(c_X1_test)
c_X1_train = sequence.pad_sequences(c_x_train_seq, maxlen = 350)
c_X1_test = sequence.pad_sequences(c_x_test_seq, maxlen = 350)

8500


In [45]:
# === helpdesk
h_X_train = train_h.filter(['round','length','texts'])
h_X_test = dev_h.filter(['round','length','texts'])
    

# y_train_c = train_c.filter(['CNUG','CNUG*','CNUG0','CNaN'])
# y_test_c = dev_c.filter(['CNUG','CNUG*','CNUG0','CNaN'])

y_train_h = train_h.filter(['HNUG','HNUG*','HNaN'])
y_test_h = dev_h.filter(['HNUG','HNUG*','HNaN'])

h_X1_train = h_X_train['texts']
# h_X1_train = [str (item) for item in h_X1_train]
h_X1_test = h_X_test['texts']
# h_X1_test = [str (item) for item in h_X1_test]

h_X2_train = h_X_train[['round','length']].values
h_X2_test = h_X_test[['round','length']].values

h_token = Tokenizer(num_words = 20000)
h_token.fit_on_texts(h_X1_train)
h_vocab = h_token.word_index
print(h_token.document_count)

h_x_train_seq = h_token.texts_to_sequences(h_X1_train)
h_x_test_seq = h_token.texts_to_sequences(h_X1_test)
h_X1_train = sequence.pad_sequences(h_x_train_seq, maxlen = 350)
h_X1_test = sequence.pad_sequences(h_x_test_seq, maxlen = 350)

6900


> Word2Vec vector

In [46]:
def to_array(dataframe, length, string, dim=256):   
    train_vec = np.zeros(shape=(length,dim))
#     print(train_vec.shape)
    for i in range(len(dataframe[string])):
        if dataframe[string].iloc[i] != []:
#             print(type(dataframe[string].iloc[i]))
    #         print(i)
            train_vec[i] = dataframe[string].iloc[i]
        else:
            continue
    print(train_vec.shape)
    
    return train_vec

train_vec_c = to_array(train_c, len(train_c), 'W2V', dim=300)
train_vec_h = to_array(train_h, len(train_h), 'W2V', dim=300)
dev_vec_c = to_array(dev_c, len(dev_c), 'W2V', dim=300)
dev_vec_h = to_array(dev_h, len(dev_h), 'W2V', dim=300)
# test_vec_c = to_array(test_c, len(test_c), 'W2V', dim=300)
# test_vec_h = to_array(test_h, len(test_h), 'W2V', dim=300)

(8500, 300)
(6900, 300)
(975, 300)
(780, 300)


In [47]:
train_vec_c = np.reshape(train_vec_c, (train_vec_c.shape[0],1, train_vec_c.shape[1]))
train_vec_h = np.reshape(train_vec_h, (train_vec_h.shape[0],1, train_vec_h.shape[1]))
dev_vec_c = np.reshape(dev_vec_c, (dev_vec_c.shape[0],1,dev_vec_c.shape[1]))
dev_vec_h = np.reshape(dev_vec_h, (dev_vec_h.shape[0],1,dev_vec_h.shape[1]))
# test_vec_c = np.reshape(test_vec_c, (test_vec_c.shape[0],1,test_vec_c.shape[1]))
# test_vec_h = np.reshape(test_vec_h, (test_vec_h.shape[0],1,test_vec_h.shape[1]))

In [14]:
# from tensorflow.keras.layers import Attention
# class Attention(Model):
#     def __init__(self, units):
#         super(Attention, self).__init__()
#         self.W1 = Dense(units)
#         self.W2 = Dense(units)
#         self.V = Dense(1)

#     def call(self, features, hidden):
#         # hidden shape == (batch_size, hidden size)
#         # hidden_with_time_axis shape == (batch_size, 1, hidden size)
#         # we are doing this to perform addition to calculate the score
#         hidden_with_time_axis = tf.expand_dims(hidden, 1)
        
#         # score shape == (batch_size, max_length, 1)
#         # we get 1 at the last axis because we are applying score to self.V
#         # the shape of the tensor before applying self.V is (batch_size, max_length, units)
#         score = tanh(self.W1(features) + self.W2(hidden_with_time_axis))
#         # attention_weights shape == (batch_size, max_length, 1)
#         attention_weights = softmax(self.V(score), axis=1)

#         # context_vector shape after sum == (batch_size, hidden_size)
#         context_vector = attention_weights * features
#         context_vector = tf.reduce_sum(context_vector, axis=1)
#         return context_vector, attention_weights

In [48]:
# === customer
def lstm_C(X1_train, X2_train, X1_test, X2_test, y_train, y_test, loss='categorical_crossentropy'):
     
#     main_input = Input(shape=(350,), dtype='float64')
    main_input = Input(shape=(1,300))
    sub_input = Input(shape=(2,))
#     attention_layer = Attention()
    lst1 = Bidirectional(LSTM(units=128, return_sequences=True))(main_input)
    lst2 = Bidirectional(LSTM(units=128, return_sequences=True))(main_input)
    
#     embed = Embedding(output_dim=300,input_dim=20000,input_length=350)(main_input)
#     dropout_1 = Dropout(0.35)(embed)
    att_out = Attention(64)([lst1,lst2])
    merge = concatenate([att_out, sub_input])
    
    dense_1 = Dense(units=64,activation='relu')(merge)
    dropout_1 = Dropout(0.35)(dense_1)
#     dense_2 = Dense(units=32,activation='relu')(dropout_1)
    dense_3 = Dense(units=32,activation='relu')(dropout_1)
    output = Dense(units=4,activation='softmax')(dense_3)


    model = Model(inputs=[main_input, sub_input], outputs=output)
    model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
    print(model.summary())

    # checkpoint
    filepath="C:\\Users\\doudi\\OneDrive\\Documents\\ntcir15\\Model file\\weights(c).best.hdf5"
    checkpoint= ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, 
                                save_best_only=True, mode='max')
    callbacks_list = [checkpoint]
    
    train_history = model.fit(x=[X1_train, X2_train], y=y_train, epochs=50, 
                              batch_size=128, callbacks=callbacks_list, verbose=1, validation_split=0.2)

    score = model.evaluate(x=[X1_test, X2_test], y=y_test, verbose=1)

    print("Test Score:", score[0])
    print("Test Accuracy:", score[1])

    pre_probability = model.predict(x=[X1_test, X2_test])
    predicted = pre_probability.argmax(axis=-1)
    
    return model, train_history, pre_probability

In [52]:
# === helpdesk
def lstm_H(X1_train, X2_train, X1_test, X2_test, y_train, y_test, loss='categorical_crossentropy'):
     
#     main_input = Input(shape=(350,), dtype='float64')
    main_input = Input(shape=(1,300))
    sub_input = Input(shape=(2,))
    
#     embed = Embedding(output_dim=300,input_dim=20000,input_length=350)(main_input)
#     dropout_1 = Dropout(0.35)(embed)
    lst = Bidirectional(LSTM(units=256,return_sequences=True))(main_input)
    lst2 = Bidirectional(LSTM(units=128))(lst)
    merge = concatenate([lst2, sub_input])
    dense_1 = Dense(units=64,activation='relu')(merge)
    dropout_1 = Dropout(0.35)(dense_1)
#     dense_2 = Dense(units=32,activation='relu')(dropout_1)
    dense_3 = Dense(units=32,activation='relu')(dropout_1)
    output = Dense(units=3,activation='softmax')(dense_3)




    model = Model(inputs=[main_input, sub_input], outputs=output)
    model.compile(optimizer='adam', loss=loss, metrics=['accuracy'])
    print(model.summary())

    # checkpoint
    filepath="C:\\Users\\doudi\\OneDrive\\Documents\\ntcir15\\Model file\\weights(h).best.hdf5"
    checkpoint= ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, 
                                save_best_only=True, mode='max')
    callbacks_list = [checkpoint]
    
    train_history = model.fit(x=[X1_train, X2_train], y=y_train, epochs=50, 
                              batch_size=128, callbacks=callbacks_list, verbose=1, validation_split=0.2)

    score = model.evaluate(x=[X1_test, X2_test], y=y_test, verbose=1)

    print("Test Score:", score[0])
    print("Test Accuracy:", score[1])

    pre_probability = model.predict(x=[X1_test, X2_test])
    predicted = pre_probability.argmax(axis=-1)
    
    return model, train_history, pre_probability

In [49]:
lstm_c_model, lstm_c_history, lstm_c_pred = lstm_C(train_vec_c, c_X2_train, dev_vec_c, 
                                               c_X2_test, y_train_c, y_test_c, 
                                               loss = jsd_custom_loss)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


ValueError: A `Concatenate` layer requires inputs with matching shapes except for the concat axis. Got inputs shapes: [(None, 1, 256), (None, 2)]

In [53]:
lstm_h_model, lstm_h_history, lstm_h_pred = lstm_H(train_vec_h, h_X2_train, dev_vec_h, 
                                               h_X2_test, y_train_h, y_test_h,  
                                               loss = jsd_custom_loss)

ValueError: A `Concatenate` layer requires inputs with matching shapes except for the concat axis. Got inputs shapes: [(None, 1, 512), (None, 2)]

#### 6.2 Output by rbind

In [29]:
# predict single row
def padding_single_c(dev):
    X_test = dev.filter(['round','texts'])
    X1_test = X_test['texts']
#     X1_test = [str (item) for item in X1_test]
    X2_test = X_test[['round']].values
    
    x_test_seq = c_token.texts_to_sequences([X1_test])
    X1_test = sequence.pad_sequences(x_test_seq, maxlen = 350)
    
    return X1_test, X2_test

def padding_single_h(dev):
    X_test = dev.filter(['round','texts'])
    X1_test = X_test['texts']
#     X1_test = [str (item) for item in X1_test]
    X2_test = X_test[['round']].values
    
    x_test_seq = h_token.texts_to_sequences([X1_test])
    X1_test = sequence.pad_sequences(x_test_seq, maxlen = 350)
    
    return X1_test, X2_test

from itertools import chain

# input the development dataframe and the method
# for current models (loss = jsd):
# model_1 = customer model (CNN_c_model, lstm_c_model)
# model_2 = helpdesk model (CNN_h_model, lstm_h_model)
def Generate_submission(dev, model_1, model_2):
    Id_list = dev['id'].unique()
    C_nugget = ['CNUG','CNUG*','CNUG0','CNaN']
    H_nugget = ['HNUG','HNUG*','HNaN']

    final = []
    
    # go through each Id first
    for Id in tqdm(Id_list):  
        result = []
        
        for i in range(len(dev)):
            
            # if Id is match than predict the prob_distribution and zip it as dictionary 
            if dev['id'][i] == Id:
                if dev.iloc[i, 1] == 'customer':
                    t1, t2 = padding_single_c(dev.iloc[i])
                    t2 = np.array(t2).reshape(1,1)
                    cus_prob = model_1.predict(x=[t1, t2])
                    cus_prob = cus_prob.tolist()
                    cus_prob = list(chain(*cus_prob))
                    dict_c = dict(zip(C_nugget, cus_prob))
                    result.append(dict_c)
                else:
                    t3, t4 = padding_single_h(dev.iloc[i])
                    t4 = np.array(t4).reshape(1,1)
                    help_prob = model_2.predict(x=[t3, t4])
                    help_prob = help_prob.tolist()
                    help_prob = list(chain(*help_prob))
                    dict_h = dict(zip(H_nugget, help_prob))
                    result.append(dict_h)
            # if Id isn't match than continue until it match or switch to new Id
            else:
                continue
        
        # Submission form
        dict1 = {'nugget':result,'id':Id}
        final.append(dict1)
        
    return final

> Word2Vec

In [77]:
from itertools import chain

def to_array_2(dataframe, length=1, dim=300):   
    train_vec = np.zeros(shape=(length,dim))
    if dataframe['W2V'] != []:
        train_vec = np.array(dataframe['W2V'])
    
#     print(train_vec.shape)
    train_vec = train_vec.reshape(1,300)
    
    return np.array(train_vec)

def padding_single_w2v(dev):
    X_test = dev.filter(['round','length','W2V'])
    X1_test = to_array_2(X_test)
    X1_test = np.reshape(X1_test, (X1_test.shape[0],1,X1_test.shape[1]))
    X2_test = X_test[['round','length']].values
    return X1_test, X2_test

def Generate_submission_w2v(dev, model_1, model_2):
    dev['id'] = dev['id'].apply(str)
    Id_list = dev['id'].unique()
    C_nugget = ['CNUG0','CNUG','CNUG*','CNaN']
    H_nugget = ['HNUG','HNUG*','HNaN']

    final = []
    
    # go through each Id first
    for Id in tqdm(Id_list):  
        result = []
        
        for i in range(len(dev)):
            
            # if Id is match than predict the prob_distribution and zip it as dictionary 
            if dev['id'][i] == Id:
                if dev.iloc[i, 1] == 'customer':
#                     print(i)
                    t1, t2 = padding_single_w2v(dev.iloc[i])
                    t2 = np.array(t2).reshape(1,2)
                    cus_prob = model_1.predict(x=[t1, t2])
                    cus_prob = cus_prob.tolist()
                    cus_prob = list(chain(*cus_prob))
                    dict_c = dict(zip(C_nugget, cus_prob))
                    result.append(dict_c)
                else:
#                     print(i)
                    t3, t4 = padding_single_w2v(dev.iloc[i])
                    t4 = np.array(t4).reshape(1,2)
                    help_prob = model_2.predict(x=[t3, t4])
                    help_prob = help_prob.tolist()
                    help_prob = list(chain(*help_prob))
                    dict_h = dict(zip(H_nugget, help_prob))
                    result.append(dict_h)
            # if Id isn't match than continue until it match or switch to new Id
            else:
                continue
        
        # Submission form
        dict1 = {'nugget':result,'id':Id}
        final.append(dict1)
        
    return final

In [88]:
# final = Generate_submission(dev, lstm_c_model, lstm_h_model)
from keras.models import load_model
path_1="C:\\Users\\doudi\\OneDrive\\Documents\\ntcir15\\Model file\\weights(c).best.hdf5"
path_2="C:\\Users\\doudi\\OneDrive\\Documents\\ntcir15\\Model file\\weights(h).best.hdf5"
model_c = load_model(path_1,custom_objects={'jsd_custom_loss': jsd_custom_loss})
model_h = load_model(path_2,custom_objects={'jsd_custom_loss': jsd_custom_loss})

model_c.compile(optimizer='adam', loss=jsd_custom_loss, metrics=['accuracy'])
model_h.compile(optimizer='adam', loss=jsd_custom_loss, metrics=['accuracy'])

In [89]:
final = Generate_submission_w2v(dev,model_c,model_h)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


HBox(children=(FloatProgress(value=0.0, max=390.0), HTML(value='')))




### 7. Generate the submission estimation JSON

In [91]:
import json
import time
import os

path = 'C:/Users/doudi/OneDrive/Documents/ntcir15/eval'
os.chdir(path)
timestr = time.strftime("%Y%m%d%H%M")

In [92]:
with open((timestr + '_' + 'dev_eval.json'), 'w', encoding='utf-8') as f: 
    f.write(json.dumps(final, ensure_ascii=False, indent=2))

In [27]:
# with open(('test_submit_01.json'), 'w', encoding='utf-8') as f: 
#     f.write(json.dumps(final, ensure_ascii=False, indent=2))
# # import json
# # p = 'C:\\Users\\doudi\\OneDrive\\Documents\\ntcir15\\Dataset\\New_DialEval-1\\test_cn.json'
# # t = json.load(open(p, encoding="utf-8"))

In [93]:
import os
os.chdir('C:\\Users\\doudi\\OneDrive\\Documents\\ntcir15\\eval')
!python eval.py 202007161718_dev_eval.json dev_cn.json

{'quality': None, 'nugget': {'jsd': 4.721656118176839, 'rnss': 3.001970362468278}}


In [94]:
2**(-4.721656118176839)

0.037900058404167256

In [83]:
2**(-3.014702576188082)

0.12373258794452668

In [51]:
x = [[2,3,4],[1,3,3],[2,3,4]]
x[:,-1]

TypeError: list indices must be integers or slices, not tuple