# Parte 3: Preguntas y Respuestas en Documentos

## Preprocesamiento

Para el preprocesamiento haremos las siguientes cosas:

1. Pasar preguntas y contextos a arreglos de palabras.
2. A partir campo `answer_start` crear un indice de palabra y no de caracter `answer_word_start`.
3. Agregar campo `answer_word_end`

Guardaremos el archivo en un JSON nuevo para no tener que repetir el preprocesamiento.

In [50]:
# Importar dependencias para el preprocesamiento.
from nltk.tokenize import word_tokenize
import string
import re
import numpy as np
import json

In [6]:
from google.colab import files

uploaded = files.upload()

Saving dev-v1.1-pr.json to dev-v1.1-pr.json
Saving train-v1.1-pr.json to train-v1.1-pr.json


KeyboardInterrupt: ignored

In [0]:
with open("train-v1.1.json", "r") as data:
    train = json.load(data)['data']
with open("dev-v1.1.json", "r") as data:
    test = json.load(data)['data']

In [0]:
# Funciones de preprocesamiento

def preprocess_dataset(data):
    for document in data:
        for paragraph in document['paragraphs']:
            preprocess_paragraph(paragraph)

def preprocess_paragraph(paragraph):
    # preprocesamos contexto
    preprocess_context(paragraph)
    for question in paragraph['qas']:
        # preprocesamos preguntas.
        preprocess_question(paragraph['context'],question)

def preprocess_context(paragraph):

    # Guardamos contexto como arreglo preprocesado
    paragraph['context_tokenized'] =  preprocess_text(paragraph['context'])

def preprocess_question(context,question):
    
    # guardamos pregunta como arreglo
    question['question_tokenized'] = preprocess_text(question['question'])
    for answer in question['answers']:
        # preprocesamos respuestas
        preprocess_answer(context, answer)
    
def preprocess_answer(context,answer):
    
    # Pasamos respuesta a arreglo
    answer['text_tokenized'] = preprocess_text(answer['text'])
    

    # Contamos cantidad de palabras hasta la respuesta
    answer_word = len(preprocess_text(context[:answer['answer_start']]))
    
    # Guardamos en el hash
    answer['answer_word_start'] = answer_word
    
    # Guardamos la palabra final de la respusta en el hash.
    answer['answer_word_end'] = answer_word + len(answer['text_tokenized']) - 1
    
"""
retorna string tokenizado y limpio de simbolos y mayusculas. Esto ultimo es necesario para disminuir
las veces en que GLove no tiene la palabra.
"""
def preprocess_text(text):
    result = text
    
    # minusculas
    result = result.lower()
    
    # simbolos para eliminar
    symbols = re.sub("[{}]".format(string.ascii_letters + "'1234567890" ),"",string.printable)
    
    # eliminamos simbolos mediante regexp.
    result = re.sub("[{}–]".format(symbols)," ", result)
    
    return word_tokenize(result)


In [0]:
# Preprocesamos y guardamos el nuevo dataset
with open("train-v1.1-pr.json","w") as out:
    print("preprocessing training set")
    preprocess_dataset(train)
    json.dump(train,out)
    
with open("dev-v1.1-pr.json","w") as out:
    print("preprocessing test set")
    preprocess_dataset(test)
    json.dump(test,out)

## Embeddings

Para usar los embeddings e inyectarlos en el modelo de Keras primero se intento lo siguiente:

1. Construiremos un index de palabras a partir del vocabulario del dataset.
2. Transformamos las secuencias de palabras en secuencias de enteros mediante el index.
3. Estandarizamos el Tamaño de la secuencia usando `pad_sequences` de Keras.
4. Luego  construimos matriz de pesos a partir de Glove para inyectar a una capa `Embedding` de Keras.

Este proceso no funcionó porque los vectores de glove ocupaban demasiada memoria de la gpu (eran 50M de parametros), a cambio se optó por precomputar los vectores de cada palabra y pasar los tensores de una sequencia directamente a la red. Como el dataset crece demasiado al pasar cada palabra a un vector de 300D, esta transformación se genera de a poco mediante un objeto Keras Sequence ( se probó primero un generador pero no era compatible con el paralelismo).

In [1]:
# Importamos dependencias para generar los embeddings
from gensim.models import KeyedVectors
from gensim.test.utils import datapath, get_tmpfile
from gensim.scripts.glove2word2vec import glove2word2vec
from keras.preprocessing.sequence import pad_sequences
from keras.utils.np_utils import to_categorical
from keras.utils import Sequence
import threading

import json
import numpy as np
import os

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# Usamos los glove vectors 300D de wikipedia 2014. Por limitación de memoria no podemos usar un corpus más grande.
glove_file = datapath(os.getcwd() + '/glove.6B.300d.txt')
tmp_file = get_tmpfile(os.getcwd() + "/test_word2vec.txt")
print("Running script")
#glove2word2vec(glove_file, tmp_file)
print("Loading Glove Vectors")
embedder = KeyedVectors.load_word2vec_format(tmp_file)

Running script
Loading Glove Vectors


In [3]:
# Funciones para metodo Embedding layer (se desecho)
# retorna secuencia de enteros a partir de secuencia de palabras.
def text_to_sequence(text_seq, word_index):
    result = []
    for word in text_seq:
        if word in word_index:
            result.append(word_index[word])
        else:
            word_index[word] = len(word_index)
            result.append(word_index[word])
    return np.array(result)

# Construimos index de palabras a medida que aparecen en el dataset. 
# Construimos matrices de enteros a partir del dataset.
def gen_data(dataset,word_index):
    contexts = []
    questions = []
    output = []
    for document in dataset:
        for paragraph in document['paragraphs']:
            for question in paragraph['qas']:
                # Tomamos la primera respuesta para generar el dataset final
                answer = question['answers'][0]
                # Pasamos secuencias a secuencias de enteros.
                contexts.append(text_to_sequence(paragraph['context_tokenized'],word_index))
                questions.append(text_to_sequence(question['question_tokenized'],word_index))
                #guardamos tupla de inicio y fin para el output.
                output.append((answer['answer_word_start'],answer['answer_word_end']))
    return contexts,questions,output,word_index



# Funciones para la keras sequence.

# Pasamos una secuencia de texto a un tensor de tamaño fijo.
# Quedan en 0 el padding y las palabras que no estan en GLove
def text_to_tensor(text_seq, word_vectors,sequence_length):
    result = np.zeros((sequence_length,300))
    for i,t in enumerate(text_seq):
        if t in word_vectors:
            result[i]  = word_vectors[t]
    return result

def data_counter(dataset):
    count = 0
    for document in dataset:
        for paragraph in document['paragraphs']:
            for question in paragraph['qas']:
                count +=1
    return count

# Secuencia para hacer multijob data generation
class TensorSequence(Sequence):

    def __init__(self, dataset,batch_size,word_vectors,context_length,question_length):
        self.dataset = []
        self.batch_size = batch_size
        self.data_count = data_counter(dataset)
        self.word_vectors = word_vectors
        self.context_length = context_length
        self.question_length = question_length
        print("Loading sequence")
        # Guardamos el dataset en formato tabla para poder indexar por batch size.
        for document in dataset:
            for paragraph in document['paragraphs']:
                for question in paragraph['qas']:
                    # Tomamos la primera respuesta para generar el dataset final
                    answer = question['answers'][0]
                    
                    context = paragraph['context_tokenized'] if len(paragraph['context_tokenized']) <= context_length else paragraph['context_tokenized'][:context_length]
                    question_t = question['question_tokenized'] if len(question['question_tokenized']) <= question_length else question['question_tokenized'][:question_length]
                    self.dataset.append([context,question_t,min(answer['answer_word_start'],context_length-1),min(answer['answer_word_end'],context_length-1)])

    # steps per batch
    def __len__(self):
        return self.data_count//self.batch_size

    
    
    # retorna un batch de tensores.
    def __getitem__(self, idx):
        contexts = None
        questions = None
        output_start = []
        output_end = []
        #iteramos sobre el batch pedido.
        for row in self.dataset[idx * self.batch_size:(idx + 1) * self.batch_size]:
            
            # pasamos sequencias de palabras a tensores
            if contexts is None:
                contexts = text_to_tensor(row[0],self.word_vectors,self.context_length)
            else:
                contexts = np.dstack((contexts,text_to_tensor(row[0],self.word_vectors,self.context_length)))
            if  questions is None:
                questions = text_to_tensor(row[1],self.word_vectors,self.question_length)
            else:
                questions= np.dstack((questions,text_to_tensor(row[1],self.word_vectors,self.question_length)))

            #guardamos tupla de inicio y fin para el output.
            output_start.append(row[2])
            output_end.append(row[3])
        #print("RETURNING a batch")
        return [np.moveaxis(contexts,2,0),np.moveaxis(questions,2,0)],[to_categorical(np.array(output_start),num_classes=self.context_length),to_categorical(np.array(output_end),num_classes=self.context_length)]

In [4]:

# generamos las secuencias de enteros para inyectar en el modelo de Keras.
with open("train-v1.1-pr.json", "r") as data:
    train = json.load(data)
with open("dev-v1.1-pr.json", "r") as data:
    test = json.load(data)

In [5]:
# Calculamos el tamaño máximo
def max_context(document):
    return max(document['paragraphs'], key= lambda x: len(x['context_tokenized']))

def max_question_par(paragraph):
    return max(paragraph['qas'], key= lambda x: len(x['question_tokenized']))

def max_paragraph(document):
    return max(document['paragraphs'], key=lambda x: len(max_question_par(x)['question_tokenized']))

MAX_CONTEXT = 400
MAX_QUESTIONS = 30
TRAIN_COUNT = data_counter(train)

## Declaración del Modelo

In [6]:
# Importamos dependencias
from keras.layers import Input, Concatenate, Dense, Reshape, Activation,Multiply, Dot, Add, Lambda,SeparableConv1D, BatchNormalization,TimeDistributed,Dropout,Reshape,Softmax, Reshape, Flatten
from keras.models import Model, load_model
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam
import tensorflow as tf
from keras import backend as K
import keras.backend as K
import keras.initializers
import numpy as np

# Para elegir GPU o multicore
num_cores = 4
CPU= False
GPU= not CPU
if GPU:
    num_GPU = 1
    num_CPU = 1
if CPU:
    num_CPU = 1
    num_GPU = 0

config = tf.ConfigProto(intra_op_parallelism_threads=num_cores,\
        inter_op_parallelism_threads=num_cores, allow_soft_placement=True,\
        device_count = {'CPU' : num_CPU, 'GPU' : num_GPU})
session = tf.Session(config=config)
K.set_session(session)


In [7]:
## Attention 

class ScaledDotProductAttention():
    def __init__(self, d_model, attn_dropout=0.1):
        self.temper = np.sqrt(d_model)
        self.dropout = Dropout(attn_dropout)
    def __call__(self, q, k, v, mask):
        attn = Lambda(lambda x:K.batch_dot(x[0],x[1],axes=[2,2])/self.temper)([q, k])
        if mask is not None:
            mmask = Lambda(lambda x:(-1e+10)*(1-x))(mask)
            attn = Add()([attn, mmask])
        attn = Activation('softmax')(attn)
        attn = self.dropout(attn)
        output = Lambda(lambda x:K.batch_dot(x[0], x[1]))([attn, v])
        return output

#https://github.com/Lsdefine/attention-is-all-you-need-keras/blob/master/transformer.py
class MultiHeadAttention():
    # mode 0 - big martixes, faster; mode 1 - more clear implementation
    def __init__(self, n_head, d_model, d_k, d_v, dropout):
        self.n_head = n_head
        self.d_k = d_k
        self.d_v = d_v
        self.dropout = dropout
        self.qs_layers = []
        self.ks_layers = []
        self.vs_layers = []
        for _ in range(n_head):
            self.qs_layers.append(TimeDistributed(Dense(d_k, use_bias=False)))
            self.ks_layers.append(TimeDistributed(Dense(d_k, use_bias=False)))
            self.vs_layers.append(TimeDistributed(Dense(d_v, use_bias=False)))
        self.attention = ScaledDotProductAttention(d_model)
        #self.layer_norm = BatchNormalization(axis=1)
        self.w_o = TimeDistributed(Dense(d_model))

    def __call__(self, q, k, v, mask=None):
        d_k, d_v = self.d_k, self.d_v
        n_head = self.n_head
        heads = []
        #attns = []
        for i in range(n_head):
            qs = self.qs_layers[i](q)   
            ks = self.ks_layers[i](k) 
            vs = self.vs_layers[i](v) 
        #head, attn = self.attention(qs, ks, vs, mask)
            head = self.attention(qs, ks, vs, mask)
            heads.append(head)
        #attns.append(attn)
        head = Concatenate()(heads)
        #attn = Concatenate()(attns)

        outputs = self.w_o(head)
        outputs = Dropout(self.dropout)(outputs)
        outputs = Add()([outputs, q])
        return outputs
    
    
class EncoderConv():
  
    def __init__(self,n_convs,filters,kernel,name="encoder_conv"):
        self.n_convs = n_convs
        self.filters = filters
        self.kernel = kernel
        self.name = name
        self.norms = []
        self.convs = []
        for i in range(self.n_convs):
            norm_layer = BatchNormalization(axis = 1,name=name+"_norm_{}".format(i))
            conv_layer = SeparableConv1D(filters=filters,kernel_size=kernel,name=name+"_conv_{}".format(i),padding="same")
            self.norms.append(norm_layer)
            self.convs.append(conv_layer)


    def __call__(self,value):
        value_normed = self.norms[0](value)
        value_conv = self.convs[0](value_normed)
        value_end = value_conv
        for i in range(1,self.n_convs):
            value_normed = value_normed = self.norms[i](value_end)
            value_conv = self.convs[i](value_normed)
            value_end = Add()([value_conv,value_end])

        return value_end

class SelfAttention():
  
    def __init__(self,n_heads,d_model,d_k,d_v,dropout=0.1,name="encoder_self_attention"):
        self.n_heads = n_heads
        self.d_model = d_model
        self.d_k = d_k
        self.d_v = d_v
        self.dropout = dropout
        self.name = name
        self.attn = MultiHeadAttention(n_heads,d_model,d_k,d_v,dropout)
        self.norm_layer = BatchNormalization(axis = 1,name=name+"_norm")
        #self.mask = Lambda(lambda x:GetPadMask(x,x))(norm_layer)
        
    def __call__(self,value):
        norm_layer = self.norm_layer(value)
        attn_layer = self.attn(norm_layer,norm_layer,norm_layer)
        value = Add()([value,attn_layer])
        return value
      
class FeedForward():
  
    def __init__(self,ndims,activation="relu",name="encoder_ff"):
        self.ndims = ndims
        self.activation = activation
        self.name = name
        self.norm_layer = BatchNormalization(axis=1,name=name+"_norm")
        self.ff = Dense(ndims, activation=activation, name=name+"_ff")
      
    def __call__(self,value):
        norm = self.norm_layer(value)
        ff = self.ff(norm)
        value = Add()([value,ff])
        return value

      
class EncoderBlock():
  
    ''' 
      Ensamble de Encoder Block
      Para las Stacked Embedding EB, n_conv = 4
      Para las Stacked Model EB, n_conv = 2 (Luego necesito repetir el EB 7 veces y tener 3 repeticiones de eso con pesos compartidos)

    '''

    def __init__(self, n_convs, filters, kernels, n_heads, d_model, d_k, d_v, ndims, dropout=0.1, activation="relu", name="encoder_block"):
        self.name = name
        self.dropout = dropout
        self.encoder_conv = EncoderConv(n_convs, filters, kernels, name=name+"_conv")
        self.self_attention = SelfAttention(n_heads, d_model, d_k, d_v, dropout,name=name+"_self_attention")
        self.ff = FeedForward(ndims, activation,name=name+"_ff")

    def __call__(self, value):
        enc_conv = self.encoder_conv(value)
        self_att = self.self_attention(enc_conv)
        value = self.ff(self_att)
        return value

class ModelEncoder():
  
    '''
     Concatenación de n_reps Enconder blocks
    '''
    def __init__(self, n_reps, n_convs, filters, kernels, n_heads, d_model, d_k, d_v, ndims, dropout=0.1, activation="relu", name="model_encoder"):
        self.blocks = []
        self.n_reps = n_reps
        self.name = name
        for i in range(self.n_reps):
            self.blocks.append(EncoderBlock(n_convs, filters, kernels, n_heads, d_model, d_k, d_v, ndims, dropout,name=name+"_block_{}".format(i)))

    def __call__(self, value):
        for i in range(self.n_reps):
            value = self.blocks[i](value)
        return value

In [8]:
## Highway network https://gist.github.com/iskandr/a874e4cf358697037d14a17020304535
def highway_layers(value, n_layers, activation="tanh", gate_bias=-3,name="highway"):
    dim = K.int_shape(value)[-1]
    gate_bias_initializer = keras.initializers.Constant(gate_bias)
    for i in range(n_layers):     
        gate = Dense(units=dim, bias_initializer=gate_bias_initializer,name=name+"_dense_1_{}".format(i))(value)
        gate = Activation("sigmoid",name=name+"_activation_1_{}".format(i))(gate)
        negated_gate = Lambda(
            lambda x: 1.0 - x,
            output_shape=(dim,))(gate)
        transformed = Dense(units=dim,name=name+"_dense_2_{}".format(i))(value)
        transformed = Activation(activation,name=name+"_activation_2_{}".format(i))(value)
        transformed_gated = Multiply(name=name+"_multiply_1_{}".format(i))([gate, transformed])
        identity_gated = Multiply(name=name+"_multiply_2_{}".format(i))([negated_gate, value])
        value = Add(name=name+"_add_{}".format(i))([transformed_gated, identity_gated])
    return value
  
def GetPadMask(q, k):
    ones = K.expand_dims(K.ones_like(q, 'float32'), -1)
    mask = K.cast(K.expand_dims(K.not_equal(k, 0), 1), 'float32')
    mask = K.batch_dot(ones, mask, axes=[2,1])
    return mask
  
def create_mask(x):
    zeros = K.zeros_like(x)
    return K.cast(K.not_equal(zeros,x), dtype='float32')
  

In [9]:
def attention(batch):
  
    def _attention_f(c_q):
        c,q=c_q[:MAX_CONTEXT,:], c_q[MAX_CONTEXT:,:]
        c = K.tile(c,[MAX_QUESTIONS,1])
        q = K.reshape(K.tile(q,[1,MAX_CONTEXT]),[MAX_QUESTIONS*MAX_CONTEXT,FILTERS])
        return K.concatenate([q,c,c*q],axis=1)

    return K.map_fn(_attention_f,batch)

In [10]:
## Model params
GLOVE_DIM=300
KERNEL_SIZE=7
FILTERS=64
BLOCK_CONV_LAYERS=4
N_HEADS=4
DROPOUT=0.1
N_REPS = 3
BLOCK_CONV_LAYERS_STACKED = 2
STACKED_KERNEL_SIZE=5

In [11]:
## Question embedding
question_input = Input(shape=(MAX_QUESTIONS,GLOVE_DIM),name="question_input")
highway_question = highway_layers(question_input,2,activation="relu", gate_bias=-3,name="question_highway")
question_ff = EncoderBlock(BLOCK_CONV_LAYERS,FILTERS,KERNEL_SIZE,N_HEADS,FILTERS,FILTERS,FILTERS,FILTERS,DROPOUT,name="question_eeb")(highway_question)

In [12]:
## context embedding
context_input = Input(shape=(MAX_CONTEXT,GLOVE_DIM),name="context_input")
highway_context = highway_layers(context_input,2,activation="relu", gate_bias=-3,name="context_highway")
context_ff = EncoderBlock(BLOCK_CONV_LAYERS,FILTERS,KERNEL_SIZE,N_HEADS,FILTERS,FILTERS,FILTERS,FILTERS,DROPOUT,name="context_eeb")(highway_context)

In [13]:
## Context question attention
concat = Concatenate(axis=1)([context_ff,question_ff])
lambda_concat = Lambda(attention)(concat)
attention_dense = TimeDistributed(Dense(1,use_bias=False))(lambda_concat)
attention_matrix = Reshape((MAX_CONTEXT,MAX_QUESTIONS))(attention_dense)
attention_matrix_bar = Softmax()(attention_matrix)
A = Dot(axes=(2,1))([attention_matrix_bar, question_ff])

attention_matrix_transpose = Lambda(lambda x : K.permute_dimensions(x, (0, 2, 1)))(attention_matrix)
attention_matrix_bar_bar = Softmax()(attention_matrix_transpose)
B = Dot(axes=(2,1))([attention_matrix_bar, attention_matrix_bar_bar])
B = Dot(axes=(2,1))([B, context_ff])

In [14]:
## Stacked model encoder blocks.
A_attention = Multiply()([context_ff,A])
B_attention = Multiply()([context_ff,B])

stacked_blocks_input=Concatenate(axis=2)([context_ff,A,A_attention,B_attention])

stacked_blocks_resized = SeparableConv1D(filters=FILTERS,kernel_size=STACKED_KERNEL_SIZE,name="conv_resize",padding="same")(stacked_blocks_input)


me = ModelEncoder(N_REPS, BLOCK_CONV_LAYERS_STACKED,FILTERS,STACKED_KERNEL_SIZE,N_HEADS,FILTERS,FILTERS,FILTERS,FILTERS,DROPOUT)

stacked_encoder_blocks_0 = me(stacked_blocks_resized)
stacked_encoder_blocks_1 = me(stacked_encoder_blocks_0)
stacked_encoder_blocks_2 = me(stacked_encoder_blocks_1)


In [15]:
## Output layer

start_layer = Concatenate(axis=2)([stacked_encoder_blocks_0,stacked_encoder_blocks_1]) # no estoy seguro del axis
start_dense = TimeDistributed(Dense(1,use_bias=False))(start_layer)
start_reshape = Flatten()(start_dense)
start_output = Softmax()(start_reshape)


end_layer = Concatenate(axis=2)([stacked_encoder_blocks_0,stacked_encoder_blocks_2]) # no estoy seguro del axis
end_dense = TimeDistributed(Dense(1, use_bias=False))(end_layer)
end_reshape = Flatten()(end_dense)
end_output = Softmax()(end_reshape)

In [16]:
model = Model(inputs=[context_input,question_input] ,outputs =[start_output,end_output])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
context_input (InputLayer)      (None, 400, 300)     0                                            
__________________________________________________________________________________________________
question_input (InputLayer)     (None, 30, 300)      0                                            
__________________________________________________________________________________________________
context_highway_dense_1_0 (Dens (None, 400, 300)     90300       context_input[0][0]              
__________________________________________________________________________________________________
question_highway_dense_1_0 (Den (None, 30, 300)      90300       question_input[0][0]             
__________________________________________________________________________________________________
context_hi

In [17]:
# Parametros
BATCH_SIZE=8
EPOCHS=50
OPTIMIZER='adam'
LOSS= 'categorical_crossentropy'
generator= TensorSequence(train,BATCH_SIZE,embedder,MAX_CONTEXT,MAX_QUESTIONS)

checkpoint = ModelCheckpoint(filepath='weights.hdf5',monitor="loss", verbose=1)
callbacks_list = [checkpoint]

Loading sequence


In [38]:
generator[0][0][0].shape

RETURNING a batch


(8, 400, 300)

In [None]:
model.compile(optimizer=OPTIMIZER,loss=LOSS, metrics=['accuracy'])
model.fit_generator(generator, steps_per_epoch = TRAIN_COUNT//BATCH_SIZE, max_queue_size=5, epochs = EPOCHS, verbose=2, callbacks=callbacks_list, use_multiprocessing=True, workers=3)

Epoch 1/50


In [0]:
#entrenamos desde archivo guardado

model = load_model('weights.hdf5')
BATCH_SIZE=32
EPOCHS=50
OPTIMIZER= Adam()
LOSS= 'categorical_crossentropy'
generator= TensorSequence(train,BATCH_SIZE,embedder,MAX_CONTEXT,MAX_QUESTIONS)

checkpoint = ModelCheckpoint(filepath='weights.hdf5',monitor="loss", verbose=1)
callbacks_list = [checkpoint]

model.summary()

In [0]:
model.compile(optimizer=OPTIMIZER,loss=LOSS, metrics=['accuracy'])
model.fit_generator(generator, steps_per_epoch = TRAIN_COUNT//BATCH_SIZE, max_queue_size=5, epochs = EPOCHS, verbose=1, callbacks=callbacks_list, use_multiprocessing=True, workers=3)

 ## Evaluación

Si bien no se alcanzó a hacer un entrenamiento apropiado de todas formas se intentará evaluar el modelo.

Por tiempo generaremos arreglo con indices originales y arreglo on indices predecidos y usaremos el metodo de sklearn para calcular el fscore.

In [0]:
# generador de test data.
test_generator = TensorSequence(test,BATCH_SIZE,embedder,MAX_CONTEXT,MAX_QUESTIONS)

Loading sequence


In [0]:
input,output=test_generator[0]


(32, 677)

In [0]:
y_true = []
y_pred = []
j = 0

# Predecimos por batch
for k in range(len(test_generator)):
    print(k,len(test_generator))
    batch = test_generator[k]
    for i in range(len(batch[1][0])):
        # agregamos la tupla real de inicio y fin.
        y_true.append((np.argmax(batch[1][0][i]),np.argmax(batch[1][1][i])))
    predict = model.predict_on_batch(batch[0])
    for i in range(len(predict[1])):
        #agregamos la tupla predecida.
        y_pred.append((np.argmax(predict[0][i]),np.argmax(predict[1][i])))
    j+=1


In [0]:
from sklearn.metrics import f1_score

# Mapeamos a arreglos 1 dimensionales para insertar en la función de sklearn.

y_true_start = list(map(lambda x: x[0], y_true))
y_pred_start = list(map(lambda x: x[0], y_pred))

y_true_end = list(map(lambda x: x[1], y_true))
y_pred_end = list(map(lambda x: x[1], y_pred))

y_true_1d = list(map(lambda x: "{0} {1}".format(*x), y_true))
y_pred_1d = list(map(lambda x: "{0} {1}".format(*x), y_pred))

In [0]:
print("f1 score inicio respuesta: {}".format(f1_score(y_true_start, y_pred_start, average='micro')*100))
print("f1 score fin respuesta: {}".format(f1_score(y_true_end, y_pred_end, average='micro')*100))
print("f1 score concatenacion: {}".format(f1_score(y_true_1d, y_pred_1d, average='micro')*100))

In [49]:
from keras.activations import softmax
import keras.backend as K

a = np.array([[1,2,1], [2,3,100], [0, 100, 1]])
K.eval(softmax(K.variable(a),))

array([[0.21194157, 0.5761169 , 0.21194157],
       [0.        , 0.        , 1.        ],
       [0.        , 1.        , 0.        ]], dtype=float32)

ValueError: ignored