## Please use the following code to visualize the results trained by recipeClassifierHATT_v4

### load essential modules

In [1]:
from __future__ import division

import numpy as np
np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})

import pandas as pd
import cPickle
from collections import defaultdict
import re

from bs4 import BeautifulSoup

import sys
import os

from keras.preprocessing.text import Tokenizer, text_to_word_sequence
from keras.preprocessing.sequence import pad_sequences
from keras.utils.np_utils import to_categorical

from keras.layers import Embedding
from keras.layers import Dense, Input, Flatten
from keras.layers import Conv1D, MaxPooling1D, Embedding, Merge, Dropout, LSTM, GRU, Bidirectional, TimeDistributed
from keras.models import Model

from keras import backend as K
from keras.engine.topology import Layer, InputSpec
from keras import initializers, optimizers
from nltk import tokenize
#### additional
import pickle
from args import get_parser
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score
import tensorflow as tf
from tensorboardX import SummaryWriter
from sys import exit

def save_pickle(filename, obj, overwrite = False):
    make_dir(filename)
    if os.path.isfile(filename) == True and overwrite == False:
        print('already exists'+filename)
    else:
        with open(filename, 'wb') as gfp:
            pickle.dump(obj, gfp, protocol=2)
            gfp.close()
            
def make_dir(filename):
    dir_path = os.path.dirname(filename)
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
        print('make dir')
        
def load_pickle(filename):
    with open(filename, 'rb') as gfp:
        r = pickle.load(gfp)
    return r

class AttLayer(Layer):
    def __init__(self, attention_dim):
        self.init = initializers.get('normal')
        self.supports_masking = True
        self.attention_dim = attention_dim
        super(AttLayer, self).__init__()

    def build(self, input_shape):
        assert len(input_shape) == 3
        self.W = K.variable(self.init((input_shape[-1], self.attention_dim)))
        self.b = K.variable(self.init((self.attention_dim, )))
        self.u = K.variable(self.init((self.attention_dim, 1)))
        self.trainable_weights = [self.W, self.b, self.u]
        super(AttLayer, self).build(input_shape)

    def compute_mask(self, inputs, mask=None):
        return mask

    def call(self, x, mask=None):
        # size of x :[batch_size, sel_len, attention_dim]
        # size of u :[batch_size, attention_dim]
        # uit = tanh(xW+b)
        uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
        ait = K.dot(uit, self.u)
        ait = K.squeeze(ait, -1)

        ait = K.exp(ait)

        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            ait *= K.cast(mask, K.floatx())
        ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        ait = K.expand_dims(ait)
        weighted_input = x * ait
        output = K.sum(weighted_input, axis=1)

        return output

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[-1])

def test(X_test, y_test, model, threshold, print_ = True):
    prob_class1 = model.predict(X_test)[:,1]
    return validate(prob_class1, y_test, threshold, print_ = True)

def validate(prob_class1, y_test, threshold, print_ = True):
    preds = [1 if i>threshold else 0 for i in prob_class1]
    true = y_test.argmax(axis = -1).tolist()
    f1 = f1_score(true, preds)
    if print_ == True:
        print(':::current prob threshold %.3f '%(threshold))
        print('   positive number: pred %d, true %d' %(sum(preds), sum(true)))
        print('   -f1 %.3f, precision %.3f, recall %.3f' % (f1, precision_score(true, preds), recall_score(true, preds)))
    return -f1 # return negative

Using TensorFlow backend.


### load saved opts, so that saved model will be loaded together

In [39]:
resume = '../../dir_HugeFiles/snap_0311/attention/model_e006_v--0.370'
statename =resume + '.pickle'
state = load_pickle(statename)
opts = state['opts']
opts.resume = resume
opts.gpu= 1

### setup the model, then load the pretrained weights

In [None]:
gpu_id = opts.gpu
print('Current running on GPU number:', gpu_id)
gpu_options = tf.GPUOptions(visible_device_list=str(gpu_id))
config = tf.ConfigProto(device_count = {'GPU': gpu_id, 'CPU': 10},
                        gpu_options = gpu_options,
                        intra_op_parallelism_threads = 32,
                        inter_op_parallelism_threads = 32)
sess = tf.Session(config = config)
K.set_session(sess)
####
MAX_SENT_LENGTH = 100
MAX_SENTS = 15
MAX_NB_WORDS = 20000
EMBEDDING_DIM = 100
VALIDATION_SPLIT = 0.2

##### customized settings ##### 
p = os.path.abspath(opts.snapshots)
tag = '/'.join(p.split(os.sep)[3:])# store the path, but w/o prefix workspace/dir_HugeFiles
print('tag of tensor board: %s'%(tag))

reviews, labels = load_pickle(opts.train)

# if want to use less data to train
small = opts.small
if small:
    reviews, labels = reviews[:5000], labels[:5000]
texts = [' '.join(recipe) for recipe in reviews]
'''
# sentences = list of string, each string contains one sentence
# texts =  flatten sentences, separate by recipes
# reviews = list of sentences
reviews = [v['directions'] for v in dic.values()]
texts = [' '.join(v['directions']) for v in dic.values()]
labels = [v['GI'] for v in dic.values()]    
'''

tokenizer = Tokenizer(nb_words=MAX_NB_WORDS)
tokenizer.fit_on_texts(texts)

data = np.zeros((len(texts), MAX_SENTS, MAX_SENT_LENGTH), dtype='int32')

for i, sentences in enumerate(reviews):
    for j, sent in enumerate(sentences):
        if j < MAX_SENTS:
            wordTokens = text_to_word_sequence(sent)
            k = 0
            for _, word in enumerate(wordTokens):
                if k < MAX_SENT_LENGTH and tokenizer.word_index[word] < MAX_NB_WORDS:
                    data[i, j, k] = tokenizer.word_index[word]
                    k = k + 1

word_index = tokenizer.word_index
print('Total %s unique tokens.' % len(word_index))

labels = to_categorical(np.asarray(labels))
print('Shape of data tensor:', data.shape)
print('Shape of label tensor:', labels.shape)

X_train, X_test, y_train, y_test = train_test_split(data, labels, stratify = labels, test_size = 0.2, random_state = opts.random)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, stratify = y_train, test_size = 0.25, random_state = 1 + opts.random)

print('Number of positive and negative reviews in traing and validation set')
print y_train.sum(axis=0)
print y_val.sum(axis=0)

# delete variable to release memory
del data

class_wights = opts.pweight
# if -1, then automatically caculate the balanced weight
if class_wights == -1:
    class_01 = y_train.sum(axis= 0)
    class_weights = round(class_01[0]/class_01[1],1)
print('class weight is %.1f' % class_weights)

embeddings_index = {}
f = open(opts.gloveW2V)
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()

print('Total %s word vectors.' % len(embeddings_index))  

# building Hierachical Attention network
embedding_matrix = np.random.random((len(word_index) + 1, EMBEDDING_DIM))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector

embedding_layer = Embedding(len(word_index) + 1,
                            EMBEDDING_DIM,
                            weights=[embedding_matrix],
                            input_length=MAX_SENT_LENGTH,
                            trainable=True,
                            mask_zero=True)

sentence_input = Input(shape=(MAX_SENT_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sentence_input)
l_lstm = Bidirectional(GRU(100, return_sequences=True))(embedded_sequences)
l_att = AttLayer(100)(l_lstm)
sentEncoder = Model(sentence_input, l_att)

review_input = Input(shape=(MAX_SENTS, MAX_SENT_LENGTH), dtype='int32')
review_encoder = TimeDistributed(sentEncoder)(review_input)
l_lstm_sent = Bidirectional(GRU(100, return_sequences=True))(review_encoder)
l_att_sent = AttLayer(100)(l_lstm_sent)
preds = Dense(2, activation='softmax')(l_att_sent)
model = Model(review_input, preds)

modelname = opts.resume+'_model.h5'
if os.path.isfile(modelname):
    print("=> loading checkpoint '{}'".format(opts.resume))
    model.load_weights(modelname)

('Current running on GPU number:', 1)
tag of tensor board: snap_0311/attention


### define a new layer that will export the weights on words or sentences

In [None]:
class AttLayer_visual(Layer):
    def __init__(self, attention_dim):
        self.init = initializers.get('normal')
        self.supports_masking = True
        self.attention_dim = attention_dim
        super(AttLayer_visual, self).__init__()

    def build(self, input_shape):
        assert len(input_shape) == 3
        self.W = K.variable(self.init((input_shape[-1], self.attention_dim)))
        self.b = K.variable(self.init((self.attention_dim, )))
        self.u = K.variable(self.init((self.attention_dim, 1)))
        self.trainable_weights = [self.W, self.b, self.u]
        super(AttLayer_visual, self).build(input_shape)

    def compute_mask(self, inputs, mask=None):
        return mask

    def call(self, x, mask=None):
        # size of x :[batch_size, sel_len, attention_dim]
        # size of u :[batch_size, attention_dim]
        # uit = tanh(xW+b)
        uit = K.tanh(K.bias_add(K.dot(x, self.W), self.b))
        ait = K.dot(uit, self.u)
        ait = K.squeeze(ait, -1)

        ait = K.exp(ait)

        if mask is not None:
            # Cast the mask to floatX to avoid float64 upcasting in theano
            ait *= K.cast(mask, K.floatx())
        ait /= K.cast(K.sum(ait, axis=1, keepdims=True) + K.epsilon(), K.floatx())
        ait = K.expand_dims(ait)
        weighted_input = x * ait
        output = K.sum(weighted_input, axis=1)

        return ait

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[-1])
    
document_id= 0
l_att_sent_vis = AttLayer_visual(100)(l_lstm_sent)
model_vis = Model(review_input, l_att_sent_vis)
ait_sent = model_vis.predict(X_test)

# only takes one document
l_att_vis = AttLayer_visual(100)(l_lstm)
sentEncoder_vis = Model(sentence_input, l_att_vis)
ait = sentEncoder_vis.predict(X_test[document_id])

### prepare the word-index dictionary

In [None]:
reverse_word_map = dict(map(reversed, tokenizer.word_index.items()))

### visaulize sentences importances

In [None]:
def display_sent(document_id, ait_sent, X, y):
    print('Label as %r Low GI recipe' % (y[document_id][1] == 1))
    max_sen = 15
    for i in range(max_sen):
        sentence = [t for t in X[document_id][i] if t!=0]
        if sentence:
            str_sent = ' '.join([reverse_word_map[t] for t in sentence])
            score = ait_sent[document_id][i][0]
            print('Score %.3f: %s' %(score, str_sent))
display_sent(document_id, ait_sent, X_test, y_test)

### visaulize sentence and word level importances

In [None]:
def display_importance(document_id, ait_sent, ait, X, y):
    print('Label as %r Low GI recipe' % (y[document_id][1] == 1))
    max_sen = 15
    for i in range(max_sen):
        sentence = [t for t in X[document_id][i] if t!=0]
        if sentence:
            weight = [round(ait[i][t][0],3) for t, v in enumerate(sentence)]
            str_sent = ' '.join([reverse_word_map[t] for t in sentence])
            score = ait_sent[document_id][i][0]
            score_2 = np.sqrt(score)*np.array(weight)
            print('Score %.3f:       %s' %(score, str_sent))
            print('pw:               %s' %(weight))
            print('sqrt ps *pw       %s' %(score_2))
display_importance(document_id, ait_sent, ait, X_test, y_test)

In [None]:
def plt_importance(document_id, ait_sent, ait, X, y):
    print('Label as %r Low GI recipe' % (y[document_id][1] == 1))
    max_sen = 15
    df = pd.DataFrame()
    rows = []
    for i in range(max_sen):
        sentence = [t for t in X[document_id][i] if t!=0]
        if sentence:
            score = ait_sent[document_id][i][0]
            row = [score]+[reverse_word_map[t] for t in sentence]
            pads = max_sen - len(row)
            row+=pads*[]
            rows.append(row)
            
            weight = [round(ait[i][t][0],3) for t, v in enumerate(sentence)]
            score_2 = (np.sqrt(score)*np.array(weight)).tolist()
            row = [0]+score_2
            pads = max_sen - len(row)
            row+=pads*[]
            rows.append(row)
    df = pd.DataFrame(rows)
    return df
df = plt_importance(document_id, ait_sent, ait, X_test, y_test)

In [None]:
prob = model.predict(X_test)

In [None]:
document_id = np.argmax(prob[:,1])
print(document_id)
ait = sentEncoder_vis.predict(X_test[document_id])

In [None]:
display_importance(document_id, ait_sent, ait, X_test, y_test)