From: https://github.com/bradleypallen/keras-quora-question-pairs/blob/master/keras-quora-question-pairs.py

In [1]:
from __future__ import print_function
import numpy as np
import pandas as pd
import csv, datetime, time, json
from zipfile import ZipFile
from os.path import expanduser, exists
import re
import pickle
import time
from shared_lib import vocabulary
import utils
import scipy.sparse

In [2]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Model
from keras.layers import Input, Embedding, TimeDistributed, Dense, Lambda, concatenate, Dropout, BatchNormalization
from keras.layers.embeddings import Embedding
from keras.regularizers import l2
from keras.callbacks import Callback, ModelCheckpoint
from keras.utils.data_utils import get_file
from keras import backend as K
from sklearn.model_selection import train_test_split


Using TensorFlow backend.


In [4]:
# from word embedding example provided by instructors
def pretty_timedelta(fmt="%d:%02d:%02d", since=None, until=None):
    """Pretty-print a timedelta, using the given format string."""
    since = since or time.time()
    until = until or time.time()
    delta_s = until - since
    hours, remainder = divmod(delta_s, 3600)
    minutes, seconds = divmod(remainder, 60)
    return fmt % (hours, minutes, seconds)

In [5]:
# Initialize global variables
#KERAS_DATASETS_DIR = expanduser('~/.keras/datasets/')
KERAS_DATASETS_DIR = 'Data/'
QUESTION_PAIRS_FILE_URL = 'Data/'
QUESTION_PAIRS_FILE = 'train_lite.df.pkl'
#GLOVE_ZIP_FILE_URL = 'http://nlp.stanford.edu/data/glove.840B.300d.zip'
#GLOVE_ZIP_FILE = 'glove.840B.300d.zip'
#GLOVE_FILE = 'glove.840B.300d.txt'
Q1_TRAINING_POS_FILE = 'q1_train_pos.npy'
Q2_TRAINING_POS_FILE = 'q2_train_pos.npy'
LABEL_TRAINING_POS_FILE = 'label_train_pos.npy'
WORD_EMBEDDING_MATRIX_POS_FILE = 'word_embedding_matrix_pos.npy'
#NB_WORDS_POS_FILE = 'nb_words.json'
MAX_NB_WORDS = 200000
V = 200000
MAX_SEQUENCE_LENGTH = 50
EMBEDDING_DIM = 300
MODEL_WEIGHTS_FILE = 'question_pairs_weights_pos.h5'
VALIDATION_SPLIT = 0.1
TEST_SPLIT = 0.1
RNG_SEED = 13371447
NB_EPOCHS = 25
#NB_EPOCHS = 1
DROPOUT = 0.1
BATCH_SIZE = 32
OPTIMIZER = 'adam'

In [6]:
# takes sentence and sentence POS tags, to make 1 sequence...1/2 from words and 1/2 from tags
def build_sent(q, q_pos):
    q = str(q).split()
    q_pos = str(q_pos).split()
    if len(q) > MAX_SEQUENCE_LENGTH/2:
        # cut it off
        q = q[0:MAX_SEQUENCE_LENGTH]
    else:
        # pad to max_time
        amt_to_pad = int((MAX_SEQUENCE_LENGTH/2) - len(q))
        q.extend([0] * amt_to_pad)
    if len(q_pos) > MAX_SEQUENCE_LENGTH/2:
        # cut it off
        q_pos = q_pos[0:MAX_SEQUENCE_LENGTH]
    else:
        # pad to max_time
        amt_to_pad = int((MAX_SEQUENCE_LENGTH/2) - len(q_pos))
        q_pos.extend([0] * amt_to_pad)
    
    q_final = q + q_pos
    q_final = ' '.join(str(w) for w in q_final)
    return q_final

In [None]:
def build_vocab(questions, V=10000):
    #token_feed = (canonicalize_word(w) for w in sentence.split() for sentence in questions)
    #token_feed = (canonicalize_word(w) for sentence in questions for w in sentence.split())
    #print (token_feed)
    
    token_feed = []
    for sentence in questions:
        for w in str(sentence).split():
            token_feed.append(utils.canonicalize_word(w))
    token_feed = set(token_feed)
    
    vocab = vocabulary.Vocabulary(token_feed, size=V)
    
    return vocab

In [7]:
print("Processing Questions")

print ("Reading from source File)")

question1 = []
question2 = []
is_duplicate = []
    
f = "Data/train0.df.pkl.gz"
dev = pd.read_pickle(f)
    
t0 = time.time()
train_pos = pd.DataFrame()
i = 1000
while i < 364000:
    f = "Data/train"+str(i)+".df.pkl.gz"
    print("Starting ", f, i, i+1000, pretty_timedelta(since=t0))
    x = pd.read_pickle(f)
    train_pos = pd.concat([train_pos, x])
    print("Finished ", f, i, i+1000, pretty_timedelta(since=t0))
    i += 1000
    if i == 26000:
        i = 27000 # skip bad file

    
    print (type(train_pos))

Processing Questions
Reading from source File)
Starting  Data/train1000.df.pkl.gz 1000 2000 0:00:00
Finished  Data/train1000.df.pkl.gz 1000 2000 0:00:00
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train2000.df.pkl.gz 2000 3000 0:00:00
Finished  Data/train2000.df.pkl.gz 2000 3000 0:00:00
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train3000.df.pkl.gz 3000 4000 0:00:00
Finished  Data/train3000.df.pkl.gz 3000 4000 0:00:00
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train4000.df.pkl.gz 4000 5000 0:00:00
Finished  Data/train4000.df.pkl.gz 4000 5000 0:00:00
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train5000.df.pkl.gz 5000 6000 0:00:00
Finished  Data/train5000.df.pkl.gz 5000 6000 0:00:00
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train6000.df.pkl.gz 6000 7000 0:00:00
Finished  Data/train6000.df.pkl.gz 6000 7000 0:00:00
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train7000.df.pkl.gz 7000 8000 0:00:00
Finished  Data/train7000.df.pkl.gz 7

Finished  Data/train61000.df.pkl.gz 61000 62000 0:00:01
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train62000.df.pkl.gz 62000 63000 0:00:01
Finished  Data/train62000.df.pkl.gz 62000 63000 0:00:01
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train63000.df.pkl.gz 63000 64000 0:00:01
Finished  Data/train63000.df.pkl.gz 63000 64000 0:00:01
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train64000.df.pkl.gz 64000 65000 0:00:01
Finished  Data/train64000.df.pkl.gz 64000 65000 0:00:01
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train65000.df.pkl.gz 65000 66000 0:00:01
Finished  Data/train65000.df.pkl.gz 65000 66000 0:00:01
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train66000.df.pkl.gz 66000 67000 0:00:01
Finished  Data/train66000.df.pkl.gz 66000 67000 0:00:01
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train67000.df.pkl.gz 67000 68000 0:00:01
Finished  Data/train67000.df.pkl.gz 67000 68000 0:00:01
<class 'pandas.core.frame.DataFrame'>
Starti

Finished  Data/train119000.df.pkl.gz 119000 120000 0:00:03
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train120000.df.pkl.gz 120000 121000 0:00:03
Finished  Data/train120000.df.pkl.gz 120000 121000 0:00:03
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train121000.df.pkl.gz 121000 122000 0:00:03
Finished  Data/train121000.df.pkl.gz 121000 122000 0:00:03
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train122000.df.pkl.gz 122000 123000 0:00:03
Finished  Data/train122000.df.pkl.gz 122000 123000 0:00:03
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train123000.df.pkl.gz 123000 124000 0:00:03
Finished  Data/train123000.df.pkl.gz 123000 124000 0:00:03
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train124000.df.pkl.gz 124000 125000 0:00:03
Finished  Data/train124000.df.pkl.gz 124000 125000 0:00:03
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train125000.df.pkl.gz 125000 126000 0:00:03
Finished  Data/train125000.df.pkl.gz 125000 126000 0:00:03
<clas

Finished  Data/train173000.df.pkl.gz 173000 174000 0:00:07
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train174000.df.pkl.gz 174000 175000 0:00:07
Finished  Data/train174000.df.pkl.gz 174000 175000 0:00:07
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train175000.df.pkl.gz 175000 176000 0:00:07
Finished  Data/train175000.df.pkl.gz 175000 176000 0:00:07
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train176000.df.pkl.gz 176000 177000 0:00:07
Finished  Data/train176000.df.pkl.gz 176000 177000 0:00:07
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train177000.df.pkl.gz 177000 178000 0:00:07
Finished  Data/train177000.df.pkl.gz 177000 178000 0:00:07
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train178000.df.pkl.gz 178000 179000 0:00:07
Finished  Data/train178000.df.pkl.gz 178000 179000 0:00:07
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train179000.df.pkl.gz 179000 180000 0:00:07
Finished  Data/train179000.df.pkl.gz 179000 180000 0:00:07
<clas

Finished  Data/train226000.df.pkl.gz 226000 227000 0:00:12
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train227000.df.pkl.gz 227000 228000 0:00:12
Finished  Data/train227000.df.pkl.gz 227000 228000 0:00:12
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train228000.df.pkl.gz 228000 229000 0:00:12
Finished  Data/train228000.df.pkl.gz 228000 229000 0:00:12
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train229000.df.pkl.gz 229000 230000 0:00:12
Finished  Data/train229000.df.pkl.gz 229000 230000 0:00:12
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train230000.df.pkl.gz 230000 231000 0:00:12
Finished  Data/train230000.df.pkl.gz 230000 231000 0:00:12
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train231000.df.pkl.gz 231000 232000 0:00:12
Finished  Data/train231000.df.pkl.gz 231000 232000 0:00:12
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train232000.df.pkl.gz 232000 233000 0:00:12
Finished  Data/train232000.df.pkl.gz 232000 233000 0:00:12
<clas

Finished  Data/train279000.df.pkl.gz 279000 280000 0:00:18
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train280000.df.pkl.gz 280000 281000 0:00:18
Finished  Data/train280000.df.pkl.gz 280000 281000 0:00:18
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train281000.df.pkl.gz 281000 282000 0:00:18
Finished  Data/train281000.df.pkl.gz 281000 282000 0:00:18
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train282000.df.pkl.gz 282000 283000 0:00:18
Finished  Data/train282000.df.pkl.gz 282000 283000 0:00:18
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train283000.df.pkl.gz 283000 284000 0:00:18
Finished  Data/train283000.df.pkl.gz 283000 284000 0:00:18
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train284000.df.pkl.gz 284000 285000 0:00:18
Finished  Data/train284000.df.pkl.gz 284000 285000 0:00:18
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train285000.df.pkl.gz 285000 286000 0:00:18
Finished  Data/train285000.df.pkl.gz 285000 286000 0:00:18
<clas

Finished  Data/train333000.df.pkl.gz 333000 334000 0:00:25
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train334000.df.pkl.gz 334000 335000 0:00:25
Finished  Data/train334000.df.pkl.gz 334000 335000 0:00:25
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train335000.df.pkl.gz 335000 336000 0:00:25
Finished  Data/train335000.df.pkl.gz 335000 336000 0:00:25
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train336000.df.pkl.gz 336000 337000 0:00:25
Finished  Data/train336000.df.pkl.gz 336000 337000 0:00:25
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train337000.df.pkl.gz 337000 338000 0:00:25
Finished  Data/train337000.df.pkl.gz 337000 338000 0:00:26
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train338000.df.pkl.gz 338000 339000 0:00:26
Finished  Data/train338000.df.pkl.gz 338000 339000 0:00:26
<class 'pandas.core.frame.DataFrame'>
Starting  Data/train339000.df.pkl.gz 339000 340000 0:00:26
Finished  Data/train339000.df.pkl.gz 339000 340000 0:00:26
<clas

In [25]:
print ("Build  Question Pairs with POS, maximizing sequence length to 50")

question1 = []
question2 = []
is_duplicate = []

for index, row in train_pos.iterrows():
    q1=row["question1"]
    q1_pos = row["question1_pos"]
    q2=row["question2"]
    q2_pos = row["question2_pos"]
    question1.append(build_sent(q1,q1_pos))
    question2.append(build_sent(q2,q2_pos))
    
    is_duplicate.append(row['is_duplicate'])

print('Question pairs: %d' % len(question1))

Build  Question Pairs with POS, maximizing sequence length to 50
Question pairs: 361861


In [26]:
print ("Build Tokenized Word Index")
questions = question1 + question2
tokenizer = Tokenizer(num_words=MAX_NB_WORDS)
tokenizer.fit_on_texts(questions)
question1_word_sequences = tokenizer.texts_to_sequences(question1)
question2_word_sequences = tokenizer.texts_to_sequences(question2)
word_index = tokenizer.word_index

print("Words in index: %d" % len(word_index))

# alternative here would be to use sents_to_tokens from vocab

Build Tokenized Word Index
Words in index: 90882


In [27]:
print (len(word_index))
print(dict(list(word_index.items())[0:2]))
print ("Build Vocab and token_ids")
vocab = list(word_index.keys())
token_ids = list(word_index.values())
print (len(vocab))
print (vocab[50881])
V = len(token_ids) + 1
print (len(token_ids))
print (token_ids[50881])

#print "Sample ids: " + str(word_index[:5])

90882
{'ruffle': 54290, '1453': 59775}
Build Vocab and token_ids
90882
chiplun
90882
49113


In [28]:
def cooccurrence_matrix(token_ids, Vsize, K=2):
    # We'll use this as an "accumulator" matrix
    C = scipy.sparse.csc_matrix((Vsize,Vsize), dtype=np.float32)
    print (C.shape)

    for k in range(1, K+1):
        print (u"Counting pairs (i, i \u00B1 %d) ..." % k)
        i = token_ids[:-k]  # current word
        j = token_ids[k:]   # k words ahead
        data = (np.ones_like(i), (i,j))  # values, indices
        Ck_plus = scipy.sparse.coo_matrix(data, shape=C.shape, dtype=np.float32)
        Ck_plus = scipy.sparse.csc_matrix(Ck_plus)
        Ck_minus = Ck_plus.T  # Consider k words behind
        C += Ck_plus + Ck_minus

    print ("Co-occurrence matrix: %d words x %d words" % (C.shape))
    print ("  %.02g nonzero elements" % (C.nnz))
    return C

In [29]:
kw=1
for k in range(1, kw+1):
    i = token_ids[:-k]  # current word
    j = token_ids[k:]   # k words ahead

In [30]:
k = 1
t0 = time.time()
C = cooccurrence_matrix(token_ids, V, K=k)
print ("Computed Co-occurrence matrix in %s" % utils.pretty_timedelta(since=t0), t0)


(90883, 90883)
Counting pairs (i, i ± 1) ...
Co-occurrence matrix: 90883 words x 90883 words
  1.8e+05 nonzero elements
Computed Co-occurrence matrix in 0:00:00 1502909650.3687503


In [31]:
def PPMI(C):
    """Tranform a counts matrix to PPMI.
    
    Args:
      C: scipy.sparse.csc_matrix of counts C_ij
    
    Returns:
      (scipy.sparse.csc_matrix) PPMI(C) as defined above
    """
    Z = float(C.sum())  # total counts
    # sum each column (along rows)
    Zc = np.array(C.sum(axis=0), dtype=np.float64).flatten()
    # sum each row (along columns)
    Zr = np.array(C.sum(axis=1), dtype=np.float64).flatten()
    
    # Get indices of relevant elements
    ii, jj = C.nonzero()  # row, column indices
    Cij = np.array(C[ii,jj], dtype=np.float64).flatten()
    
    ##
    # PMI equation
    pmi = np.log(Cij * Z / (Zr[ii] * Zc[jj]))
    ##
    # Truncate to positive only
    ppmi = np.maximum(0, pmi)  # take positive only
    
    # Re-format as sparse matrix
    ret = scipy.sparse.csc_matrix((ppmi, (ii,jj)), shape=C.shape,
                                  dtype=np.float64)
    ret.eliminate_zeros()  # remove zeros
    return ret

In [32]:
from sklearn.decomposition import TruncatedSVD
def SVD(X, d=100):
    """Returns word vectors from SVD.
    
    Args:
      X: m x n matrix
      d: word vector dimension
      
    Returns:
      Wv : m x d matrix, each row is a word vector.
    """
    transformer = TruncatedSVD(n_components=d, random_state=1)
    Wv = transformer.fit_transform(X)
    # Normalize to unit length
    Wv = Wv / np.linalg.norm(Wv, axis=1).reshape([-1,1])
    return Wv, transformer.explained_variance_

In [33]:
C_ppmi = PPMI(C)
print ("Computed PPMI in %s" % utils.pretty_timedelta(since=t0),t0)
word_embedding_matrix, _ = SVD(C_ppmi, d=EMBEDDING_DIM)
print ("Computed SVD in %s" % utils.pretty_timedelta(since=t0))
print('Shape of embedding matrix:', word_embedding_matrix.shape)

Computed PPMI in 0:00:02 1502909650.3687503
Computed SVD in 0:00:25
Shape of embedding matrix: (90883, 300)


In [34]:
# Prepare training data tensors
q1_data = pad_sequences(question1_word_sequences, maxlen=MAX_SEQUENCE_LENGTH)
q2_data = pad_sequences(question2_word_sequences, maxlen=MAX_SEQUENCE_LENGTH)
labels = np.array(is_duplicate, dtype=int)
print('Shape of question1 data tensor:', q1_data.shape)
print('Shape of question2 data tensor:', q2_data.shape)
print('Shape of label tensor:', labels.shape)

Shape of question1 data tensor: (361861, 50)
Shape of question2 data tensor: (361861, 50)
Shape of label tensor: (361861,)


In [35]:
# Partition the dataset into train and test sets
X = np.stack((q1_data, q2_data), axis=1)
y = labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SPLIT, random_state=RNG_SEED)
Q1_train = X_train[:,0]
Q2_train = X_train[:,1]
Q1_test = X_test[:,0]
Q2_test = X_test[:,1]

In [36]:
# Define the model
question1 = Input(shape=(MAX_SEQUENCE_LENGTH,))
question2 = Input(shape=(MAX_SEQUENCE_LENGTH,))

q1 = Embedding(V, 
                 EMBEDDING_DIM, 
                 weights=[word_embedding_matrix], 
                 input_length=MAX_SEQUENCE_LENGTH, 
                 trainable=False)(question1)
#print ("q1 shape:", q1.shape)
q1 = TimeDistributed(Dense(EMBEDDING_DIM, activation='relu'))(q1)
#print ("q1 shape:", q1.shape)
q1 = Lambda(lambda x: K.max(x, axis=1), output_shape=(EMBEDDING_DIM, ))(q1)
#print ("q1 shape:", q1.shape)
q2 = Embedding(V, 
                 EMBEDDING_DIM, 
                 weights=[word_embedding_matrix], 
                 input_length=MAX_SEQUENCE_LENGTH, 
                 trainable=False)(question2)
q2 = TimeDistributed(Dense(EMBEDDING_DIM, activation='relu'))(q2)
q2 = Lambda(lambda x: K.max(x, axis=1), output_shape=(EMBEDDING_DIM, ))(q2)

merged = concatenate([q1,q2])
# print(merged.shape)
merged = Dense(200, activation='relu')(merged)
# print(merged.shape)
merged = Dropout(DROPOUT)(merged)
merged = BatchNormalization()(merged)
merged = Dense(200, activation='relu')(merged)
merged = Dropout(DROPOUT)(merged)
merged = BatchNormalization()(merged)
merged = Dense(200, activation='relu')(merged)
merged = Dropout(DROPOUT)(merged)
merged = BatchNormalization()(merged)
merged = Dense(200, activation='relu')(merged)
merged = Dropout(DROPOUT)(merged)
merged = BatchNormalization()(merged)

is_duplicate = Dense(1, activation='sigmoid')(merged)

model = Model(inputs=[question1,question2], outputs=is_duplicate)
model.compile(loss='binary_crossentropy', optimizer=OPTIMIZER, metrics=['accuracy'])


In [37]:
# Train the model, checkpointing weights with best validation accuracy
print("Starting training at", datetime.datetime.now())
t0 = time.time()
callbacks = [ModelCheckpoint(MODEL_WEIGHTS_FILE, monitor='val_acc', save_best_only=True)]
history = model.fit([Q1_train, Q2_train],
                    y_train,
                    epochs=NB_EPOCHS,
                    validation_split=VALIDATION_SPLIT,
                    verbose=2,
                    batch_size=BATCH_SIZE,
                    callbacks=callbacks)
t1 = time.time()
print("Training ended at", datetime.datetime.now())
print("Minutes elapsed: %f" % ((t1 - t0) / 60.))

Starting training at 2017-08-16 14:55:31.118556
Train on 293106 samples, validate on 32568 samples
Epoch 1/25
870s - loss: 0.5667 - acc: 0.7064 - val_loss: 0.5352 - val_acc: 0.7298
Epoch 2/25
901s - loss: 0.5071 - acc: 0.7499 - val_loss: 0.5065 - val_acc: 0.7417
Epoch 3/25
946s - loss: 0.4773 - acc: 0.7693 - val_loss: 0.4842 - val_acc: 0.7620
Epoch 4/25
920s - loss: 0.4580 - acc: 0.7825 - val_loss: 0.5259 - val_acc: 0.7374
Epoch 5/25
942s - loss: 0.4421 - acc: 0.7919 - val_loss: 0.4701 - val_acc: 0.7716
Epoch 6/25
929s - loss: 0.4213 - acc: 0.8055 - val_loss: 0.4650 - val_acc: 0.7758
Epoch 7/25
945s - loss: 0.4028 - acc: 0.8173 - val_loss: 0.4565 - val_acc: 0.7801
Epoch 8/25
871s - loss: 0.3863 - acc: 0.8260 - val_loss: 0.4607 - val_acc: 0.7804
Epoch 9/25
869s - loss: 0.3710 - acc: 0.8348 - val_loss: 0.4643 - val_acc: 0.7776
Epoch 10/25
891s - loss: 0.3569 - acc: 0.8421 - val_loss: 0.4566 - val_acc: 0.7826
Epoch 11/25
913s - loss: 0.3428 - acc: 0.8504 - val_loss: 0.4676 - val_acc: 0.77