In [1]:
import gzip
import numpy as np
import pandas as pd
from collections import defaultdict
import os 
os.environ['KERAS_BACKEND'] = 'cntk'
import requests
import subprocess
import time
from tqdm import tqdm

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

from keras.preprocessing.text import Tokenizer, text_to_word_sequence
from keras.preprocessing.sequence import pad_sequences
from keras.utils.np_utils import to_categorical
from keras.optimizers import SGD

from keras.layers import Embedding
from keras.layers import Dense, Input, Flatten
from keras.layers import Conv1D, MaxPooling1D, Embedding, Dropout, LSTM, GRU, Bidirectional, TimeDistributed
from keras.models import Model, load_model

from keras import backend as K
from keras.engine.topology import Layer, InputSpec
from keras import initializers, regularizers, optimizers
from keras.callbacks import History, CSVLogger, ModelCheckpoint

Using CNTK backend


In [2]:
f = open("data/AMiner-Paper.txt", "r")
data=[]
for x in f:
  data=f.readlines()

In [3]:
data_i=[]
temp=[]
for string in tqdm(data):
    if string != '\n':
        temp.append(string)
    else:
        data_i.append(temp)
        temp=[]

100%|██████████| 24206330/24206330 [00:20<00:00, 1168444.78it/s]


In [4]:
processed_data=[]
for dp in tqdm(data_i):
    for string in dp:
        if(string[:2]=='#!'):
            processed_data.append(dp)

100%|██████████| 2092356/2092356 [00:04<00:00, 449259.97it/s]


In [5]:
abs_list=[]
c_list=[]
for dp in tqdm(processed_data):
    for string in dp:
        if(string[:2]=='#c'):
            c_list.append(string[3:].strip('\n'))
        if(string[:2]=='#!'):
            abs_list.append(string[3:].strip('\n'))

100%|██████████| 1534970/1534970 [00:08<00:00, 172709.71it/s]


In [6]:
all_data = pd.DataFrame(data={'abstract': abs_list, 'pv': c_list})

In [7]:
filtered = all_data.groupby('pv').filter(lambda x: len(x) >= 2000)

In [8]:
all_data=filtered

In [9]:
data_points=len(all_data)

In [10]:
np.random.seed(42)
shuffled = all_data.iloc[np.random.permutation(data_points), :]

In [11]:
train_data=shuffled[:int(data_points*0.9)]
test_data=shuffled[int(data_points*0.9):]

In [12]:
import pickle as p
p.dump(train_data,open("data/train.p","wb"))
p.dump(test_data,open("data/test.p","wb"))

In [13]:
MAX_SENT_LENGTH = 25
MAX_SENTS = 10
MAX_NB_WORDS = 6000
EMBEDDING_DIM = 100

In [14]:
import nltk 
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer

wnl=WordNetLemmatizer()
nltk.download('punkt')

reviews = []
labels = []
texts = []

# for idx in range(train_data.shape[0]):
#     text = train_data['abstract'].iloc[idx]
#     texts.append(text)
#     sentences = nltk.tokenize.sent_tokenize(text)
#     reviews.append(sentences)
#     labels.append(train_data['pv'].iloc[idx])


for idx in tqdm(range(train_data.shape[0])):
    text = train_data['abstract'].iloc[idx]
#     tokens=nltk.tokenize.work_tokenize
#     texts.append(text)
    sentences = nltk.tokenize.sent_tokenize(text)
    l_sentences=[]
    for sentence in (sentences):
        sent_tokens=nltk.tokenize.word_tokenize(sentence)
        l_sent_tokens=[]
        for token in (sent_tokens):
            l_sent_tokens.append(wnl.lemmatize(token))
            l_sent_tokens.append(" ")
        l_sentence="".join(l_sent_tokens)
        l_sentences.append(l_sentence)
    reviews.append(l_sentences)
    texts.append(" ".join(l_sentences))
    labels.append(train_data['pv'].iloc[idx])

[nltk_data] Downloading package wordnet to /home/asr/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /home/asr/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
100%|██████████| 187566/187566 [07:28<00:00, 417.89it/s]


In [15]:
tokenizer = Tokenizer(num_words=MAX_NB_WORDS)
tokenizer.fit_on_texts(texts)

In [16]:
len(tokenizer.word_index)

164800

In [17]:
data = np.zeros((len(texts), MAX_SENTS, MAX_SENT_LENGTH), dtype='int32')
doc_lst = []

for i, sentences in enumerate(reviews):
    for j, sent in enumerate(sentences):
        if j < MAX_SENTS:
            wordTokens = text_to_word_sequence(sent)
#             print(wordTokens)
            k = 0
            words_in_sent = []
            for _, word in enumerate(wordTokens):
                if k < MAX_SENT_LENGTH: 
                    if (word in tokenizer.word_index) and (tokenizer.word_index[word] < MAX_NB_WORDS):
                        data[i, j, k] = tokenizer.word_index[word]
                        words_in_sent.append(word)
                    else:
                        data[i, j, k] = MAX_NB_WORDS
                        words_in_sent.append('UNK')
                    k = k + 1
            doc_lst.append(words_in_sent)

In [18]:
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(labels)

In [19]:
word_index = tokenizer.word_index
print('Total %s unique tokens.' % len(word_index))

y_train = to_categorical(np.asarray(integer_encoded)).astype('float32')
x_train = data

print('Shape of data tensor:', x_train.shape)
print('Shape of label tensor:', y_train.shape)

Total 164800 unique tokens.
Shape of data tensor: (187566, 10, 25)
Shape of label tensor: (187566, 54)


In [20]:
n_classes = y_train.shape[1]

In [21]:
test_reviews = []
test_labels = []
test_texts = []

# for idx in range(test_data.shape[0]):
#     text = test_data['abstract'].iloc[idx]
#     test_texts.append(text)
#     sentences = nltk.tokenize.sent_tokenize(text)
#     test_reviews.append(sentences)
#     test_labels.append(test_data['pv'].iloc[idx])
    
    
for idx in tqdm(range(test_data.shape[0])):
    text = test_data['abstract'].iloc[idx]
#     tokens=nltk.tokenize.work_tokenize
#     texts.append(text)
    sentences = nltk.tokenize.sent_tokenize(text)
    l_sentences=[]
    for sentence in (sentences):
        sent_tokens=nltk.tokenize.word_tokenize(sentence)
        l_sent_tokens=[]
        for token in (sent_tokens):
            l_sent_tokens.append(wnl.lemmatize(token))
            l_sent_tokens.append(" ")
        l_sentence="".join(l_sent_tokens)
        l_sentences.append(l_sentence)
    test_reviews.append(l_sentences)
    test_texts.append(" ".join(l_sentences))
    test_labels.append(test_data['pv'].iloc[idx])

100%|██████████| 20841/20841 [00:49<00:00, 419.59it/s]


In [22]:
data2 = np.zeros((len(test_texts), MAX_SENTS, MAX_SENT_LENGTH), dtype='int32')

for i, sentences in enumerate(test_reviews):
    for j, sent in enumerate(sentences):
        if j < MAX_SENTS:
            wordTokens = text_to_word_sequence(sent)
            k = 0
            words_in_sent = []
            for _, word in enumerate(wordTokens):
                if k < MAX_SENT_LENGTH: 
                    if (word in tokenizer.word_index) and (tokenizer.word_index[word] < MAX_NB_WORDS):
                        data2[i, j, k] = tokenizer.word_index[word]
                        words_in_sent.append(word)
                    else:
                        data2[i, j, k] = MAX_NB_WORDS
                        words_in_sent.append('UNK')
                    k = k + 1

In [23]:
test_integer_encoded = label_encoder.transform(test_labels)
y_test = to_categorical(np.asarray(test_integer_encoded)).astype('float32')
x_test = data2

In [24]:
import gensim, logging

logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
word2vec_model = gensim.models.Word2Vec(doc_lst, min_count=3, size=EMBEDDING_DIM, sg=1, workers=os.cpu_count(), iter=15)

2019-04-21 09:12:21,315 : INFO : collecting all words and their counts
2019-04-21 09:12:21,315 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2019-04-21 09:12:21,348 : INFO : PROGRESS: at sentence #10000, processed 196572 words, keeping 5594 word types
2019-04-21 09:12:21,380 : INFO : PROGRESS: at sentence #20000, processed 393666 words, keeping 5925 word types
2019-04-21 09:12:21,412 : INFO : PROGRESS: at sentence #30000, processed 590634 words, keeping 5983 word types
2019-04-21 09:12:21,445 : INFO : PROGRESS: at sentence #40000, processed 786750 words, keeping 5994 word types
2019-04-21 09:12:21,477 : INFO : PROGRESS: at sentence #50000, processed 983657 words, keeping 5997 word types
2019-04-21 09:12:21,509 : INFO : PROGRESS: at sentence #60000, processed 1180753 words, keeping 5998 word types
2019-04-21 09:12:21,541 : INFO : PROGRESS: at sentence #70000, processed 1376987 words, keeping 5999 word types
2019-04-21 09:12:21,573 : INFO : PROGRESS: at sente

2019-04-21 09:12:23,668 : INFO : PROGRESS: at sentence #720000, processed 14168778 words, keeping 6000 word types
2019-04-21 09:12:23,702 : INFO : PROGRESS: at sentence #730000, processed 14365453 words, keeping 6000 word types
2019-04-21 09:12:23,736 : INFO : PROGRESS: at sentence #740000, processed 14563061 words, keeping 6000 word types
2019-04-21 09:12:23,770 : INFO : PROGRESS: at sentence #750000, processed 14759694 words, keeping 6000 word types
2019-04-21 09:12:23,804 : INFO : PROGRESS: at sentence #760000, processed 14957534 words, keeping 6000 word types
2019-04-21 09:12:23,837 : INFO : PROGRESS: at sentence #770000, processed 15153904 words, keeping 6000 word types
2019-04-21 09:12:23,870 : INFO : PROGRESS: at sentence #780000, processed 15351906 words, keeping 6000 word types
2019-04-21 09:12:23,903 : INFO : PROGRESS: at sentence #790000, processed 15547649 words, keeping 6000 word types
2019-04-21 09:12:23,936 : INFO : PROGRESS: at sentence #800000, processed 15745002 words

2019-04-21 09:12:40,828 : INFO : EPOCH 2 - PROGRESS: at 23.91% examples, 1195701 words/s, in_qsize 31, out_qsize 0
2019-04-21 09:12:41,834 : INFO : EPOCH 2 - PROGRESS: at 31.87% examples, 1195656 words/s, in_qsize 31, out_qsize 0
2019-04-21 09:12:42,840 : INFO : EPOCH 2 - PROGRESS: at 40.13% examples, 1204427 words/s, in_qsize 31, out_qsize 0
2019-04-21 09:12:43,854 : INFO : EPOCH 2 - PROGRESS: at 48.24% examples, 1205110 words/s, in_qsize 31, out_qsize 0
2019-04-21 09:12:44,855 : INFO : EPOCH 2 - PROGRESS: at 56.46% examples, 1209602 words/s, in_qsize 29, out_qsize 2
2019-04-21 09:12:45,862 : INFO : EPOCH 2 - PROGRESS: at 64.60% examples, 1210389 words/s, in_qsize 31, out_qsize 0
2019-04-21 09:12:46,874 : INFO : EPOCH 2 - PROGRESS: at 73.01% examples, 1215157 words/s, in_qsize 30, out_qsize 1
2019-04-21 09:12:47,883 : INFO : EPOCH 2 - PROGRESS: at 80.99% examples, 1212783 words/s, in_qsize 31, out_qsize 0
2019-04-21 09:12:48,885 : INFO : EPOCH 2 - PROGRESS: at 89.27% examples, 1215610

2019-04-21 09:13:15,018 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-04-21 09:13:15,021 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-04-21 09:13:15,029 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-04-21 09:13:15,034 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-04-21 09:13:15,035 : INFO : EPOCH - 4 : training on 21030083 raw words (15072634 effective words) took 12.4s, 1214493 effective words/s
2019-04-21 09:13:16,053 : INFO : EPOCH 5 - PROGRESS: at 7.27% examples, 1088597 words/s, in_qsize 30, out_qsize 1
2019-04-21 09:13:17,062 : INFO : EPOCH 5 - PROGRESS: at 15.50% examples, 1158873 words/s, in_qsize 31, out_qsize 0
2019-04-21 09:13:18,063 : INFO : EPOCH 5 - PROGRESS: at 23.53% examples, 1175246 words/s, in_qsize 31, out_qsize 0
2019-04-21 09:13:19,067 : INFO : EPOCH 5 - PROGRESS: at 31.68% examples, 1188076 words/s, in_qsize 31, out_qsize 0
2019-04-21 09:13:20,071 : INFO : EPO

2019-04-21 09:13:52,306 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-04-21 09:13:52,316 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-04-21 09:13:52,322 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-04-21 09:13:52,326 : INFO : worker thread finished; awaiting finish of 8 more threads
2019-04-21 09:13:52,328 : INFO : worker thread finished; awaiting finish of 7 more threads
2019-04-21 09:13:52,331 : INFO : worker thread finished; awaiting finish of 6 more threads
2019-04-21 09:13:52,339 : INFO : worker thread finished; awaiting finish of 5 more threads
2019-04-21 09:13:52,341 : INFO : worker thread finished; awaiting finish of 4 more threads
2019-04-21 09:13:52,343 : INFO : worker thread finished; awaiting finish of 3 more threads
2019-04-21 09:13:52,347 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-04-21 09:13:52,348 : INFO : worker thread finished; awaiting finish of 1 more threa

2019-04-21 09:14:27,373 : INFO : EPOCH 10 - PROGRESS: at 80.56% examples, 1209376 words/s, in_qsize 29, out_qsize 2
2019-04-21 09:14:28,373 : INFO : EPOCH 10 - PROGRESS: at 88.79% examples, 1211984 words/s, in_qsize 31, out_qsize 0
2019-04-21 09:14:29,379 : INFO : EPOCH 10 - PROGRESS: at 97.04% examples, 1214136 words/s, in_qsize 31, out_qsize 0
2019-04-21 09:14:29,646 : INFO : worker thread finished; awaiting finish of 15 more threads
2019-04-21 09:14:29,650 : INFO : worker thread finished; awaiting finish of 14 more threads
2019-04-21 09:14:29,658 : INFO : worker thread finished; awaiting finish of 13 more threads
2019-04-21 09:14:29,661 : INFO : worker thread finished; awaiting finish of 12 more threads
2019-04-21 09:14:29,668 : INFO : worker thread finished; awaiting finish of 11 more threads
2019-04-21 09:14:29,670 : INFO : worker thread finished; awaiting finish of 10 more threads
2019-04-21 09:14:29,673 : INFO : worker thread finished; awaiting finish of 9 more threads
2019-04-2

2019-04-21 09:14:58,628 : INFO : EPOCH 13 - PROGRESS: at 31.58% examples, 1184597 words/s, in_qsize 31, out_qsize 0
2019-04-21 09:14:59,639 : INFO : EPOCH 13 - PROGRESS: at 39.71% examples, 1189874 words/s, in_qsize 30, out_qsize 1
2019-04-21 09:15:00,642 : INFO : EPOCH 13 - PROGRESS: at 47.77% examples, 1193829 words/s, in_qsize 31, out_qsize 0
2019-04-21 09:15:01,653 : INFO : EPOCH 13 - PROGRESS: at 56.04% examples, 1199238 words/s, in_qsize 31, out_qsize 0
2019-04-21 09:15:02,662 : INFO : EPOCH 13 - PROGRESS: at 64.31% examples, 1203746 words/s, in_qsize 31, out_qsize 0
2019-04-21 09:15:03,669 : INFO : EPOCH 13 - PROGRESS: at 72.58% examples, 1207489 words/s, in_qsize 31, out_qsize 0
2019-04-21 09:15:04,671 : INFO : EPOCH 13 - PROGRESS: at 80.75% examples, 1209525 words/s, in_qsize 29, out_qsize 2
2019-04-21 09:15:05,685 : INFO : EPOCH 13 - PROGRESS: at 88.88% examples, 1209384 words/s, in_qsize 31, out_qsize 0
2019-04-21 09:15:06,689 : INFO : EPOCH 13 - PROGRESS: at 97.00% examples

2019-04-21 09:15:31,935 : INFO : worker thread finished; awaiting finish of 2 more threads
2019-04-21 09:15:31,938 : INFO : worker thread finished; awaiting finish of 1 more threads
2019-04-21 09:15:31,943 : INFO : worker thread finished; awaiting finish of 0 more threads
2019-04-21 09:15:31,944 : INFO : EPOCH - 15 : training on 21030083 raw words (15074400 effective words) took 12.4s, 1210858 effective words/s
2019-04-21 09:15:31,944 : INFO : training on a 315451245 raw words (226097042 effective words) took 187.0s, 1209144 effective words/s


In [25]:
embeddings_index = {}

# f = open("glove.6B.100d.txt","r")
# for line in f:
#     values = line.split()
#     word = values[0]
#     coefs = np.asarray(values[1:], dtype='float32')
#     embeddings_index[word] = coefs
# f.close()
    

for word in word2vec_model.wv.vocab:
    coefs = np.asarray(word2vec_model.wv[word], dtype='float32')
    embeddings_index[word] = coefs

print('Total %s word vectors.' % len(embeddings_index))

Total 6000 word vectors.


In [26]:
embedding_matrix = np.zeros((MAX_NB_WORDS + 1, EMBEDDING_DIM))

for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None and i < MAX_NB_WORDS:
        embedding_matrix[i] = embedding_vector
    elif i == MAX_NB_WORDS:
        embedding_matrix[i] = embeddings_index['UNK']

In [27]:
REG_PARAM = 1e-5
l2_reg = regularizers.l2(REG_PARAM)

embedding_layer = Embedding(MAX_NB_WORDS+ 1,
                            EMBEDDING_DIM,
                            input_length=MAX_SENT_LENGTH,
                            trainable=True,
                            mask_zero=False,
                            embeddings_regularizer=l2_reg,
                            weights=[embedding_matrix])

In [28]:
CONTEXT_DIM = 100

class AttLayer(Layer):
    def __init__(self, regularizer=None, **kwargs):
        self.regularizer = regularizer
        self.supports_masking = True
        super(AttLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape) == 3        
        self.W = self.add_weight(name='W', shape=(input_shape[-1], CONTEXT_DIM), initializer='normal', trainable=True, 
                                 regularizer=self.regularizer)
        self.b = self.add_weight(name='b', shape=(CONTEXT_DIM,), initializer='normal', trainable=True, 
                                 regularizer=self.regularizer)
        self.u = self.add_weight(name='u', shape=(CONTEXT_DIM,), initializer='normal', trainable=True, 
                                 regularizer=self.regularizer)        
        super(AttLayer, self).build(input_shape)  # be sure you call this somewhere!

    def call(self, x, mask=None):
        eij = K.dot(K.tanh(K.dot(x, self.W) + self.b), K.expand_dims(self.u))
        ai = K.exp(eij)
        alphas = ai / K.sum(ai, axis=1)
        if mask is not None:
            # use only the inputs specified by the mask
            alphas *= K.expand_dims(mask)
        weighted_input = K.dot(K.transpose(x), alphas)
        return K.reshape(weighted_input, (weighted_input.shape[0],))

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[-1])
    
    def get_config(self):
        config = {}
        base_config = super(AttLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def compute_mask(self, inputs, mask):
        return None

In [29]:
GPU_IMPL = 2          
GRU_UNITS = 100        

sentence_input = Input(shape=(MAX_SENT_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sentence_input)
l_lstm = GRU(GRU_UNITS, return_sequences=True, kernel_regularizer=l2_reg, 
                           implementation=GPU_IMPL, recurrent_activation='sigmoid')(embedded_sequences)
l_att = AttLayer(regularizer=l2_reg)(l_lstm)            
sentEncoder = Model(sentence_input, l_att)

review_input = Input(shape=(MAX_SENTS, MAX_SENT_LENGTH), dtype='int32')
review_encoder = TimeDistributed(sentEncoder)(review_input)
l_lstm_sent = GRU(GRU_UNITS, return_sequences=True, kernel_regularizer=l2_reg, 
                                implementation=GPU_IMPL, recurrent_activation='sigmoid')(review_encoder)
l_att_sent = AttLayer(regularizer=l2_reg)(l_lstm_sent)
dense = Dense(500, activation='sigmoid', kernel_regularizer=l2_reg)(l_att_sent)
preds = Dense(n_classes, activation='softmax', kernel_regularizer=l2_reg)(dense)
model = Model(review_input, preds)

In [30]:
model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.SGD(lr=0.1,nesterov=True, clipnorm=1.0),
              metrics=['acc'])

In [31]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 10, 25)            0         
_________________________________________________________________
time_distributed_1 (TimeDist (None, 10, 100)           670600    
_________________________________________________________________
gru_2 (GRU)                  (None, 10, 100)           60300     
_________________________________________________________________
att_layer_2 (AttLayer)       (None, 100)               10200     
_________________________________________________________________
dense_1 (Dense)              (None, 500)               50500     
_________________________________________________________________
dense_2 (Dense)              (None, 54)                27054     
Total params: 818,654
Trainable params: 818,654
Non-trainable params: 0
_________________________________________________________________


In [32]:
fname = 'det'
history = History()
csv_logger = CSVLogger('./{0}_{1}.log'.format(fname, REG_PARAM), separator=',', append=True)

In [33]:
doc_lengths = [len(r) for r in reviews]
ind = np.argsort(doc_lengths)

In [34]:
BATCH_SIZE = 30
NUM_EPOCHS = 100

In [35]:
filepath = "models/saved-model6-{epoch:02d}-{acc:.2f}.h5"
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True)

In [36]:
t1 = time.time()

model.fit(x_train[ind,:,:], y_train[ind,:], epochs=NUM_EPOCHS, batch_size=BATCH_SIZE, shuffle=False, validation_data=[x_test,y_test], 
          callbacks=[history, csv_logger, checkpoint], verbose=1)

t2 = time.time()

Train on 187566 samples, validate on 20841 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100


Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [37]:
preds = model.predict(x_test)
print("Accuracy = {0}".format(accuracy_score(y_test.argmax(axis=1),preds.argmax(axis=1))))

Accuracy = 0.4927786574540569


In [38]:
i=0
for z,val in enumerate(y_test.argmax(axis=1)):
    if val in (-preds[z]).argsort()[:15]:
        i+=1
print("Accuracy@15 = {0}".format(i/len(x_test)))

Accuracy@5 = 0.9591190441917374
