In [1]:
import keras
from keras.models import load_model


import sqmutils.data_utils as du

import os
import time 

import pandas as pd
import numpy as np

import json

%load_ext autoreload
%autoreload 2
%matplotlib inline 


Using TensorFlow backend.


# Configs

In [2]:
model_dir = "models"
model_weights = os.path.join(model_dir, "best_val_f1_model.h5")
test_dataset_path = "/home/elkhand/Downloads/test.csv"
embedding_path = "/home/elkhand/datasets/fasttext/wiki.en.vec"
emb_dim = 300


config = du.get_config(None, None, None,  embedding_dimension=emb_dim)
custom_objects= {"f1": du.f1, "recall" : du.recall, "precision" : du.precision}

config
 {'train_dataset_path': None, 'test_size': None, 'val_size': None, 'max_seq_len': 32, 'embedding_dimension': 300, 'batch_size': 3096, 'nb_epochs': 100, 'recurrent_dropout': 0.3, 'dropout': 0.3, 'seed': 7, 'is_debug_on': False} 



# Load pre-trained model

In [3]:
model = load_model(model_weights, custom_objects = custom_objects)

optimizer = keras.optimizers.Nadam()
model.compile(loss='binary_crossentropy', optimizer=optimizer, 
                  metrics=['accuracy', du.f1, du.recall, du.precision])

# Reading test data

In [4]:
start = time.time()
dfTest = pd.read_csv(test_dataset_path, sep=',', encoding='utf-8')
end = time.time()
print("Total time passed", (end - start))

  interactivity=interactivity, compiler=compiler, result=result)


Total time passed 5.666259765625


In [5]:
dfTest[:10]

Unnamed: 0,test_id,question1,question2
0,0,How does the Surface Pro himself 4 compare wit...,Why did Microsoft choose core m3 and not core ...
1,1,Should I have a hair transplant at age 24? How...,How much cost does hair transplant require?
2,2,What but is the best way to send money from Ch...,What you send money to China?
3,3,Which food not emulsifiers?,What foods fibre?
4,4,"How ""aberystwyth"" start reading?",How their can I start reading?
5,5,How are the two wheeler insurance from Bharti ...,I admire I am considering of buying insurance ...
6,6,How can I reduce my belly fat through a diet?,How can I reduce my lower belly fat in one month?
7,7,"By scrapping the 500 and 1000 rupee notes, how...",How will the recent move to declare 500 and 10...
8,8,What are the how best books of all time?,What are some of the military history books of...
9,9,After 12th years old boy and I had sex with a ...,Can a 14 old guy date a 12 year old girl?


# Load embeddings

We will be using Fasttext Wiki word vectors 300D

In [9]:
print("word vectors path", embedding_path)
start = time.time()
w2v = du.load_embedding(embedding_path)
# w2v = {}
end = time.time()
print("Total time passed: ", (end-start))

word vectors path /home/elkhand/datasets/fasttext/wiki.en.vec
embedding size : 2519371
embedding dimension : (300,)
Total time passed:  323.2240672111511


# Convert Test dataset into word vectors

In [8]:
inp = np.zeros((32,300))
print(inp.shape)

limit = 100

start = time.time()
for index, row in dfTest.iterrows():
    #print(row)
    df_test_q1_emb, df_test_q2_emb  = du.load_dataset_single_row(row, w2v, config, index)   
    result = model.predict([df_test_q1_emb, df_test_q2_emb], verbose=0) #, batch_size=None, steps=None
    print(result)
    if index >100:
        break
end = time.time()
print("Total time passed", (end - start))

(32, 300)
df_test_q1_emb.shape (1, 32, 300)
df_test_q2_emb.shape (1, 32, 300)
[[0.6372563]]
df_test_q1_emb.shape (1, 32, 300)
df_test_q2_emb.shape (1, 32, 300)
[[0.6372563]]
df_test_q1_emb.shape (1, 32, 300)
df_test_q2_emb.shape (1, 32, 300)
[[0.6372563]]
df_test_q1_emb.shape (1, 32, 300)
df_test_q2_emb.shape (1, 32, 300)
[[0.6372563]]
df_test_q1_emb.shape (1, 32, 300)
df_test_q2_emb.shape (1, 32, 300)
[[0.6372563]]
df_test_q1_emb.shape (1, 32, 300)
df_test_q2_emb.shape (1, 32, 300)
[[0.6372563]]
df_test_q1_emb.shape (1, 32, 300)
df_test_q2_emb.shape (1, 32, 300)
[[0.6372563]]
df_test_q1_emb.shape (1, 32, 300)
df_test_q2_emb.shape (1, 32, 300)
[[0.6372563]]
df_test_q1_emb.shape (1, 32, 300)
df_test_q2_emb.shape (1, 32, 300)
[[0.6372563]]
df_test_q1_emb.shape (1, 32, 300)
df_test_q2_emb.shape (1, 32, 300)
[[0.6372563]]
df_test_q1_emb.shape (1, 32, 300)
df_test_q2_emb.shape (1, 32, 300)
[[0.6372563]]
df_test_q1_emb.shape (1, 32, 300)
df_test_q2_emb.shape (1, 32, 300)
[[0.6372563]]
df_tes

[[0.6372563]]
df_test_q1_emb.shape (1, 32, 300)
df_test_q2_emb.shape (1, 32, 300)
[[0.6372563]]
df_test_q1_emb.shape (1, 32, 300)
df_test_q2_emb.shape (1, 32, 300)
[[0.6372563]]
Total time passed 18.13885998725891


# Evaluate test dataset

In [None]:
# evaluate loaded model on test data 
# Define X_test & Y_test data first

#score = loaded_model.evaluate(X_test, Y_test, verbose=0)
#print ("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))
#result = model.evaluate(x=[df_test_q1_emb, df_test_q2_emb], y=df_test_label)

# result = model.predict([df_test_q1_emb, df_test_q2_emb], verbose=1) #, batch_size=None, steps=None
# print(result)
