# Testing DeepER (using Kaggle Kernels)

### Import libraries

In [None]:
%matplotlib inline

import time
import os
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from keras.callbacks import EarlyStopping
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
import matplotlib.pyplot as plt
import seaborn as sns
from deeper_model import build_model
from deeper_utils import preprocess_data

### Read the dataset

In [None]:
trainData, testData, valData, embeddingMatrix, wordsWithNoEmbeddings = preprocess_data(
    'Fodors_Zagats', baseDir = '../input/deeper/deeper/')

Printing out words with no embeddings in GloVe

In [None]:
for word in wordsWithNoEmbeddings:
    print(word)
print()
print("There are {} words with no embeddings in GloVe".format(len(wordsWithNoEmbeddings)))

Print embedding matrix shape

In [None]:
embeddingMatrix.shape

In [None]:
leftTableTrainData, rightTableTrainData, trainLabels = trainData
leftTableTestData, rightTableTestData, testLabels = testData
leftTableValData, rightTableValData, valLabels = valData

Print training set size

In [None]:
print(leftTableTrainData.shape)
print(rightTableTrainData.shape)

Print test set size

In [None]:
print(leftTableTestData.shape)
print(rightTableTestData.shape)

Print validation set size

In [None]:
print(leftTableValData.shape)
print(rightTableValData.shape)

### Build the DeepER model 

In [None]:
model = build_model(embeddingMatrix, denseUnits=64)
model.summary()

### Plot DeepER architecture

In [None]:
SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))

### Training

In [None]:
start_time = time.time()

history = model.fit([leftTableTrainData,
           rightTableTrainData],
          trainLabels,
          batch_size=16,
          epochs=20,
          validation_data=([leftTableValData,
                            rightTableValData],
                           valLabels))

elapsed_time = time.time() - start_time
t = time.strftime("%H:%M:%S", time.gmtime(elapsed_time))
print('Elapsed time (learning):', t)

### Plot accuracy and loss

In [None]:
history_dict = history.history

acc = history_dict['acc']
val_acc = history_dict['val_acc']
loss = history_dict['loss']
val_loss = history_dict['val_loss']

epochs = range(1, len(acc) + 1)

plt.figure(figsize=(10,10))
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'g', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

In [None]:
plt.figure(figsize=(10,10))
plt.plot(epochs, acc, 'b', label='Training acc')
plt.plot(epochs, val_acc, 'g', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

### Predict labels on test data

In [None]:
predictedLabels = model.predict(x=[leftTableTestData, rightTableTestData])
predictedLabels = (predictedLabels > 0.5).astype(int)

### Compute confusion matrix

In [None]:
cm = confusion_matrix(testLabels, predictedLabels)
fig, ax = plt.subplots(figsize=(10, 10))
sns.heatmap(cm, annot=True, ax = ax, fmt='g', cmap='Greens')

# labels, title and ticks
ax.set_xlabel('Predicted labels')
ax.set_ylabel('True labels') 
ax.set_title('Confusion Matrix')
ax.xaxis.set_ticklabels(['non matching', 'matching'])
ax.yaxis.set_ticklabels(['non matching', 'matching'])

### Compute f-measure

In [None]:
fMeasure = f1_score(testLabels, predictedLabels)
fMeasure