## Testing model

In [None]:
import os
import time
import multiprocessing
import numpy as np
import logging
import tensorflow as tf
import char_keras_lm as lm
from process_utils import process_file, UserConfig, decode_data, encode_data


In [None]:
users_indir = '../data/users_feats'
users_lossdir = '../data/test/users_losses'
users_modeldir = '../data/exper3__all__1epoch__bidi_model/users_models'
users_logidr = '../data/test/users_logs'

max_len = 120 # max length of sentence
num_chars = 128 # our vocabulary, i.e. unique characters in text. We'll just use the first 128 (half ASCII)

if not os.path.exists(users_lossdir):
    os.makedirs(users_lossdir)

if not os.path.exists(users_logidr):
    os.makedirs(users_logidr)

u = 'U13'
day = 27
userConfig = UserConfig()
userConfig.user_name = u
userConfig.feat_dir = '{0}/{1}/'.format(users_indir, u)
userConfig.output_base_filepath = '{0}/{1}_losses'.format(users_lossdir, u)
userConfig.model_filepath = '{0}/{1}_simple_lm.hdf5'.format(users_modeldir, u)
userConfig.log_filepath = '{}/{}_log.txt'.format(users_logidr, u)

In [None]:

char_lm = lm.KerasLM(userConfig)
dataset_fname = userConfig.feat_dir+'{}.txt'.format(day)

input_data, target_data, red_events = process_file(dataset_fname, num_chars, max_len)
# print(red_events)
print('evaluating: {} - num events: {}  - red events:{}'.format(dataset_fname, len(input_data), len(red_events)))

for X in input_data[10:25]:
    X = X.reshape((1,) + X.shape)
    print('X:', decode_data(X).strip())
    y_ = char_lm.model.predict(X, batch_size=1, verbose=2)
    res = ''.join([chr(np.argmax(x)) for x in y_[0]]).strip()
    print('y_:', res)
    print('\n=========================================\n')
    
## testing output for red events
# print('Checking red events...')
# for i, e in red_events:
#     data = e[-1].split('|')
#     text= [','.join(data[0:])]
#     X, _ = encode_data(text, num_chars, max_len)
#     print('X:', decode_data(X).strip())
#     y_ = char_lm.model.predict(X, batch_size=1, verbose=2)
#     res = ''.join([chr(np.argmax(x)) for x in y_[0]]).strip()
#     print('y_:', res)
#     print('\n=========================================\n')


In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import jaccard_similarity_score

char_lm = lm.KerasLM(userConfig)
dataset_fname = userConfig.feat_dir+'{}.txt'.format(day)

input_data, target_data, red_events = process_file(dataset_fname, num_chars, max_len)
# print(red_events)
print('evaluating: {} - num events: {}  - red events:{}'.format(dataset_fname, len(input_data), len(red_events)))
  

## testing jaccard similarity of the results
print('\n<<<<<======== Jaccard Similarity =========>>>>>\n')

## testing output for red events
min_js = 0
print('Checking red events...')
for i, e in red_events:
    data = e[-1].split('|')
    text= [','.join(data[0:])]
    X, _ = encode_data(text, num_chars, max_len)
    y = [c for c in text[0] if c != '\n']
    print('y :', ''.join(y))
    y_ = char_lm.model.predict(X, batch_size=1, verbose=2)
    y_ = [chr(np.argmax(x)) for x in y_[0]]
    y_ = [x for x in y_ if x != '\n']
    y_ = y_[:len(y)]
    y_d = ''.join(y_).strip()
    print('y_:', y_d)
    js = jaccard_similarity_score(y, y_)
    min_js = max(min_js, js)
    print('similarity:', js)
    print('\n=========================================\n')

verbose = True
if min_js == 0:
    min_js = 0.8
    verbose = False

all_sim = []
count = 0
print('Checking all events...')
for X, y in list(zip(input_data, target_data)):
    X = X.reshape((1,) + X.shape)
    y = y.reshape((1,) + y.shape)
    yd = decode_data(y)
    y = [c for c in yd if c != '\n']
    y_ = char_lm.model.predict(X, batch_size=1, verbose=2)
    y_ = [chr(np.argmax(x)) for x in y_[0]]
    y_ = [x for x in y_ if x != '\n']
    y_ = y_[:len(y)]
    y_d = ''.join(y_).strip()
    js = jaccard_similarity_score(y, y_)
    if js < (min_js + 0.001):
        if verbose:
            print('y :', yd.strip())
            print('y_:', y_d)
            print('similarity:', js)
        count +=1
    all_sim.append(js)
#     print('\n=========================================\n')

print('\n => # events below {}:{}'.format(min_js+0.001, count))
print('\n => avg similarity:', np.mean(all_sim))
plt.figure()
plt.plot(all_sim)
plt.show()


