<a href="https://colab.research.google.com/github/athensclub/Thai-Word-Cutter/blob/master/test_model_with_fixed_vision.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
#imports and files 
from google.colab import files
from tensorflow.keras import Model
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Input, Dense, Embedding, Concatenate, Flatten,LSTM
import tensorflow as tf
import os
import time
import numpy as np

In [10]:
#Create a mapping from a character to an integer
characters = 'กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙abcdefghijklmnopqrstuvwxyz"\'0123456789,.!?/\\:;%()[]{}+_-*@#><=^$& \t\n'
char_encode = {}
char_decode = {}
i = 1
for c in characters:
  char_encode[c] = i;
  char_decode[i] = c;
  i += 1

def encode(data):
  encoded = []
  data = data.lower()
  for c in data:
    if c in char_encode:
      encoded.append(char_encode[c])
    else:
      encoded.append(0) #unknown character
  return encoded

def decode(data):
  decoded = ''
  for c in data:
    if c != 0:
      decoded = decoded + char_decode[c]
  return decoded

#convert from raw data, a text which words are splitted by '|' will be converted
#to a list of numver encoded by function encode and a list of the position of
#where to cut the word
def convert_data(data):
  splitted = data.split('|')
  encoded = encode(data.replace('|',''))
  ans = np.zeros(len(encoded))
  i = 0;
  for s in splitted:
    if(len(s) > 0):
      i += len(s) 
      ans[i - 1] = 1
  return encoded,ans

#create a data for model with vision of length. used for training, evaluation, and predictions
def create_model_data(encoded,ans,length):
  before = []
  current = []
  after = []
  temp = []
  for i in range(len(encoded)):
    temp.append(encoded[i])
    a = []
    b = []
    for x in range(length):
      if i - x - 1 >= 0:
        a.insert(0,encoded[i-x-1])
      if i + x + 1 < len(encoded):
        b.append(encoded[i+x+1])
    before.append(a)
    current.append(temp.copy())
    after.append(b)
    if ans[i] == 1:
      temp = []
  return sequence.pad_sequences(before,length),sequence.pad_sequences(current,length),sequence.pad_sequences(after,length)

#train the given model with the given vision length with the given raw data
def train(model,data,length):
  (encoded,ans) = convert_data(data)
  (before,current,after) = create_model_data(encoded,ans,length)
  model.fit([before,current,after],np.asarray(ans))

def timed(func):
   def function_timer(*args, **kwargs):
      start = time.time()
      value = func(*args, **kwargs)
      end = time.time()
      runtime = end - start
      msg = "{func} took {time} seconds to complete its execution."
      print(msg.format(func = func.__name__,time = runtime))
      print(value)
   return function_timer

#evaluate the given model with the given vision length with the given raw data
@timed
def evaluate(model,data,length):
  (encoded,ans) = convert_data(data)
  (before,current,after) = create_model_data(encoded,ans,length)
  return model.evaluate([before,current,after],np.asarray(ans))




visions = [10,20,30,50,100,200]
models = []

for v in visions:
  models.append(load_model('model_vision_' + str(v) + '.h5'))
raw_data = "";
for i in range(5):
  target_file = open('train_{:05d}.txt'.format(445+1),'r')
  if(target_file.mode == 'r'):
    raw_data += target_file.read()
  target_file.close()

for j in range(len(models)):
  print('vision ' + str(visions[j]) + ': ')
  evaluate(models[j],raw_data,visions[j])



vision 10: 
evaluate took 18.452951669692993 seconds to complete its execution.
[0.05072725564241409, 0.9812352061271667]
vision 20: 
evaluate took 23.239840269088745 seconds to complete its execution.
[0.06153349205851555, 0.9760732054710388]
vision 30: 
evaluate took 30.30086922645569 seconds to complete its execution.
[0.1072482019662857, 0.9585857391357422]
vision 50: 
evaluate took 45.074501752853394 seconds to complete its execution.
[0.10546562075614929, 0.9583882093429565]
vision 100: 
evaluate took 72.32821106910706 seconds to complete its execution.
[0.10011348873376846, 0.9592441320419312]
vision 200: 
evaluate took 132.64933848381042 seconds to complete its execution.
[0.10267859697341919, 0.9587832689285278]


153120
