In [None]:
!pip install -q tensorflow-model-optimization

In [None]:
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import csv
from tensorflow.keras.layers import TextVectorization
import tensorflow_model_optimization as tfmot

In [None]:
with open("eng-ind.csv") as f:
    csvreader = csv.reader(f, delimiter=",")
    next(csvreader)
    eng_ind, label = list(),list()
    for i in csvreader:
        eng_ind.append(i[0])
        label.append(0)
        eng_ind.append(i[1])
        label.append(1)
        
vectorize = TextVectorization(output_sequence_length=32)
vectorize.adapt(tf.data.Dataset.from_tensor_slices(eng_ind))


In [None]:
datas = list()
for i in eng_ind:
  datas.append(vectorize(i))

In [None]:
dataset = tf.data.Dataset.from_tensor_slices((datas,label)).batch(8)

In [None]:
train = dataset.take(int(0.9*len(dataset)))
val = dataset.skip(int(0.9*len(dataset)))

In [None]:
len(vectorize.get_vocabulary())

In [None]:
quantize_annotate_layer = tfmot.quantization.keras.quantize_annotate_layer

In [None]:
model = tf.keras.Sequential()
model.add(quantize_annotate_layer(layers.Embedding(input_dim=1000, output_dim=64, input_shape=(32,))))
model.add(layers.Flatten())

model.add(layers.Dense(64, activation="tanh"))
model.add(layers.Dense(64,activation="tanh"))
model.add(layers.Dense(32,activation="tanh"))

model.add(layers.Dense(1, activation="sigmoid"))

model.compile(
    loss = "binary_crossentropy",
    optimizer = "RMSprop",
    metrics = ["accuracy"]
)

model.summary()

In [None]:
model.fit(train,
          validation_data=val,
          epochs=5)

## Predict

## Greater than 0.5 mean Indonesian language, less than 0.5 mean English language.

In [None]:
teks = tf.constant(["i love you",
               "When using this layer as the first layer in a model, provide an input_shape argument (tuple of integers or None, e.g. (10, 128) for sequences of 10 vectors of 128-dimensional vectors, or (None, 128) for variable-length sequences of 128-dimensional vectors.",
               "Indonesia, dengan nama resmi Republik Indonesia atau lengkapnya Negara Kesatuan Republik Indonesia, adalah sebuah negara kepulauan di Asia Tenggara yang dilintasi garis khatulistiwa dan berada di antara daratan benua Asia dan Oseania, serta antara Samudra Pasifik dan Samudra Hindia"])


model.predict(vectorize(teks))

## Save model into tensorflow format

In [None]:
model.save("model")

## Convert tensorflow model to tflite

In [None]:
converter = tf.lite.TFLiteConverter.from_saved_model("model")
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()

with open("model/model.tflite","wb") as f:
    f.write(tflite_model)

##Parse tokenizer into json

In [None]:
import json

word_index = {}
# index_word = {}

for i,j in enumerate(vectorize.get_vocabulary()):
  word_index[j] = i
  # index_word[i] = j

with open("model/word_index.json","w") as f:
  json.dump(word_index,f,indent=4)

# with open("index_word.json","w") as f:
#   json.dump(word_index,f,indent=4)

##Predict Using tflite and keras tokenizer

In [None]:
teks = tf.constant(["i love you",
               "When using this layer as the first layer in a model, provide an input_shape argument (tuple of integers or None, e.g. (10, 128) for sequences of 10 vectors of 128-dimensional vectors, or (None, 128) for variable-length sequences of 128-dimensional vectors.",
               "Indonesia, dengan nama resmi Republik Indonesia atau lengkapnya Negara Kesatuan Republik Indonesia, adalah sebuah negara kepulauan di Asia Tenggara yang dilintasi garis khatulistiwa dan berada di antara daratan benua Asia dan Oseania, serta antara Samudra Pasifik dan Samudra Hindia",
               "hi kami dari red team",
               "hi we're from red team",
               "mantab pak eko",
               "lu cakep juga yak"])


In [None]:
interpreter = tf.lite.Interpreter(model_path="model/model.tflite")
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()


# must have 2 dimension and float32 dtype
for i in range(len(teks)):
  input_data = np.array([vectorize(teks[i])], dtype=np.float32)

  interpreter.set_tensor(input_details[0]['index'], input_data)
  interpreter.invoke()

  predictions = interpreter.get_tensor(output_details[0]['index'])

  print(teks[i])
  print(predictions)

##Predict with tflite and json instead of tokenizer

In [None]:
import json

with open("model/word_index.json") as f:
  kamus = json.load(f)

In [None]:
def tf_lower_and_split_punct(text):
  text = tf.strings.lower(text)
  text = tf.strings.regex_replace(text, "[!\"#$%&'()*+,-./:;<=>?@[\]^_`{|}~]", '')

  text = tf.strings.strip(text)
  text = tf.strings.split(text)
  return text


def sequenize(teks):
  hasil = list()
  counter = 32
  for i in teks:
    if i.numpy().decode() in kamus.keys():
      hasil.append(kamus[i.numpy().decode()])
      counter -= 1
      if counter == 0:
        return tf.constant([hasil],dtype=tf.float32)
    else:
      hasil.append(1)
      counter -= 1
      if counter == 0:
        return tf.constant([hasil],dtype=tf.float32)

  for i in range(counter):
    hasil.append(0)

  return tf.constant([hasil],dtype=tf.float32)

In [None]:
test_uwu = "saya su'ka kamu, uwu!!!!"
tf_lower_and_split_punct(test_uwu).numpy(), len(tf_lower_and_split_punct(test_uwu))

In [None]:
uwu = sequenize(a)
uwu.numpy()

In [None]:
np.array([vectorize(teks[0])], dtype=np.float32)

In [None]:
interpreter = tf.lite.Interpreter(model_path="model/model.tflite")
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()


# must have 2 dimension and float32 dtype
for i in teks:
  input_data = sequenize(tf_lower_and_split_punct(i)).numpy()
  interpreter.set_tensor(input_details[0]['index'], input_data)
  interpreter.invoke()

  predictions = interpreter.get_tensor(output_details[0]['index'])

  print(i.numpy().decode())
  print(predictions)

##Download All file needed to do inference in kotlin

In [None]:
!zip -r /content/file.zip model

In [None]:
from google.colab import files
files.download("/content/file.zip")

## Vanny Ezhaan Nur Sandika