In [5]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

df = pd.read_csv('yelp_labelled.txt', names=['sentence', 'label'], sep='\t')
df.shape

(1000, 2)

In [6]:
df.head()

Unnamed: 0,sentence,label
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [7]:
# convert to lowercase
df['sentence'] = df['sentence'].str.lower()

In [10]:
# remove stopwords
from nltk.corpus import stopwords
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [11]:
stop = set(stopwords.words('english'))
df['sentence'] = df['sentence'].apply(lambda x:' '.join([word for word in x.split() if word not in (stop)]))
df.head()

Unnamed: 0,sentence,label
0,wow... loved place.,1
1,crust good.,0
2,tasty texture nasty.,0
3,stopped late may bank holiday rick steve recom...,1
4,selection menu great prices.,1


In [15]:
#tokenisasi
#metadata: word index hasil tokenisasi
vocab_size = 2000
oov_tok = "<OOV>"
filt = '!"#$%&()*+.,-/:;=?@[\]^_`{|}~ ' #remove symbols

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer(num_words = vocab_size, oov_token = oov_tok, filters = filt)
tokenizer.fit_on_texts(df['sentence'].values)

word2index = tokenizer.word_index
print(len(word2index))

1998


In [17]:
#ubah ke JSON
#serialisasi variabel agar dapat diunduh komputer
import json

with open('word2index.json', 'w') as fp:
    json.dump(word2index, fp)

In [14]:
max_length =  max(len(values.split()) for i, values in enumerate(df['sentence']))
max_length

18

In [18]:
trunc_type='post'

all_seq = tokenizer.texts_to_sequences(df['sentence'].values)
all_padded = pad_sequences(all_seq, maxlen = max_length, padding = trunc_type)
all_padded.shape

(1000, 18)

In [19]:
# split train and test sets
from sklearn.model_selection import train_test_split

X = all_padded
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2, random_state=42)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(800, 18) (800,)
(200, 18) (200,)


In [20]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim= vocab_size, output_dim=16, input_length= max_length),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 18, 16)            32000     
_________________________________________________________________
lstm (LSTM)                  (None, 64)                20736     
_________________________________________________________________
dense (Dense)                (None, 24)                1560      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 25        
Total params: 54,321
Trainable params: 54,321
Non-trainable params: 0
_________________________________________________________________


In [21]:
num_epochs = 30
history = model.fit(X_train, y_train, epochs=num_epochs, validation_data=(X_test, y_test))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [22]:
!pip install tensorflowjs

Collecting tensorflowjs
  Downloading tensorflowjs-3.9.0-py3-none-any.whl (64 kB)
[?25l[K     |█████                           | 10 kB 19.2 MB/s eta 0:00:01[K     |██████████▏                     | 20 kB 25.3 MB/s eta 0:00:01[K     |███████████████▏                | 30 kB 16.3 MB/s eta 0:00:01[K     |████████████████████▎           | 40 kB 11.8 MB/s eta 0:00:01[K     |█████████████████████████▎      | 51 kB 5.5 MB/s eta 0:00:01[K     |██████████████████████████████▍ | 61 kB 5.7 MB/s eta 0:00:01[K     |████████████████████████████████| 64 kB 2.0 MB/s 
Installing collected packages: tensorflowjs
Successfully installed tensorflowjs-3.9.0


In [23]:
saved_model_path = '/content/mymodel/'
tf.saved_model.save(model, saved_model_path)



INFO:tensorflow:Assets written to: /content/mymodel/assets


INFO:tensorflow:Assets written to: /content/mymodel/assets


In [24]:
!tensorflowjs_converter \
  --input_format=tf_saved_model \
  /content/mymodel/ \
  /content/modeltfjs

2021-10-14 06:39:56.350029: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2021-10-14 06:39:56.350097: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (e446032863da): /proc/driver/nvidia/version does not exist
2021-10-14 06:39:58.102911: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2021-10-14 06:39:58.103190: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2021-10-14 06:39:58.112892: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:1137] Optimization results for grappler item: graph_to_optimize
  function_optimizer: Graph size after: 251 nodes (239), 348 edges (336), time = 4.898ms.
  function_optimizer: function_optimizer did nothing. time = 0.087ms.

2021-10-14 06:39:58.316412: I tensorflow/core/grappler/optimizers/meta_optim