In [1]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
import os

base_dir = '/content/drive/MyDrive/Kaggle/sentiment labelled sentences'

In [7]:
%cd "$base_dir"

/content/drive/MyDrive/Kaggle/sentiment labelled sentences


In [9]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [10]:
df = pd.read_csv(
    'yelp_labelled.txt',
    names=['sentence','label'],
    sep='\t'
)
df.shape

(1000, 2)

In [11]:
df.head()

Unnamed: 0,sentence,label
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [12]:
df['sentence'] = df['sentence'].str.lower()

In [13]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [15]:
from nltk.corpus import stopwords

stop = set(stopwords.words('english'))
df.sentence = df.sentence.apply(
    lambda x: ' '.join(
        [word for word in x.split() if word not in (stop)]
    )
)
df.head()

Unnamed: 0,sentence,label
0,wow... loved place.,1
1,crust good.,0
2,tasty texture nasty.,0
3,stopped late may bank holiday rick steve recom...,1
4,selection menu great prices.,1


In [16]:
import string

vocab_size = 2000
oov_tok = '<OOV>'

In [18]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer(
    num_words=vocab_size,
    oov_token=oov_tok,
    filters=string.punctuation
)
tokenizer.fit_on_texts(df.sentence.values)

word2index = tokenizer.word_index
print(len(word2index))

1985


In [19]:
import json

with open('word2index.json', 'w') as fp:
  json.dump(word2index, fp)

In [20]:
max_length = max(len(values.split()) for i, values in enumerate(df.sentence))
max_length

18

In [21]:
trunc_type = 'post'

all_seq = tokenizer.texts_to_sequences(df.sentence.values)
all_padded = pad_sequences(
    all_seq,
    maxlen=max_length,
    padding=trunc_type
)
all_padded.shape

(1000, 18)

In [22]:
from sklearn.model_selection import train_test_split

x = all_padded
y = df.label

x_train, x_test, y_train, y_test = train_test_split(
    x,
    y,
    test_size=0.2,
    random_state=42,
    shuffle=True,
    stratify=y
)

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(800, 18) (800,)
(200, 18) (200,)


In [23]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim= vocab_size, output_dim=16, input_length= max_length),
    tf.keras.layers.LSTM(64),
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 18, 16)            32000     
_________________________________________________________________
lstm (LSTM)                  (None, 64)                20736     
_________________________________________________________________
dense (Dense)                (None, 24)                1560      
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 25        
Total params: 54,321
Trainable params: 54,321
Non-trainable params: 0
_________________________________________________________________


In [24]:
num_epochs = 30
history  = model.fit(
    x_train,
    y_train,
    epochs=num_epochs,
    validation_data=(x_test,y_test)
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [32]:
!pip install tensorflowjs



In [27]:
save_model_path = os.path.join(base_dir, 'Model')
model.save(save_model_path)



INFO:tensorflow:Assets written to: /content/drive/MyDrive/Kaggle/sentiment labelled sentences/Model/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Kaggle/sentiment labelled sentences/Model/assets


In [30]:
!saved_model_cli show --dir "$save_model_path" --tag_set serve --signature_def serving_default

The given SavedModel SignatureDef contains the following input(s):
  inputs['embedding_input'] tensor_info:
      dtype: DT_FLOAT
      shape: (-1, 18)
      name: serving_default_embedding_input:0
The given SavedModel SignatureDef contains the following output(s):
  outputs['dense_1'] tensor_info:
      dtype: DT_FLOAT
      shape: (-1, 1)
      name: StatefulPartitionedCall:0
Method name is: tensorflow/serving/predict


In [37]:
save_model_path = '/content/drive/MyDrive/Kaggle/sentiment-labelled-sentences/model'
converted_model_path = '/content/drive/MyDrive/Kaggle/sentiment-labelled-sentences/modeltfjs'

In [38]:
!tensorflowjs_converter \
  --input_format=tf_saved_model \
  "$save_model_path" \
  "$converted_model_path"

2021-07-12 19:27:06.143926: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0
2021-07-12 19:27:08.277281: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1
2021-07-12 19:27:08.288843: E tensorflow/stream_executor/cuda/cuda_driver.cc:328] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2021-07-12 19:27:08.288899: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (cb4488ee95e3): /proc/driver/nvidia/version does not exist
2021-07-12 19:27:09.907910: I tensorflow/core/grappler/devices.cc:69] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2021-07-12 19:27:09.908137: I tensorflow/core/grappler/clusters/single_machine.cc:357] Starting new session
2021-07-12 19:27:09.908756: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequen