In [2]:
!nvidia-smi

Fri Jul 24 14:00:39 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.51.05    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   49C    P8    30W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

try:
  !pip uninstall tb-nightly tensorboardX tensorboard
  !pip install tb-nightly

except Exception:
  pass

import tensorflow as tf

import os
import datetime
import tensorflow_datasets as tfds

In [5]:
print(tf.__version__)

2.2.0


In [None]:
dataset, info = tfds.load('amazon_us_reviews/Mobile_Electronics_v1_00',with_info=True)
train_dataset = dataset['train']

In [8]:
len(list(train_dataset))

104975

In [9]:
BUFFER_SIZE = 30000
BATCH_SIZE = 128

Tokenizing and building only an original vocabulary 

In [None]:
tokenizer = tfds.features.text.Tokenizer()

vocabulary_set = set()

for _,reviews in train_dataset.enumerate():
  reviews_text = reviews['data']
  reviews_token = tokenizer.tokenize(reviews_text.get('review_body').numpy())
  vocabulary_set.update(reviews_token)

In [13]:
vocab_size = len(vocabulary_set)
vocab_size

73738

In [14]:
encoder = tfds.features.text.TokenTextEncoder(vocabulary_set)

In [15]:
print(vocabulary_set)



In [16]:
def encode(text_tensor,label_tensor):
  encoded_text = encoder.encode(text_tensor.numpy())
  label = tf.where(label_tensor>3,1,0)
  return encoded_text,label

In [19]:
def encode_map_fn(tensor):

  text = tensor['data'].get('review_body')
  label = tensor['data'].get('star_rating')

  encoded_text, label = tf.py_function(encode,
                                       inp=[text,label],
                                       Tout = [tf.int64,tf.int32])
  
  encoded_text.set_shape([None])
  label.set_shape([])

  return encoded_text,label

In [20]:
ar_encoded_data = train_dataset.map(encode_map_fn)

In [22]:
for encoded_review, rating in ar_encoded_data.take(2):
  print(encoded_review)
  print(rating)

tf.Tensor([39102 51736 70550], shape=(3,), dtype=int64)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(
[35375 26195 21220  2991 71194 15228  7211 19381 60256 25182 35897 57517
 59505 68625 65150 32935 39747 25182 20993 42752  2554 30268 36576 44995
 57017 47717 51736 66734 33822 70122 31569 47516 30268 30452 68690 69318
 59505 24446 26195 30648 70122  7879 27271 70122 15793 47516 26195 21220
 61326 68532 30268 16474 30452 41471 47327 30268 18961  7037  7211 50854
 60256  9439 35760  2991 15228], shape=(65,), dtype=int64)
tf.Tensor(1, shape=(), dtype=int32)


In [24]:
test_size = 10000

train_data = ar_encoded_data.skip(test_size).shuffle(BUFFER_SIZE)
train_data = train_data.padded_batch(BATCH_SIZE)

test_data = ar_encoded_data.take(test_size)
test_data = test_data.padded_batch(BATCH_SIZE)

In [25]:
vocab_size += 1

In [26]:
sample_text, sample_label  = next(iter(test_data))
sample_text[0], sample_label[0]

(<tf.Tensor: shape=(1006,), dtype=int64, numpy=array([39102, 51736, 70550, ...,     0,     0,     0])>,
 <tf.Tensor: shape=(), dtype=int32, numpy=0>)

Bidirectional LSTM modelling

In [27]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(vocab_size,128))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(128,return_sequences=True)))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)))

for units in [64,64]:
  model.add(tf.keras.layers.Dense(units,activation='relu'))

model.add(tf.keras.layers.Dense(1))

In [30]:
model.compile(optimizer='adam',
             loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
             metrics=['accuracy'])

In [32]:
history = model.fit(train_data,validation_data=test_data,epochs=3)
#1 epoch will take approx 8 mins. so taking only 3 epoch

Epoch 1/3
Epoch 2/3
Epoch 3/3


Saving my model to google drive

In [33]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [34]:
model_name = 'sentimental_analysis_amazon_reviews.hdf5'

In [38]:
path = F"/content/gdrive/My Drive/{model_name}" 
model.save(path)