<a href="https://colab.research.google.com/github/janShi1105/science/blob/main/ML16.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
tf.random.set_seed(1)
rnn_layer = tf.keras.layers.SimpleRNN(units=2, use_bias= True, return_sequences=True)
rnn_layer.build(input_shape=(None, None, 5))
w_xh, w_oo, b_h = rnn_layer.weights
print('W_xh shape:', w_xh.shape)
print('W_oo shape:', w_oo.shape)
print('b_h shape:', b_h.shape)

W_xh shape: (5, 2)
W_oo shape: (2, 2)
b_h shape: (2,)


In [2]:
x_seq = tf.convert_to_tensor([[1.0]*5, [2.0]*5, [3.0]*5], dtype=tf.float32)
output = rnn_layer(tf.reshape(x_seq, shape=(1,3,5)))
out_man = []
for t in range(len(x_seq)):
  xt = tf.reshape(x_seq[t], (1,5))
  print('Time step {}'.format(t))
  print('    Input           :', xt.numpy())

  ht = tf.matmul(xt, w_xh) + b_h
  print('    Hidden        :', ht.numpy())

  if t>0:
    prev_o = out_man[t-1]
  else:
    prev_o = tf.zeros(shape=(ht.shape))
  ot = ht + tf.matmul(prev_o, w_oo)
  ot = tf.math.tanh(ot)
  out_man.append(ot)
  print('      Output (manual)   :', ot.numpy())
  print('      SimpleRNN output: '.format(t), output[0][t].numpy())
  print()



Time step 0
    Input           : [[1. 1. 1. 1. 1.]]
    Hidden        : [[-1.5800393   0.99679226]]
      Output (manual)   : [[-0.918608   0.7602437]]
      SimpleRNN output:  [-0.918608   0.7602437]

Time step 1
    Input           : [[2. 2. 2. 2. 2.]]
    Hidden        : [[-3.1600785  1.9935845]]
      Output (manual)   : [[-0.99958783  0.9863836 ]]
      SimpleRNN output:  [-0.99958783  0.9863836 ]

Time step 2
    Input           : [[3. 3. 3. 3. 3.]]
    Hidden        : [[-4.7401175  2.990377 ]]
      Output (manual)   : [[-0.9999867   0.99874306]]
      SimpleRNN output:  [-0.9999867   0.99874306]



In [3]:
from google.colab import drive 
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import pandas as pd

df = pd.read_csv('drive/MyDrive/movie_data.csv', encoding='utf-8')

In [5]:
!ls

drive  sample_data


In [6]:
target = df.pop('sentiment')
ds_raw = tf.data.Dataset.from_tensor_slices((df.values, target.values))
for ex in ds_raw.take(3):
  tf.print(ex[0].numpy()[0][:50], ex[1])


b'In 1974, the teenager Martha Moxley (Maggie Grace)' 1
b'OK... so... I really like Kris Kristofferson and h' 0
b'***SPOILER*** Do not read this, if you think about' 0


In [7]:
tf.random.set_seed(1)
ds_raw= ds_raw.shuffle(50000, reshuffle_each_iteration=False)
ds_raw_test = ds_raw.take(25000)
ds_raw_train_valid = ds_raw.skip(25000)
ds_raw_train = ds_raw_train_valid.take(20000)
ds_raw_valid = ds_raw_train_valid.skip(20000)

In [26]:
from collections import Counter

tokenizer = tfds.deprecated.text.Tokenizer()
token_counts = Counter()
for example in ds_raw_train:
  tokens = tokenizer.tokenize(example[0].numpy()[0])
  token_counts.update(tokens)

print('Vocab-size:', len(token_counts))

Vocab-size: 87007


In [None]:
!pip install tensorflow-text

In [None]:
!tensorflow-text --version

In [27]:
encoder = tfds.deprecated.text.TokenTextEncoder(token_counts)
example_str = 'This is an example!'
print(encoder.encode(example_str))

[232, 9, 270, 1123]


In [28]:
def encode(text_tensor, label):
  text = text_tensor.numpy()[0]
  encoded_text = encoder.encode(text)
  return encoded_text, label

In [31]:
def encode_map_fn(text, label):
  return tf.py_function(encode, inp=[text, label], Tout=(tf.int64, tf.int64))

ds_train = ds_raw_train.map(encode_map_fn)
ds_valid = ds_raw_valid.map(encode_map_fn)
ds_test = ds_raw_test.map(encode_map_fn)

tf.random.set_seed(1)
for example in ds_train.shuffle(1000).take(5):
  print('Sequnece length: ', example[0].shape)

Sequnece length:  (24,)
Sequnece length:  (179,)
Sequnece length:  (262,)
Sequnece length:  (535,)
Sequnece length:  (130,)
