# Project: RNN (Recurrent Neural Network) on TensorFlow

In this project, ...

## Part 1: Import TensorFlow and Setup

In [3]:
from __future__ import absolute_import, division, print_function
import tensorflow as tf

# TensorFlow's eager execution is an imperative programming environment that evaluates operations immediately, without building graphs: 
# operations return concrete values instead of constructing a computational graph to run later.
tf.enable_eager_execution()
import os
import time
import numpy as np
import matplotlib.pyplot as plt

## Part 2: Load and Inspect the DataSet

In [38]:
# dowenload the Shakespeare dataset to the cache_dir ~/.keras
path_to_file = tf.keras.utils.get_file('shakespeare.txt','https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')
print(path_to_file)

# read the dataset in Binary mode (rb)
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
print("The length of text is {} characters".format(len(text)))
# see the first 200 charachters 
print(text[:200])

# understand the unique characters in the text
# get unique charachters in the text using set() and sort them in a list
ch =sorted(set(text))
print("There are {} unique charachters".format(len(ch)))
# type(ch)


/Users/Amir/.keras/datasets/shakespeare.txt
The length of text is 1115394 characters
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you
There are 65 unique charachters


list

## Part 3: Vectorize the Dataset

In [46]:
# Vectorize the text by mapping strings to a numerical representation

# using enumerate loop over ch, retrieve both the index and the value of each item, and make a dictionary of it
ch2idx = {u:i for i, u in enumerate(ch)}
print(ch2idx)

# transfrom text to integers
text_as_int = np.array([ch2idx[c] for c in text])
# inspect 
print ('{} mapped to -> {}'.format(repr(text[:13]), text_as_int[:13]))

{'\n': 0, ' ': 1, '!': 2, '$': 3, '&': 4, "'": 5, ',': 6, '-': 7, '.': 8, '3': 9, ':': 10, ';': 11, '?': 12, 'A': 13, 'B': 14, 'C': 15, 'D': 16, 'E': 17, 'F': 18, 'G': 19, 'H': 20, 'I': 21, 'J': 22, 'K': 23, 'L': 24, 'M': 25, 'N': 26, 'O': 27, 'P': 28, 'Q': 29, 'R': 30, 'S': 31, 'T': 32, 'U': 33, 'V': 34, 'W': 35, 'X': 36, 'Y': 37, 'Z': 38, 'a': 39, 'b': 40, 'c': 41, 'd': 42, 'e': 43, 'f': 44, 'g': 45, 'h': 46, 'i': 47, 'j': 48, 'k': 49, 'l': 50, 'm': 51, 'n': 52, 'o': 53, 'p': 54, 'q': 55, 'r': 56, 's': 57, 't': 58, 'u': 59, 'v': 60, 'w': 61, 'x': 62, 'y': 63, 'z': 64}
'First Citizen' mapped to -> [18 47 56 57 58  1 15 47 58 47 64 43 52]


## Part 3: Create Training Dataset

In [73]:
# create training examples/targets from text_as_int
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

# make an array of the unique charachters 
idx2ch = np.array(ch)

# print examples of char_dataset; use idx2ch to find equivalent characters of the integers in char_dataset
for i in char_dataset.take(10):
    print(idx2ch[i])

# the maximum length sentence we want for a single input in characters
seq_length = 100

# use batch method lets us easily convert these individual characters to sequences of the desired size
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

# print examples of sequences
for i in sequences.take(1):
  print(idx2ch[i.numpy()])
  print(repr(''.join(idx2ch[i.numpy()])))

F
i
r
s
t
 
C
i
t
i
['F' 'i' 'r' 's' 't' ' ' 'C' 'i' 't' 'i' 'z' 'e' 'n' ':' '\n' 'B' 'e' 'f'
 'o' 'r' 'e' ' ' 'w' 'e' ' ' 'p' 'r' 'o' 'c' 'e' 'e' 'd' ' ' 'a' 'n' 'y'
 ' ' 'f' 'u' 'r' 't' 'h' 'e' 'r' ',' ' ' 'h' 'e' 'a' 'r' ' ' 'm' 'e' ' '
 's' 'p' 'e' 'a' 'k' '.' '\n' '\n' 'A' 'l' 'l' ':' '\n' 'S' 'p' 'e' 'a'
 'k' ',' ' ' 's' 'p' 'e' 'a' 'k' '.' '\n' '\n' 'F' 'i' 'r' 's' 't' ' ' 'C'
 'i' 't' 'i' 'z' 'e' 'n' ':' '\n' 'Y' 'o' 'u' ' ']
'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


## Part 3: 