Week1: Using Text Tokenizer
Note: Things are in list rather than array here

In [0]:
import tensorflow as tf

sentences = ['i love you',
             'i need you',
             'dog is the best of all',
             'i am sleeping']
test_sentences = ['Jaadu need you',
                  'i dont love darkness'
                  'lets play holi',
                  'god is there with all of us']

tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words = 9, oov_token = '<00v>')
tokenizer.fit_on_texts(sentences)
print(tokenizer.index_word)

sequences = tokenizer.texts_to_sequences(test_sentences)
print(sequences)
padded = tf.keras.preprocessing.sequence.pad_sequences(sequences , maxlen = 5 ,padding = 'post', truncating = 'pre')
print(padded)

Week2: Tensorflow Datasets, embedding layer, getting weights

In [0]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
tf.enable_eager_execution()

VOCABSIZE = 10000
MAXLEN = 120
EMBDIM = 16
EMBMAXLEN = 120

print(tf.__version__)
imdb,info = tfds.load("imdb_reviews",with_info= True, as_supervised= True, download=False)

train_data, test_data = imdb['train'] , imdb['test']

train_sentences = []
train_labels = []
test_sentences = []
test_labels = []

for feature,label in train_data:
    train_sentences.append(str(feature.numpy()))
    train_labels.append(label.numpy())

for feature,label in test_data:
    test_sentences.append(str(feature.numpy()))
    test_labels.append(label.numpy())

train_labels = np.array(train_labels)
test_labels = np.array(test_labels)

tokenizer = tf.keras.preprocessing.text.Tokenizer(oov_token='',num_words = VOCABSIZE)
tokenizer.fit_on_texts(train_sentences)

train_sequences = tokenizer.texts_to_sequences(train_sentences)
train_padded = tf.keras.preprocessing.sequence.pad_sequences(train_sequences,padding = 'pre', maxlen = MAXLEN,truncating = 'pre')

test_sequences = tokenizer.texts_to_sequences(test_sentences)
test_padded = tf.keras.preprocessing.sequence.pad_sequences(test_sequences,padding = 'pre', maxlen = MAXLEN,truncating = 'pre')

model = tf.keras.models.Sequential([tf.keras.layers.Embedding(VOCABSIZE,EMBDIM,input_length=EMBMAXLEN), #Embedding takes Senteces, as a matrix of EMBMAXLEN x VOCABSIZE [one-hot-rep] and multiplies with embedding matrix VOCABSIZE x EMBDIM and returns a matrix representing the sentence in embedding feature space of size EMBMAXLEN x EMBDIM
                                    tf.keras.layers.GlobalAveragePooling1D(),
                                    tf.keras.layers.Dense(6,activation = 'relu'),
                                    tf.keras.layers.Dense(1, activation = 'sigmoid')])

model.summary()

model.compile(loss = tf.keras.losses.BinaryCrossentropy(), optimizer= tf.keras.optimizers.Adam(lr = 0.001), metrics = ['accuracy'] )
history = model.fit(train_padded,train_labels,
          validation_data = (test_padded,test_labels),
          epochs=50)

#Check the weights of layers
emb_weights = model.get_weights()[0]
emb_weights2 = model.layers[0].get_weights()[0]

#Plotting the learning curves
import matplotlib.pyplot as plt
plt.figure('losses')
plt.plot(history.history['loss'], label = 'train loss')
plt.plot(history.history['val_loss'], label = 'validation loss')

plt.figure('accuracy')
plt.plot(history.history['acc'], label = 'train accuracy')
plt.plot(history.history['val_acc'], label = 'validation accuracy')


Week3: LSTM , GRU , Conv

In [0]:
#Modify the Week2 code as per:
VOCABSIZE = 10000
MAXLEN = 120
EMBDIM = 16
EMBMAXLEN = 120

#Original Model
model = tf.keras.models.Sequential([tf.keras.layers.Embedding(VOCABSIZE,EMBDIM,input_length=EMBMAXLEN), #None,120,16
                                    tf.keras.layers.GlobalAveragePooling1D(), #Output_shape = None,16
                                    tf.keras.layers.Dense(6,activation = 'relu'), #None,6
                                    tf.keras.layers.Dense(1, activation = 'sigmoid')]) # None,1

model = tf.keras.models.Sequential([tf.keras.layers.Embedding(VOCABSIZE,EMBDIM, input_length=EMBMAXLEN), #None,120,16
                                    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)), #None,128 , parameters = 31104
                                    tf.keras.layers.Dense(6,activation = 'relu'), #None,6
                                    tf.keras.layers.Dense(1, activation = 'sigmoid')]) #None,1

model = tf.keras.models.Sequential([tf.keras.layers.Embedding(VOCABSIZE,EMBDIM, input_length=EMBMAXLEN), #(None, 120, 16) 
                                    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)), #(None, 120, 128)
                                    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=False)), #(None, 128) 
                                    tf.keras.layers.Dense(6,activation = 'relu'), #(None, 6)
                                    tf.keras.layers.Dense(1, activation = 'sigmoid')]) #(None, 1)

model = tf.keras.models.Sequential([tf.keras.layers.Embedding(VOCABSIZE,EMBDIM, input_length=EMBMAXLEN),#(None, 120, 16) 
                                    tf.keras.layers.Conv1D(51, kernel_size=5,activation = 'relu'),#(None, 116, 51)  
                                    tf.keras.layers.GlobalAveragePooling1D(), #(None, 51)
                                    tf.keras.layers.Dense(6,activation = 'relu'), #(None, 6) 
                                    tf.keras.layers.Dense(1, activation = 'sigmoid')]) #(None, 1)

model = tf.keras.models.Sequential([tf.keras.layers.Embedding(VOCABSIZE,EMBDIM, input_length=EMBMAXLEN), #(None, 120, 16)
                                    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64)),#(None, 128) , parameters = 41472
                                    tf.keras.layers.Dense(6,activation = 'relu'),#(None, 6)
                                    tf.keras.layers.Dense(1, activation = 'sigmoid')]) #(None, 1)



