##### Copyright 2020 The TensorFlow Authors.

In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Constructing a Text Generation Model


<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/examples/blob/master/courses/udacity_intro_to_tensorflow_for_deep_learning/l10c03_nlp_constructing_text_generation_model.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/tensorflow/examples/blob/master/courses/udacity_intro_to_tensorflow_for_deep_learning/l10c03_nlp_constructing_text_generation_model.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
</table>

Using most of the techniques you've already learned, it's now possible to generate new text by predicting the next word that follows a given seed word. To practice this method, we'll use the [Kaggle Song Lyrics Dataset](https://www.kaggle.com/mousehead/songlyrics).

## Import TensorFlow and related functions

In [None]:
import tensorflow as tf

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Other imports for processing data
import string
import numpy as np
import pandas as pd

## Get the Dataset

As noted above, we'll utilize the [Song Lyrics dataset](https://www.kaggle.com/mousehead/songlyrics) on Kaggle.

In [None]:
# !wget --no-check-certificate \
#     https://drive.google.com/uc?id=1LiJFZd41ofrWoBtW-pMYsfz1w8Ny0Bj8 \
#     -O /tmp/songdata.csv

## **First 10 Songs**

Let's first look at just 10 songs from the dataset, and see how things perform.

### Preprocessing

Let's perform some basic preprocessing to get rid of punctuation and make everything lowercase. We'll then split the lyrics up by line and tokenize the lyrics.

In [None]:
def tokenize_corpus(corpus, num_words=-1):
  # Fit a Tokenizer on the corpus
  if num_words > -1:
    tokenizer = Tokenizer(num_words=num_words)
  else:
    tokenizer = Tokenizer(filters=" ", lower=False, char_level=True)
  tokenizer.fit_on_texts(corpus)
  return tokenizer

def create_lyrics_corpus(dataset, field):
  # Remove all other punctuation
  dataset[field] = dataset[field].str.replace('[{}]'.format(string.punctuation), '')
  # Make it lowercase
  dataset[field] = dataset[field].str.lower()
  # Make it one long string to split by line
  lyrics = dataset[field].str.cat()
  corpus = lyrics.split('\n')
  # Remove any trailing whitespace
  for l in range(len(corpus)):
    corpus[l] = corpus[l].rstrip()
  # Remove any empty lines
  corpus = [l for l in corpus if l != '']

  return corpus

def create_pass_corpus(passwds):
    #passwds=file.read_text()

    return sorted(list(set(passwds)))
    #corpus=passwds.split("\n")


In [None]:
# Read the dataset from csv - just first 10 songs for now
#dataset = pd.read_csv('/tmp/songdata.csv', dtype=str)[:10]
# Create the corpus using the 'text' column containing lyrics
#corpus = create_lyrics_corpus(dataset, 'text')
# Tokenize the corpus

with open("200k_pass_sorted.txt") as myfile:
    pass_corpus = ['\t'+next(myfile) for x in range(100000)]
with open("200k_emails_sorted.txt") as myfile:
    email_corpus = ['\t'+next(myfile)+'\n' for x in range(100000)]
    
#print(corpus)



pass_tokenizer = tokenize_corpus(pass_corpus)
email_tokenizer= tokenize_corpus(email_corpus)

total_pass_letters = len(pass_tokenizer.word_index) + 1
total_email_letters= len(email_tokenizer.word_index) + 1
print(pass_tokenizer.word_index)
print(total_email_letters)
print(total_pass_letters)

{'\t': 1, '\n': 2, 'a': 3, '1': 4, 'e': 5, '0': 6, '2': 7, 'n': 8, 'i': 9, 'o': 10, 'r': 11, 's': 12, '9': 13, 'l': 14, 'm': 15, '3': 16, 't': 17, '8': 18, '5': 19, '4': 20, '7': 21, '6': 22, 'd': 23, 'c': 24, 'h': 25, 'b': 26, 'u': 27, 'k': 28, 'y': 29, 'g': 30, 'p': 31, 'f': 32, 'w': 33, 'j': 34, 'v': 35, 'z': 36, 'x': 37, 'A': 38, 'q': 39, 'E': 40, 'S': 41, 'B': 42, 'M': 43, 'L': 44, 'C': 45, 'R': 46, 'D': 47, 'I': 48, 'N': 49, 'T': 50, 'O': 51, 'H': 52, 'P': 53, 'J': 54, 'G': 55, 'K': 56, '!': 57, 'F': 58, '.': 59, 'U': 60, 'W': 61, 'Y': 62, '-': 63, 'V': 64, '_': 65, 'Z': 66, 'X': 67, 'Q': 68, ' ': 69, '@': 70, '#': 71, '?': 72, '/': 73, '*': 74, '&': 75, '$': 76, '%': 77, ';': 78, '=': 79, '+': 80, ']': 81, ',': 82, '[': 83, 'þ': 84, '`': 85, '^': 86, '"': 87, "'": 88, 'Ø': 89, 'Ã': 90, 'Ÿ': 91, 'Å': 92, 'ý': 93, 'õ': 94, 'ú': 95, 'ß': 96, 'ø': 97, '�': 98, '\\': 99}
85
100


### Create Sequences and Labels

After preprocessing, we next need to create sequences and labels. Creating the sequences themselves is similar to before with `texts_to_sequences`, but also including the use of [N-Grams](https://towardsdatascience.com/introduction-to-language-models-n-gram-e323081503d9); creating the labels will now utilize those sequences as well as utilize one-hot encoding over all potential output words.

In [None]:

def seqs(corpus, tokenizer, corpus_other, tokenizer_other):
    sequences = []
    sequences_other=[]
    for i,line in enumerate(corpus):
      token_list = tokenizer.texts_to_sequences([line])[0]
      token_list_other=tokenizer_other.texts_to_sequences([corpus_other[i]])[0]
      for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        sequences.append(n_gram_sequence)
        sequences_other.append(token_list_other)
    return sequences,sequences_other




# max_sequence_len_pass = max([len(seq) for seq in pass_corpus])
# max_sequence_len_email = max([len(seq) for seq in email_corpus])
# x,y=seqs(pass_corpus, pass_tokenizer, email_corpus,email_tokenizer)

# print((x[1:10]))
# print(y[1:10])
#zip_seqs(email_corpus, pass_corpus, email_tokenizer, pass_tokenizer)

# email_input_sequences = np.zeros(
#     (len(email_corpus), max_sequence_len_email, total_email_letters), dtype="float32"
# )
# pass_input_sequences = np.zeros(
#     (len(pass_corpus), max_sequence_len_pass,total_pass_letters), dtype="float32"
# )
# one_hot_labels = np.zeros(
#     (len(pass_corpus), max_sequence_len_pass, total_pass_letters), dtype="float32"
# )


# for i, (input_text, target_text) in enumerate(zip(email_corpus, pass_corpus)):
#     for t, char in enumerate(input_text):
#         try:
#           email_input_sequences[i, t,email_tokenizer.word_index[char]]=1.0
#         except:
#           print(t)
#     email_input_sequences[i, t + 1 :,email_tokenizer.word_index[" "]] = 1.0
#     for t, char in enumerate(target_text):
#         # decoder_target_data is ahead of decoder_input_data by one timestep
#         pass_input_sequences[i, t, pass_tokenizer.word_index[char]] = 1.0
#         if t > 0:
#             # decoder_target_data will be ahead by one timestep
#             # and will not include the start character.
#             if(char=='\t'):
#               print("err")
#             one_hot_labels[i, t - 1, pass_tokenizer.word_index[char]] = 1.0
#     pass_input_sequences[i, t + 1 :, pass_tokenizer.word_index[char]] = 1.0
#     one_hot_labels[i, t:, pass_tokenizer.word_index[" "]] = 1.0


#prev lstm!!!

# email_seqs=seqs(email_corpus, email_tokenizer)
# pass_seqs= seqs(pass_corpus, pass_tokenizer)

pass_seqs, email_seqs=seqs(pass_corpus, pass_tokenizer, email_corpus,email_tokenizer)
# # Pad sequences for equal input length 
max_sequence_len_pass = max([len(seq) for seq in pass_seqs])
max_sequence_len_email = max([len(seq) for seq in email_seqs])
pass_sequences = np.array(pad_sequences(pass_seqs, maxlen=max_sequence_len_pass, padding='pre'))
email_sequences=np.array(pad_sequences(email_seqs, maxlen=max_sequence_len_email, padding='pre'))

# Split sequences between the "input" sequence and "output" predicted word
pass_input_sequences, pass_labels = pass_sequences[:,:-1], pass_sequences[:,-1]
email_input_sequences= email_sequences


# # One-hot encode the labels
one_hot_labels = tf.keras.utils.to_categorical(pass_labels, num_classes=total_pass_letters)

#NEED TO DO SUFFLING

In [None]:
# Check out how some of our data is being stored
# The Tokenizer has just a single index per word
print(pass_tokenizer.word_index['8'])
print(pass_tokenizer.word_index['\n'])
print(pass_tokenizer.index_word[4])
# Input sequences will have multiple indexes
print(pass_input_sequences[5])
print(pass_input_sequences[6])
print(email_input_sequences[6])
# And the one hot labels will be as long as the full spread of tokenized words
print(one_hot_labels[5])
print(one_hot_labels[6])

print(len(pass_corpus))

18
2
1
[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  1 30  5 11  3 14]
[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  1 30  5 11  3 14 23]
[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  9 78 25 25 25 25 25 25 25 10 14  4  1  5  8  6
  7  3  4  2  2]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0.]
[0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 

### Train a Text Generation Model

Building an RNN to train our text generation model will be very similar to the sentiment models you've built previously. The only real change necessary is to make sure to use Categorical instead of Binary Cross Entropy as the loss function - we could use Binary before since the sentiment was only 0 or 1, but now there are hundreds of categories.

From there, we should also consider using *more* epochs than before, as text generation can take a little longer to converge than sentiment analysis, *and* we aren't working with all that much data yet. I'll set it at 200 epochs here since we're only use part of the dataset, and training will tail off quite a bit over that many epochs.

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional, Input, Dropout
from tensorflow.keras import Model
from tensorflow import keras
latent_dim=256


encoder_ins=keras.Input(shape=(max_sequence_len_email,))
encoder_inputs= Embedding(total_email_letters, latent_dim, input_length= max_sequence_len_email-1)(encoder_ins)
encoder= Bidirectional(LSTM(latent_dim,return_state=True, return_sequences=True))
e2=Bidirectional(LSTM(latent_dim,return_state=True))
e=encoder(encoder_inputs)
z= e2(e)
encoder_outputs, state_h, state_c, _1, _2=z
encoder_outputs=Dropout(.4)(encoder_outputs)
encoder_states=[state_h, _1, state_c, _2]

decoder_ins=keras.Input(shape=(max_sequence_len_pass-1,))
decoder_inputs=Embedding(total_pass_letters, latent_dim, input_length=max_sequence_len_pass-1)(decoder_ins) #Input(shape=(None, total_pass_letters))
decoder=Bidirectional(LSTM(latent_dim, return_sequences=True, return_state=False)) #Bidirectional wrap?
d2=Bidirectional(LSTM(latent_dim, return_sequences=False, return_state=False))


d=decoder(decoder_inputs, initial_state= encoder_states)

decoder_out=d2(d)
decoder_out=Dropout(.4)(decoder_out)

decoder_dense=Dense(total_pass_letters, activation='softmax')
decoder_outs= decoder_dense(decoder_out)

model=Model([encoder_ins,decoder_ins],decoder_outs)
model.compile(optimizer= 'adam', loss='categorical_crossentropy',  metrics=['accuracy'])
print(model.summary())
model.fit(([email_input_sequences,pass_input_sequences]), one_hot_labels, epochs=200, validation_split=0.2, verbose=1, batch_size=32)
model.save('/content/drive/MyDrive/pass_seq2')



# unconditional_model = Sequential()
# unconditional_model.add(Embedding(total_pass_letters, 256, input_length=max_sequence_len_pass-1))
# unconditional_model.add(Bidirectional(LSTM(256, return_sequences=True)))
# unconditional_model.add(tf.keras.layers.Dropout(.4))
# unconditional_model.add(Bidirectional(LSTM(256))) #shape here?
# unconditional_model.add(tf.keras.layers.Dropout(.4))
# unconditional_model.add(Dense(total_pass_letters, activation='softmax'))
# unconditional_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# unconditional_model.summary()
# history = model.fit(input_sequences, one_hot_labels, epochs=1, verbose=1)

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 53)]         0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 53, 256)      21760       input_1[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 36)]         0                                            
__________________________________________________________________________________________________
bidirectional (Bidirectional)   [(None, 53, 512), (N 1050624     embedding[0][0]                  
______________________________________________________________________________________________

KeyboardInterrupt: ignored

In [None]:
model.fit(([email_input_sequences,pass_input_sequences]), one_hot_labels, epochs=200, validation_split=0.2, verbose=1, batch_size=32)
model.save('/content/drive/MyDrive/pass_seq2')

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200

### View the Training Graph

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
model.save('/content/drive/pass_seq')

In [None]:
import matplotlib.pyplot as plt

def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.show()

plot_graphs(history, 'accuracy')

### Non-conditional LSTM Inference


In [None]:
seed_texts = ["\t"]

num_words=90
print(seed_texts)
outputs=[]
for i,partial in enumerate(seed_texts):
  token_list = pass_tokenizer.texts_to_sequences([partial])
  email_t= email_tokenizer.texts_to_sequences( ["chillop.byrne@gmail.com"] )
  email_t=pad_sequences(email_t, max_sequence_len_email)
  token_list = pad_sequences(token_list, maxlen=max_sequence_len_pass-1, padding='pre')
  
  p=model.predict([email_t,token_list])
  
  ind = np.argpartition(p[0], -num_words)[-num_words:]
  # print(p[0][ind])
  predicted = np.argmax(p, axis=-1) #
  # print(predicted)
  # print(ind)
  output_word = ""
  
  for w in ind:
      if pass_tokenizer.index_word[w] ==  '\n':
        outputs.append(seed_texts[i])
        seed_texts.pop(i)
      else:
        output_word = pass_tokenizer.index_word[w]
        seed_texts.append(partial +"" + output_word)
  #print(seed_texts)
  num_words= num_words//2 +1
      
print(outputs)
print(len(outputs))



['\tp']
['\tpink', '\tppink', '\tppace', '\tppark', '\tppoop', '\tppppp', '\tpster', '\tpsppp', '\tphopo', '\tpussy', '\tpurta', '\tpanda', '\tpasss', '\tpoops', '\tpolly', '\tpeace', '\tpenny', '\tpillo', '\tplato', '\tppilit', '\tppinky', '\tpprish', '\tpprint', '\tpplone', '\tpploss', '\tpplast', '\tpplate', '\tppacio', '\tpparky', '\tppoppy', '\tppp101', '\tppp120', '\tpppp11', '\tpppppp', '\tpstris', '\tpsalia', '\tpsalla', '\tpsaris', '\tpsarda', '\tpspent', '\tpspert', '\tpspper', '\tpsones', '\tpsolis', '\tpsolla', '\tphanda', '\tphante', '\tphaint', '\tphopon', '\tphoner', '\tphista', '\tphildy', '\tpusten', '\tpustic', '\tpussy1', '\tpurplo', '\tpandel', '\tpantha', '\tpastri', '\tpoolie', '\tpoopin', '\tpoline', '\tpolly1', '\tpearut', '\tpentor', '\tpentit', '\tpilita', '\tpilina', '\tpintal', '\tpintor', '\tpinked', '\tplonit', '\tplondo', '\tplater', '\tplayen', '\tprondo', '\tpronet', '\tpritis', '\tpritty', '\tprinca', '\tppilina', '\tppilito', '\tppinky1', '\tpproner',

In [None]:
from tensorflow import keras
model = keras.models.load_model("/content/drive/MyDrive/pass_seq")
model.fit(([email_input_sequences,pass_input_sequences]), one_hot_labels, epochs=100, validation_split=0.2, verbose=1, batch_size=32)


In [None]:
# from https://keras.io/examples/nlp/lstm_seq2seq/
from tensorflow import keras
print(model.summary())
# encoder_inputs = model.input[0]  # input_1
# e0 = model.layers[1](encoder_inputs)  # lstm_1
# encoder_outputs, state_h_enc, state_c_enc= model.layers[3].output

 
# encoder_states = [state_h_enc, state_c_enc]
# encoder_model = keras.Model(encoder_inputs, encoder_states)
# print(encoder_model.summary())

# decoder_inputs = model.input[1]  # input_2
# decoder_state_input_h = keras.Input(shape=(latent_dim,), name="input_3h")
# decoder_state_input_c = keras.Input(shape=(latent_dim,), name="input_4c")
# decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
# decoder_lstm_in = model.layers[4]

# d1 = decoder_lstm_in(
#     decoder_inputs, initial_state=decoder_states_inputs
# )

# d1_d2=model.layers[5]
# decoder_outputs, state_h_dec, state_c_dec=d1_d2(d1)

# decoder_states = [state_h_dec, state_c_dec]
# decoder_dense = model.layers[7]
# decoder_dropout= model.layers[6]
# t=decoder_dropout(decoder_outputs)
# decoder_outputs = decoder_dense(t)
# decoder_model = keras.Model(
#     [decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states
# )


# print(decoder_model.summary())
# # Reverse-lookup token index to decode sequences back to
# # something readable.
reverse_input_char_index = dict((i, char) for char, i in email_tokenizer.word_index.items())
reverse_target_char_index = dict((i, char) for char, i in pass_tokenizer.word_index.items())


def decode_sequence(input_seq):
    # Encode the input as state vectors.
    states_value = encoder_model.predict(input_seq)

    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1, 1, len(pass_tokenizer.word_index) + 1))
    # # Populate the first character of target sequence with the start character.
    target_seq[0, 0, pass_tokenizer.word_index["p"]] = 1.0

    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = ""
    seed_texts=["\t", " ", "j"]
    num_words=1
    outputs=[]
    for i,partial in enumerate(seed_texts):

        #target_seq = np.zeros((1, 1, len(pass_tokenizer.word_index) + 1))
    # Populate the first character of target sequence with the start character.
        target_seq[0, 0, pass_tokenizer.word_index[partial[-1]]] = 1.0
        states_value = encoder_model.predict(input_seq)
        stop_condition=False
        j=1
        decoded_sentence=""
        while not stop_condition:
          # token_list = pass_tokenizer.texts_to_sequences([partial])
          
          # token_list = pad_sequences(token_list, maxlen=max_sequence_len-1, padding='pre')
          # p=model.predict(token_list)
          
          # ind = np.argpartition(p[0], -num_words)[-num_words:]
        
          # output_word = ""
      
              #num_words= num_words//2 +1

              output_tokens = model.predict([input_seq,target_seq])

              # Sample a token
              #print(output_tokens.shape)
              ind = np.argpartition(output_tokens[0,-1,:], -num_words)[-num_words:]

              #this is the loop I need to change
              sampled_token_index = np.argmax(output_tokens[0, -1, :])
              # print(sampled_token_index)
              sampled_char = reverse_target_char_index[sampled_token_index]

              # for w in ind:
              #         if pass_tokenizer.index_word[w] ==  '\n':
              #             outputs.append(seed_texts[i])
              #             seed_texts.pop(i)
              #             print("pop")
              #         else:
              #           output_word = pass_tokenizer.index_word[w]
              #           seed_texts.append(partial +"" + output_word)
              decoded_sentence += sampled_char
              
              #print(decoded_sentence)

              # Exit condition: either hit max length
              # or find stop character.
              if sampled_char=='\n' or len(decoded_sentence) > max_sequence_len_pass: #not sure if I want this
                  stop_condition = True
              
              # Update the target sequence (of length 1).
              # target_seq = np.zeros((1, 1, len(pass_tokenizer.word_index) + 1))
              # target_seq[0, 0,sampled_token_index] = 1.0
              #multi char seq 
              target_seq = np.zeros((1, j+1, len(pass_tokenizer.word_index) + 1))
              for letter, _ in enumerate(decoded_sentence):
                  target_seq[0, letter, pass_tokenizer.word_index[_]] = 1.0

              target_seq[0, j, sampled_token_index] = 1.0
              j+=1
              #states_value = [h, c]
        # Update states
        #hold state for now
        outputs.append(decoded_sentence)
              
    return outputs

for seq_index in range(70000,70003):
    # Take one sequence (part of the training set)
    # for trying out decoding.
    input_seq = email_input_sequences[seq_index : seq_index + 1]
    decoded_sentence = decode_sequence(input_seq)
    print("-")
    target_seq = np.zeros((1, 3, len(pass_tokenizer.word_index) + 1))
    # # Populate the first character of target sequence with the start character.
    target_seq[0, 0, pass_tokenizer.word_index["\t"]] = 1.0
    target_seq[0, 1, pass_tokenizer.word_index["p"]] = 1.0
    target_seq[0, 2, pass_tokenizer.word_index["a"]] = 1.0

    test=np.argmax(model.predict([input_seq, target_seq])[0, -1, :] )
    #target_seq[0, 1, pass_tokenizer.word_index["a"]] = 1.0
    print(pass_tokenizer.index_word[test])
    

    print("Input email:", email_corpus[seq_index])
    print("Decoded pass:", decoded_sentence)


Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 53)]         0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 53, 256)      21760       input_1[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 36)]         0                                            
__________________________________________________________________________________________________
bidirectional (Bidirectional)   [(None, 53, 512), (N 1050624     embedding[0][0]                  
______________________________________________________________________________________________

NameError: ignored

In [None]:
import tensorflow as tf
from tensorflow import Tensor
import tensorflow.keras as keras


def downsample_residual(x:Tensor, downsample: bool, filters: int, kernel_size: int = 3):
    #https://github.com/lidless-vision/keras-residual-vae-tf2.3/blob/43b558f97b74d187ccb8811c3bc7f9e303f5f6c3/vae.py#L94 has batch norm and dropout
    y=keras.layers.Conv2D(kernel_size=(kernel_size,kernel_size), strides= (1 if not downsample else 2), filters=filters, padding='same')(x)
    print(y.shape)
    
    y= keras.layers.ReLU()(y)
    print(y.shape)
    y=keras.layers.Conv2D(kernel_size=(kernel_size,kernel_size), strides= (1,1), filters=filters, padding='same')(y)
    print(y.shape)
    if downsample:
      x=keras.layers.Conv2D(kernel_size=2, strides= 2, filters=filters, )(x)
    print(x.shape)
    out=keras.layers.Add()([x,y])
    out=keras.layers.ReLU()(out)
    return out

def upsample_residual(x:Tensor, upsample: bool, filters: int, kernel_size: int = 3):
    y=keras.layers.Conv2DTranspose(filters=filters, 
                                   kernel_size=kernel_size,
                                   strides= (1 if not upsample else 2),
                                   padding='same')(x)
    y=keras.layers.ReLU()(y)
    y=keras.layers.Conv2DTranspose(filters=filters, 
                                   kernel_size=kernel_size,
                                   strides= 1,
                                   padding='same')(y)
    if upsample:
        x=keras.layers.Conv2DTranspose(filters=filters, 
                                   kernel_size=2,
                                   strides= 2,
                                   )(x)
    out= keras.layers.Add()([x,y])
    out=keras.layers.ReLU()(out)

    return out




    
     
def define_discriminator(in_shape: [], n_classes: int):

    label_in = keras.Input(shape=(1,))

    label_embedding = keras.layers.Embedding(n_classes, 50 )(label_in)

    n_nodes= tf.reduce_prod(in_shape)
    
    li= keras.layers.Dense(n_nodes)(label_embedding)

    li=keras.layers.Reshape(in_shape)(li)

    in_img= keras.Input(shape=in_shape)

    merge = keras.layers.Concatenate()([in_img])

    ds=downsample_residual(merge, downsample=True, filters=128, kernel_size=3)

    flat=keras.layers.Flatten()(ds)

    flat=keras.layers.Dropout(0.4)(flat)

    belief= keras.layers.Dense(1, activation='sigmoid')(flat)
    model= keras.Model([in_img, label_in], belief)
    opt= keras.optimizers.Adam(lr=2e-4, beta_1=0.5)
    #need to look up gan losses
    model.compile(loss= 'binary_crossentropy', optimizer=opt, metrics= ['accuracy'])
    return model

m=define_discriminator((28,28,1),10)

print(m.summary())