In [1]:
import pandas as pd
import numpy as np
import re

In [2]:
df = pd.read_parquet("hf://datasets/shahules786/PoetryFoundationData/data/train-00000-of-00001-486832872ed96d17.parquet")
print(f"\n\n======\n\n")

print(df.columns)

newyork = df[df['author'].isin(["John Ashbery", "Barbara Guest", "James Schuyler", "Kenneth Koch", "Frank O'Hara"])]
shake = df[df['author'] == 'William Shakespeare']

print(f"Shakespeare: {len(shake)} examples\nNew Yorkers: {len(newyork)} examples")
print(f"Shakespeare avg length: {np.average([len(poem) for poem in shake['content']])}\nNew Yorkers avg length: {np.average([len(poem) for poem in newyork['content']])}")

  from .autonotebook import tqdm as notebook_tqdm






Index(['poem name', 'content', 'author', 'type', 'age'], dtype='object')
Shakespeare: 85 examples
New Yorkers: 81 examples
Shakespeare avg length: 1468.5058823529412
New Yorkers avg length: 1810.6049382716049


In [3]:
def load_embedding_model():
    """ Load GloVe Vectors
        Return:
            wv_from_bin: All 400000 embeddings, each length 50
    """
    import gensim.downloader as api
    wv_from_bin = api.load("glove-wiki-gigaword-50")
    # wv_from_bin = api.load("glove.6B/glove.6B.50d.txt")
    print("Loaded vocab size %i" % len(list(wv_from_bin.index_to_key)))
    return wv_from_bin
wv_from_bin = load_embedding_model()

Loaded vocab size 400000


In [4]:
def process_poem_debug(poem) :
  out = re.sub(r'[\r\n]+', ' ', poem)
  out = re.sub(r'[.?!]+', '.', out)
  out = re.sub(r'\s+', ' ', out)
  out = out.lower()
  sentence_list = out.split('.')
  sentence_list = [sentence for sentence in sentence_list if len(sentence) > 0]
  print(sentence_list)
  for i in range(len(sentence_list)) :
     sentence_list[i] = re.sub(r'[^a-zA-Z ]', '', sentence_list[i])
  return [[word for word in sentence.split(' ') if word != ''] for sentence in sentence_list ]
process_poem_debug("this is a sentence. This is -another :SENTENCE!!!!!\nAND this is a question? again.")

['this is a sentence', ' this is -another :sentence', ' and this is a question', ' again']


[['this', 'is', 'a', 'sentence'],
 ['this', 'is', 'another', 'sentence'],
 ['and', 'this', 'is', 'a', 'question'],
 ['again']]

In [5]:
def process_poem_into_list_of_words(poem) :
  out = re.sub(r'[\r\n]+', ' ', poem)
  out = re.sub(r'[.?!]+', '.', out)
  out = re.sub(r'\s+', ' ', out)
  out = out.lower()
  sentence_list = out.split('.')
  sentence_list = [sentence for sentence in sentence_list if len(sentence) > 0]
  for i in range(len(sentence_list)) :
     sentence_list[i] = re.sub(r'[^a-zA-Z ]', '', sentence_list[i])
  return [[word for word in sentence.split(' ') if word != ''] for sentence in sentence_list ]

newyork_processed = [] # [process_poem_into_list_of_words(newyork['content'].iloc[i]) for i in range(len(newyork))]
for i in range(len(newyork)) :
   newyork_processed += process_poem_into_list_of_words(newyork['content'].iloc[i])
newyork_labels = [0 for i in range(len(newyork_processed))]
shake_processed = [] # [process_poem_into_list_of_words(shake['content'].iloc[i]) for i in range(len(shake))]
for i in range(len(shake)) :
   shake_processed += process_poem_into_list_of_words(shake['content'].iloc[i])
shake_labels = [1 for i in range(len(shake_processed))]

print(f"Number of New Yorker sentences: {len(newyork_processed)} with avg length of {np.mean([len(sentence) for sentence in newyork_processed])}")
print(f"eg:")
for i in range(10) :
   print(f"   {' '.join(newyork_processed[i])}")
print(f"\nNumber of Shakespearean sentences: {len(shake_processed)} with avg length of {np.mean([len(sentence) for sentence in shake_processed])}")
print(f"eg:")
for i in range(10) :
   print(f"   {' '.join(shake_processed[i])}")

processed_poems = newyork_processed + shake_processed
labels = newyork_labels + shake_labels
# perm = np.random.permutation(len(processed_poems))
# processed_poems = processed_poems[perm]
# labels = labels[perm]
newyork_vocab = set([word for poem in newyork_processed for word in poem])
shake_vocab = set([word for poem in shake_processed for word in poem])
vocab = sorted(list(set([word for poem in processed_poems for word in poem])))

print(f"\n\nTotal vocab: {len(vocab)}\nNew York vocab: {len(newyork_vocab)}\nShakespeare Vocab: {len(shake_vocab)}\nIntersection: {len(shake_vocab & newyork_vocab)}")
print(f"New York vocab:")
for i in range(10) :
   print(f"   {list(newyork_vocab)[i]}")
print(f"Shakespeare vocab:")
for i in range(10) :
   print(f"   {list(shake_vocab)[i]}")
print(f"Both vocab:")
for i in range(10) :
   print(f"   {list(newyork_vocab & shake_vocab)[i]}")

word_to_idx = {word:idx for idx, word in enumerate(vocab)}
idx_to_word = {idx:word for idx, word in enumerate(vocab)}

for poem in processed_poems :
  for i in range(len(poem)) :
    poem[i] = word_to_idx[poem[i]]

# print(processed_poems[0])

embedding_matrix = np.zeros((len(vocab), 50))
bad_count = 0
for i, word in enumerate(vocab):
    try:
        embedding_matrix[i] = wv_from_bin.get_vector(word)
    except:
      #   print("this is bad", word)
        bad_count += 1
print(f"\nTotal Bad Words (not in GloVe): {bad_count} out of total vocab {len(vocab)}")

Number of New Yorker sentences: 1412 with avg length of 17.378895184135978
eg:
   is anything central
   orchards flung out on the land urban forests rustic plantations kneehigh hills
   are place names central
   elm grove adcock corner story book farm
   as they concur with a rush at eye level beating themselves into eyes which have had enough thank you no more thank you
   and they come on like scenery mingled with darkness the damp plains overgrown suburbs places of known civic pride of civil obscurity
   these are connected to my version of america but the juice is elsewhere
   this morning as i walked out of your room after breakfast crosshatched with backward and forward glances backward into light forward into unfamiliar light was it our doing and was it the material the lumber of life or of lives we were measuring counting
   a mood soon to be forgotten in crossed girders of light cool downtown shadow in this morning that has seized us again
   i know that i braid too much on 

In [6]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences

sentence_lengths = [len(poem) for poem in processed_poems]
max_length = max(sentence_lengths)
avg_length = np.mean(sentence_lengths)
print(f"Max Length = {max_length}\nAvg Length = {avg_length}")
max_length = 50
padded_poems = pad_sequences(processed_poems, maxlen=max_length, padding='post', truncating='post')
print(f"Padded Poems shape is {np.array(padded_poems).shape}")

Max Length = 381
Avg Length = 20.304543409806566
Padded Poems shape is (2223, 50)


In [7]:
perm = np.random.permutation(len(padded_poems))
shuffled_poems = np.array(padded_poems)[perm]
shuffled_labels = np.array(labels)[perm]

training_data = shuffled_poems[:-100]
training_labels = shuffled_labels[:-100]

validation_data = shuffled_poems[-100:]
validation_labels = shuffled_labels[-100:]

## LSTM Discriminator Model

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding

## hyperparams
init_lr = 0.1
lr_decay_rate = 0.5
lr_decay_steps = 100
dropout_p = 0.2
l2_lambda = 0.005

model_lstm = tf.keras.Sequential()
e = Embedding(len(vocab), 50, weights=[embedding_matrix], input_length = max_length, trainable=False)
model_lstm.add(e)
model_lstm.add(LSTM(100, input_shape = (max_length, 50)))
model_lstm.add(Dense(100, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_lambda)))
model_lstm.add(Dense(50, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_lambda)))
model_lstm.add(Dense(1, activation='sigmoid'))
lr_schedule = tf.keras.optimizers.schedules.InverseTimeDecay(init_lr, decay_rate=lr_decay_rate, decay_steps=lr_decay_steps)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
model_lstm.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
print(model_lstm.summary())

  super().__init__(**kwargs)


None


In [9]:
model_lstm.fit(np.array(training_data), np.array(training_labels), epochs=20, verbose=1)

Epoch 1/20
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - accuracy: 0.6075 - loss: 2.6031
Epoch 2/20
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.6356 - loss: 0.7691
Epoch 3/20
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.6199 - loss: 0.6802
Epoch 4/20
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.6236 - loss: 0.6687
Epoch 5/20
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.6361 - loss: 0.6570
Epoch 6/20
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.6706 - loss: 0.6365
Epoch 7/20
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.6378 - loss: 0.6567
Epoch 8/20
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.6558 - loss: 0.6447
Epoch 9/20
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x2c4b55cf2f0>

In [10]:
loss, accuracy = model_lstm.evaluate(np.array(validation_data), np.array(validation_labels), verbose=1)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5897 - loss: 0.6818  


In [11]:
print(loss, accuracy)

0.6646882891654968 0.6200000047683716


## FCN - Logistic Regression Discriminator

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, Flatten, Dropout

## hyperparams
init_lr = 0.1
lr_decay_rate = 0.5
lr_decay_steps = 100
dropout_p = 0.2
l2_lambda = 0.005

model_fcn = tf.keras.Sequential()
e = Embedding(len(vocab), 50, weights=[embedding_matrix], input_length = max_length, trainable=False)
model_fcn.add(e)
model_fcn.add(Flatten())
model_fcn.add(Dropout(dropout_p))
model_fcn.add(Dense(1, activation='sigmoid', kernel_regularizer=tf.keras.regularizers.l2(l2_lambda)))
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(init_lr, decay_rate=lr_decay_rate, decay_steps=lr_decay_steps)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
model_fcn.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
print(model_fcn.summary())

None


In [13]:
model_fcn.fit(np.array(training_data), np.array(training_labels), epochs=75, verbose=1)

Epoch 1/75
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6745 - loss: 3.8762
Epoch 2/75
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7558 - loss: 1.8242
Epoch 3/75
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7690 - loss: 1.2614
Epoch 4/75
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8153 - loss: 0.7983
Epoch 5/75
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8272 - loss: 0.5977
Epoch 6/75
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8382 - loss: 0.5394
Epoch 7/75
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8625 - loss: 0.4485
Epoch 8/75
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8645 - loss: 0.4255
Epoch 9/75
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x2c4bcb50d40>

In [14]:
loss, accuracy = model_fcn.evaluate(np.array(validation_data), np.array(validation_labels), verbose=1)
print(loss, accuracy)
## best so far is about 85% at around 50 loss

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - accuracy: 0.8342 - loss: 0.4590   
0.4878181219100952 0.8199999928474426


# Iterative Back Training

First define the two models (S --> NY) and (NY --> S)

In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding, Dropout, Flatten

## hyperparams for both translation models
dropout_p = 0.2
l2_lambda = 0.005
## end hyperparams

In [16]:
model_s_to_ny = tf.keras.Sequential()
e = Embedding(len(vocab), 50, weights=[embedding_matrix], input_length = max_length, trainable=False)
model_s_to_ny.add(e)
model_s_to_ny.add(LSTM(100, input_shape = (max_length, 50)))
model_s_to_ny.add(Dropout(dropout_p))
model_s_to_ny.add(Dense(100, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_lambda)))
model_s_to_ny.add(Dropout(dropout_p))
model_s_to_ny.add(Dense(100, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_lambda)))
model_s_to_ny.add(Dense(len(vocab), activation='softmax'))
print(model_s_to_ny.summary())

None


In [17]:
model_ny_to_s = tf.keras.Sequential()
e = Embedding(len(vocab), 50, weights=[embedding_matrix], input_length = max_length, trainable=False)
model_ny_to_s.add(e)
model_ny_to_s.add(LSTM(100, input_shape = (max_length, 50)))
model_ny_to_s.add(Dropout(dropout_p))
model_ny_to_s.add(Dense(100, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_lambda)))
model_ny_to_s.add(Dropout(dropout_p))
model_ny_to_s.add(Dense(100, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l2_lambda)))
model_ny_to_s.add(Dense(len(vocab), activation='softmax'))

print(model_ny_to_s.summary())

None


Now define the discriminator model

In [30]:

model_discriminator = tf.keras.Sequential()
e = Embedding(len(vocab), 50, weights=[embedding_matrix], input_length = max_length, trainable=False)
model_discriminator.add(e)
model_discriminator.add(Flatten())
model_discriminator.add(Dropout(dropout_p))
model_discriminator.add(Dense(1, activation='sigmoid', kernel_regularizer=tf.keras.regularizers.l2(l2_lambda)))



Now, define optimizers for both the translation models and the discriminator model

In [19]:
## disc opt hyperparam
trans_init_lr = 0.1
# disc_decay_rate = 0.5
trans_decay_steps = 1000
trans_decay_alpha = 0.0

trans_lr_schedule = tf.keras.optimizers.schedules.CosineDecay(trans_init_lr, trans_decay_steps, alpha=trans_decay_alpha)
optimizer_trans = tf.keras.optimizers.Adam(learning_rate=trans_lr_schedule)

In [20]:
## disc opt hyperparam
disc_init_lr = 0.1
# disc_decay_rate = 0.5
disc_decay_steps = 1000
disc_decay_alpha = 0.0

disc_lr_schedule = tf.keras.optimizers.schedules.CosineDecay(disc_init_lr, disc_decay_steps, alpha=disc_decay_alpha)
optimizer_disc = tf.keras.optimizers.Adam(learning_rate=disc_lr_schedule)

Pretrain the discriminator model for use in the IBT training loop

In [31]:
model_discriminator.compile(optimizer=optimizer_disc, loss='binary_crossentropy', metrics=['accuracy'])
model_discriminator.summary()
model_discriminator.fit(np.array(training_data), np.array(training_labels), epochs=75, verbose=1)
loss, accuracy = model_discriminator.evaluate(np.array(validation_data), np.array(validation_labels), verbose=1)
print(loss, accuracy)

Epoch 1/75
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.6254 - loss: 8.0983
Epoch 2/75
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7109 - loss: 2.8355
Epoch 3/75
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6791 - loss: 4.5700
Epoch 4/75
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7619 - loss: 2.0303
Epoch 5/75
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7692 - loss: 1.7464
Epoch 6/75
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7659 - loss: 1.6173
Epoch 7/75
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7253 - loss: 1.8871
Epoch 8/75
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7550 - loss: 1.6226
Epoch 9/75
[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

Now, define the training loop of IBT (this is simplified from the paper, essentially just a GAN right now, it is not modeling a parallel corpus of data)

In [41]:
e

<Embedding name=embedding_5, built=True>

In [40]:
for epoch in range(num_epochs) : # num epochs
    print(f"Epoch {epoch + 1} starting...")
    curr_loss_trans = 0
    curr_loss_disc = 0
    for x_s_to_ny_b, x_ny_to_s_b in combined_ds :
        print(x_s_to_ny_b.shape, x_ny_to_s_b.shape)
        snyout = model_s_to_ny(x_s_to_ny_b)
        nysout = model_ny_to_s(x_ny_to_s_b)
        print(snyout.shape, nysout.shape)
        snyarg = tf.argmax(snyout, axis=1)
        nysarg = tf.argmax(nysout, axis=1)
        print(snyarg.shape, nysarg.shape)
        snyoh = tf.one_hot(snyarg, depth=len(vocab))
        nysoh = tf.one_hot(nysarg, depth=len(vocab))
        print(snyoh.shape, nysoh.shape)
        snye = e(snyoh)
        nyse = e(nysoh)
        print(snye.shape, nyse.shape)
        break
    break

Epoch 1 starting...
(20, 50) (40, 50)
(20, 8505) (40, 8505)
(20,) (40,)
(20, 8505) (40, 8505)
(20, 8505, 50) (40, 8505, 50)


In [34]:
def embed_outputs(outputs) :
    one_hots = tf.one_hot(tf.argmax(outputs, axis=1), depth=len(vocab))
    embeddings = e(one_hots)
    return embeddings

In [35]:
def train_step(x_s_to_ny, x_ny_to_s, recon_weight=1, adv_weight=1, verbose=False) :
    with tf.GradientTape(persistent = True) as tape :
        # forward pass
        # s to ny forward and reconstruction
        if verbose : print(f"Starting Shakespeare to New Yorker forward pass")
        x_s_to_ny_out = embed_outputs(model_s_to_ny(x_s_to_ny))
        x_s_to_ny_recon = embed_outputs(model_ny_to_s(x_s_to_ny_out))
        x_s_to_ny_out_disc = model_discriminator(x_s_to_ny_out)
        # ny to s forward and reconstruction
        if verbose : print(f"Starting New Yorker to Shakespeare forward pass")
        x_ny_to_s_out = embed_outputs(model_ny_to_s(x_ny_to_s))
        x_ny_to_s_recon = embed_outputs(model_s_to_ny(x_ny_to_s_out))
        x_ny_to_s_out_disc = model_discriminator(x_ny_to_s_out)

        # losses
        if verbose : print(f"Starting loss calculations")
        # reconstruction loss
        loss_s_to_ny_recon = tf.keras.losses.binary_crossentropy(x_s_to_ny, x_s_to_ny_recon)
        loss_ny_to_s_recon = tf.keras.losses.binary_crossentropy(x_ny_to_s, x_ny_to_s_recon)
        loss_recon = loss_s_to_ny_recon + loss_ny_to_s_recon
        if verbose : print(f"Calculated reconstruction loss as {loss_recon}")
        # adversarial loss
        loss_s_to_ny_adv = tf.keras.losses.binary_crossentropy([0 for i in range(len(x_s_to_ny))], x_s_to_ny_out_disc)
        loss_ny_to_s_adv = tf.keras.losses.binary_crossentropy([1 for i in range(len(x_ny_to_s))], x_ny_to_s_out_disc)
        loss_adv = loss_s_to_ny_adv + loss_ny_to_s_adv
        if verbose : print(f"Calculated adversarial loss as {loss_adv}")
        # total loss for translators
        loss_trans = loss_recon*recon_weight + loss_adv*adv_weight
        if verbose : print(f"Calculated total translation loss as {loss_trans}")
        # loss for discriminators
        loss_disc = loss_adv
        if verbose : print(f"Calculated total loss for discriminator as {loss_disc}")
    
    # gradients
    if verbose : print(f"Starting to apply gradients and update parameters for the translator models")
    gradients_trans = tape.gradient(loss_trans, model_s_to_ny.trainable_variables + model_ny_to_s.trainable_variables)
    optimizer_trans.apply_gradients(zip(gradients_trans, model_s_to_ny.trainable_variables + model_ny_to_s.trainable_variables))
    if verbose : print(f"Finished updating the translator models")

    if verbose : print(f"Starting to apply gradients and update parameters for the discriminator model")
    gradients_disc = tape.gradient(loss_disc, model_discriminator.trainable_variables)
    optimizer_disc.apply_gradients(zip(gradients_disc, model_discriminator.trainable_variables))
    if verbose : print(f"Finished updating the discriminator model")

    if verbose : print(f"Finishing training step.\n")
    return loss_trans, loss_disc



Now, train over some batches for a few epochs

In [26]:
num_batches_to_make = 35

raw_x_s_to_ny = [training_data[i] for i in range(len(training_labels)) if labels[i] == 1]
raw_x_ny_to_s = [training_data[i] for i in range(len(training_labels)) if labels[i] == 0]

print(f"x_s_to_ny shape is {np.array(raw_x_s_to_ny).shape}")
print(f"x_ny_to_s shape is {np.array(raw_x_ny_to_s).shape}")

ds_s_to_ny = tf.data.Dataset.from_tensor_slices(
    raw_x_s_to_ny
)
ds_ny_to_s = tf.data.Dataset.from_tensor_slices(
    raw_x_ny_to_s
)

ds_s_to_ny = ds_s_to_ny.batch(np.array(raw_x_s_to_ny).shape[0] // num_batches_to_make)
ds_ny_to_s = ds_ny_to_s.batch(np.array(raw_x_ny_to_s).shape[0] // num_batches_to_make)

for batch in ds_s_to_ny.take(1):  # Take the first batch
    print("Batch size for s to ny:", batch.shape[0])
num_batches = tf.data.experimental.cardinality(ds_s_to_ny).numpy()
print("Number of batches:", num_batches)

for batch in ds_ny_to_s.take(1):  # Take the first batch
    print("Batch size for ny to s:", batch.shape[0])
num_batches = tf.data.experimental.cardinality(ds_ny_to_s).numpy()
print("Number of batches:", num_batches)

combined_ds = tf.data.Dataset.zip((ds_s_to_ny, ds_ny_to_s))
print(combined_ds)

x_s_to_ny shape is (711, 50)
x_ny_to_s shape is (1412, 50)
Batch size for s to ny: 20
Number of batches: 36
Batch size for ny to s: 40
Number of batches: 36
<_ZipDataset element_spec=(TensorSpec(shape=(None, 50), dtype=tf.int32, name=None), TensorSpec(shape=(None, 50), dtype=tf.int32, name=None))>


In [36]:
num_epochs = 1
recon_weight = 1
adv_weight = 1
##
for epoch in range(num_epochs) : # num epochs
    print(f"Epoch {epoch + 1} starting...")
    curr_loss_trans = 0
    curr_loss_disc = 0
    for x_s_to_ny_b, x_ny_to_s_b in combined_ds :
        loss_trans, loss_disc = train_step(x_s_to_ny_b, x_ny_to_s_b, recon_weight=recon_weight, adv_weight=adv_weight, verbose=True)
        curr_loss_trans += loss_trans
        curr_loss_disc += loss_disc
        print(f"   Batch Translator Loss: {loss_trans}\n   Batch Discriminator Loss: {loss_disc}")
    print(f"Total Epoch Translator Loss: {curr_loss_trans}\nTotal Epoch Discriminator Loss: {curr_loss_disc}\n")

Epoch 1 starting...
Starting Shakespeare to New Yorker forward pass


ValueError: Exception encountered when calling Sequential.call().

[1mInvalid input shape for input [[[ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  ...
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]]

 [[ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  ...
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]]

 [[ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  ...
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]]

 ...

 [[ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  ...
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]]

 [[ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  ...
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]]

 [[ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  ...
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]
  [ 0.21705   0.46515  -0.46757  ... -0.043782  0.41013   0.1796  ]]]. Expected shape (20, 8505), but input has incompatible shape (20, 8505, 50)[0m

Arguments received by Sequential.call():
  • inputs=tf.Tensor(shape=(20, 8505, 50), dtype=float32)
  • training=None
  • mask=None