In [79]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, LSTM, Embedding, Bidirectional, Reshape, Flatten
from tensorflow.keras.optimizers import Adam
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras_preprocessing.sequence import pad_sequences


In [80]:
stories = [
""""The Forgotten Map": In a dusty old library, a young researcher discovers a mysterious map hidden within the pages of an ancient book. The map seems to lead to a long-lost kingdom filled with treasures and secrets. As the researcher delves deeper into deciphering the map's clues, they uncover unexpected challenges and adversaries determined to reach the treasure first.""",

""""Whispers in the Woods": In a secluded forest, strange whispers are heard by anyone who dares to enter. A curious teenager, known for their bravery, decides to investigate. As they venture deeper into the woods, they encounter enigmatic creatures and learn that the whispers hold the key to a forgotten prophecy that could change the fate of their world.""",

""""The Last Beacon": In a world plunged into darkness after an ancient catastrophe, a young engineer discovers an ancient lighthouse said to hold the power to restore light to the world. With the help of a group of unlikely allies, the engineer embarks on a perilous journey across treacherous lands, facing formidable challenges and adversaries who seek to keep the world in eternal darkness.""",

""""Echoes of Time": A gifted physicist invents a time-traveling device capable of sending messages to the past. However, the messages sent cause unforeseen ripples through time, altering events in unexpected ways. As the fabric of reality begins to unravel, the physicist races against time to correct the changes before irreparable damage occurs.""",

""""The Dream Catcher": In a town plagued by a series of haunting nightmares, a young artist discovers they have the ability to enter dreams and alter their course. Tasked with unraveling the mystery behind these nightmares, they navigate a surreal dream world, facing manifestations of people's deepest fears and secrets, all while trying to prevent a looming catastrophe that threatens to merge the dream realm with reality.""",
    
    # Add more stories here
]

In [81]:
# this checks the longest story with most number of words
max_len = max(len(story.split()) for story in stories)



In [82]:
vocab_size = 1000  # Choose an appropriate vocabulary size
embedding_dim = 100  # Embedding dimension

In [83]:
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(stories)


In [84]:
sequences = tokenizer.texts_to_sequences(stories)
sequences

[[1,
  22,
  11,
  5,
  2,
  42,
  43,
  44,
  2,
  12,
  23,
  13,
  2,
  45,
  11,
  46,
  47,
  1,
  48,
  4,
  14,
  15,
  49,
  1,
  11,
  50,
  3,
  51,
  3,
  2,
  52,
  53,
  54,
  55,
  9,
  56,
  6,
  24,
  16,
  1,
  23,
  57,
  25,
  17,
  58,
  1,
  59,
  60,
  7,
  61,
  26,
  27,
  6,
  28,
  62,
  3,
  63,
  1,
  64,
  65],
 [18,
  5,
  1,
  29,
  5,
  2,
  66,
  67,
  68,
  18,
  69,
  70,
  30,
  71,
  31,
  72,
  3,
  32,
  2,
  73,
  74,
  75,
  76,
  19,
  77,
  78,
  3,
  79,
  16,
  7,
  80,
  25,
  17,
  1,
  29,
  7,
  81,
  82,
  83,
  6,
  84,
  20,
  1,
  18,
  33,
  1,
  85,
  3,
  2,
  22,
  86,
  20,
  87,
  88,
  1,
  89,
  4,
  19,
  8],
 [1,
  90,
  91,
  5,
  2,
  8,
  92,
  17,
  34,
  93,
  14,
  15,
  35,
  2,
  12,
  36,
  13,
  14,
  15,
  94,
  95,
  3,
  33,
  1,
  96,
  3,
  97,
  98,
  3,
  1,
  8,
  9,
  1,
  99,
  4,
  2,
  100,
  4,
  101,
  102,
  1,
  36,
  103,
  104,
  2,
  105,
  106,
  107,
  108,
  109,
  37,
  110,
  27,
  6,
  28,

In [85]:
# [
#     [1, 2, 3, 4, 1, 5],
#     [1, 6, 7, 8, 9],
#     [10, 11, 12, 4, 1, 13]
# ]
## converts this code to this using
# padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post')
# [
#     [1, 2, 3, 4, 1, 5],    # Unchanged - already at max_len
#     [1, 6, 7, 8, 9, 0],   # Padded with zeros at the end
#     [10, 11, 12, 4, 1, 13]  # Unchanged - already at max_len
# ]
# # 
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post')

In [86]:
# Define generator model
generator = Sequential()
generator.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_len))
generator.add(Bidirectional(LSTM(128)))
# activation='softmax' is used because the generator's output should represent 
# probabilities for each element in vocab_size in the vocabulary.
generator.add(Dense(vocab_size, activation="softmax"))

# Define discriminator model
discriminator = Sequential()
discriminator.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_len))
discriminator.add(Bidirectional(LSTM(128)))
discriminator.add(Dense(1, activation="sigmoid"))

discriminator.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate=0.0002, beta_1=0.5))

# Combine models for GAN
discriminator.trainable = False
gan_input = generator.input
gan_output = discriminator(gan_input)
gan = Model(gan_input, gan_output)

gan.compile(loss="binary_crossentropy", optimizer=Adam(learning_rate=0.0002, beta_1=0.5))

In [111]:
np.ones((32, 1))

array([[1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [1.]])

In [87]:
# Train the GAN
epochs = 100
batch_size = 32
for epoch in range(epochs):
    # Generate noise data (fixed for better training stability)
    noise = np.random.randint(0, vocab_size, size=(batch_size, max_len))

    # Generate stories using the generator
    generated_stories = generator.predict(noise)
    max_real_len = real_stories.shape[1]
    generated_stories = generated_stories[:, :max_real_len]

    # Select real stories randomly
    real_stories = padded_sequences[np.random.randint(0, len(padded_sequences), size=batch_size)]

    # Concatenate real and generated stories
    x_combined = np.concatenate([real_stories, generated_stories])

    # Create labels
    y_combined = np.concatenate([np.ones((batch_size, 1)), np.zeros((batch_size, 1))])

    # Train discriminator
    d_loss = discriminator.train_on_batch(x_combined, y_combined)

    # Train generator to fool discriminator
    y_mislabeled = np.ones((batch_size, 1))
    g_loss = gan.train_on_batch(noise, y_mislabeled)

    # Print and track loss
    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, Generator Loss: {g_loss}, Discriminator Loss: {d_loss}")

# Save the generator model for future use
generator.save("text_generation_gan.h5")

Epoch: 0, Generator Loss: 0.6927824020385742, Discriminator Loss: 0.6858948469161987
Epoch: 10, Generator Loss: 0.6665546894073486, Discriminator Loss: 0.6346516609191895
Epoch: 20, Generator Loss: 0.5360705256462097, Discriminator Loss: 0.3151545524597168
Epoch: 30, Generator Loss: 0.06461039930582047, Discriminator Loss: 0.0345764085650444
Epoch: 40, Generator Loss: 0.017264321446418762, Discriminator Loss: 0.009681900963187218
Epoch: 50, Generator Loss: 0.009558825753629208, Discriminator Loss: 0.005782551132142544
Epoch: 60, Generator Loss: 0.006264679133892059, Discriminator Loss: 0.004005702678114176
Epoch: 70, Generator Loss: 0.004504249896854162, Discriminator Loss: 0.0029421320650726557
Epoch: 80, Generator Loss: 0.0034687956795096397, Discriminator Loss: 0.002288751071318984
Epoch: 90, Generator Loss: 0.0027777557261288166, Discriminator Loss: 0.0018840418197214603


In [99]:
# Generate a story
def generate_story(seed_text, max_length=100):
    generated = tokenizer.texts_to_sequences([seed_text])
    for i in range(max_length):
        padded = pad_sequences(generated, maxlen=max_len, padding='post')
        prediction = np.argmax(generator.predict(padded), axis=-1)
        generated[0].append(prediction[0])
        if prediction == 0:
            break
    return ' '.join(tokenizer.sequences_to_texts(generated)[0])

# Generate a story using the trained model
seed_text = "In a land far, far away, one upon a time there was a lion and a tiger."
generated_story = generate_story(seed_text)
print("Generated Story:")
print(generated_story)

Generated Story:
i n   a   a   t i m e   a   a n d   a   t h e   t h e


In [101]:
tokenizer.texts_to_sequences([seed_text])

[[5, 2, 2, 10, 2, 6, 2]]

In [100]:
generated_story

'i n   a   a   t i m e   a   a n d   a   t h e   t h e'

In [97]:
generator

<keras.engine.sequential.Sequential at 0x244334749d0>

In [88]:
from tensorflow.keras.models import load_model

In [89]:
generated_stories

array([[0.00100108, 0.00100863, 0.0009958 , ..., 0.00100204, 0.00100405,
        0.00099891],
       [0.00099262, 0.00099682, 0.00099762, ..., 0.00100302, 0.00099641,
        0.00100109],
       [0.00099899, 0.00100128, 0.00100433, ..., 0.00099843, 0.00099495,
        0.0010049 ],
       ...,
       [0.00099559, 0.00100589, 0.00100231, ..., 0.00099636, 0.00099853,
        0.00100125],
       [0.0009976 , 0.00099541, 0.00100342, ..., 0.00101211, 0.00099718,
        0.00100451],
       [0.00099189, 0.00099955, 0.0009974 , ..., 0.0009991 , 0.00099733,
        0.00099985]], dtype=float32)

In [91]:
real_stories.shape, generated_stories.shape

((32, 67), (32, 67))

In [92]:
g_loss = gan.train_on_batch(noise, y_mislabeled)

g_loss

0.0023494430352002382

In [93]:
x_combined, y_combined.shape

(array([[1.80000000e+01, 5.00000000e+00, 1.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [1.00000000e+00, 2.10000000e+01, 1.42000000e+02, ...,
         1.71000000e+02, 9.00000000e+00, 4.00000000e+01],
        [1.00000000e+00, 2.10000000e+01, 1.42000000e+02, ...,
         1.71000000e+02, 9.00000000e+00, 4.00000000e+01],
        ...,
        [9.95585578e-04, 1.00589253e-03, 1.00230542e-03, ...,
         9.96359158e-04, 9.98530886e-04, 1.00125244e-03],
        [9.97600611e-04, 9.95408744e-04, 1.00342499e-03, ...,
         1.01211248e-03, 9.97176045e-04, 1.00451428e-03],
        [9.91886132e-04, 9.99549171e-04, 9.97404335e-04, ...,
         9.99099924e-04, 9.97333671e-04, 9.99849639e-04]]),
 (64, 1))

In [94]:
hehe = np.random.randint(0, len(padded_sequences), size=batch_size)
hehe
padded_sequences[hehe]

array([[114,   4,  10, ...,   0,   0,   0],
       [  1,  21, 142, ..., 171,   9,  40],
       [ 18,   5,   1, ...,   0,   0,   0],
       ...,
       [ 18,   5,   1, ...,   0,   0,   0],
       [  1,  90,  91, ...,   0,   0,   0],
       [114,   4,  10, ...,   0,   0,   0]])

In [95]:
# x_combined = np.concatenate([real_stories, generated_stories])
generated_stories.shape, generated_stories_padded.shape
generated_stories_padded, generated_stories
    # y_combined = np.concatenate([np.ones((batch_size, 1)), np.zeros((batch_size, 1))])

NameError: name 'generated_stories_padded' is not defined