Developing Generative AI Using Python: A Step-by-Step Guide



Before that, a short example to see what it means to "generate" something out if something

This example shows how to generate a random 10 word sentence
 out of a sample sentence

Markov Chain: sequence generator which uses historic data to generate new sequence

In [None]:
import random

class MarkovChain:
    def __init__(self, text):
        self.text = text
        self.states = {}

        # Split the text into words
        words = text.split()

        # Create a dictionary of states
        for i in range(len(words) - 1):
            state = words[i]
            next_state = words[i + 1]

            if state not in self.states:
                self.states[state] = []

            self.states[state].append(next_state)

    def generate(self, length):
        # Choose a random starting state
        state = random.choice(list(self.states.keys()))

        # Generate a sequence of words
        sentence = []
        for i in range(length):
            next_state = random.choice(self.states[state])
            sentence.append(next_state)
            state = next_state

        # Return the generated sentence
        return " ".join(sentence)

if __name__ == "__main__":
    # Create a Markov chain
    text = "This is a sample text. It is used to train the Markov chain."
    markov_chain = MarkovChain(text)

    # Generate a sentence
    sentence = markov_chain.generate(10)

    # Print the generated sentence
    print(sentence)

sample text. It is a sample text. It is a


Using RNNs to perform the same task

In [None]:
# pip install tensorflow

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

class RNNTextGenerator:
    def __init__(self, text, seq_length=5):
        self.text = text
        self.seq_length = seq_length
        self.tokenizer = Tokenizer()
        self.tokenizer.fit_on_texts([text])
        self.vocab_size = len(self.tokenizer.word_index) + 1

        # Prepare the sequences
        self.sequences = self.create_sequences()

        # Build the model
        self.model = self.build_model()

    def create_sequences(self):
        words = self.tokenizer.texts_to_sequences([self.text])[0]
        sequences = []
        for i in range(self.seq_length, len(words)):
            seq = words[i - self.seq_length:i + 1]
            sequences.append(seq)
        return np.array(sequences)

    def build_model(self):
        model = Sequential()
        model.add(Embedding(self.vocab_size, 50, input_length=self.seq_length))
        model.add(LSTM(100, return_sequences=True))
        model.add(LSTM(100))
        model.add(Dense(self.vocab_size, activation='softmax'))

        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    def train(self, epochs=100):
        x, y = self.sequences[:, :-1], self.sequences[:, -1]
        y = tf.keras.utils.to_categorical(y, num_classes=self.vocab_size)

        self.model.fit(x, y, epochs=epochs, verbose=2)

    def generate(self, seed_text, length=10):
        result = []
        input_text = seed_text.split()
        for _ in range(length):
            encoded = self.tokenizer.texts_to_sequences([input_text[-self.seq_length:]])[0]
            encoded = pad_sequences([encoded], maxlen=self.seq_length, truncating='pre')

            predicted = np.argmax(self.model.predict(encoded, verbose=0), axis=-1)
            output_word = ''
            for word, index in self.tokenizer.word_index.items():
                if index == predicted:
                    output_word = word
                    break

            input_text.append(output_word)
            result.append(output_word)

        return ' '.join(result)

if __name__ == "__main__":
    # Create RNN text generator
    text = "This is a sample text. It is used to train the Markov chain."
    rnn_generator = RNNTextGenerator(text)

    # Train the model
    rnn_generator.train(epochs=100)

    # Generate a sentence
    seed_text = "This is"
    sentence = rnn_generator.generate(seed_text, length=10)

    # Print the generated sentence
    print(sentence)


Epoch 1/100
1/1 - 4s - loss: 2.5645 - accuracy: 0.1250 - 4s/epoch - 4s/step
Epoch 2/100
1/1 - 0s - loss: 2.5590 - accuracy: 0.5000 - 14ms/epoch - 14ms/step
Epoch 3/100
1/1 - 0s - loss: 2.5534 - accuracy: 0.5000 - 13ms/epoch - 13ms/step
Epoch 4/100
1/1 - 0s - loss: 2.5473 - accuracy: 0.5000 - 14ms/epoch - 14ms/step
Epoch 5/100
1/1 - 0s - loss: 2.5407 - accuracy: 0.6250 - 14ms/epoch - 14ms/step
Epoch 6/100
1/1 - 0s - loss: 2.5333 - accuracy: 0.6250 - 17ms/epoch - 17ms/step
Epoch 7/100
1/1 - 0s - loss: 2.5249 - accuracy: 0.6250 - 17ms/epoch - 17ms/step
Epoch 8/100
1/1 - 0s - loss: 2.5155 - accuracy: 0.6250 - 14ms/epoch - 14ms/step
Epoch 9/100
1/1 - 0s - loss: 2.5046 - accuracy: 0.6250 - 15ms/epoch - 15ms/step
Epoch 10/100
1/1 - 0s - loss: 2.4922 - accuracy: 0.6250 - 15ms/epoch - 15ms/step
Epoch 11/100
1/1 - 0s - loss: 2.4779 - accuracy: 0.6250 - 14ms/epoch - 14ms/step
Epoch 12/100
1/1 - 0s - loss: 2.4614 - accuracy: 0.6250 - 19ms/epoch - 19ms/step
Epoch 13/100
1/1 - 0s - loss: 2.4423 - ac

It still doesnt make any sense, we need something better and more "contextual"
*   The quality of the generated text by Markov Chains and simple RNNs may often lack coherence, especially with small datasets or limited training epochs. While these models can generate sequences, their ability to capture the intricacies of language and context is limited compared to more advanced models.





In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

class LSTMTextGenerator:
    def __init__(self, text, seq_length=5):
        self.text = text
        self.seq_length = seq_length
        self.tokenizer = Tokenizer()
        self.tokenizer.fit_on_texts([text])
        self.vocab_size = len(self.tokenizer.word_index) + 1

        # Prepare the sequences
        self.sequences = self.create_sequences()

        # Build the model
        self.model = self.build_model()

    def create_sequences(self):
        words = self.tokenizer.texts_to_sequences([self.text])[0]
        sequences = []
        for i in range(self.seq_length, len(words)):
            seq = words[i - self.seq_length:i + 1]
            sequences.append(seq)
        return np.array(sequences)

    def build_model(self):
        model = Sequential()
        model.add(Embedding(self.vocab_size, 50, input_length=self.seq_length))
        model.add(LSTM(100, return_sequences=True))
        model.add(LSTM(100))
        model.add(Dense(self.vocab_size, activation='softmax'))

        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    def train(self, epochs=100):
        x, y = self.sequences[:, :-1], self.sequences[:, -1]
        y = tf.keras.utils.to_categorical(y, num_classes=self.vocab_size)

        self.model.fit(x, y, epochs=epochs, verbose=2)

    def generate(self, seed_text, length=10):
        result = []
        input_text = seed_text.split()
        for _ in range(length):
            encoded = self.tokenizer.texts_to_sequences([input_text[-self.seq_length:]])[0]
            encoded = pad_sequences([encoded], maxlen=self.seq_length, truncating='pre')

            predicted = np.argmax(self.model.predict(encoded, verbose=0), axis=-1)
            output_word = ''
            for word, index in self.tokenizer.word_index.items():
                if index == predicted:
                    output_word = word
                    break

            input_text.append(output_word)
            result.append(output_word)

        return ' '.join(result)

if __name__ == "__main__":
    # Create LSTM text generator
    text = "This is a sample text. It is used to train the Markov chain."
    lstm_generator = LSTMTextGenerator(text)

    # Train the model
    lstm_generator.train(epochs=100)

    # Generate a sentence
    seed_text = "This is"
    sentence = lstm_generator.generate(seed_text, length=10)

    # Print the generated sentence
    print("LSTM Generated Text: ", sentence)


Epoch 1/100
1/1 - 4s - loss: 2.5650 - accuracy: 0.1250 - 4s/epoch - 4s/step
Epoch 2/100
1/1 - 0s - loss: 2.5592 - accuracy: 0.2500 - 14ms/epoch - 14ms/step
Epoch 3/100
1/1 - 0s - loss: 2.5534 - accuracy: 0.2500 - 13ms/epoch - 13ms/step
Epoch 4/100
1/1 - 0s - loss: 2.5471 - accuracy: 0.3750 - 15ms/epoch - 15ms/step
Epoch 5/100
1/1 - 0s - loss: 2.5403 - accuracy: 0.3750 - 14ms/epoch - 14ms/step
Epoch 6/100
1/1 - 0s - loss: 2.5328 - accuracy: 0.3750 - 17ms/epoch - 17ms/step
Epoch 7/100
1/1 - 0s - loss: 2.5243 - accuracy: 0.3750 - 16ms/epoch - 16ms/step
Epoch 8/100
1/1 - 0s - loss: 2.5147 - accuracy: 0.3750 - 13ms/epoch - 13ms/step
Epoch 9/100
1/1 - 0s - loss: 2.5038 - accuracy: 0.3750 - 13ms/epoch - 13ms/step
Epoch 10/100
1/1 - 0s - loss: 2.4913 - accuracy: 0.3750 - 13ms/epoch - 13ms/step
Epoch 11/100
1/1 - 0s - loss: 2.4769 - accuracy: 0.2500 - 14ms/epoch - 14ms/step
Epoch 12/100
1/1 - 0s - loss: 2.4603 - accuracy: 0.2500 - 14ms/epoch - 14ms/step
Epoch 13/100
1/1 - 0s - loss: 2.4412 - ac

using GRUs

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

class GRUTextGenerator:
    def __init__(self, text, seq_length=5):
        self.text = text
        self.seq_length = seq_length
        self.tokenizer = Tokenizer()
        self.tokenizer.fit_on_texts([text])
        self.vocab_size = len(self.tokenizer.word_index) + 1

        # Prepare the sequences
        self.sequences = self.create_sequences()

        # Build the model
        self.model = self.build_model()

    def create_sequences(self):
        words = self.tokenizer.texts_to_sequences([self.text])[0]
        sequences = []
        for i in range(self.seq_length, len(words)):
            seq = words[i - self.seq_length:i + 1]
            sequences.append(seq)
        return np.array(sequences)

    def build_model(self):
        model = Sequential()
        model.add(Embedding(self.vocab_size, 50, input_length=self.seq_length))
        model.add(GRU(100, return_sequences=True))
        model.add(GRU(100))
        model.add(Dense(self.vocab_size, activation='softmax'))

        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    def train(self, epochs=100):
        x, y = self.sequences[:, :-1], self.sequences[:, -1]
        y = tf.keras.utils.to_categorical(y, num_classes=self.vocab_size)

        self.model.fit(x, y, epochs=epochs, verbose=2)

    def generate(self, seed_text, length=10):
        result = []
        input_text = seed_text.split()
        for _ in range(length):
            encoded = self.tokenizer.texts_to_sequences([input_text[-self.seq_length:]])[0]
            encoded = pad_sequences([encoded], maxlen=self.seq_length, truncating='pre')

            predicted = np.argmax(self.model.predict(encoded, verbose=0), axis=-1)
            output_word = ''
            for word, index in self.tokenizer.word_index.items():
                if index == predicted:
                    output_word = word
                    break

            input_text.append(output_word)
            result.append(output_word)

        return ' '.join(result)

if __name__ == "__main__":
    # Create GRU text generator
    text = "This is a sample text. It is used to train the Markov chain."
    gru_generator = GRUTextGenerator(text)

    # Train the model
    gru_generator.train(epochs=100)

    # Generate a sentence
    seed_text = "This is"
    sentence = gru_generator.generate(seed_text, length=10)

    # Print the generated sentence
    print("GRU Generated Text: ", sentence)


Epoch 1/100
1/1 - 4s - loss: 2.5665 - accuracy: 0.1250 - 4s/epoch - 4s/step
Epoch 2/100
1/1 - 0s - loss: 2.5534 - accuracy: 0.2500 - 12ms/epoch - 12ms/step
Epoch 3/100
1/1 - 0s - loss: 2.5403 - accuracy: 0.2500 - 14ms/epoch - 14ms/step
Epoch 4/100
1/1 - 0s - loss: 2.5265 - accuracy: 0.2500 - 13ms/epoch - 13ms/step
Epoch 5/100
1/1 - 0s - loss: 2.5118 - accuracy: 0.1250 - 14ms/epoch - 14ms/step
Epoch 6/100
1/1 - 0s - loss: 2.4956 - accuracy: 0.1250 - 13ms/epoch - 13ms/step
Epoch 7/100
1/1 - 0s - loss: 2.4778 - accuracy: 0.2500 - 15ms/epoch - 15ms/step
Epoch 8/100
1/1 - 0s - loss: 2.4578 - accuracy: 0.2500 - 13ms/epoch - 13ms/step
Epoch 9/100
1/1 - 0s - loss: 2.4355 - accuracy: 0.2500 - 13ms/epoch - 13ms/step
Epoch 10/100
1/1 - 0s - loss: 2.4104 - accuracy: 0.2500 - 14ms/epoch - 14ms/step
Epoch 11/100
1/1 - 0s - loss: 2.3824 - accuracy: 0.1250 - 14ms/epoch - 14ms/step
Epoch 12/100
1/1 - 0s - loss: 2.3512 - accuracy: 0.1250 - 13ms/epoch - 13ms/step
Epoch 13/100
1/1 - 0s - loss: 2.3167 - ac

Variational Autoencoders (VAEs)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Lambda, Embedding, RepeatVector, TimeDistributed, Flatten
from tensorflow.keras.losses import sparse_categorical_crossentropy
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

class VAETextGenerator:
    def __init__(self, text, seq_length=5, latent_dim=2):
        self.text = text
        self.seq_length = seq_length
        self.latent_dim = latent_dim
        self.tokenizer = Tokenizer()
        self.tokenizer.fit_on_texts([text])
        self.vocab_size = len(self.tokenizer.word_index) + 1

        # Prepare the sequences
        self.sequences = self.create_sequences()

        # Build the VAE model
        self.encoder, self.decoder, self.vae = self.build_model()

    def create_sequences(self):
        words = self.tokenizer.texts_to_sequences([self.text])[0]
        sequences = []
        for i in range(self.seq_length, len(words)):
            seq = words[i - self.seq_length:i]
            sequences.append(seq)
        return np.array(sequences)

    def sampling(self, args):
        z_mean, z_log_var = args
        batch = K.shape(z_mean)[0]
        dim = K.int_shape(z_mean)[1]
        epsilon = K.random_normal(shape=(batch, dim))
        return z_mean + K.exp(0.5 * z_log_var) * epsilon

    def build_model(self):
        # Encoder
        inputs = Input(shape=(self.seq_length,))
        x = Embedding(self.vocab_size, 50, input_length=self.seq_length)(inputs)
        x = LSTM(100)(x)
        z_mean = Dense(self.latent_dim)(x)
        z_log_var = Dense(self.latent_dim)(x)
        z = Lambda(self.sampling, output_shape=(self.latent_dim,))([z_mean, z_log_var])
        encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')

        # Decoder
        latent_inputs = Input(shape=(self.latent_dim,))
        x = Dense(100)(latent_inputs)
        x = RepeatVector(self.seq_length)(x)
        x = LSTM(100, return_sequences=True)(x)
        outputs = TimeDistributed(Dense(self.vocab_size, activation='softmax'))(x)
        decoder = Model(latent_inputs, outputs, name='decoder')

        # VAE
        outputs = decoder(encoder(inputs)[2])
        vae = Model(inputs, outputs, name='vae')

        # VAE loss
        def vae_loss(inputs, outputs):
            reconstruction_loss = tf.keras.losses.sparse_categorical_crossentropy(inputs, outputs)
            reconstruction_loss = K.sum(reconstruction_loss, axis=1)
            kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
            kl_loss = K.sum(kl_loss, axis=-1)
            kl_loss *= -0.5
            return K.mean(reconstruction_loss + kl_loss)

        vae.add_loss(vae_loss(inputs, outputs))
        vae.compile(optimizer='adam')

        return encoder, decoder, vae

    def train(self, epochs=10):
        x = pad_sequences(self.sequences, maxlen=self.seq_length)
        self.vae.fit(x, epochs=epochs, batch_size=32, verbose=2)

    def generate(self, seed_text, length=10):
        encoded_seed = self.tokenizer.texts_to_sequences([seed_text])[0]
        encoded_seed = pad_sequences([encoded_seed], maxlen=self.seq_length, truncating='pre')
        z_mean, _, _ = self.encoder.predict(encoded_seed)
        generated = self.decoder.predict(z_mean)
        generated_text = [self.tokenizer.index_word.get(np.argmax(word), '') for word in generated[0]]
        return ' '.join(generated_text[:length])

if __name__ == "__main__":
    base_text = "This is a sample text. It is used to train the Markov chain."
    large_text = " ".join([base_text] * 1000)

    vae_generator = VAETextGenerator(large_text)
    vae_generator.train(epochs=10)

    seed_text = "This is"
    sentence = vae_generator.generate(seed_text, length=10)
    print("VAE Generated Text: ", sentence)


Epoch 1/10
407/407 - 13s - loss: 8.0805 - 13s/epoch - 31ms/step
Epoch 2/10
407/407 - 5s - loss: 4.6485 - 5s/epoch - 13ms/step
Epoch 3/10
407/407 - 6s - loss: 4.1526 - 6s/epoch - 15ms/step
Epoch 4/10
407/407 - 6s - loss: 4.0109 - 6s/epoch - 15ms/step
Epoch 5/10
407/407 - 5s - loss: 3.8812 - 5s/epoch - 13ms/step
Epoch 6/10
407/407 - 7s - loss: 3.7502 - 7s/epoch - 16ms/step
Epoch 7/10
407/407 - 5s - loss: 3.7159 - 5s/epoch - 13ms/step
Epoch 8/10
407/407 - 7s - loss: 3.7334 - 7s/epoch - 16ms/step
Epoch 9/10
407/407 - 5s - loss: 3.6523 - 5s/epoch - 13ms/step
Epoch 10/10
407/407 - 7s - loss: 3.5987 - 7s/epoch - 16ms/step
VAE Generated Text:  markov chain this is a


GANs

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, Input, Reshape, Flatten, TimeDistributed, GRU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

class GANTextGenerator:
    def __init__(self, text, seq_length=5):
        self.text = text
        self.seq_length = seq_length
        self.tokenizer = Tokenizer()
        self.tokenizer.fit_on_texts([text])
        self.vocab_size = len(self.tokenizer.word_index) + 1

        # Prepare the sequences
        self.sequences = self.create_sequences()

        # Build the GAN model
        self.generator = self.build_generator()
        self.discriminator = self.build_discriminator()
        self.gan = self.build_gan()

    def create_sequences(self):
        words = self.tokenizer.texts_to_sequences([self.text])[0]
        sequences = []
        for i in range(self.seq_length, len(words)):
            seq = words[i - self.seq_length:i]
            sequences.append(seq)
        return np.array(sequences)

    def build_generator(self):
        model = Sequential()
        model.add(Dense(self.seq_length * 256, input_dim=self.seq_length, activation='relu'))
        model.add(Reshape((self.seq_length, 256)))
        model.add(LSTM(100, return_sequences=True))
        model.add(TimeDistributed(Dense(self.vocab_size, activation='softmax')))
        return model

    def build_discriminator(self):
        model = Sequential()
        model.add(LSTM(100, input_shape=(self.seq_length, self.vocab_size), return_sequences=True))
        model.add(Flatten())
        model.add(Dense(1, activation='sigmoid'))
        model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
        return model

    def build_gan(self):
        self.discriminator.trainable = False
        model = Sequential()
        model.add(self.generator)
        model.add(self.discriminator)
        model.compile(optimizer=Adam(), loss='binary_crossentropy')
        return model

    def train(self, epochs=1000, batch_size=32):
        half_batch = int(batch_size / 2)

        for epoch in range(epochs):
            idx = np.random.randint(0, self.sequences.shape[0], half_batch)
            real_seqs = self.sequences[idx]
            real_seqs = pad_sequences(real_seqs, maxlen=self.seq_length)
            real_seqs = tf.keras.utils.to_categorical(real_seqs, num_classes=self.vocab_size)

            noise = np.random.normal(0, 1, (half_batch, self.seq_length))
            gen_seqs = self.generator.predict(noise)

            d_loss_real = self.discriminator.train_on_batch(real_seqs, np.ones((half_batch, 1)))
            d_loss_fake = self.discriminator.train_on_batch(gen_seqs, np.zeros((half_batch, 1)))
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            noise = np.random.normal(0, 1, (batch_size, self.seq_length))
            valid_y = np.array([1] * batch_size)
            g_loss = self.gan.train_on_batch(noise, valid_y)

            print(f"{epoch} [D loss: {d_loss[0]}, acc.: {100*d_loss[1]}] [G loss: {g_loss}]")

    def generate(self, length=10):
        noise = np.random.normal(0, 1, (1, self.seq_length))
        generated = self.generator.predict(noise)
        generated_text = [self.tokenizer.index_word.get(np.argmax(word), '') for word in generated[0]]
        return ' '.join(generated_text[:length])

if __name__ == "__main__":
    base_text = "This is a sample text. It is used to train the Markov chain."
    large_text = " ".join([base_text] * 1000)

    gan_generator = GANTextGenerator(large_text)
    gan_generator.train(epochs=1000, batch_size=32)

    sentence = gan_generator.generate(length=10)
    print("GAN Generated Text: ", sentence)


0 [D loss: 0.6987886726856232, acc.: 21.875] [G loss: 0.6818361878395081]
1 [D loss: 0.6937481462955475, acc.: 46.875] [G loss: 0.6800020933151245]
2 [D loss: 0.6957064568996429, acc.: 34.375] [G loss: 0.6793464422225952]
3 [D loss: 0.6914526224136353, acc.: 50.0] [G loss: 0.6789439916610718]
4 [D loss: 0.6918761134147644, acc.: 46.875] [G loss: 0.6786562204360962]
5 [D loss: 0.6893258392810822, acc.: 46.875] [G loss: 0.6788207292556763]
6 [D loss: 0.6921645700931549, acc.: 43.75] [G loss: 0.6786749958992004]
7 [D loss: 0.6922245621681213, acc.: 43.75] [G loss: 0.6782711744308472]
8 [D loss: 0.6899897456169128, acc.: 50.0] [G loss: 0.6781689524650574]
9 [D loss: 0.6852372586727142, acc.: 50.0] [G loss: 0.6779918074607849]
10 [D loss: 0.693285346031189, acc.: 34.375] [G loss: 0.6774842739105225]
11 [D loss: 0.6889465749263763, acc.: 46.875] [G loss: 0.6778182983398438]
12 [D loss: 0.6910552382469177, acc.: 43.75] [G loss: 0.6766859292984009]
13 [D loss: 0.6884288787841797, acc.: 46.875]

Transformers

In [None]:
# pip install transformers

In [None]:
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

class GPT2TextGenerator:
    def __init__(self, model_name='gpt2'):
        # Load pre-trained model and tokenizer
        self.tokenizer = GPT2Tokenizer.from_pretrained(model_name)
        self.model = GPT2LMHeadModel.from_pretrained(model_name)

    def generate(self, seed_text, max_length=50):
        # Encode input text
        input_ids = self.tokenizer.encode(seed_text, return_tensors='pt')

        # Generate text
        output = self.model.generate(input_ids, max_length=max_length, num_return_sequences=1, no_repeat_ngram_size=2, early_stopping=True)

        # Decode output text
        generated_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
        return generated_text

if __name__ == "__main__":
    seed_text = "This is a sample text. It is used to train the Markov chain."

    # Create GPT-2 text generator
    gpt2_generator = GPT2TextGenerator()

    # Generate a sentence
    sentence = gpt2_generator.generate(seed_text, max_length=50)

    # Print the generated sentence
    print("GPT-2 Generated Text: ", sentence)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


GPT-2 Generated Text:  This is a sample text. It is used to train the Markov chain.

The Markova chain is the most common chain in the world. The Markovan chain has a very high number of users. In the past, the number was
