In [15]:
!pip install textblob
!pip install tensorflow



In [22]:
# import random:  This line imports the random module, used for generating random numbers
# import numpy as np:  This line imports the numpy library, essential for numerical operations in Python and working with Arrays
# from textblob import TextBlob:  This line imports TextBlob class from the textblob library, used for sentiment analysis
# from tensorflow.keras:  These lines import various components from the tensorflow.keras
# library, a deep learning framework.
# Sequential is used to build the text model layer by layer
# Dense, LSTM, and Embedding are types of layers in the Neural Network
# Adam is an optimization algorithm, and to_categoriacal is a function for converting data into a categorical format

import random
import numpy as np
from textblob import TextBlob
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

In [23]:
# Parameters
# vocab_size:  This variable sets the size of the vocabulary (the number of unique words the model will know)
# embedding_dim:  This determines the dimensionality of the word embeddings (how words are represented as vectors)
# max_length:  This limits the length of the sentences the model will generate
# target_sentiment:  This specifies the desired sentiment object of the generated text ("positive" in this case)
# vocab:  This is a list of words that the model will use to generate sentences

vocab_size = 100  # Example vocabulary size
embedding_dim = 10  # Dimensionality of embedding layer
max_length = 10  # Length of generated sentences
target_sentiment = "positive"  # Target sentiment for generated text


In [26]:
# Sample vocabulary for text generation
# vocab:  This is the list of words that the GAN Generator model will use initially as random input noise
vocab = ["good", "bad", "happy", "sad", "awesome", "terrible", "nice", "horrible", "love", "hate"]

In [27]:
# build_generator():  This function defines the generator model, which is responsible for creating new text.
# from our list of words or keywords
# It uses a sequential model, meaning layers are added one after another...
# Embedding:  This layer converts words into numerical vectors to generate sentiments from scratch
# LSTM:  This is a Long Short-Term Memory layer, a type of reccurent neural network
# that's good at processing sequences of data like text.
# Dense:  This is a fully connected layer that outputs probabilities for each word in the vocabulary.
### Softmax activation ensures these probabilities sum to 1

# Generator Model
def build_generator():
    model = Sequential()
    # Fix: Removed input_length argument
    model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim))
    model.add(LSTM(units=64, return_sequences=True))
    model.add(Dense(vocab_size, activation='softmax'))
    return model


In [28]:
# Discriminator Function
# Discriminator(sentence):  This function acts as the discriminator.
# By doing so, it gets and takes a sentence as input and uses
# TextBlob to analyze its sentiment object

def discriminator(sentence):
    blob = TextBlob(sentence)
    sentiment = blob.sentiment.polarity
    if target_sentiment == "positive":
        return 1 if sentiment > 0 else 0
    elif target_sentiment == "negative":
        return 1 if sentiment < 0 else 0
    else:
        return 1 if -0.1 < sentiment < 0.1 else 0

In [29]:
# Convert sequence of indices to sentence
# sequence_to_text(sequence):  This function takes a sequence of numerical indices (representing words)
# and converts it back into a readable sentence using the vocab list

def sequence_to_text(sequence):
    # Ensure sequence is iterable, even if it's a single index
    if not isinstance(sequence, (list, np.ndarray)):
        sequence = [sequence]
    flat_sequence = np.ravel(sequence)
    return " ".join([vocab[idx % len(vocab)] for idx in flat_sequence])

In [32]:
# Training the GAN
# train_gan(generator, epochs=100, batch_size=16):  this function is the main training loop for the GAN
# epochs:  The number of times the training process will iterate over the entire dataset
# batch_size:  The number of samples processed before the model's internal parameters are updated:

# Inside the loop:
# Noise generation:  random noise is generated using np.random.randint, which servers as input to the generator
# Sentence generation:  The generator uses this noise to create a fake sentence
# Sentiment analysis:  The discriminator function is called to check the sentiment of the generated sentence.
# Feedback:  Based on the sentiment analysis, feedback (feedback_score) is provided to the generator.
# A score of 1 indicates the sentence matches the target sentiment, while -1 indicates it does not.
# Generator Update:  The generator's weights are updated based on the feedback, encouraging
# it to generate sentences with the desired sentiment.

def train_gan(generator, epochs=100, batch_size=16):
    for epoch in range(epochs):
        print(f"Epoch {epoch + 1}/{epochs}")
        for _ in range(batch_size):
            # Generate random noise
            noise = np.random.randint(0, vocab_size, (1, max_length))

            # Generate fake sentence
            # Data input into the generator to create fake data
            generated_sequence = generator.predict(noise, verbose=0)

            # Get indices of predicted words and convert to sentence
            # Predicted output (from discriminator) and actual output (from real data or
            # data fromt the generator) are compared and fedback into the system
            predicted_indices = np.argmax(generated_sequence, axis=-1)
            # Ensure predicted_indices[0] is a list or array before converting to text
            if not isinstance(predicted_indices[0], (list, np.ndarray)):
                predicted_indices = [predicted_indices[0]]

            # Moved sentence creation to before sentiment analysis
            # Real and fake data from generator will be the input into discriminator.
            # Both the probability values and actual values are compared ...
            sentence = sequence_to_text(predicted_indices)

            # Calculate and print the polarity and subjectivity
            # Getting the Sentiment at the individual sentence level.
            # and of the Sentiment object
            blob = TextBlob(sentence)  # Now sentence is defined
            polarity = blob.sentiment.polarity
            %precision 3
            subjectivity = blob.sentiment.subjectivity
            print(f"Generated sentence: {sentence}")
            print(f"Polarity: {polarity}, Subjectivity: {subjectivity}")

            # Check sentiment and provide feedback
            # The discriminator function is then called to check the
            # values of the sentiment object of a sentence
            sentiment_match = discriminator(sentence)
            feedback_score = 1 if sentiment_match else -1

            # If condition that uses the generator that created fake data that cannot be detected by the discriminator
            # to print the values of the target_sentiment while the discriminator detects all fake data values
            # of the sentence
            if sentiment_match:
                print(f"Generated sentence matching '{target_sentiment}' sentiment: {sentence}")

            # Update generator weights
            target_sequence = noise + feedback_score

            # Clip values to be within the valid range for to_categorical
            target_sequence = np.clip(target_sequence, 0, vocab_size - 1)
            target_sequence_onehot = to_categorical(target_sequence, num_classes=vocab_size)
            generator.train_on_batch(noise, target_sequence_onehot)

In [33]:
# Instantiate models and start training

# generator = build_generator():  This line creates an instance of the generator model
# using the build_generator() function
generator = build_generator()

# generator.compile(...):  This configures the generator for training, specifying
# the loss function (categorical_crossentropy), optimizer(Adam), and learning rate.
generator.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001))

# train_gen(generator, epochs=3): This starts the training process, calling the train_gan()
# function with the generator model and setting the number of training epochs to 3
train_gan(generator, epochs=3)

Epoch 1/3
Generated sentence: nice nice awesome hate hate hate hate awesome awesome happy
Polarity: 0.18, Subjectivity: 0.9600000000000002
Generated sentence matching 'positive' sentiment: nice nice awesome hate hate hate hate awesome awesome happy
Generated sentence: horrible horrible happy happy happy happy happy bad bad bad
Polarity: -0.009999999999999943, Subjectivity: 0.9
Generated sentence: happy happy hate happy happy happy happy happy happy happy
Polarity: 0.6399999999999999, Subjectivity: 0.99
Generated sentence matching 'positive' sentiment: happy happy hate happy happy happy happy happy happy happy
Generated sentence: awesome happy happy happy happy happy happy happy happy happy
Polarity: 0.82, Subjectivity: 1.0
Generated sentence matching 'positive' sentiment: awesome happy happy happy happy happy happy happy happy happy
Generated sentence: happy happy happy happy happy happy happy happy happy happy
Polarity: 0.7999999999999999, Subjectivity: 1.0
Generated sentence matching